From 06e5a0f94ba809b1c07e7e1b122c8f24e26043c8 Mon Sep 17 00:00:00 2001 From: root Date: Sat, 5 Oct 2024 14:41:52 +0800 Subject: [PATCH] big update --- script/S112D/S112D.sh | 305 ++++++++++++++++++++++++++++-------------- 1 file changed, 202 insertions(+), 103 deletions(-) diff --git a/script/S112D/S112D.sh b/script/S112D/S112D.sh index 5f65e59..46a0a69 100755 --- a/script/S112D/S112D.sh +++ b/script/S112D/S112D.sh @@ -10,13 +10,14 @@ if [ -n "$GMXRC_PATH" ]; then fi GPU_ID=${GPU_ID:-"0"} -LIGAND_GRO="ligand.gro" # 配体的 .gro 文件 -LIGAND_ITP="ligand.itp" # 配体的 .itp 文件 +NAME="protein" +LIGAND_GRO="mole.gro" # 配体的 .gro 文件 +LIGAND_ITP="mole.itp" # 配体的 .itp 文件 # FORCEFIELD=${FORCEFIELD:-"amber99sb-ildn"} FORCEFIELD=${FORCEFIELD:-"amber99sb"} WATERMODEL=${WATERMODEL:-"tip3p"} WATERTOPFILE=${WATERTOPFILE:-"spc216.gro"} -BOXTYPE=${BOXTYPE:-"tric"} +BOXTYPE=${BOXTYPE:-"tric"} # or dodecahedron BOXORIENTATION=${BOXORIENTATION:-"1.0"} # NSTEPS=${NSTEPS:-500000} # 50,000 steps for 1 ns NSTEPS=${NSTEPS:-50000000} # 100 ns 模拟 @@ -62,11 +63,11 @@ gmx_mpi pdb2gmx -f $NAME.pdb -o $NAME.gro -ff $FORCEFIELD -water $WATERMODEL -p # 使用 Sobtop(http://sobereva.com/soft/Sobtop)提取 # Step 2: Merge protein and ligand .gro files into a complex -cat $NAME.gro mol.gro > complex.gro +cat $NAME.gro mole.gro > complex.gro # 获取蛋白质和小分子的原子数 PROTEIN_ATOMS=$(head -n 2 $NAME.gro | tail -n 1 | tr -d ' ') -LIGAND_ATOMS=$(head -n 2 mol.gro | tail -n 1 | tr -d ' ') +LIGAND_ATOMS=$(head -n 2 mole.gro | tail -n 1 | tr -d ' ') # 打印原子数,确保正确 echo "Protein atoms: $PROTEIN_ATOMS" @@ -75,41 +76,23 @@ echo "Ligand atoms: $LIGAND_ATOMS" # 计算总原子数 TOTAL_ATOMS=$(($PROTEIN_ATOMS + $LIGAND_ATOMS)) -# 打印总原子数 +# 打印总原子数 7482 echo "Total atoms: $TOTAL_ATOMS" -# Update the atom count in the complex.gro file +# 更新原子数目 # ! 手动实现 sed -i "2s/\(^[[:space:]]*\)[0-9]\+/\1$TOTAL_ATOMS/" complex.gro -echo -e "0\nq" | gmx_mpi genrestr -f mol.gro -o posre_mol.itp - -cat << EOF >> mol.itp - -#ifdef POSRES -#include "posre_mol.itp" -#endif -EOF - -# Modify the topology file to include the ligand -# echo "#include \"$LIGAND_ITP\"" >> topol.top - -# Step 6: Modify the topology file to include the ligand and its parameters - -# 6.1 在topol.top顶部添加小分子的参数文件 -sed -i '/^; Include forcefield parameters/a #include "jz4.prm"' topol.top - -# 6.2 在position restraint后添加小分子的拓扑文件 -sed -i '/^; Include Position restraint file/!b;n;/#endif/a ; Include ligand topology\n#include "jz4.itp"' topol.top - -# 6.3 在[molecules]部分添加小分子 -sed -i '/^Protein_chain_A/a JZ4 1' topol.top +# 定义盒子、添加溶剂和离子、能量最小化 +# 定义盒子并向盒子充满水 # Step 3: Define the simulation box gmx_mpi editconf -f complex.gro -o complex-box.gro -bt $BOXTYPE -c -d $BOXORIENTATION - +# gmx_mpi editconf -f complex.gro -o complex-box.gro -bt dodecahedron -c -d 1.0 # Step 4: Add solvate gmx_mpi solvate -cp complex-box.gro -cs $WATERTOPFILE -o complex-solv.gro -p topol.top +# gmx_mpi solvate -cp complex-box.gro -cs spc216.gro -o complex-solv.gro -p topol.top +# 添加离子 # add icons # ! ions.mdp add by manual # --- ions.mdp file content --- # cat << EOF > ions.mdp @@ -129,7 +112,14 @@ rvdw = 1.0 ; Short-range Van der Waals cut-off pbc = xyz ; Periodic Boundary Conditions in all 3 dimensions EOF gmx_mpi grompp -f ions.mdp -c complex-solv.gro -p topol.top -o ions.tpr -maxwarn 1 +# 添加NaCl echo SOL | gmx_mpi genion -s ions.tpr -o complex-solv-ions.gro -p topol.top -pname NA -nname CL -conc 0.125 -neutral +# -pname 阳离子的名称 +# -nname 阴离子的名称 +# -np 阳离子个数 +# -nn 阴离子个数 + +# 能量最小化 # energy minimization of the structure in solvate # ! minim.mdp add by manual # --- minim.mdp file content --- # cat << EOF > minim.mdp @@ -150,46 +140,117 @@ pbc = xyz ; Periodic Boundary Conditions in all 3 dimensions EOF gmx_mpi grompp -f minim.mdp -c complex-solv-ions.gro -p topol.top -o em.tpr gmx_mpi mdrun -v -deffnm em + # optional em, you will need the Xmgrace plotting too #gmx_mpi energy -f em.edr -o potential.xvg -#position restrain +# 查看系统的系统的总势能变化情况 +echo "12\n" | gmx_mpi energy -f em.edr -o potential.xvg +# 系统的总势能变化绘图 +./plot_potential.sh potential.xvg potential.svg +# 位置约束(Position Restraint) +# 位置约束是指在进行模拟时,约束某些原子的位置,使它们不会偏离初始坐标。在能量最小化之后的位置约束步骤用于对溶剂和其他环境进行平衡,同时保持蛋白质或其他分子的位置不变。通常用于溶液系统的准备过程,以避免对生物分子结构造成过大影响。 + +# 位置约束步骤解释 +# posre.mdp 文件是位置约束的输入文件,它会约束特定原子的运动。 +# grompp -r em.gro:此命令中的 -r em.gro 参数指明位置约束应基于已经能量最小化的结构 em.gro 进行。 +# mdrun -deffnm posre:执行模拟时,会生成位置约束下的模拟结果。 +# 如何使用 +# 首先准备好 posre.mdp 文件,然后执行以下命令: + # gmx_mpi grompp -f posre.mdp -c em.gro -p topol.top -o posre.tpr -r em.gro # gmx_mpi mdrun -v -deffnm posre + +# 这将生成一个应用位置约束后的系统,用于后续的分子动力学模拟步骤。 +# 位置限制的目的: 加速体系平衡, 减少计算资源浪费 +# 对配体施加位置限制(position restraint),通常是为了在分子动力学模拟的早期阶段保持配体在特定位置,以帮助系统更快达到平衡,尤其是在 NVT(恒体积恒温)和 NPT(恒压恒温)平衡的过程中。这种约束有助于避免在系统能量未平衡时,配体位置大幅偏离而影响蛋白-配体复合物的稳定性。 + +# 位置限制通常在以下情况中使用: + +# 能量最小化后,NVT 和 NPT 平衡之前: + +# 在模拟的平衡阶段(NVT 和 NPT),施加配体限制可以保证配体在溶剂环境中不产生大幅度的偏移,特别是在系统温度和压力稳定之前。这时配体的限制作用是关键的。 +# 平衡后,生产阶段去除限制: + +# 在生产阶段(即实际的分子动力学生产模拟),通常应去除位置限制,以观察蛋白-配体复合物的真实动态行为。如果限制保留在生产阶段,配体的运动将受到不必要的约束,无法得到真实的配体运动轨迹。 + +# 限制可做可不做的情况: +# 如果体系比较简单,且配体与蛋白质的相互作用较强,位置限制可以选择不做,直接让配体在系统中自由运动。 +# 如果配体是比较松散或弱结合到蛋白质上的,或者你关心配体的初期运动轨迹,初期的短时间平衡阶段可以不施加限制。 + +# 限制配体 +# 平衡蛋白配体复合物与平衡水中只包含蛋白的体系非常类似,值得注意的是: +# 1、对配体施加限定。 +# 2、对于温度耦合群的处理。 +# 为了限定配体,需要为他生成一个位置限定拓扑。首先,为JZ4创建一个只包含非氢原子的index group。 + +gmx_mpi make_ndx -f mole.gro -o index_mole.ndx << EOF +0 & ! a H* +q +EOF + +# 执行genrestr模块,选择新创建的index group(在index_mole.ndx文件中是group 3) +# echo -e "0\nq" | gmx_mpi genrestr -f mol.gro -o posre_mol.itp +gmx_mpi genrestr -f mole.gro -n index_mole.ndx -o posre_mole.itp -fc 1000 1000 1000 << EOF +3 +EOF + +# 下一步需要把这个信息包括在拓扑文件中。有几种方式,取决于我们希望使用的条件。这里介绍第一种:如果我们想在无论蛋白是否被限定的条件下对配体施加限定,在拓扑文件的Include ligand topology之后中加入如下内容: +# 在拓扑文件的Include ligand topology之后中加入如下内容 + +: << EOF +; Ligand position restraints +#ifdef POSRES +#include "posre_mole.itp" +#endif +EOF + +# 必须要把限定这段话加在Include ligand topology之后,Include water topology之前,加在其他任何地方都会报错。 + +# 由于对只有几个原子的组在控制其动能的涨落时, 温度耦合算法不够稳定,不要对体系中的每一单个物种使用独立的耦合。创建一个特殊的组, 其中包含蛋白质和配体。 + +gmx_mpi make_ndx -f em.gro -o index.ndx << EOF +1 | 13 +q +EOF + +# NVT、NPT平衡 # nvt # gmx_mpi grompp -f nvt.mdp -c posre.gro -p topol.top -o nvt.tpr # --- nvt.mdp file content --- # cat << EOF > nvt.mdp -title = OPLS Lysozyme NVT equilibration -define = -DPOSRES ; position restrain the protein +title = Protein-ligand complex NVT equilibration +define = -DPOSRES ; position restrain the protein and ligand ; Run parameters integrator = md ; leap-frog integrator nsteps = 50000 ; 2 * 50000 = 100 ps dt = 0.002 ; 2 fs ; Output control -nstxout = 500 ; save coordinates every 1.0 ps -nstvout = 500 ; save velocities every 1.0 ps nstenergy = 500 ; save energies every 1.0 ps nstlog = 500 ; update log file every 1.0 ps +nstxout-compressed = 500 ; save coordinates every 1.0 ps ; Bond parameters continuation = no ; first dynamics run constraint_algorithm = lincs ; holonomic constraints constraints = h-bonds ; bonds involving H are constrained lincs_iter = 1 ; accuracy of LINCS lincs_order = 4 ; also related to accuracy -; Nonbonded settings -cutoff-scheme = Verlet ; Buffered neighbor searching +; Neighbor searching and vdW +cutoff-scheme = Verlet ns_type = grid ; search neighboring grid cells -nstlist = 10 ; 20 fs, largely irrelevant with Verlet -rcoulomb = 1.0 ; short-range electrostatic cutoff (in nm) -rvdw = 1.0 ; short-range van der Waals cutoff (in nm) -DispCorr = EnerPres ; account for cut-off vdW scheme +nstlist = 20 ; largely irrelevant with Verlet +rlist = 1.2 +vdwtype = cutoff +vdw-modifier = force-switch +rvdw-switch = 1.0 +rvdw = 1.2 ; short-range van der Waals cutoff (in nm) ; Electrostatics coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics +rcoulomb = 1.2 ; short-range electrostatic cutoff (in nm) pme_order = 4 ; cubic interpolation fourierspacing = 0.16 ; grid spacing for FFT ; Temperature coupling is on tcoupl = V-rescale ; modified Berendsen thermostat -tc-grps = Protein Non-Protein ; two coupling groups - more accurate +tc-grps = Protein_MOL Water_and_ions ; two coupling groups - more accurate tau_t = 0.1 0.1 ; time constant, in ps ref_t = 310 310 ; reference temperature, one for each group, in K ; Pressure coupling is off @@ -201,118 +262,156 @@ gen_vel = yes ; assign velocities from Maxwell distributio gen_temp = 300 ; temperature for Maxwell distribution gen_seed = -1 ; generate a random seed EOF -gmx_mpi grompp -f nvt.mdp -c em.gro -r em.gro -p topol.top -o nvt.tpr +gmx_mpi grompp -f nvt.mdp -c em.gro -r em.gro -p topol.top -o nvt.tpr -n index.ndx gmx_mpi mdrun -v -deffnm nvt + # optional : Let's analyze the temperature progression, again using energy: -# gmx_mpi energy -f nvt.edr -o temperature.xvg -# npt +# 提取并保存温度数据 +gmx_mpi energy -f nvt.edr -o temperature.xvg << EOF +17 39 40 +EOF +# 拆分 .xvg 文件 +# 提取整体温度 +grep -v '^@' temperature.xvg | grep -v '^#' | awk '{print $1, $2}' > system_temperature.xvg + +# 提取 Protein_MOL 组的温度 +grep -v '^@' temperature.xvg | grep -v '^#' | awk '{print $1, $3}' > protein_mol_temperature.xvg + +# 提取 Water_and_ions 组的温度 +grep -v '^@' temperature.xvg | grep -v '^#' | awk '{print $1, $4}' > water_and_ions_temperature.xvg + +# 绘制温度变化曲线 +./plot_potential.sh system_temperature.xvg system_temperature.svg +./plot_potential.sh protein_mol_temperature.xvg protein_mol_temperature.svg +./plot_potential.sh water_and_ions_temperature.xvg water_and_ions_temperature.svg + +# NPT平衡 # gmx_mpi grompp -f npt.mdp -c nvt.gro -t nvt.cpt -p topol.top -o npt.tpr # --- npt.mdp file content --- # cat << EOF > npt.mdp -title = OPLS Lysozyme NPT equilibration -define = -DPOSRES ; position restrain the protein +title = Protein-ligand complex NPT equilibration +define = -DPOSRES ; position restrain the protein and ligand ; Run parameters integrator = md ; leap-frog integrator nsteps = 50000 ; 2 * 50000 = 100 ps dt = 0.002 ; 2 fs ; Output control -nstxout = 500 ; save coordinates every 1.0 ps -nstvout = 500 ; save velocities every 1.0 ps nstenergy = 500 ; save energies every 1.0 ps nstlog = 500 ; update log file every 1.0 ps +nstxout-compressed = 500 ; save coordinates every 1.0 ps ; Bond parameters -continuation = yes ; Restarting after NVT +continuation = yes ; continuing from NVT constraint_algorithm = lincs ; holonomic constraints -constraints = h-bonds ; bonds involving H are constrained +constraints = h-bonds ; bonds to H are constrained lincs_iter = 1 ; accuracy of LINCS lincs_order = 4 ; also related to accuracy -; Nonbonded settings -cutoff-scheme = Verlet ; Buffered neighbor searching +; Neighbor searching and vdW +cutoff-scheme = Verlet ns_type = grid ; search neighboring grid cells -nstlist = 10 ; 20 fs, largely irrelevant with Verlet scheme -rcoulomb = 1.0 ; short-range electrostatic cutoff (in nm) -rvdw = 1.0 ; short-range van der Waals cutoff (in nm) -DispCorr = EnerPres ; account for cut-off vdW scheme +nstlist = 20 ; largely irrelevant with Verlet +rlist = 1.2 +vdwtype = cutoff +vdw-modifier = force-switch +rvdw-switch = 1.0 +rvdw = 1.2 ; short-range van der Waals cutoff (in nm) ; Electrostatics coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics +rcoulomb = 1.2 pme_order = 4 ; cubic interpolation fourierspacing = 0.16 ; grid spacing for FFT -; Temperature coupling is on -tcoupl = V-rescale ; modified Berendsen thermostat -tc-grps = Protein Non-Protein ; two coupling groups - more accurate -tau_t = 0.1 0.1 ; time constant, in ps -ref_t = 300 300 ; reference temperature, one for each group, in K -; Pressure coupling is on -pcoupl = Parrinello-Rahman ; Pressure coupling on in NPT -pcoupltype = isotropic ; uniform scaling of box vectors -tau_p = 2.0 ; time constant, in ps -ref_p = 1.0 ; reference pressure, in bar -compressibility = 4.5e-5 ; isothermal compressibility of water, bar^-1 +; Temperature coupling +tcoupl = V-rescale ; modified Berendsen thermostat +tc-grps = Protein_MOL Water_and_ions ; two coupling groups - more accurate +tau_t = 0.1 0.1 ; time constant, in ps +ref_t = 300 300 ; reference temperature, one for each group, in K +; Pressure coupling +pcoupl = Parrinello-Rahman ; Parrinello-Rahman or C-rescale for NPT production +pcoupltype = isotropic ; uniform scaling of box vectors +tau_p = 2.0 ; time constant, in ps +ref_p = 1.0 ; reference pressure, in bar +compressibility = 4.5e-5 ; isothermal compressibility of water, bar^-1 refcoord_scaling = com ; Periodic boundary conditions pbc = xyz ; 3-D PBC +; Dispersion correction is not used for proteins with the C36 additive FF +DispCorr = no ; Velocity generation -gen_vel = no ; Velocity generation is off +gen_vel = no ; velocity generation off after NVT EOF -gmx_mpi grompp -f npt.mdp -c nvt.gro -r nvt.gro -t nvt.cpt -p topol.top -o npt.tpr +gmx_mpi grompp -f npt.mdp -c nvt.gro -r nvt.gro -t nvt.cpt -p topol.top -o npt.tpr -n index.ndx gmx_mpi mdrun -v -deffnm npt # Optional: Let's analyze the pressure progression, again using energy: type 18 0 -# gmx energy -f npt.edr -o pressure.xvg +gmx_mpi energy -f npt.edr -o pressure.xvg << EOF +18 +0 +EOF + # Optional: Let's take a look at density as well, this time using energy and entering "24 0" at the prompt. -# gmx energy -f npt.edr -o density.xvg +gmx_mpi energy -f npt.edr -o density.xvg << EOF +24 +0 +EOF + +# 绘制 SVG 图像 +# 绘制压力图像 +./plot_potential.sh pressure.xvg pressure.svg + +# 绘制密度图像 +./plot_potential.sh density.xvg density.svg + # md # --- md.mdp file content --- # cat << EOF > ${MDRUN_NAME}.mdp -title = OPLS Lysozyme NPT equilibration +title = Protein-ligand complex MD simulation ; Run parameters integrator = md ; leap-frog integrator -nsteps = ${NSTEPS} ; steps for simulation -dt = ${DT} ; time step in fs +nsteps = 5000000 ; 2 * 5000000 = 10000 ps (10 ns) +dt = 0.002 ; 2 fs ; Output control -nstxout = 0 ; suppress bulky .trr file by specifying -nstvout = 0 ; 0 for output frequency of nstxout, -nstfout = 0 ; nstvout, and nstfout nstenergy = 5000 ; save energies every 10.0 ps nstlog = 5000 ; update log file every 10.0 ps -nstxout-compressed = 5000 ; save compressed coordinates every 10.0 ps -compressed-x-grps = System ; save the whole system +nstxout-compressed = 5000 ; save coordinates every 10.0 ps ; Bond parameters -continuation = yes ; Restarting after NPT -constraint_algorithm = lincs ; holonomic constraints -constraints = h-bonds ; bonds involving H are constrained +continuation = yes ; continuing from NPT +constraint_algorithm = lincs ; holonomic constraints +constraints = h-bonds ; bonds to H are constrained lincs_iter = 1 ; accuracy of LINCS lincs_order = 4 ; also related to accuracy -; Neighborsearching -cutoff-scheme = Verlet ; Buffered neighbor searching +; Neighbor searching and vdW +cutoff-scheme = Verlet ns_type = grid ; search neighboring grid cells -nstlist = 10 ; 20 fs, largely irrelevant with Verlet scheme -rcoulomb = 1.0 ; short-range electrostatic cutoff (in nm) -rvdw = 1.0 ; short-range van der Waals cutoff (in nm) +nstlist = 20 ; largely irrelevant with Verlet +rlist = 1.2 +vdwtype = cutoff +vdw-modifier = force-switch +rvdw-switch = 1.0 +rvdw = 1.2 ; short-range van der Waals cutoff (in nm) ; Electrostatics coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics +rcoulomb = 1.2 pme_order = 4 ; cubic interpolation fourierspacing = 0.16 ; grid spacing for FFT -; Temperature coupling is on -tcoupl = V-rescale ; modified Berendsen thermostat -tc-grps = Protein Non-Protein ; two coupling groups - more accurate -tau_t = 0.1 0.1 ; time constant, in ps -ref_t = 310 310 ; reference temperature, one for each group, in K -; Pressure coupling is on -pcoupl = Parrinello-Rahman ; Pressure coupling on in NPT -pcoupltype = isotropic ; uniform scaling of box vectors -tau_p = 2.0 ; time constant, in ps -ref_p = 1.0 ; reference pressure, in bar -compressibility = 4.5e-5 ; isothermal compressibility of water, bar^-1 +; Temperature coupling +tcoupl = V-rescale ; modified Berendsen thermostat +tc-grps = Protein_MOL Water_and_ions ; two coupling groups - more accurate +tau_t = 0.1 0.1 ; time constant, in ps +ref_t = 310 310 ; reference temperature, one for each group, in K +; Pressure coupling +pcoupl = Parrinello-Rahman ; pressure coupling is on for NPT +pcoupltype = isotropic ; uniform scaling of box vectors +tau_p = 2.0 ; time constant, in ps +ref_p = 1.0 ; reference pressure, in bar +compressibility = 4.5e-5 ; isothermal compressibility of water, bar^-1 ; Periodic boundary conditions pbc = xyz ; 3-D PBC -; Dispersion correction -DispCorr = EnerPres ; account for cut-off vdW scheme +; Dispersion correction is not used for proteins with the C36 additive FF +DispCorr = no ; Velocity generation -gen_vel = no ; Velocity generation is off +gen_vel = no ; continuing from NPT equilibration EOF # Generate GROMACS .tpr file for the simulation -gmx_mpi grompp -f ${MDRUN_NAME}.mdp -c npt.gro -t npt.cpt -p topol.top -o ${TPR_FILE} +gmx_mpi grompp -f ${MDRUN_NAME}.mdp -c npt.gro -t npt.cpt -p topol.top -o ${TPR_FILE} -n index.ndx # Run the simulation gmx_mpi mdrun -deffnm ${MDRUN_NAME} -update gpu -ntomp 1 -gpu_id ${GPU_ID}