big update

This commit is contained in:
root
2024-10-05 14:41:52 +08:00
parent dbd3e452be
commit 06e5a0f94b

View File

@@ -10,13 +10,14 @@ if [ -n "$GMXRC_PATH" ]; then
fi
GPU_ID=${GPU_ID:-"0"}
LIGAND_GRO="ligand.gro" # 配体的 .gro 文件
LIGAND_ITP="ligand.itp" # 配体的 .itp 文件
NAME="protein"
LIGAND_GRO="mole.gro" # 配体的 .gro 文件
LIGAND_ITP="mole.itp" # 配体的 .itp 文件
# FORCEFIELD=${FORCEFIELD:-"amber99sb-ildn"}
FORCEFIELD=${FORCEFIELD:-"amber99sb"}
WATERMODEL=${WATERMODEL:-"tip3p"}
WATERTOPFILE=${WATERTOPFILE:-"spc216.gro"}
BOXTYPE=${BOXTYPE:-"tric"}
BOXTYPE=${BOXTYPE:-"tric"} # or dodecahedron
BOXORIENTATION=${BOXORIENTATION:-"1.0"}
# NSTEPS=${NSTEPS:-500000} # 50,000 steps for 1 ns
NSTEPS=${NSTEPS:-50000000} # 100 ns 模拟
@@ -62,11 +63,11 @@ gmx_mpi pdb2gmx -f $NAME.pdb -o $NAME.gro -ff $FORCEFIELD -water $WATERMODEL -p
# 使用 Sobtophttp://sobereva.com/soft/Sobtop提取
# Step 2: Merge protein and ligand .gro files into a complex
cat $NAME.gro mol.gro > complex.gro
cat $NAME.gro mole.gro > complex.gro
# 获取蛋白质和小分子的原子数
PROTEIN_ATOMS=$(head -n 2 $NAME.gro | tail -n 1 | tr -d ' ')
LIGAND_ATOMS=$(head -n 2 mol.gro | tail -n 1 | tr -d ' ')
LIGAND_ATOMS=$(head -n 2 mole.gro | tail -n 1 | tr -d ' ')
# 打印原子数,确保正确
echo "Protein atoms: $PROTEIN_ATOMS"
@@ -75,41 +76,23 @@ echo "Ligand atoms: $LIGAND_ATOMS"
# 计算总原子数
TOTAL_ATOMS=$(($PROTEIN_ATOMS + $LIGAND_ATOMS))
# 打印总原子数
# 打印总原子数 7482
echo "Total atoms: $TOTAL_ATOMS"
# Update the atom count in the complex.gro file
# 更新原子数目 # ! 手动实现
sed -i "2s/\(^[[:space:]]*\)[0-9]\+/\1$TOTAL_ATOMS/" complex.gro
echo -e "0\nq" | gmx_mpi genrestr -f mol.gro -o posre_mol.itp
cat << EOF >> mol.itp
#ifdef POSRES
#include "posre_mol.itp"
#endif
EOF
# Modify the topology file to include the ligand
# echo "#include \"$LIGAND_ITP\"" >> topol.top
# Step 6: Modify the topology file to include the ligand and its parameters
# 6.1 在topol.top顶部添加小分子的参数文件
sed -i '/^; Include forcefield parameters/a #include "jz4.prm"' topol.top
# 6.2 在position restraint后添加小分子的拓扑文件
sed -i '/^; Include Position restraint file/!b;n;/#endif/a ; Include ligand topology\n#include "jz4.itp"' topol.top
# 6.3 在[molecules]部分添加小分子
sed -i '/^Protein_chain_A/a JZ4 1' topol.top
# 定义盒子、添加溶剂和离子、能量最小化
# 定义盒子并向盒子充满水
# Step 3: Define the simulation box
gmx_mpi editconf -f complex.gro -o complex-box.gro -bt $BOXTYPE -c -d $BOXORIENTATION
# gmx_mpi editconf -f complex.gro -o complex-box.gro -bt dodecahedron -c -d 1.0
# Step 4: Add solvate
gmx_mpi solvate -cp complex-box.gro -cs $WATERTOPFILE -o complex-solv.gro -p topol.top
# gmx_mpi solvate -cp complex-box.gro -cs spc216.gro -o complex-solv.gro -p topol.top
# 添加离子
# add icons # ! ions.mdp add by manual
# --- ions.mdp file content --- #
cat << EOF > ions.mdp
@@ -129,7 +112,14 @@ rvdw = 1.0 ; Short-range Van der Waals cut-off
pbc = xyz ; Periodic Boundary Conditions in all 3 dimensions
EOF
gmx_mpi grompp -f ions.mdp -c complex-solv.gro -p topol.top -o ions.tpr -maxwarn 1
# 添加NaCl
echo SOL | gmx_mpi genion -s ions.tpr -o complex-solv-ions.gro -p topol.top -pname NA -nname CL -conc 0.125 -neutral
# -pname 阳离子的名称
# -nname 阴离子的名称
# -np 阳离子个数
# -nn 阴离子个数
# 能量最小化
# energy minimization of the structure in solvate # ! minim.mdp add by manual
# --- minim.mdp file content --- #
cat << EOF > minim.mdp
@@ -150,46 +140,117 @@ pbc = xyz ; Periodic Boundary Conditions in all 3 dimensions
EOF
gmx_mpi grompp -f minim.mdp -c complex-solv-ions.gro -p topol.top -o em.tpr
gmx_mpi mdrun -v -deffnm em
# optional em, you will need the Xmgrace plotting too
#gmx_mpi energy -f em.edr -o potential.xvg
#position restrain
# 查看系统的系统的总势能变化情况
echo "12\n" | gmx_mpi energy -f em.edr -o potential.xvg
# 系统的总势能变化绘图
./plot_potential.sh potential.xvg potential.svg
# 位置约束Position Restraint
# 位置约束是指在进行模拟时,约束某些原子的位置,使它们不会偏离初始坐标。在能量最小化之后的位置约束步骤用于对溶剂和其他环境进行平衡,同时保持蛋白质或其他分子的位置不变。通常用于溶液系统的准备过程,以避免对生物分子结构造成过大影响。
# 位置约束步骤解释
# posre.mdp 文件是位置约束的输入文件,它会约束特定原子的运动。
# grompp -r em.gro此命令中的 -r em.gro 参数指明位置约束应基于已经能量最小化的结构 em.gro 进行。
# mdrun -deffnm posre执行模拟时会生成位置约束下的模拟结果。
# 如何使用
# 首先准备好 posre.mdp 文件,然后执行以下命令:
# gmx_mpi grompp -f posre.mdp -c em.gro -p topol.top -o posre.tpr -r em.gro
# gmx_mpi mdrun -v -deffnm posre
# 这将生成一个应用位置约束后的系统,用于后续的分子动力学模拟步骤。
# 位置限制的目的: 加速体系平衡, 减少计算资源浪费
# 对配体施加位置限制position restraint通常是为了在分子动力学模拟的早期阶段保持配体在特定位置以帮助系统更快达到平衡尤其是在 NVT恒体积恒温和 NPT恒压恒温平衡的过程中。这种约束有助于避免在系统能量未平衡时配体位置大幅偏离而影响蛋白-配体复合物的稳定性。
# 位置限制通常在以下情况中使用:
# 能量最小化后NVT 和 NPT 平衡之前:
# 在模拟的平衡阶段NVT 和 NPT施加配体限制可以保证配体在溶剂环境中不产生大幅度的偏移特别是在系统温度和压力稳定之前。这时配体的限制作用是关键的。
# 平衡后,生产阶段去除限制:
# 在生产阶段(即实际的分子动力学生产模拟),通常应去除位置限制,以观察蛋白-配体复合物的真实动态行为。如果限制保留在生产阶段,配体的运动将受到不必要的约束,无法得到真实的配体运动轨迹。
# 限制可做可不做的情况:
# 如果体系比较简单,且配体与蛋白质的相互作用较强,位置限制可以选择不做,直接让配体在系统中自由运动。
# 如果配体是比较松散或弱结合到蛋白质上的,或者你关心配体的初期运动轨迹,初期的短时间平衡阶段可以不施加限制。
# 限制配体
# 平衡蛋白配体复合物与平衡水中只包含蛋白的体系非常类似,值得注意的是:
# 1、对配体施加限定。
# 2、对于温度耦合群的处理。
# 为了限定配体需要为他生成一个位置限定拓扑。首先为JZ4创建一个只包含非氢原子的index group。
gmx_mpi make_ndx -f mole.gro -o index_mole.ndx << EOF
0 & ! a H*
q
EOF
# 执行genrestr模块选择新创建的index group在index_mole.ndx文件中是group 3
# echo -e "0\nq" | gmx_mpi genrestr -f mol.gro -o posre_mol.itp
gmx_mpi genrestr -f mole.gro -n index_mole.ndx -o posre_mole.itp -fc 1000 1000 1000 << EOF
3
EOF
# 下一步需要把这个信息包括在拓扑文件中。有几种方式取决于我们希望使用的条件。这里介绍第一种如果我们想在无论蛋白是否被限定的条件下对配体施加限定在拓扑文件的Include ligand topology之后中加入如下内容
# 在拓扑文件的Include ligand topology之后中加入如下内容
: << EOF
; Ligand position restraints
#ifdef POSRES
#include "posre_mole.itp"
#endif
EOF
# 必须要把限定这段话加在Include ligand topology之后Include water topology之前加在其他任何地方都会报错。
# 由于对只有几个原子的组在控制其动能的涨落时, 温度耦合算法不够稳定,不要对体系中的每一单个物种使用独立的耦合。创建一个特殊的组, 其中包含蛋白质和配体。
gmx_mpi make_ndx -f em.gro -o index.ndx << EOF
1 | 13
q
EOF
# NVT、NPT平衡
# nvt
# gmx_mpi grompp -f nvt.mdp -c posre.gro -p topol.top -o nvt.tpr
# --- nvt.mdp file content --- #
cat << EOF > nvt.mdp
title = OPLS Lysozyme NVT equilibration
define = -DPOSRES ; position restrain the protein
title = Protein-ligand complex NVT equilibration
define = -DPOSRES ; position restrain the protein and ligand
; Run parameters
integrator = md ; leap-frog integrator
nsteps = 50000 ; 2 * 50000 = 100 ps
dt = 0.002 ; 2 fs
; Output control
nstxout = 500 ; save coordinates every 1.0 ps
nstvout = 500 ; save velocities every 1.0 ps
nstenergy = 500 ; save energies every 1.0 ps
nstlog = 500 ; update log file every 1.0 ps
nstxout-compressed = 500 ; save coordinates every 1.0 ps
; Bond parameters
continuation = no ; first dynamics run
constraint_algorithm = lincs ; holonomic constraints
constraints = h-bonds ; bonds involving H are constrained
lincs_iter = 1 ; accuracy of LINCS
lincs_order = 4 ; also related to accuracy
; Nonbonded settings
cutoff-scheme = Verlet ; Buffered neighbor searching
; Neighbor searching and vdW
cutoff-scheme = Verlet
ns_type = grid ; search neighboring grid cells
nstlist = 10 ; 20 fs, largely irrelevant with Verlet
rcoulomb = 1.0 ; short-range electrostatic cutoff in nm
rvdw = 1.0 ; short-range van der Waals cutoff in nm
DispCorr = EnerPres ; account for cut-off vdW scheme
nstlist = 20 ; largely irrelevant with Verlet
rlist = 1.2
vdwtype = cutoff
vdw-modifier = force-switch
rvdw-switch = 1.0
rvdw = 1.2 ; short-range van der Waals cutoff (in nm)
; Electrostatics
coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics
rcoulomb = 1.2 ; short-range electrostatic cutoff (in nm)
pme_order = 4 ; cubic interpolation
fourierspacing = 0.16 ; grid spacing for FFT
; Temperature coupling is on
tcoupl = V-rescale ; modified Berendsen thermostat
tc-grps = Protein Non-Protein ; two coupling groups - more accurate
tc-grps = Protein_MOL Water_and_ions ; two coupling groups - more accurate
tau_t = 0.1 0.1 ; time constant, in ps
ref_t = 310 310 ; reference temperature, one for each group, in K
; Pressure coupling is off
@@ -201,118 +262,156 @@ gen_vel = yes ; assign velocities from Maxwell distributio
gen_temp = 300 ; temperature for Maxwell distribution
gen_seed = -1 ; generate a random seed
EOF
gmx_mpi grompp -f nvt.mdp -c em.gro -r em.gro -p topol.top -o nvt.tpr
gmx_mpi grompp -f nvt.mdp -c em.gro -r em.gro -p topol.top -o nvt.tpr -n index.ndx
gmx_mpi mdrun -v -deffnm nvt
# optional : Let's analyze the temperature progression, again using energy:
# gmx_mpi energy -f nvt.edr -o temperature.xvg
# npt
# 提取并保存温度数据
gmx_mpi energy -f nvt.edr -o temperature.xvg << EOF
17 39 40
EOF
# 拆分 .xvg 文件
# 提取整体温度
grep -v '^@' temperature.xvg | grep -v '^#' | awk '{print $1, $2}' > system_temperature.xvg
# 提取 Protein_MOL 组的温度
grep -v '^@' temperature.xvg | grep -v '^#' | awk '{print $1, $3}' > protein_mol_temperature.xvg
# 提取 Water_and_ions 组的温度
grep -v '^@' temperature.xvg | grep -v '^#' | awk '{print $1, $4}' > water_and_ions_temperature.xvg
# 绘制温度变化曲线
./plot_potential.sh system_temperature.xvg system_temperature.svg
./plot_potential.sh protein_mol_temperature.xvg protein_mol_temperature.svg
./plot_potential.sh water_and_ions_temperature.xvg water_and_ions_temperature.svg
# NPT平衡
# gmx_mpi grompp -f npt.mdp -c nvt.gro -t nvt.cpt -p topol.top -o npt.tpr
# --- npt.mdp file content --- #
cat << EOF > npt.mdp
title = OPLS Lysozyme NPT equilibration
define = -DPOSRES ; position restrain the protein
title = Protein-ligand complex NPT equilibration
define = -DPOSRES ; position restrain the protein and ligand
; Run parameters
integrator = md ; leap-frog integrator
nsteps = 50000 ; 2 * 50000 = 100 ps
dt = 0.002 ; 2 fs
; Output control
nstxout = 500 ; save coordinates every 1.0 ps
nstvout = 500 ; save velocities every 1.0 ps
nstenergy = 500 ; save energies every 1.0 ps
nstlog = 500 ; update log file every 1.0 ps
nstxout-compressed = 500 ; save coordinates every 1.0 ps
; Bond parameters
continuation = yes ; Restarting after NVT
continuation = yes ; continuing from NVT
constraint_algorithm = lincs ; holonomic constraints
constraints = h-bonds ; bonds involving H are constrained
constraints = h-bonds ; bonds to H are constrained
lincs_iter = 1 ; accuracy of LINCS
lincs_order = 4 ; also related to accuracy
; Nonbonded settings
cutoff-scheme = Verlet ; Buffered neighbor searching
; Neighbor searching and vdW
cutoff-scheme = Verlet
ns_type = grid ; search neighboring grid cells
nstlist = 10 ; 20 fs, largely irrelevant with Verlet scheme
rcoulomb = 1.0 ; short-range electrostatic cutoff (in nm)
rvdw = 1.0 ; short-range van der Waals cutoff (in nm)
DispCorr = EnerPres ; account for cut-off vdW scheme
nstlist = 20 ; largely irrelevant with Verlet
rlist = 1.2
vdwtype = cutoff
vdw-modifier = force-switch
rvdw-switch = 1.0
rvdw = 1.2 ; short-range van der Waals cutoff (in nm)
; Electrostatics
coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics
rcoulomb = 1.2
pme_order = 4 ; cubic interpolation
fourierspacing = 0.16 ; grid spacing for FFT
; Temperature coupling is on
tcoupl = V-rescale ; modified Berendsen thermostat
tc-grps = Protein Non-Protein ; two coupling groups - more accurate
tau_t = 0.1 0.1 ; time constant, in ps
ref_t = 300 300 ; reference temperature, one for each group, in K
; Pressure coupling is on
pcoupl = Parrinello-Rahman ; Pressure coupling on in NPT
pcoupltype = isotropic ; uniform scaling of box vectors
tau_p = 2.0 ; time constant, in ps
ref_p = 1.0 ; reference pressure, in bar
compressibility = 4.5e-5 ; isothermal compressibility of water, bar^-1
; Temperature coupling
tcoupl = V-rescale ; modified Berendsen thermostat
tc-grps = Protein_MOL Water_and_ions ; two coupling groups - more accurate
tau_t = 0.1 0.1 ; time constant, in ps
ref_t = 300 300 ; reference temperature, one for each group, in K
; Pressure coupling
pcoupl = Parrinello-Rahman ; Parrinello-Rahman or C-rescale for NPT production
pcoupltype = isotropic ; uniform scaling of box vectors
tau_p = 2.0 ; time constant, in ps
ref_p = 1.0 ; reference pressure, in bar
compressibility = 4.5e-5 ; isothermal compressibility of water, bar^-1
refcoord_scaling = com
; Periodic boundary conditions
pbc = xyz ; 3-D PBC
; Dispersion correction is not used for proteins with the C36 additive FF
DispCorr = no
; Velocity generation
gen_vel = no ; Velocity generation is off
gen_vel = no ; velocity generation off after NVT
EOF
gmx_mpi grompp -f npt.mdp -c nvt.gro -r nvt.gro -t nvt.cpt -p topol.top -o npt.tpr
gmx_mpi grompp -f npt.mdp -c nvt.gro -r nvt.gro -t nvt.cpt -p topol.top -o npt.tpr -n index.ndx
gmx_mpi mdrun -v -deffnm npt
# Optional: Let's analyze the pressure progression, again using energy: type 18 0
# gmx energy -f npt.edr -o pressure.xvg
gmx_mpi energy -f npt.edr -o pressure.xvg << EOF
18
0
EOF
# Optional: Let's take a look at density as well, this time using energy and entering "24 0" at the prompt.
# gmx energy -f npt.edr -o density.xvg
gmx_mpi energy -f npt.edr -o density.xvg << EOF
24
0
EOF
# 绘制 SVG 图像
# 绘制压力图像
./plot_potential.sh pressure.xvg pressure.svg
# 绘制密度图像
./plot_potential.sh density.xvg density.svg
# md
# --- md.mdp file content --- #
cat << EOF > ${MDRUN_NAME}.mdp
title = OPLS Lysozyme NPT equilibration
title = Protein-ligand complex MD simulation
; Run parameters
integrator = md ; leap-frog integrator
nsteps = ${NSTEPS} ; steps for simulation
dt = ${DT} ; time step in fs
nsteps = 5000000 ; 2 * 5000000 = 10000 ps (10 ns)
dt = 0.002 ; 2 fs
; Output control
nstxout = 0 ; suppress bulky .trr file by specifying
nstvout = 0 ; 0 for output frequency of nstxout,
nstfout = 0 ; nstvout, and nstfout
nstenergy = 5000 ; save energies every 10.0 ps
nstlog = 5000 ; update log file every 10.0 ps
nstxout-compressed = 5000 ; save compressed coordinates every 10.0 ps
compressed-x-grps = System ; save the whole system
nstxout-compressed = 5000 ; save coordinates every 10.0 ps
; Bond parameters
continuation = yes ; Restarting after NPT
constraint_algorithm = lincs ; holonomic constraints
constraints = h-bonds ; bonds involving H are constrained
continuation = yes ; continuing from NPT
constraint_algorithm = lincs ; holonomic constraints
constraints = h-bonds ; bonds to H are constrained
lincs_iter = 1 ; accuracy of LINCS
lincs_order = 4 ; also related to accuracy
; Neighborsearching
cutoff-scheme = Verlet ; Buffered neighbor searching
; Neighbor searching and vdW
cutoff-scheme = Verlet
ns_type = grid ; search neighboring grid cells
nstlist = 10 ; 20 fs, largely irrelevant with Verlet scheme
rcoulomb = 1.0 ; short-range electrostatic cutoff in nm
rvdw = 1.0 ; short-range van der Waals cutoff in nm
nstlist = 20 ; largely irrelevant with Verlet
rlist = 1.2
vdwtype = cutoff
vdw-modifier = force-switch
rvdw-switch = 1.0
rvdw = 1.2 ; short-range van der Waals cutoff (in nm)
; Electrostatics
coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics
rcoulomb = 1.2
pme_order = 4 ; cubic interpolation
fourierspacing = 0.16 ; grid spacing for FFT
; Temperature coupling is on
tcoupl = V-rescale ; modified Berendsen thermostat
tc-grps = Protein Non-Protein ; two coupling groups - more accurate
tau_t = 0.1 0.1 ; time constant, in ps
ref_t = 310 310 ; reference temperature, one for each group, in K
; Pressure coupling is on
pcoupl = Parrinello-Rahman ; Pressure coupling on in NPT
pcoupltype = isotropic ; uniform scaling of box vectors
tau_p = 2.0 ; time constant, in ps
ref_p = 1.0 ; reference pressure, in bar
compressibility = 4.5e-5 ; isothermal compressibility of water, bar^-1
; Temperature coupling
tcoupl = V-rescale ; modified Berendsen thermostat
tc-grps = Protein_MOL Water_and_ions ; two coupling groups - more accurate
tau_t = 0.1 0.1 ; time constant, in ps
ref_t = 310 310 ; reference temperature, one for each group, in K
; Pressure coupling
pcoupl = Parrinello-Rahman ; pressure coupling is on for NPT
pcoupltype = isotropic ; uniform scaling of box vectors
tau_p = 2.0 ; time constant, in ps
ref_p = 1.0 ; reference pressure, in bar
compressibility = 4.5e-5 ; isothermal compressibility of water, bar^-1
; Periodic boundary conditions
pbc = xyz ; 3-D PBC
; Dispersion correction
DispCorr = EnerPres ; account for cut-off vdW scheme
; Dispersion correction is not used for proteins with the C36 additive FF
DispCorr = no
; Velocity generation
gen_vel = no ; Velocity generation is off
gen_vel = no ; continuing from NPT equilibration
EOF
# Generate GROMACS .tpr file for the simulation
gmx_mpi grompp -f ${MDRUN_NAME}.mdp -c npt.gro -t npt.cpt -p topol.top -o ${TPR_FILE}
gmx_mpi grompp -f ${MDRUN_NAME}.mdp -c npt.gro -t npt.cpt -p topol.top -o ${TPR_FILE} -n index.ndx
# Run the simulation
gmx_mpi mdrun -deffnm ${MDRUN_NAME} -update gpu -ntomp 1 -gpu_id ${GPU_ID}