Files
gromacs_docker/script/S112D/S112D.sh
2024-10-05 19:28:24 +08:00

595 lines
27 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
#This script needs to be edited for each run.
#Define PDB Filename & GROMACS Pameters
# reference: http://www.mdtutorials.com/gmx/lysozyme/
# NAME=1ao7 NSTEPS=50000000 ./md_gromacs.sh
# 命令会临时设置 NAME 为 1ao7 和 NSTEPS 为 50000000对应 100ns然后运行 md_gromacs.sh 脚本
# Check if GMXRC_PATH is provided and source it
if [ -n "$GMXRC_PATH" ]; then
source "$GMXRC_PATH" # source /home/lingyuzeng/software/gmx2023.2/bin/GMXRC
fi
GPU_ID=${GPU_ID:-"0"}
NAME="protein"
LIGAND_GRO="mole.gro" # 配体的 .gro 文件
LIGAND_ITP="mole.itp" # 配体的 .itp 文件
# FORCEFIELD=${FORCEFIELD:-"amber99sb-ildn"}
FORCEFIELD=${FORCEFIELD:-"amber99sb"}
WATERMODEL=${WATERMODEL:-"tip3p"}
WATERTOPFILE=${WATERTOPFILE:-"spc216.gro"}
BOXTYPE=${BOXTYPE:-"tric"} # or dodecahedron
BOXORIENTATION=${BOXORIENTATION:-"1.0"}
# NSTEPS=${NSTEPS:-500000} # 50,000 steps for 1 ns
NSTEPS=${NSTEPS:-50000000} # 100 ns 模拟
DT=${DT:-0.002} # 2 fs
#BOXSIZE="5.0"
#BOXCENTER="2.5"
# Define simulation name variable
MDRUN_NAME=${MDRUN_NAME:-"md"}
NDX_NAME=${NDX_NAME:-"index"}
# Define analysis parameters
# Define other filenames based on MDRUN_NAME
TPR_FILE="${MDRUN_NAME}.tpr"
XTC_FILE="${MDRUN_NAME}.xtc"
NDX_FILE="${NDX_NAME}.ndx"
NO_PBC_XTC_FILE="${MDRUN_NAME}_noPBC.xtc"
OUTPUT_FOLDER=${OUTPUT_FOLDER:-"frame_extraction_output"}
TEMP_FOLDER=${TEMP_FOLDER:-"temp"}
# Define variables for frame extraction
EXTRACT_EVERY_PS=${EXTRACT_EVERY_PS:-100} # Default to 100 ps if not set
# Print the current settings
echo "Current settings:"
echo "NAME: $NAME"
echo "LIGAND_GRO: $LIGAND_GRO"
echo "FORCEFIELD: $FORCEFIELD"
echo "WATERMODEL: $WATERMODEL"
echo "WATERTOPFILE: $WATERTOPFILE"
echo "BOXTYPE: $BOXTYPE"
echo "BOXORIENTATION: $BOXORIENTATION"
echo "NSTEPS: $NSTEPS"
echo "DT: $DT"
echo "MDRUN_NAME: $MDRUN_NAME"
echo "TPR_FILE: $TPR_FILE"
echo "XTC_FILE: $XTC_FILE"
echo "NO_PBC_XTC_FILE: $NO_PBC_XTC_FILE"
echo "OUTPUT_FOLDER: $OUTPUT_FOLDER"
echo "EXTRACT_EVERY_PS: $EXTRACT_EVERY_PS"
# Step 1: Generate GROMACS .gro file for the protein
gmx_mpi pdb2gmx -f $NAME.pdb -o $NAME.gro -ff $FORCEFIELD -water $WATERMODEL -p topol.top -ignh
# gmx_mpi pdb2gmx -f protein.pdb -o $NAME.gro -ff $FORCEFIELD -water $WATERMODEL -p topol.top -ignh
# gmx_mpi pdb2gmx -f protein.pdb -o protein.gro -ff amber99sb -water tip3p -p unk.top -ignh
# 使用 Sobtophttp://sobereva.com/soft/Sobtop提取
# Step 2: Merge protein and ligand .gro files into a complex
cat $NAME.gro mole.gro > complex.gro
# 获取蛋白质和小分子的原子数
PROTEIN_ATOMS=$(head -n 2 $NAME.gro | tail -n 1 | tr -d ' ')
LIGAND_ATOMS=$(head -n 2 mole.gro | tail -n 1 | tr -d ' ')
# 打印原子数,确保正确
echo "Protein atoms: $PROTEIN_ATOMS"
echo "Ligand atoms: $LIGAND_ATOMS"
# 计算总原子数
TOTAL_ATOMS=$(($PROTEIN_ATOMS + $LIGAND_ATOMS))
# 打印总原子数 7482
echo "Total atoms: $TOTAL_ATOMS"
# 更新原子数目
sed -i "2s/\(^[[:space:]]*\)[0-9]\+/\1$TOTAL_ATOMS/" complex.gro
# 定义盒子、添加溶剂和离子、能量最小化
# 定义盒子并向盒子充满水
# Step 3: Define the simulation box
gmx_mpi editconf -f complex.gro -o complex-box.gro -bt $BOXTYPE -c -d $BOXORIENTATION
# gmx_mpi editconf -f complex.gro -o complex-box.gro -bt dodecahedron -c -d 1.0
# Step 4: Add solvate
gmx_mpi solvate -cp complex-box.gro -cs $WATERTOPFILE -o complex-solv.gro -p topol.top
# gmx_mpi solvate -cp complex-box.gro -cs spc216.gro -o complex-solv.gro -p topol.top
# 添加离子
# add icons # ! ions.mdp add by manual
# --- ions.mdp file content --- #
cat << EOF > ions.mdp
; ions.mdp - used as input into grompp to generate ions.tpr
; Parameters describing what to do, when to stop and what to save
integrator = steep ; Algorithm steep = steepest descent minimization
emtol = 1000.0 ; Stop minimization when the maximum force < 1000.0 kJ/mol/nm
emstep = 0.01 ; Minimization step size
nsteps = 50000 ; Maximum number of minimization steps to perform
; Parameters describing how to find the neighbors of each atom and how to calculate the interactions
nstlist = 1 ; Frequency to update the neighbor list and long range forces
cutoff-scheme = Verlet ; Buffered neighbor searching
ns_type = grid ; Method to determine neighbor list simple, grid
coulombtype = cutoff ; Treatment of long range electrostatic interactions
rcoulomb = 1.0 ; Short-range electrostatic cut-off
rvdw = 1.0 ; Short-range Van der Waals cut-off
pbc = xyz ; Periodic Boundary Conditions in all 3 dimensions
EOF
gmx_mpi grompp -f ions.mdp -c complex-solv.gro -p topol.top -o ions.tpr -maxwarn 1
# 添加NaCl
echo SOL | gmx_mpi genion -s ions.tpr -o complex-solv-ions.gro -p topol.top -pname NA -nname CL -conc 0.125 -neutral
# -pname 阳离子的名称
# -nname 阴离子的名称
# -np 阳离子个数
# -nn 阴离子个数
# 能量最小化
# energy minimization of the structure in solvate # ! minim.mdp add by manual
# --- minim.mdp file content --- #
cat << EOF > minim.mdp
; minim.mdp - used as input into grompp to generate em.tpr
; Parameters describing what to do, when to stop and what to save
integrator = steep ; Algorithm steep = steepest descent minimization
emtol = 1000.0 ; Stop minimization when the maximum force < 1000.0 kJ/mol/nm
emstep = 0.01 ; Minimization step size
nsteps = 50000 ; Maximum number of minimization steps to perform
; Parameters describing how to find the neighbors of each atom and how to calculate the interactions
nstlist = 1 ; Frequency to update the neighbor list and long range forces
cutoff-scheme = Verlet ; Buffered neighbor searching
ns_type = grid ; Method to determine neighbor list simple, grid
coulombtype = PME ; Treatment of long range electrostatic interactions
rcoulomb = 1.0 ; Short-range electrostatic cut-off
rvdw = 1.0 ; Short-range Van der Waals cut-off
pbc = xyz ; Periodic Boundary Conditions in all 3 dimensions
EOF
gmx_mpi grompp -f minim.mdp -c complex-solv-ions.gro -p topol.top -o em.tpr
gmx_mpi mdrun -v -deffnm em
# optional em, you will need the Xmgrace plotting too
#gmx_mpi energy -f em.edr -o potential.xvg
# 查看系统的系统的总势能变化情况
echo "12\n" | gmx_mpi energy -f em.edr -o potential.xvg
# 系统的总势能变化绘图
./plot_potential.sh potential.xvg potential.svg
# 位置约束Position Restraint
# 位置约束是指在进行模拟时,约束某些原子的位置,使它们不会偏离初始坐标。在能量最小化之后的位置约束步骤用于对溶剂和其他环境进行平衡,同时保持蛋白质或其他分子的位置不变。通常用于溶液系统的准备过程,以避免对生物分子结构造成过大影响。
# 位置约束步骤解释
# posre.mdp 文件是位置约束的输入文件,它会约束特定原子的运动。
# grompp -r em.gro此命令中的 -r em.gro 参数指明位置约束应基于已经能量最小化的结构 em.gro 进行。
# mdrun -deffnm posre执行模拟时会生成位置约束下的模拟结果。
# 如何使用
# 首先准备好 posre.mdp 文件,然后执行以下命令:
# gmx_mpi grompp -f posre.mdp -c em.gro -p topol.top -o posre.tpr -r em.gro
# gmx_mpi mdrun -v -deffnm posre
# 这将生成一个应用位置约束后的系统,用于后续的分子动力学模拟步骤。
# 位置限制的目的: 加速体系平衡, 减少计算资源浪费
# 对配体施加位置限制position restraint通常是为了在分子动力学模拟的早期阶段保持配体在特定位置以帮助系统更快达到平衡尤其是在 NVT恒体积恒温和 NPT恒压恒温平衡的过程中。这种约束有助于避免在系统能量未平衡时配体位置大幅偏离而影响蛋白-配体复合物的稳定性。
# 位置限制通常在以下情况中使用:
# 能量最小化后NVT 和 NPT 平衡之前:
# 在模拟的平衡阶段NVT 和 NPT施加配体限制可以保证配体在溶剂环境中不产生大幅度的偏移特别是在系统温度和压力稳定之前。这时配体的限制作用是关键的。
# 平衡后,生产阶段去除限制:
# 在生产阶段(即实际的分子动力学生产模拟),通常应去除位置限制,以观察蛋白-配体复合物的真实动态行为。如果限制保留在生产阶段,配体的运动将受到不必要的约束,无法得到真实的配体运动轨迹。
# 限制可做可不做的情况:
# 如果体系比较简单,且配体与蛋白质的相互作用较强,位置限制可以选择不做,直接让配体在系统中自由运动。
# 如果配体是比较松散或弱结合到蛋白质上的,或者你关心配体的初期运动轨迹,初期的短时间平衡阶段可以不施加限制。
# 限制配体
# 平衡蛋白配体复合物与平衡水中只包含蛋白的体系非常类似,值得注意的是:
# 1、对配体施加限定。
# 2、对于温度耦合群的处理。
# 为了限定配体需要为他生成一个位置限定拓扑。首先为JZ4创建一个只包含非氢原子的index group。
gmx_mpi make_ndx -f mole.gro -o index_mole.ndx << EOF
0 & ! a H*
q
EOF
# 执行genrestr模块选择新创建的index group在index_mole.ndx文件中是group 3
# echo -e "0\nq" | gmx_mpi genrestr -f mol.gro -o posre_mol.itp
gmx_mpi genrestr -f mole.gro -n index_mole.ndx -o posre_mole.itp -fc 1000 1000 1000 << EOF
3
EOF
# 下一步需要把这个信息包括在拓扑文件中。有几种方式取决于我们希望使用的条件。这里介绍第一种如果我们想在无论蛋白是否被限定的条件下对配体施加限定在拓扑文件的Include ligand topology之后中加入如下内容
# 在拓扑文件的Include ligand topology之后中加入如下内容
: << EOF
; Ligand position restraints
#ifdef POSRES
#include "posre_mole.itp"
#endif
EOF
# 必须要把限定这段话加在Include ligand topology之后Include water topology之前加在其他任何地方都会报错。
# 由于对只有几个原子的组在控制其动能的涨落时, 温度耦合算法不够稳定,不要对体系中的每一单个物种使用独立的耦合。创建一个特殊的组, 其中包含蛋白质和配体。
gmx_mpi make_ndx -f em.gro -o index.ndx << EOF
1 | 13
q
EOF
# NVT、NPT平衡
# nvt
# gmx_mpi grompp -f nvt.mdp -c posre.gro -p topol.top -o nvt.tpr
# --- nvt.mdp file content --- #
cat << EOF > nvt.mdp
title = Protein-ligand complex NVT equilibration
define = -DPOSRES ; position restrain the protein and ligand
; Run parameters
integrator = md ; leap-frog integrator
nsteps = 50000 ; 2 * 50000 = 100 ps
dt = 0.002 ; 2 fs
; Output control
nstenergy = 500 ; save energies every 1.0 ps
nstlog = 500 ; update log file every 1.0 ps
nstxout-compressed = 500 ; save coordinates every 1.0 ps
; Bond parameters
continuation = no ; first dynamics run
constraint_algorithm = lincs ; holonomic constraints
constraints = h-bonds ; bonds involving H are constrained
lincs_iter = 1 ; accuracy of LINCS
lincs_order = 4 ; also related to accuracy
; Neighbor searching and vdW
cutoff-scheme = Verlet
ns_type = grid ; search neighboring grid cells
nstlist = 20 ; largely irrelevant with Verlet
rlist = 1.2
vdwtype = cutoff
vdw-modifier = force-switch
rvdw-switch = 1.0
rvdw = 1.2 ; short-range van der Waals cutoff (in nm)
; Electrostatics
coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics
rcoulomb = 1.2 ; short-range electrostatic cutoff (in nm)
pme_order = 4 ; cubic interpolation
fourierspacing = 0.16 ; grid spacing for FFT
; Temperature coupling is on
tcoupl = V-rescale ; modified Berendsen thermostat
tc-grps = Protein_MOL Water_and_ions ; two coupling groups - more accurate
tau_t = 0.1 0.1 ; time constant, in ps
ref_t = 310 310 ; reference temperature, one for each group, in K
; Pressure coupling is off
pcoupl = no ; no pressure coupling in NVT
; Periodic boundary conditions
pbc = xyz ; 3-D PBC
; Velocity generation
gen_vel = yes ; assign velocities from Maxwell distribution
gen_temp = 300 ; temperature for Maxwell distribution
gen_seed = -1 ; generate a random seed
EOF
gmx_mpi grompp -f nvt.mdp -c em.gro -r em.gro -p topol.top -o nvt.tpr -n index.ndx
gmx_mpi mdrun -v -deffnm nvt
# optional : Let's analyze the temperature progression, again using energy:
# 提取并保存温度数据
gmx_mpi energy -f nvt.edr -o temperature.xvg << EOF
17 39 40
EOF
# 拆分 .xvg 文件
# 提取整体温度
grep -v '^@' temperature.xvg | grep -v '^#' | awk '{print $1, $2}' > system_temperature.xvg
# 提取 Protein_MOL 组的温度
grep -v '^@' temperature.xvg | grep -v '^#' | awk '{print $1, $3}' > protein_mol_temperature.xvg
# 提取 Water_and_ions 组的温度
grep -v '^@' temperature.xvg | grep -v '^#' | awk '{print $1, $4}' > water_and_ions_temperature.xvg
# 绘制温度变化曲线
./plot_potential.sh system_temperature.xvg system_temperature.svg
./plot_potential.sh protein_mol_temperature.xvg protein_mol_temperature.svg
./plot_potential.sh water_and_ions_temperature.xvg water_and_ions_temperature.svg
# NPT平衡
# gmx_mpi grompp -f npt.mdp -c nvt.gro -t nvt.cpt -p topol.top -o npt.tpr
# --- npt.mdp file content --- #
cat << EOF > npt.mdp
title = Protein-ligand complex NPT equilibration
define = -DPOSRES ; position restrain the protein and ligand
; Run parameters
integrator = md ; leap-frog integrator
nsteps = 50000 ; 2 * 50000 = 100 ps
dt = 0.002 ; 2 fs
; Output control
nstenergy = 500 ; save energies every 1.0 ps
nstlog = 500 ; update log file every 1.0 ps
nstxout-compressed = 500 ; save coordinates every 1.0 ps
; Bond parameters
continuation = yes ; continuing from NVT
constraint_algorithm = lincs ; holonomic constraints
constraints = h-bonds ; bonds to H are constrained
lincs_iter = 1 ; accuracy of LINCS
lincs_order = 4 ; also related to accuracy
; Neighbor searching and vdW
cutoff-scheme = Verlet
ns_type = grid ; search neighboring grid cells
nstlist = 20 ; largely irrelevant with Verlet
rlist = 1.2
vdwtype = cutoff
vdw-modifier = force-switch
rvdw-switch = 1.0
rvdw = 1.2 ; short-range van der Waals cutoff (in nm)
; Electrostatics
coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics
rcoulomb = 1.2
pme_order = 4 ; cubic interpolation
fourierspacing = 0.16 ; grid spacing for FFT
; Temperature coupling
tcoupl = V-rescale ; modified Berendsen thermostat
tc-grps = Protein_MOL Water_and_ions ; two coupling groups - more accurate
tau_t = 0.1 0.1 ; time constant, in ps
ref_t = 300 300 ; reference temperature, one for each group, in K
; Pressure coupling
pcoupl = Parrinello-Rahman ; Parrinello-Rahman or C-rescale for NPT production
pcoupltype = isotropic ; uniform scaling of box vectors
tau_p = 2.0 ; time constant, in ps
ref_p = 1.0 ; reference pressure, in bar
compressibility = 4.5e-5 ; isothermal compressibility of water, bar^-1
refcoord_scaling = com
; Periodic boundary conditions
pbc = xyz ; 3-D PBC
; Dispersion correction is not used for proteins with the C36 additive FF
DispCorr = no
; Velocity generation
gen_vel = no ; velocity generation off after NVT
EOF
gmx_mpi grompp -f npt.mdp -c nvt.gro -r nvt.gro -t nvt.cpt -p topol.top -o npt.tpr -n index.ndx
gmx_mpi mdrun -v -deffnm npt
# Optional: Let's analyze the pressure progression, again using energy: type 18 0
gmx_mpi energy -f npt.edr -o pressure.xvg << EOF
18
0
EOF
# Optional: Let's take a look at density as well, this time using energy and entering "24 0" at the prompt.
gmx_mpi energy -f npt.edr -o density.xvg << EOF
24
0
EOF
# 绘制 SVG 图像
# 绘制压力图像
./plot_potential.sh pressure.xvg pressure.svg
# 绘制密度图像
./plot_potential.sh density.xvg density.svg
# md
# --- md.mdp file content --- #
cat << EOF > ${MDRUN_NAME}.mdp
title = Protein-ligand complex MD simulation
; Run parameters
integrator = md ; leap-frog integrator
nsteps = 5000000 ; 2 * 5000000 = 10000 ps (10 ns)
dt = 0.002 ; 2 fs
; Output control
nstenergy = 5000 ; save energies every 10.0 ps
nstlog = 5000 ; update log file every 10.0 ps
nstxout-compressed = 5000 ; save coordinates every 10.0 ps
; Bond parameters
continuation = yes ; continuing from NPT
constraint_algorithm = lincs ; holonomic constraints
constraints = h-bonds ; bonds to H are constrained
lincs_iter = 1 ; accuracy of LINCS
lincs_order = 4 ; also related to accuracy
; Neighbor searching and vdW
cutoff-scheme = Verlet
ns_type = grid ; search neighboring grid cells
nstlist = 20 ; largely irrelevant with Verlet
rlist = 1.2
vdwtype = cutoff
vdw-modifier = force-switch
rvdw-switch = 1.0
rvdw = 1.2 ; short-range van der Waals cutoff (in nm)
; Electrostatics
coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics
rcoulomb = 1.2
pme_order = 4 ; cubic interpolation
fourierspacing = 0.16 ; grid spacing for FFT
; Temperature coupling
tcoupl = V-rescale ; modified Berendsen thermostat
tc-grps = Protein_MOL Water_and_ions ; two coupling groups - more accurate
tau_t = 0.1 0.1 ; time constant, in ps
ref_t = 310 310 ; reference temperature, one for each group, in K
; Pressure coupling
pcoupl = Parrinello-Rahman ; pressure coupling is on for NPT
pcoupltype = isotropic ; uniform scaling of box vectors
tau_p = 2.0 ; time constant, in ps
ref_p = 1.0 ; reference pressure, in bar
compressibility = 4.5e-5 ; isothermal compressibility of water, bar^-1
; Periodic boundary conditions
pbc = xyz ; 3-D PBC
; Dispersion correction is not used for proteins with the C36 additive FF
DispCorr = no
; Velocity generation
gen_vel = no ; continuing from NPT equilibration
EOF
# Generate GROMACS .tpr file for the simulation
gmx_mpi grompp -f ${MDRUN_NAME}.mdp -c npt.gro -t npt.cpt -p topol.top -o ${TPR_FILE} -n index.ndx
# Run the simulation
gmx_mpi mdrun -deffnm ${MDRUN_NAME} -update gpu -ntomp 1 -gpu_id ${GPU_ID}
# 中心化并修正 PBC
gmx_mpi trjconv -s ${TPR_FILE} -f ${MDRUN_NAME}.xtc -o ${MDRUN_NAME}_center.xtc -center -pbc mol -ur compact << EOF
1
0
EOF
# 旋转和平移拟合
gmx_mpi trjconv -s ${TPR_FILE} -f ${MDRUN_NAME}_center.xtc -o ${MDRUN_NAME}_fit.xtc -fit rot+trans << EOF
4
0
EOF
# 提取轨迹帧
# -dt 100 表示每隔 100 ps 提取一个帧。即提取的时间间隔是 500 ps。 或者用控制变量EXTRACT_EVERY_PS
# 选择组分 21 Protein_MOL
gmx_mpi trjconv -s ${TPR_FILE} -f ${MDRUN_NAME}_fit.xtc -o whole.pdb -dt 100 -b 1000 -e 20000 -n index.ndx << EOF
21
EOF
# RMSD 分析
# 分析 Protein_MOL 的 RMSD 并绘制图像
gmx_mpi rms -s ${MDRUN_NAME}.tpr -f ${MDRUN_NAME}_center.xtc -tu ns -o rmsd_protein_mol.xvg -n index.ndx << EOF
21 # 选择 "Protein_MOL" 进行拟合
21 # 选择 "Protein_MOL" 进行 RMSD 计算
EOF
./plot_rmsd.sh rmsd_protein_mol.xvg rmsd_protein_mol.svg
# 分析蛋白骨架 (Backbone) 的 RMSD 并绘制图像
gmx_mpi rms -s ${MDRUN_NAME}.tpr -f ${MDRUN_NAME}_center.xtc -tu ns -o rmsd_backbone.xvg -n index.ndx << EOF
4 # 选择 "Backbone" 进行拟合
4 # 选择 "Backbone" 进行 RMSD 计算
EOF
./plot_rmsd.sh rmsd_backbone.xvg rmsd_backbone.svg
# 执行 RMSF 分析
# 对 Protein 进行 RMSF 分析
gmx_mpi rmsf -s ${MDRUN_NAME}.tpr -f ${MDRUN_NAME}_center.xtc -o rmsf_protein.xvg -res -ox avg_structure_protein.pdb -n index.ndx << EOF
1 # 选择 Protein 进行 RMSF 分析
EOF
# 使用 plot_rmsf.sh 绘制 Protein 的 RMSF 图像
./plot_rmsf.sh rmsf_protein.xvg rmsf_protein.svg
# 对 Backbone 进行 RMSF 分析
gmx_mpi rmsf -s ${MDRUN_NAME}.tpr -f ${MDRUN_NAME}_center.xtc -o rmsf_backbone.xvg -res -ox avg_structure_backbone.pdb -n index.ndx << EOF
4 # 选择 Backbone 进行 RMSF 分析
EOF
# 使用 plot_rmsf.sh 绘制 Backbone 的 RMSF 图像
./plot_rmsf.sh rmsf_backbone.xvg rmsf_backbone.svg
# 执行氢键分析
gmx_mpi hbond -s ${MDRUN_NAME}.tpr -n index.ndx -f ${MDRUN_NAME}_center.xtc -num hbond_protein_ligand.xvg << EOF
1 # 选择 Protein
13 # 选择 Ligand (配体)
EOF
# 使用 plot_hbond.sh 绘制氢键数量变化图像
./plot_hbond.sh hbond_protein_ligand.xvg hbond_protein_ligand.svg
# 执行回旋半径分析
# 对 Protein 进行回旋半径分析
gmx_mpi gyrate -s ${MDRUN_NAME}.tpr -n index.ndx -f ${MDRUN_NAME}_center.xtc -o gyration_protein.xvg << EOF
1 # 选择 Protein 进行回旋半径分析
EOF
# 使用 plot_gyration.sh 绘制 Protein 的回旋半径图像
./plot_gyration.sh gyration_protein.xvg gyration_protein.svg
# 对 Protein_MOL 进行回旋半径分析
gmx_mpi gyrate -s ${MDRUN_NAME}.tpr -n index.ndx -f ${MDRUN_NAME}_center.xtc -o gyration_protein_mol.xvg << EOF
21 # 选择 Protein_MOL 进行回旋半径分析
EOF
# 使用 plot_gyration.sh 绘制 Protein_MOL 的回旋半径图像
./plot_gyration.sh gyration_protein_mol.xvg gyration_protein_mol.svg
: << EOF
MMPBSA计算结合能计算
采集40-50ns的模拟数据这里通常采集体系平衡的数据
gmx trjconv -f md_0_10_center.xtc -o trj_40-50ns.xtc -b 40000 -e 50000
检查轨迹路径
gmx check -f trj_40-50ns.xtc
检查index.ndx 配体的索引只能是一个多的必须删除ctrl+F搜索配体名称剩一个配体即可。删除后保存。
下载 gmx_mmpbsa.bash网址https://github.com/Jerkwin/gmxtools/blob/master/gmx_mmpbsa/gmx_mmpbsa.bsh
修改文件:
尚未完成参考:
链接https://www.jianshu.com/p/d1ae60c96b33
EOF
# gmx_mpi rms -f md.xtc -s md.tpr -o rmsd_protein.xvg
# 两次都选骨架部分protein
# 注意观测在模拟过程中骨架的RMSD波动是否较大
# gmx_mpi rms -f md.xtc -s md.tpr -o rmsd_protein.xvg
# 第一次选择骨架部分protein第二次选择配体MOL
# 可以消除蛋白质整体的运动,观察小分子配体相对于蛋白质的运动
# extra ndx file , select protein
echo -e "1\nq" | gmx_mpi make_ndx -f ${MDRUN_NAME}.gro -o ${NDX_FILE}
# echo -e "1\nq" | gmx_mpi make_ndx -f md.gro -o index.ndx
# Create extraction output directory
mkdir -p ${OUTPUT_FOLDER}
# Create temp output directory
mkdir -p ${TEMP_FOLDER}
echo -e "1\nq" | gmx_mpi trjconv -dt ${EXTRACT_EVERY_PS} -s ${TPR_FILE} -f ${XTC_FILE} -n ${NDX_FILE} -pbc mol -o ${TEMP_FOLDER}/temp.xtc
# echo -e "1\nq" | gmx_mpi trjconv -dt 100 -s md.tpr -f md.xtc -n index.ndx -pbc mol -o temp/temp.xtc
echo -e "1\n1\n1" | gmx_mpi trjconv -s ${TPR_FILE} -f ${TEMP_FOLDER}/temp.xtc -n ${NDX_FILE} -center -fit rot+trans -o ${TEMP_FOLDER}/traj_show.xtc
# echo -e "1\n1\n1" | gmx_mpi trjconv -s md.tpr -f temp/temp.xtc -n index.ndx -center -fit rot+trans -o temp/traj_show.xtc
echo -e "1\n1\n1" | gmx_mpi trjconv -s ${TPR_FILE} -f ${TEMP_FOLDER}/temp.xtc -n ${NDX_FILE} -center -fit rot+trans -b 0 -e 0 -o ${TEMP_FOLDER}/tarj_show.pdb
# echo -e "1\n1\n1" | gmx_mpi trjconv -s md.tpr -f temp/temp.xtc -n index.ndx -center -fit rot+trans -b 0 -e 0 -o temp/tarj_show.pdb
# Group 1 ( Protein)
# ---
# Step 1: Extract frames every 1000 ps
gmx_mpi trjconv -s ${TPR_FILE} -f ${XTC_FILE} -o ${OUTPUT_FOLDER}/${NO_PBC_XTC_FILE} -dt ${EXTRACT_EVERY_PS} -pbc mol <<EOF
0
EOF
# Step 2: Center and fit the trajectory
# Centering the protein and fitting to the initial frame
gmx_mpi trjconv -s ${TPR_FILE} -f ${OUTPUT_FOLDER}/${NO_PBC_XTC_FILE} -o ${OUTPUT_FOLDER}/${NO_PBC_XTC_FILE} -pbc mol -center <<EOF
1
1
EOF
# Step 3: Output PDB format file
gmx_mpi trjconv -s ${TPR_FILE} -f ${OUTPUT_FOLDER}/${NO_PBC_XTC_FILE} -o ${OUTPUT_FOLDER}/${MDRUN_NAME}.pdb -pbc mol -center <<EOF
1
0
EOF
# Continue with further analysis like RMSD calculation...
# ... [other analysis commands] ...
# End of script
# command reference
# Command 1: 提取蛋白质
command_1 = f'echo "Protein" | gmx trjconv -dt 1000 -s {tpr_file} -f {xtc_file} -n {temp_folder}/tarj_show.ndx -pbc mol -o {temp_folder}/temp.xtc'
# echo "Protein": 选择蛋白质组,用于告诉 gmx trjconv 要处理哪个部分。
# -dt 1000: 指定时间间隔这里是1000 picoseconds用于从 .xtc 文件中抽取帧。
# -s {tpr_file}: 指定拓扑文件(.tpr它包含了模拟系统的完整描述。
# -f {xtc_file}: 指定原始的 .xtc 轨迹文件。
# -n {temp_folder}/tarj_show.ndx: 指定索引文件,其中包含各种原子群的定义。
# -pbc mol: 处理周期性边界条件,确保分子不会被分割。
# -o {temp_folder}/temp.xtc: 指定输出文件名和位置。
# Command 2: 中心对齐蛋白质
command_2 = f'echo "Protein\nProtein\nProtein" | gmx trjconv -s {tpr_file} -f {temp_folder}/temp.xtc -n {temp_folder}/tarj_show.ndx -center -fit rot+trans -o {output_folder}/traj_show.xtc'
# echo "Protein\nProtein\nProtein": 三次选择蛋白质组,分别用于中心化、拟合和输出。
# -center: 将蛋白质移动到框架的中心。
# -fit rot+trans: 对齐蛋白质,通过旋转和平移来最佳拟合。
# -o {output_folder}/traj_show.xtc: 指定输出文件名和位置。
# Command 3: 抽取帧生成 .pdb 文件
command_3 = f'echo "Protein\nProtein\nProtein" | gmx trjconv -s {tpr_file} -f {temp_folder}/temp.xtc -n {temp_folder}/tarj_show.ndx -center -fit rot+trans -b 0 -e 0 -o {output_folder}/tarj_show.pdb'
# -b 0 -e 0: 指定开始和结束时间这里设置为0表示只取第一帧。
# -o {output_folder}/tarj_show.pdb: 输出为 .pdb 格式,存储在指定的位置。