gromacs_docker/script/S112D/S112D.sh

#!/bin/bash
#This script needs to be edited for each run.
#Define PDB Filename & GROMACS Pameters
# reference: http://www.mdtutorials.com/gmx/lysozyme/
# NAME=1ao7 NSTEPS=50000000 ./md_gromacs.sh
# 命令会临时设置 NAME 为 1ao7 和 NSTEPS 为 50000000（对应 100ns），然后运行 md_gromacs.sh 脚本
# Check if GMXRC_PATH is provided and source it
if [ -n "$GMXRC_PATH" ]; then
    source "$GMXRC_PATH" # source /home/lingyuzeng/software/gmx2023.2/bin/GMXRC
fi
GPU_ID=${GPU_ID:-"0"}

NAME="protein"
LIGAND_GRO="mole.gro"          # 配体的 .gro 文件
LIGAND_ITP="mole.itp"           # 配体的 .itp 文件
# FORCEFIELD=${FORCEFIELD:-"amber99sb-ildn"}
FORCEFIELD=${FORCEFIELD:-"amber99sb"}
WATERMODEL=${WATERMODEL:-"tip3p"}
WATERTOPFILE=${WATERTOPFILE:-"spc216.gro"}
BOXTYPE=${BOXTYPE:-"tric"} # or dodecahedron
BOXORIENTATION=${BOXORIENTATION:-"1.0"}
# NSTEPS=${NSTEPS:-500000} # 50,000 steps for 1 ns
NSTEPS=${NSTEPS:-50000000}  # 100 ns 模拟
DT=${DT:-0.002} # 2 fs
#BOXSIZE="5.0"
#BOXCENTER="2.5"
# Define simulation name variable
MDRUN_NAME=${MDRUN_NAME:-"md"}
NDX_NAME=${NDX_NAME:-"index"}
# Define analysis parameters
# Define other filenames based on MDRUN_NAME
TPR_FILE="${MDRUN_NAME}.tpr"
XTC_FILE="${MDRUN_NAME}.xtc"
NDX_FILE="${NDX_NAME}.ndx"
NO_PBC_XTC_FILE="${MDRUN_NAME}_noPBC.xtc"
OUTPUT_FOLDER=${OUTPUT_FOLDER:-"frame_extraction_output"}
TEMP_FOLDER=${TEMP_FOLDER:-"temp"}
# Define variables for frame extraction
EXTRACT_EVERY_PS=${EXTRACT_EVERY_PS:-100} # Default to 100 ps if not set

# Print the current settings
echo "Current settings:"
echo "NAME: $NAME"
echo "LIGAND_GRO: $LIGAND_GRO"
echo "FORCEFIELD: $FORCEFIELD"
echo "WATERMODEL: $WATERMODEL"
echo "WATERTOPFILE: $WATERTOPFILE"
echo "BOXTYPE: $BOXTYPE"
echo "BOXORIENTATION: $BOXORIENTATION"
echo "NSTEPS: $NSTEPS"
echo "DT: $DT"
echo "MDRUN_NAME: $MDRUN_NAME"
echo "TPR_FILE: $TPR_FILE"
echo "XTC_FILE: $XTC_FILE"
echo "NO_PBC_XTC_FILE: $NO_PBC_XTC_FILE"
echo "OUTPUT_FOLDER: $OUTPUT_FOLDER"
echo "EXTRACT_EVERY_PS: $EXTRACT_EVERY_PS"

# Step 1: Generate GROMACS .gro file for the protein
gmx_mpi pdb2gmx -f $NAME.pdb -o $NAME.gro -ff $FORCEFIELD -water $WATERMODEL -p topol.top -ignh
# gmx_mpi pdb2gmx -f protein.pdb -o $NAME.gro -ff $FORCEFIELD -water $WATERMODEL -p topol.top -ignh
# gmx_mpi pdb2gmx -f protein.pdb -o protein.gro -ff amber99sb -water tip3p -p unk.top -ignh

# 使用 Sobtop（http://sobereva.com/soft/Sobtop）提取
# Step 2: Merge protein and ligand .gro files into a complex
cat $NAME.gro mole.gro > complex.gro

# 获取蛋白质和小分子的原子数
PROTEIN_ATOMS=$(head -n 2 $NAME.gro | tail -n 1 | tr -d ' ')
LIGAND_ATOMS=$(head -n 2 mole.gro | tail -n 1 | tr -d ' ')

# 打印原子数，确保正确
echo "Protein atoms: $PROTEIN_ATOMS"
echo "Ligand atoms: $LIGAND_ATOMS"

# 计算总原子数
TOTAL_ATOMS=$(($PROTEIN_ATOMS + $LIGAND_ATOMS))

# 打印总原子数 7482
echo "Total atoms: $TOTAL_ATOMS"

# 更新原子数目
sed -i "2s/\(^[[:space:]]*\)[0-9]\+/\1$TOTAL_ATOMS/" complex.gro

# 定义盒子、添加溶剂和离子、能量最小化

# 定义盒子并向盒子充满水
# Step 3: Define the simulation box
gmx_mpi editconf -f complex.gro -o complex-box.gro -bt $BOXTYPE -c -d $BOXORIENTATION
# gmx_mpi editconf -f complex.gro -o complex-box.gro -bt dodecahedron -c -d 1.0
# Step 4: Add solvate
gmx_mpi solvate -cp complex-box.gro -cs $WATERTOPFILE -o complex-solv.gro -p topol.top
# gmx_mpi solvate -cp complex-box.gro -cs spc216.gro -o complex-solv.gro -p topol.top

# 添加离子
# add icons # ! ions.mdp add by manual
# --- ions.mdp file content --- #
cat << EOF > ions.mdp
; ions.mdp - used as input into grompp to generate ions.tpr
; Parameters describing what to do, when to stop and what to save
integrator  = steep         ; Algorithm （steep = steepest descent minimization）
emtol       = 1000.0        ; Stop minimization when the maximum force < 1000.0 kJ/mol/nm
emstep      = 0.01          ; Minimization step size
nsteps      = 50000         ; Maximum number of （minimization） steps to perform
; Parameters describing how to find the neighbors of each atom and how to calculate the interactions
nstlist         = 1         ; Frequency to update the neighbor list and long range forces
cutoff-scheme  = Verlet    ; Buffered neighbor searching
ns_type         = grid      ; Method to determine neighbor list （simple, grid）
coulombtype     = cutoff    ; Treatment of long range electrostatic interactions
rcoulomb        = 1.0       ; Short-range electrostatic cut-off
rvdw            = 1.0       ; Short-range Van der Waals cut-off
pbc             = xyz       ; Periodic Boundary Conditions in all 3 dimensions
EOF
gmx_mpi grompp -f ions.mdp -c complex-solv.gro -p topol.top -o ions.tpr -maxwarn 1
# 添加NaCl
echo SOL | gmx_mpi genion -s ions.tpr -o complex-solv-ions.gro -p topol.top -pname NA -nname CL -conc 0.125 -neutral
# -pname 阳离子的名称
# -nname 阴离子的名称
# -np 阳离子个数
# -nn 阴离子个数

# 能量最小化
# energy minimization of the structure in solvate # ! minim.mdp add by manual
# --- minim.mdp file content --- #
cat << EOF > minim.mdp
; minim.mdp - used as input into grompp to generate em.tpr
; Parameters describing what to do, when to stop and what to save
integrator  = steep         ; Algorithm （steep = steepest descent minimization）
emtol       = 1000.0        ; Stop minimization when the maximum force < 1000.0 kJ/mol/nm
emstep      = 0.01          ; Minimization step size
nsteps      = 50000         ; Maximum number of （minimization） steps to perform
; Parameters describing how to find the neighbors of each atom and how to calculate the interactions
nstlist         = 1         ; Frequency to update the neighbor list and long range forces
cutoff-scheme   = Verlet    ; Buffered neighbor searching
ns_type         = grid      ; Method to determine neighbor list （simple, grid）
coulombtype     = PME       ; Treatment of long range electrostatic interactions
rcoulomb        = 1.0       ; Short-range electrostatic cut-off
rvdw            = 1.0       ; Short-range Van der Waals cut-off
pbc             = xyz       ; Periodic Boundary Conditions in all 3 dimensions
EOF
gmx_mpi grompp -f minim.mdp -c complex-solv-ions.gro -p topol.top -o em.tpr
gmx_mpi mdrun -v -deffnm em

# optional em, you will need the Xmgrace plotting too
#gmx_mpi energy -f em.edr -o potential.xvg
# 查看系统的系统的总势能变化情况
echo "12\n" | gmx_mpi energy -f em.edr -o potential.xvg
# 系统的总势能变化绘图
./plot_potential.sh potential.xvg potential.svg
# 位置约束（Position Restraint）
# 位置约束是指在进行模拟时，约束某些原子的位置，使它们不会偏离初始坐标。在能量最小化之后的位置约束步骤用于对溶剂和其他环境进行平衡，同时保持蛋白质或其他分子的位置不变。通常用于溶液系统的准备过程，以避免对生物分子结构造成过大影响。

# 位置约束步骤解释
# posre.mdp 文件是位置约束的输入文件，它会约束特定原子的运动。
# grompp -r em.gro：此命令中的 -r em.gro 参数指明位置约束应基于已经能量最小化的结构 em.gro 进行。
# mdrun -deffnm posre：执行模拟时，会生成位置约束下的模拟结果。
# 如何使用
# 首先准备好 posre.mdp 文件，然后执行以下命令：

# gmx_mpi grompp -f posre.mdp -c em.gro -p topol.top -o posre.tpr -r em.gro
# gmx_mpi mdrun -v -deffnm posre

# 这将生成一个应用位置约束后的系统，用于后续的分子动力学模拟步骤。
# 位置限制的目的: 加速体系平衡, 减少计算资源浪费
# 对配体施加位置限制（position restraint），通常是为了在分子动力学模拟的早期阶段保持配体在特定位置，以帮助系统更快达到平衡，尤其是在 NVT（恒体积恒温）和 NPT（恒压恒温）平衡的过程中。这种约束有助于避免在系统能量未平衡时，配体位置大幅偏离而影响蛋白-配体复合物的稳定性。

# 位置限制通常在以下情况中使用：

# 能量最小化后，NVT 和 NPT 平衡之前：

# 在模拟的平衡阶段（NVT 和 NPT），施加配体限制可以保证配体在溶剂环境中不产生大幅度的偏移，特别是在系统温度和压力稳定之前。这时配体的限制作用是关键的。
# 平衡后，生产阶段去除限制：

# 在生产阶段（即实际的分子动力学生产模拟），通常应去除位置限制，以观察蛋白-配体复合物的真实动态行为。如果限制保留在生产阶段，配体的运动将受到不必要的约束，无法得到真实的配体运动轨迹。

# 限制可做可不做的情况：
# 如果体系比较简单，且配体与蛋白质的相互作用较强，位置限制可以选择不做，直接让配体在系统中自由运动。
# 如果配体是比较松散或弱结合到蛋白质上的，或者你关心配体的初期运动轨迹，初期的短时间平衡阶段可以不施加限制。

# 限制配体
# 平衡蛋白配体复合物与平衡水中只包含蛋白的体系非常类似，值得注意的是：
# 1、对配体施加限定。
# 2、对于温度耦合群的处理。
# 为了限定配体，需要为他生成一个位置限定拓扑。首先，为JZ4创建一个只包含非氢原子的index group。

gmx_mpi make_ndx -f mole.gro -o index_mole.ndx << EOF
0 & ! a H*
q
EOF

# 执行genrestr模块，选择新创建的index group（在index_mole.ndx文件中是group 3）
# echo -e "0\nq" | gmx_mpi genrestr -f mol.gro -o posre_mol.itp
gmx_mpi genrestr -f mole.gro -n index_mole.ndx -o posre_mole.itp -fc 1000 1000 1000 << EOF
3
EOF

# 下一步需要把这个信息包括在拓扑文件中。有几种方式，取决于我们希望使用的条件。这里介绍第一种：如果我们想在无论蛋白是否被限定的条件下对配体施加限定，在拓扑文件的Include ligand topology之后中加入如下内容：
# 在拓扑文件的Include ligand topology之后中加入如下内容

: << EOF
; Ligand position restraints
#ifdef POSRES
#include "posre_mole.itp"
#endif
EOF

# 必须要把限定这段话加在Include ligand topology之后，Include water topology之前，加在其他任何地方都会报错。

# 由于对只有几个原子的组在控制其动能的涨落时, 温度耦合算法不够稳定，不要对体系中的每一单个物种使用独立的耦合。创建一个特殊的组, 其中包含蛋白质和配体。

gmx_mpi make_ndx -f em.gro -o index.ndx << EOF
1 | 13
q
EOF

# NVT、NPT平衡
# nvt
# gmx_mpi grompp -f nvt.mdp -c posre.gro -p topol.top -o nvt.tpr
# --- nvt.mdp file content --- #
cat << EOF > nvt.mdp
title                   = Protein-ligand complex NVT equilibration
define                  = -DPOSRES  ; position restrain the protein and ligand
; Run parameters
integrator              = md        ; leap-frog integrator
nsteps                  = 50000     ; 2 * 50000 = 100 ps
dt                      = 0.002     ; 2 fs
; Output control
nstenergy               = 500       ; save energies every 1.0 ps
nstlog                  = 500       ; update log file every 1.0 ps
nstxout-compressed      = 500       ; save coordinates every 1.0 ps
; Bond parameters
continuation            = no        ; first dynamics run
constraint_algorithm    = lincs     ; holonomic constraints
constraints             = h-bonds   ; bonds involving H are constrained
lincs_iter              = 1         ; accuracy of LINCS
lincs_order             = 4         ; also related to accuracy
; Neighbor searching and vdW
cutoff-scheme           = Verlet
ns_type                 = grid      ; search neighboring grid cells
nstlist                 = 20        ; largely irrelevant with Verlet
rlist                   = 1.2
vdwtype                 = cutoff
vdw-modifier            = force-switch
rvdw-switch             = 1.0
rvdw                    = 1.2       ; short-range van der Waals cutoff (in nm)
; Electrostatics
coulombtype             = PME       ; Particle Mesh Ewald for long-range electrostatics
rcoulomb                = 1.2       ; short-range electrostatic cutoff (in nm)
pme_order               = 4         ; cubic interpolation
fourierspacing          = 0.16      ; grid spacing for FFT
; Temperature coupling is on
tcoupl                  = V-rescale             ; modified Berendsen thermostat
tc-grps                 = Protein_MOL Water_and_ions    ; two coupling groups - more accurate
tau_t                   = 0.1     0.1           ; time constant, in ps
ref_t                   = 310     310           ; reference temperature, one for each group, in K
; Pressure coupling is off
pcoupl                  = no        ; no pressure coupling in NVT
; Periodic boundary conditions
pbc                     = xyz       ; 3-D PBC
; Velocity generation
gen_vel                 = yes       ; assign velocities from Maxwell distribution
gen_temp                = 300       ; temperature for Maxwell distribution
gen_seed                = -1        ; generate a random seed
EOF
gmx_mpi grompp -f nvt.mdp -c em.gro -r em.gro -p topol.top -o nvt.tpr -n index.ndx
gmx_mpi mdrun -v -deffnm nvt

# optional : Let's analyze the temperature progression, again using energy:
# 提取并保存温度数据
gmx_mpi energy -f nvt.edr -o temperature.xvg << EOF
17 39 40
EOF
# 拆分 .xvg 文件
# 提取整体温度
grep -v '^@' temperature.xvg | grep -v '^#' | awk '{print $1, $2}' > system_temperature.xvg

# 提取 Protein_MOL 组的温度
grep -v '^@' temperature.xvg | grep -v '^#' | awk '{print $1, $3}' > protein_mol_temperature.xvg

# 提取 Water_and_ions 组的温度
grep -v '^@' temperature.xvg | grep -v '^#' | awk '{print $1, $4}' > water_and_ions_temperature.xvg

# 绘制温度变化曲线
./plot_potential.sh system_temperature.xvg system_temperature.svg
./plot_potential.sh protein_mol_temperature.xvg protein_mol_temperature.svg
./plot_potential.sh water_and_ions_temperature.xvg water_and_ions_temperature.svg

# NPT平衡
# gmx_mpi grompp -f npt.mdp -c nvt.gro -t nvt.cpt -p topol.top -o npt.tpr
# --- npt.mdp file content --- #
cat << EOF > npt.mdp
title                   = Protein-ligand complex NPT equilibration
define                  = -DPOSRES  ; position restrain the protein and ligand
; Run parameters
integrator              = md        ; leap-frog integrator
nsteps                  = 50000     ; 2 * 50000 = 100 ps
dt                      = 0.002     ; 2 fs
; Output control
nstenergy               = 500       ; save energies every 1.0 ps
nstlog                  = 500       ; update log file every 1.0 ps
nstxout-compressed      = 500       ; save coordinates every 1.0 ps
; Bond parameters
continuation            = yes       ; continuing from NVT
constraint_algorithm    = lincs     ; holonomic constraints
constraints             = h-bonds   ; bonds to H are constrained
lincs_iter              = 1         ; accuracy of LINCS
lincs_order             = 4         ; also related to accuracy
; Neighbor searching and vdW
cutoff-scheme           = Verlet
ns_type                 = grid      ; search neighboring grid cells
nstlist                 = 20        ; largely irrelevant with Verlet
rlist                   = 1.2
vdwtype                 = cutoff
vdw-modifier            = force-switch
rvdw-switch             = 1.0
rvdw                    = 1.2       ; short-range van der Waals cutoff (in nm)
; Electrostatics
coulombtype             = PME       ; Particle Mesh Ewald for long-range electrostatics
rcoulomb                = 1.2
pme_order               = 4         ; cubic interpolation
fourierspacing          = 0.16      ; grid spacing for FFT
; Temperature coupling
tcoupl                  = V-rescale                     ; modified Berendsen thermostat
tc-grps                 = Protein_MOL Water_and_ions    ; two coupling groups - more accurate
tau_t                   = 0.1   0.1                     ; time constant, in ps
ref_t                   = 300   300                     ; reference temperature, one for each group, in K
; Pressure coupling
pcoupl                  = Parrinello-Rahman             ; Parrinello-Rahman or C-rescale for NPT production
pcoupltype              = isotropic                     ; uniform scaling of box vectors
tau_p                   = 2.0                           ; time constant, in ps
ref_p                   = 1.0                           ; reference pressure, in bar
compressibility         = 4.5e-5                        ; isothermal compressibility of water, bar^-1
refcoord_scaling        = com
; Periodic boundary conditions
pbc                     = xyz       ; 3-D PBC
; Dispersion correction is not used for proteins with the C36 additive FF
DispCorr                = no
; Velocity generation
gen_vel                 = no        ; velocity generation off after NVT
EOF
gmx_mpi grompp -f npt.mdp -c nvt.gro -r nvt.gro -t nvt.cpt -p topol.top -o npt.tpr -n index.ndx
gmx_mpi mdrun -v -deffnm npt
# Optional: Let's analyze the pressure progression, again using energy: type 18 0
gmx_mpi energy -f npt.edr -o pressure.xvg << EOF
18
0
EOF

# Optional: Let's take a look at density as well, this time using energy and entering "24 0" at the prompt.
gmx_mpi energy -f npt.edr -o density.xvg << EOF
24
0
EOF

# 绘制 SVG 图像
# 绘制压力图像
./plot_potential.sh pressure.xvg pressure.svg

# 绘制密度图像
./plot_potential.sh density.xvg density.svg

# md
# --- md.mdp file content --- #
cat << EOF > ${MDRUN_NAME}.mdp
title                   = Protein-ligand complex MD simulation
; Run parameters
integrator              = md        ; leap-frog integrator
nsteps                  = 5000000   ; 2 * 5000000 = 10000 ps (10 ns)
dt                      = 0.002     ; 2 fs
; Output control
nstenergy               = 5000      ; save energies every 10.0 ps
nstlog                  = 5000      ; update log file every 10.0 ps
nstxout-compressed      = 5000      ; save coordinates every 10.0 ps
; Bond parameters
continuation            = yes       ; continuing from NPT
constraint_algorithm    = lincs     ; holonomic constraints
constraints             = h-bonds   ; bonds to H are constrained
lincs_iter              = 1         ; accuracy of LINCS
lincs_order             = 4         ; also related to accuracy
; Neighbor searching and vdW
cutoff-scheme           = Verlet
ns_type                 = grid      ; search neighboring grid cells
nstlist                 = 20        ; largely irrelevant with Verlet
rlist                   = 1.2
vdwtype                 = cutoff
vdw-modifier            = force-switch
rvdw-switch             = 1.0
rvdw                    = 1.2       ; short-range van der Waals cutoff (in nm)
; Electrostatics
coulombtype             = PME       ; Particle Mesh Ewald for long-range electrostatics
rcoulomb                = 1.2
pme_order               = 4         ; cubic interpolation
fourierspacing          = 0.16      ; grid spacing for FFT
; Temperature coupling
tcoupl                  = V-rescale                     ; modified Berendsen thermostat
tc-grps                 = Protein_MOL Water_and_ions    ; two coupling groups - more accurate
tau_t                   = 0.1   0.1                     ; time constant, in ps
ref_t                   = 310   310                     ; reference temperature, one for each group, in K
; Pressure coupling
pcoupl                  = Parrinello-Rahman             ; pressure coupling is on for NPT
pcoupltype              = isotropic                     ; uniform scaling of box vectors
tau_p                   = 2.0                           ; time constant, in ps
ref_p                   = 1.0                           ; reference pressure, in bar
compressibility         = 4.5e-5                        ; isothermal compressibility of water, bar^-1
; Periodic boundary conditions
pbc                     = xyz       ; 3-D PBC
; Dispersion correction is not used for proteins with the C36 additive FF
DispCorr                = no
; Velocity generation
gen_vel                 = no        ; continuing from NPT equilibration
EOF

# Generate GROMACS .tpr file for the simulation
gmx_mpi grompp -f ${MDRUN_NAME}.mdp -c npt.gro -t npt.cpt -p topol.top -o ${TPR_FILE} -n index.ndx

# Run the simulation
gmx_mpi mdrun -deffnm ${MDRUN_NAME} -update gpu  -ntomp 1 -gpu_id ${GPU_ID}

# 中心化并修正 PBC

gmx_mpi trjconv -s ${TPR_FILE} -f ${MDRUN_NAME}.xtc -o ${MDRUN_NAME}_center.xtc -center -pbc mol -ur compact << EOF
1
0
EOF

# 旋转和平移拟合

gmx_mpi trjconv -s ${TPR_FILE} -f ${MDRUN_NAME}_center.xtc -o ${MDRUN_NAME}_fit.xtc -fit rot+trans << EOF
4
0
EOF

# 提取轨迹帧
# -dt 100 表示每隔 100 ps 提取一个帧。即提取的时间间隔是 500 ps。  或者用控制变量：EXTRACT_EVERY_PS
# 选择组分 21 Protein_MOL
gmx_mpi trjconv -s ${TPR_FILE} -f ${MDRUN_NAME}_fit.xtc -o whole.pdb -dt 100 -b 1000 -e 20000 -n index.ndx << EOF
21
EOF

# RMSD 分析
# 分析 Protein_MOL 的 RMSD 并绘制图像
gmx_mpi rms -s ${MDRUN_NAME}.tpr -f ${MDRUN_NAME}_center.xtc -tu ns -o rmsd_protein_mol.xvg -n index.ndx << EOF
21  # 选择 "Protein_MOL" 进行拟合
21  # 选择 "Protein_MOL" 进行 RMSD 计算
EOF
./plot_rmsd.sh rmsd_protein_mol.xvg rmsd_protein_mol.svg

# 分析蛋白骨架 (Backbone) 的 RMSD 并绘制图像
gmx_mpi rms -s ${MDRUN_NAME}.tpr -f ${MDRUN_NAME}_center.xtc -tu ns -o rmsd_backbone.xvg -n index.ndx << EOF
4  # 选择 "Backbone" 进行拟合
4  # 选择 "Backbone" 进行 RMSD 计算
EOF
./plot_rmsd.sh rmsd_backbone.xvg rmsd_backbone.svg

# 执行 RMSF 分析
# 对 Protein 进行 RMSF 分析
gmx_mpi rmsf -s ${MDRUN_NAME}.tpr -f ${MDRUN_NAME}_center.xtc -o rmsf_protein.xvg -res -ox avg_structure_protein.pdb -n index.ndx << EOF
1  # 选择 Protein 进行 RMSF 分析
EOF

# 使用 plot_rmsf.sh 绘制 Protein 的 RMSF 图像
./plot_rmsf.sh rmsf_protein.xvg rmsf_protein.svg

# 对 Backbone 进行 RMSF 分析
gmx_mpi rmsf -s ${MDRUN_NAME}.tpr -f ${MDRUN_NAME}_center.xtc -o rmsf_backbone.xvg -res -ox avg_structure_backbone.pdb -n index.ndx << EOF
4  # 选择 Backbone 进行 RMSF 分析
EOF

# 使用 plot_rmsf.sh 绘制 Backbone 的 RMSF 图像
./plot_rmsf.sh rmsf_backbone.xvg rmsf_backbone.svg


# 执行氢键分析
gmx_mpi hbond -s ${MDRUN_NAME}.tpr -n index.ndx -f ${MDRUN_NAME}_center.xtc -num hbond_protein_ligand.xvg << EOF
1  # 选择 Protein
13 # 选择 Ligand (配体)
EOF

# 使用 plot_hbond.sh 绘制氢键数量变化图像
./plot_hbond.sh hbond_protein_ligand.xvg hbond_protein_ligand.svg

# 执行回旋半径分析
# 对 Protein 进行回旋半径分析
gmx_mpi gyrate -s ${MDRUN_NAME}.tpr -n index.ndx -f ${MDRUN_NAME}_center.xtc -o gyration_protein.xvg << EOF
1  # 选择 Protein 进行回旋半径分析
EOF

# 使用 plot_gyration.sh 绘制 Protein 的回旋半径图像
./plot_gyration.sh gyration_protein.xvg gyration_protein.svg

# 对 Protein_MOL 进行回旋半径分析
gmx_mpi gyrate -s ${MDRUN_NAME}.tpr -n index.ndx -f ${MDRUN_NAME}_center.xtc -o gyration_protein_mol.xvg << EOF
21  # 选择 Protein_MOL 进行回旋半径分析
EOF

# 使用 plot_gyration.sh 绘制 Protein_MOL 的回旋半径图像
./plot_gyration.sh gyration_protein_mol.xvg gyration_protein_mol.svg

: << EOF
MMPBSA计算（结合能计算）
采集40-50ns的模拟数据（这里通常采集体系平衡的数据）。
gmx trjconv -f md_0_10_center.xtc -o trj_40-50ns.xtc -b 40000 -e 50000

检查轨迹路径
gmx check -f trj_40-50ns.xtc

检查index.ndx， 配体的索引只能是一个，多的必须删除（ctrl+F搜索配体名称，剩一个配体即可）。删除后保存。

下载 gmx_mmpbsa.bash，网址：https://github.com/Jerkwin/gmxtools/blob/master/gmx_mmpbsa/gmx_mmpbsa.bsh
修改文件：

尚未完成参考：
链接：https://www.jianshu.com/p/d1ae60c96b33
EOF


# gmx_mpi rms -f md.xtc -s md.tpr -o rmsd_protein.xvg
# 两次都选骨架部分（protein）
# 注意观测在模拟过程中骨架的RMSD波动是否较大

# gmx_mpi rms -f md.xtc -s md.tpr -o rmsd_protein.xvg
# 第一次选择骨架部分（protein）第二次选择配体（MOL）
# 可以消除蛋白质整体的运动，观察小分子配体相对于蛋白质的运动

# extra ndx file , select protein
echo -e "1\nq" | gmx_mpi make_ndx -f ${MDRUN_NAME}.gro -o ${NDX_FILE}
# echo -e "1\nq" | gmx_mpi make_ndx -f md.gro -o index.ndx

# Create extraction output directory
mkdir -p ${OUTPUT_FOLDER}

# Create temp output directory
mkdir -p ${TEMP_FOLDER}

echo -e "1\nq" | gmx_mpi trjconv -dt ${EXTRACT_EVERY_PS} -s ${TPR_FILE} -f ${XTC_FILE} -n ${NDX_FILE} -pbc mol -o ${TEMP_FOLDER}/temp.xtc

# echo -e "1\nq" | gmx_mpi trjconv -dt 100 -s md.tpr -f md.xtc -n index.ndx -pbc mol -o temp/temp.xtc

echo -e "1\n1\n1" | gmx_mpi trjconv -s ${TPR_FILE} -f ${TEMP_FOLDER}/temp.xtc -n ${NDX_FILE} -center -fit rot+trans -o ${TEMP_FOLDER}/traj_show.xtc

# echo -e "1\n1\n1" | gmx_mpi trjconv -s md.tpr -f temp/temp.xtc -n index.ndx -center -fit rot+trans -o temp/traj_show.xtc

echo -e "1\n1\n1" | gmx_mpi trjconv -s ${TPR_FILE} -f ${TEMP_FOLDER}/temp.xtc -n ${NDX_FILE} -center -fit rot+trans -b 0 -e 0 -o ${TEMP_FOLDER}/tarj_show.pdb

# echo -e "1\n1\n1" | gmx_mpi trjconv -s md.tpr -f temp/temp.xtc -n index.ndx -center -fit rot+trans -b 0 -e 0 -o temp/tarj_show.pdb

# Group     1 (        Protein)

# ---
# Step 1: Extract frames every 1000 ps
gmx_mpi trjconv -s ${TPR_FILE} -f ${XTC_FILE} -o ${OUTPUT_FOLDER}/${NO_PBC_XTC_FILE} -dt ${EXTRACT_EVERY_PS} -pbc mol <<EOF
0
EOF

# Step 2: Center and fit the trajectory
# Centering the protein and fitting to the initial frame
gmx_mpi trjconv -s ${TPR_FILE} -f ${OUTPUT_FOLDER}/${NO_PBC_XTC_FILE} -o ${OUTPUT_FOLDER}/${NO_PBC_XTC_FILE} -pbc mol -center <<EOF
1
1
EOF

# Step 3: Output PDB format file
gmx_mpi trjconv -s ${TPR_FILE} -f ${OUTPUT_FOLDER}/${NO_PBC_XTC_FILE} -o ${OUTPUT_FOLDER}/${MDRUN_NAME}.pdb -pbc mol -center <<EOF
1
0
EOF

# Continue with further analysis like RMSD calculation...
# ... [other analysis commands] ...

# End of script

# command reference

# Command 1: 提取蛋白质
command_1 = f'echo "Protein" | gmx trjconv -dt 1000 -s {tpr_file} -f {xtc_file} -n {temp_folder}/tarj_show.ndx -pbc mol -o {temp_folder}/temp.xtc'
# echo "Protein": 选择蛋白质组，用于告诉 gmx trjconv 要处理哪个部分。
# -dt 1000: 指定时间间隔（这里是1000 picoseconds），用于从 .xtc 文件中抽取帧。
# -s {tpr_file}: 指定拓扑文件（.tpr），它包含了模拟系统的完整描述。
# -f {xtc_file}: 指定原始的 .xtc 轨迹文件。
# -n {temp_folder}/tarj_show.ndx: 指定索引文件，其中包含各种原子群的定义。
# -pbc mol: 处理周期性边界条件，确保分子不会被分割。
# -o {temp_folder}/temp.xtc: 指定输出文件名和位置。
# Command 2: 中心对齐蛋白质
command_2 = f'echo "Protein\nProtein\nProtein" | gmx trjconv -s {tpr_file} -f {temp_folder}/temp.xtc -n {temp_folder}/tarj_show.ndx -center -fit rot+trans -o {output_folder}/traj_show.xtc'
# echo "Protein\nProtein\nProtein": 三次选择蛋白质组，分别用于中心化、拟合和输出。
# -center: 将蛋白质移动到框架的中心。
# -fit rot+trans: 对齐蛋白质，通过旋转和平移来最佳拟合。
# -o {output_folder}/traj_show.xtc: 指定输出文件名和位置。
# Command 3: 抽取帧生成 .pdb 文件
command_3 = f'echo "Protein\nProtein\nProtein" | gmx trjconv -s {tpr_file} -f {temp_folder}/temp.xtc -n {temp_folder}/tarj_show.ndx -center -fit rot+trans -b 0 -e 0 -o {output_folder}/tarj_show.pdb'
# -b 0 -e 0: 指定开始和结束时间，这里设置为0表示只取第一帧。
# -o {output_folder}/tarj_show.pdb: 输出为 .pdb 格式，存储在指定的位置。