add protein and ligand

This commit is contained in:
2024-09-19 21:12:59 +08:00
parent 363d9b2fe1
commit 308333d97e
8 changed files with 8370 additions and 0 deletions

394
script/S112D/S112D.sh Executable file
View File

@@ -0,0 +1,394 @@
#!/bin/bash
#This script needs to be edited for each run.
#Define PDB Filename & GROMACS Pameters
# reference: http://www.mdtutorials.com/gmx/lysozyme/
# NAME=1ao7 NSTEPS=50000000 ./md_gromacs.sh
# 命令会临时设置 NAME 为 1ao7 和 NSTEPS 为 50000000对应 100ns然后运行 md_gromacs.sh 脚本
# Check if GMXRC_PATH is provided and source it
if [ -n "$GMXRC_PATH" ]; then
source "$GMXRC_PATH" # source /home/lingyuzeng/software/gmx2023.2/bin/GMXRC
fi
GPU_ID=${GPU_ID:-"0"}
NAME=${NAME:-"S112D-Alphafold2"}
LIGAND_GRO="ligand.gro" # 配体的 .gro 文件
LIGAND_ITP="ligand.itp" # 配体的 .itp 文件
# FORCEFIELD=${FORCEFIELD:-"amber99sb-ildn"}
FORCEFIELD=${FORCEFIELD:-"amber99sb"}
WATERMODEL=${WATERMODEL:-"tip3p"}
WATERTOPFILE=${WATERTOPFILE:-"spc216.gro"}
BOXTYPE=${BOXTYPE:-"tric"}
BOXORIENTATION=${BOXORIENTATION:-"1.0"}
# NSTEPS=${NSTEPS:-500000} # 50,000 steps for 1 ns
NSTEPS=${NSTEPS:-50000000} # 100 ns 模拟
DT=${DT:-0.002} # 2 fs
#BOXSIZE="5.0"
#BOXCENTER="2.5"
# Define simulation name variable
MDRUN_NAME=${MDRUN_NAME:-"md"}
NDX_NAME=${NDX_NAME:-"index"}
# Define analysis parameters
# Define other filenames based on MDRUN_NAME
TPR_FILE="${MDRUN_NAME}.tpr"
XTC_FILE="${MDRUN_NAME}.xtc"
NDX_FILE="${NDX_NAME}.ndx"
NO_PBC_XTC_FILE="${MDRUN_NAME}_noPBC.xtc"
OUTPUT_FOLDER=${OUTPUT_FOLDER:-"frame_extraction_output"}
TEMP_FOLDER=${TEMP_FOLDER:-"temp"}
# Define variables for frame extraction
EXTRACT_EVERY_PS=${EXTRACT_EVERY_PS:-100} # Default to 100 ps if not set
# Print the current settings
echo "Current settings:"
echo "NAME: $NAME"
echo "LIGAND_GRO: $LIGAND_GRO"
echo "FORCEFIELD: $FORCEFIELD"
echo "WATERMODEL: $WATERMODEL"
echo "WATERTOPFILE: $WATERTOPFILE"
echo "BOXTYPE: $BOXTYPE"
echo "BOXORIENTATION: $BOXORIENTATION"
echo "NSTEPS: $NSTEPS"
echo "DT: $DT"
echo "MDRUN_NAME: $MDRUN_NAME"
echo "TPR_FILE: $TPR_FILE"
echo "XTC_FILE: $XTC_FILE"
echo "NO_PBC_XTC_FILE: $NO_PBC_XTC_FILE"
echo "OUTPUT_FOLDER: $OUTPUT_FOLDER"
echo "EXTRACT_EVERY_PS: $EXTRACT_EVERY_PS"
# Step 1: Generate GROMACS .gro file for the protein
gmx_mpi pdb2gmx -f $NAME.pdb -o $NAME.gro -ff $FORCEFIELD -water $WATERMODEL -p topol.top -ignh
# gmx_mpi pdb2gmx -f protein.pdb -o protein.gro -ff $FORCEFIELD -water $WATERMODEL -p topol.top -ignh
# Step 2: Merge protein and ligand .gro files into a complex
cat protein.gro mol.gro > complex.gro
# 获取蛋白质和小分子的原子数
PROTEIN_ATOMS=$(head -n 2 protein.gro | tail -n 1 | tr -d ' ')
LIGAND_ATOMS=$(head -n 2 mol.gro | tail -n 1 | tr -d ' ')
# 打印原子数,确保正确
echo "Protein atoms: $PROTEIN_ATOMS"
echo "Ligand atoms: $LIGAND_ATOMS"
# 计算总原子数
TOTAL_ATOMS=$(($PROTEIN_ATOMS + $LIGAND_ATOMS))
# 打印总原子数
echo "Total atoms: $TOTAL_ATOMS"
# Update the atom count in the complex.gro file
sed -i "2s/\(^[[:space:]]*\)[0-9]\+/\1$TOTAL_ATOMS/" complex.gro
echo -e "0\nq" | gmx_mpi genrestr -f mol.gro -o posre_mol.itp
cat << EOF >> mol.itp
#ifdef POSRES
#include "posre_mol.itp"
#endif
EOF
# Modify the topology file to include the ligand
# echo "#include \"$LIGAND_ITP\"" >> topol.top
# Step 6: Modify the topology file to include the ligand and its parameters
# 6.1 在topol.top顶部添加小分子的参数文件
sed -i '/^; Include forcefield parameters/a #include "jz4.prm"' topol.top
# 6.2 在position restraint后添加小分子的拓扑文件
sed -i '/^; Include Position restraint file/!b;n;/#endif/a ; Include ligand topology\n#include "jz4.itp"' topol.top
# 6.3 在[molecules]部分添加小分子
sed -i '/^Protein_chain_A/a JZ4 1' topol.top
# Step 3: Define the simulation box
gmx_mpi editconf -f complex.gro -o complex-box.gro -bt $BOXTYPE -c -d $BOXORIENTATION
# Step 4: Add solvate
gmx_mpi solvate -cp complex-box.gro -cs $WATERTOPFILE -o complex-solv.gro -p topol.top
# add icons # ! ions.mdp add by manual
# --- ions.mdp file content --- #
cat << EOF > ions.mdp
; ions.mdp - used as input into grompp to generate ions.tpr
; Parameters describing what to do, when to stop and what to save
integrator = steep ; Algorithm steep = steepest descent minimization
emtol = 1000.0 ; Stop minimization when the maximum force < 1000.0 kJ/mol/nm
emstep = 0.01 ; Minimization step size
nsteps = 50000 ; Maximum number of minimization steps to perform
; Parameters describing how to find the neighbors of each atom and how to calculate the interactions
nstlist = 1 ; Frequency to update the neighbor list and long range forces
cutoff-scheme = Verlet ; Buffered neighbor searching
ns_type = grid ; Method to determine neighbor list simple, grid
coulombtype = cutoff ; Treatment of long range electrostatic interactions
rcoulomb = 1.0 ; Short-range electrostatic cut-off
rvdw = 1.0 ; Short-range Van der Waals cut-off
pbc = xyz ; Periodic Boundary Conditions in all 3 dimensions
EOF
gmx_mpi grompp -f ions.mdp -c complex-solv.gro -p topol.top -o ions.tpr -maxwarn 1
echo SOL | gmx_mpi genion -s ions.tpr -o complex-solv-ions.gro -p topol.top -pname NA -nname CL -conc 0.125 -neutral
# energy minimization of the structure in solvate # ! minim.mdp add by manual
# --- minim.mdp file content --- #
cat << EOF > minim.mdp
; minim.mdp - used as input into grompp to generate em.tpr
; Parameters describing what to do, when to stop and what to save
integrator = steep ; Algorithm steep = steepest descent minimization
emtol = 1000.0 ; Stop minimization when the maximum force < 1000.0 kJ/mol/nm
emstep = 0.01 ; Minimization step size
nsteps = 50000 ; Maximum number of minimization steps to perform
; Parameters describing how to find the neighbors of each atom and how to calculate the interactions
nstlist = 1 ; Frequency to update the neighbor list and long range forces
cutoff-scheme = Verlet ; Buffered neighbor searching
ns_type = grid ; Method to determine neighbor list simple, grid
coulombtype = PME ; Treatment of long range electrostatic interactions
rcoulomb = 1.0 ; Short-range electrostatic cut-off
rvdw = 1.0 ; Short-range Van der Waals cut-off
pbc = xyz ; Periodic Boundary Conditions in all 3 dimensions
EOF
gmx_mpi grompp -f minim.mdp -c complex-solv-ions.gro -p topol.top -o em.tpr
gmx_mpi mdrun -v -deffnm em
# optional em, you will need the Xmgrace plotting too
#gmx_mpi energy -f em.edr -o potential.xvg
#position restrain
# gmx_mpi grompp -f posre.mdp -c em.gro -p topol.top -o posre.tpr -r em.gro
# gmx_mpi mdrun -v -deffnm posre
# nvt
# gmx_mpi grompp -f nvt.mdp -c posre.gro -p topol.top -o nvt.tpr
# --- nvt.mdp file content --- #
cat << EOF > nvt.mdp
title = OPLS Lysozyme NVT equilibration
define = -DPOSRES ; position restrain the protein
; Run parameters
integrator = md ; leap-frog integrator
nsteps = 50000 ; 2 * 50000 = 100 ps
dt = 0.002 ; 2 fs
; Output control
nstxout = 500 ; save coordinates every 1.0 ps
nstvout = 500 ; save velocities every 1.0 ps
nstenergy = 500 ; save energies every 1.0 ps
nstlog = 500 ; update log file every 1.0 ps
; Bond parameters
continuation = no ; first dynamics run
constraint_algorithm = lincs ; holonomic constraints
constraints = h-bonds ; bonds involving H are constrained
lincs_iter = 1 ; accuracy of LINCS
lincs_order = 4 ; also related to accuracy
; Nonbonded settings
cutoff-scheme = Verlet ; Buffered neighbor searching
ns_type = grid ; search neighboring grid cells
nstlist = 10 ; 20 fs, largely irrelevant with Verlet
rcoulomb = 1.0 ; short-range electrostatic cutoff in nm
rvdw = 1.0 ; short-range van der Waals cutoff in nm
DispCorr = EnerPres ; account for cut-off vdW scheme
; Electrostatics
coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics
pme_order = 4 ; cubic interpolation
fourierspacing = 0.16 ; grid spacing for FFT
; Temperature coupling is on
tcoupl = V-rescale ; modified Berendsen thermostat
tc-grps = Protein Non-Protein ; two coupling groups - more accurate
tau_t = 0.1 0.1 ; time constant, in ps
ref_t = 310 310 ; reference temperature, one for each group, in K
; Pressure coupling is off
pcoupl = no ; no pressure coupling in NVT
; Periodic boundary conditions
pbc = xyz ; 3-D PBC
; Velocity generation
gen_vel = yes ; assign velocities from Maxwell distribution
gen_temp = 300 ; temperature for Maxwell distribution
gen_seed = -1 ; generate a random seed
EOF
gmx_mpi grompp -f nvt.mdp -c em.gro -r em.gro -p topol.top -o nvt.tpr
gmx_mpi mdrun -v -deffnm nvt
# optional : Let's analyze the temperature progression, again using energy:
# gmx_mpi energy -f nvt.edr -o temperature.xvg
# npt
# gmx_mpi grompp -f npt.mdp -c nvt.gro -t nvt.cpt -p topol.top -o npt.tpr
# --- npt.mdp file content --- #
cat << EOF > npt.mdp
title = OPLS Lysozyme NPT equilibration
define = -DPOSRES ; position restrain the protein
; Run parameters
integrator = md ; leap-frog integrator
nsteps = 50000 ; 2 * 50000 = 100 ps
dt = 0.002 ; 2 fs
; Output control
nstxout = 500 ; save coordinates every 1.0 ps
nstvout = 500 ; save velocities every 1.0 ps
nstenergy = 500 ; save energies every 1.0 ps
nstlog = 500 ; update log file every 1.0 ps
; Bond parameters
continuation = yes ; Restarting after NVT
constraint_algorithm = lincs ; holonomic constraints
constraints = h-bonds ; bonds involving H are constrained
lincs_iter = 1 ; accuracy of LINCS
lincs_order = 4 ; also related to accuracy
; Nonbonded settings
cutoff-scheme = Verlet ; Buffered neighbor searching
ns_type = grid ; search neighboring grid cells
nstlist = 10 ; 20 fs, largely irrelevant with Verlet scheme
rcoulomb = 1.0 ; short-range electrostatic cutoff (in nm)
rvdw = 1.0 ; short-range van der Waals cutoff (in nm)
DispCorr = EnerPres ; account for cut-off vdW scheme
; Electrostatics
coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics
pme_order = 4 ; cubic interpolation
fourierspacing = 0.16 ; grid spacing for FFT
; Temperature coupling is on
tcoupl = V-rescale ; modified Berendsen thermostat
tc-grps = Protein Non-Protein ; two coupling groups - more accurate
tau_t = 0.1 0.1 ; time constant, in ps
ref_t = 300 300 ; reference temperature, one for each group, in K
; Pressure coupling is on
pcoupl = Parrinello-Rahman ; Pressure coupling on in NPT
pcoupltype = isotropic ; uniform scaling of box vectors
tau_p = 2.0 ; time constant, in ps
ref_p = 1.0 ; reference pressure, in bar
compressibility = 4.5e-5 ; isothermal compressibility of water, bar^-1
refcoord_scaling = com
; Periodic boundary conditions
pbc = xyz ; 3-D PBC
; Velocity generation
gen_vel = no ; Velocity generation is off
EOF
gmx_mpi grompp -f npt.mdp -c nvt.gro -r nvt.gro -t nvt.cpt -p topol.top -o npt.tpr
gmx_mpi mdrun -v -deffnm npt
# Optional: Let's analyze the pressure progression, again using energy: type 18 0
# gmx energy -f npt.edr -o pressure.xvg
# Optional: Let's take a look at density as well, this time using energy and entering "24 0" at the prompt.
# gmx energy -f npt.edr -o density.xvg
# md
# --- md.mdp file content --- #
cat << EOF > ${MDRUN_NAME}.mdp
title = OPLS Lysozyme NPT equilibration
; Run parameters
integrator = md ; leap-frog integrator
nsteps = ${NSTEPS} ; steps for simulation
dt = ${DT} ; time step in fs
; Output control
nstxout = 0 ; suppress bulky .trr file by specifying
nstvout = 0 ; 0 for output frequency of nstxout,
nstfout = 0 ; nstvout, and nstfout
nstenergy = 5000 ; save energies every 10.0 ps
nstlog = 5000 ; update log file every 10.0 ps
nstxout-compressed = 5000 ; save compressed coordinates every 10.0 ps
compressed-x-grps = System ; save the whole system
; Bond parameters
continuation = yes ; Restarting after NPT
constraint_algorithm = lincs ; holonomic constraints
constraints = h-bonds ; bonds involving H are constrained
lincs_iter = 1 ; accuracy of LINCS
lincs_order = 4 ; also related to accuracy
; Neighborsearching
cutoff-scheme = Verlet ; Buffered neighbor searching
ns_type = grid ; search neighboring grid cells
nstlist = 10 ; 20 fs, largely irrelevant with Verlet scheme
rcoulomb = 1.0 ; short-range electrostatic cutoff in nm
rvdw = 1.0 ; short-range van der Waals cutoff in nm
; Electrostatics
coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics
pme_order = 4 ; cubic interpolation
fourierspacing = 0.16 ; grid spacing for FFT
; Temperature coupling is on
tcoupl = V-rescale ; modified Berendsen thermostat
tc-grps = Protein Non-Protein ; two coupling groups - more accurate
tau_t = 0.1 0.1 ; time constant, in ps
ref_t = 310 310 ; reference temperature, one for each group, in K
; Pressure coupling is on
pcoupl = Parrinello-Rahman ; Pressure coupling on in NPT
pcoupltype = isotropic ; uniform scaling of box vectors
tau_p = 2.0 ; time constant, in ps
ref_p = 1.0 ; reference pressure, in bar
compressibility = 4.5e-5 ; isothermal compressibility of water, bar^-1
; Periodic boundary conditions
pbc = xyz ; 3-D PBC
; Dispersion correction
DispCorr = EnerPres ; account for cut-off vdW scheme
; Velocity generation
gen_vel = no ; Velocity generation is off
EOF
# Generate GROMACS .tpr file for the simulation
gmx_mpi grompp -f ${MDRUN_NAME}.mdp -c npt.gro -t npt.cpt -p topol.top -o ${TPR_FILE}
# Run the simulation
gmx_mpi mdrun -deffnm ${MDRUN_NAME} -update gpu -ntomp 1 -gpu_id ${GPU_ID}
# gmx_mpi rms -f md.xtc -s md.tpr -o rmsd_protein.xvg
# 两次都选骨架部分protein
# 注意观测在模拟过程中骨架的RMSD波动是否较大
# gmx_mpi rms -f md.xtc -s md.tpr -o rmsd_protein.xvg
# 第一次选择骨架部分protein第二次选择配体MOL
# 可以消除蛋白质整体的运动,观察小分子配体相对于蛋白质的运动
# extra ndx file , select protein
echo -e "1\nq" | gmx_mpi make_ndx -f ${MDRUN_NAME}.gro -o ${NDX_FILE}
# echo -e "1\nq" | gmx_mpi make_ndx -f md.gro -o index.ndx
# Create extraction output directory
mkdir -p ${OUTPUT_FOLDER}
# Create temp output directory
mkdir -p ${TEMP_FOLDER}
echo -e "1\nq" | gmx_mpi trjconv -dt ${EXTRACT_EVERY_PS} -s ${TPR_FILE} -f ${XTC_FILE} -n ${NDX_FILE} -pbc mol -o ${TEMP_FOLDER}/temp.xtc
# echo -e "1\nq" | gmx_mpi trjconv -dt 100 -s md.tpr -f md.xtc -n index.ndx -pbc mol -o temp/temp.xtc
echo -e "1\n1\n1" | gmx_mpi trjconv -s ${TPR_FILE} -f ${TEMP_FOLDER}/temp.xtc -n ${NDX_FILE} -center -fit rot+trans -o ${TEMP_FOLDER}/traj_show.xtc
# echo -e "1\n1\n1" | gmx_mpi trjconv -s md.tpr -f temp/temp.xtc -n index.ndx -center -fit rot+trans -o temp/traj_show.xtc
echo -e "1\n1\n1" | gmx_mpi trjconv -s ${TPR_FILE} -f ${TEMP_FOLDER}/temp.xtc -n ${NDX_FILE} -center -fit rot+trans -b 0 -e 0 -o ${TEMP_FOLDER}/tarj_show.pdb
# echo -e "1\n1\n1" | gmx_mpi trjconv -s md.tpr -f temp/temp.xtc -n index.ndx -center -fit rot+trans -b 0 -e 0 -o temp/tarj_show.pdb
# Group 1 ( Protein)
# ---
# Step 1: Extract frames every 1000 ps
gmx_mpi trjconv -s ${TPR_FILE} -f ${XTC_FILE} -o ${OUTPUT_FOLDER}/${NO_PBC_XTC_FILE} -dt ${EXTRACT_EVERY_PS} -pbc mol <<EOF
0
EOF
# Step 2: Center and fit the trajectory
# Centering the protein and fitting to the initial frame
gmx_mpi trjconv -s ${TPR_FILE} -f ${OUTPUT_FOLDER}/${NO_PBC_XTC_FILE} -o ${OUTPUT_FOLDER}/${NO_PBC_XTC_FILE} -pbc mol -center <<EOF
1
1
EOF
# Step 3: Output PDB format file
gmx_mpi trjconv -s ${TPR_FILE} -f ${OUTPUT_FOLDER}/${NO_PBC_XTC_FILE} -o ${OUTPUT_FOLDER}/${MDRUN_NAME}.pdb -pbc mol -center <<EOF
1
0
EOF
# Continue with further analysis like RMSD calculation...
# ... [other analysis commands] ...
# End of script
# command reference
# Command 1: 提取蛋白质
command_1 = f'echo "Protein" | gmx trjconv -dt 1000 -s {tpr_file} -f {xtc_file} -n {temp_folder}/tarj_show.ndx -pbc mol -o {temp_folder}/temp.xtc'
# echo "Protein": 选择蛋白质组,用于告诉 gmx trjconv 要处理哪个部分。
# -dt 1000: 指定时间间隔这里是1000 picoseconds用于从 .xtc 文件中抽取帧。
# -s {tpr_file}: 指定拓扑文件(.tpr它包含了模拟系统的完整描述。
# -f {xtc_file}: 指定原始的 .xtc 轨迹文件。
# -n {temp_folder}/tarj_show.ndx: 指定索引文件,其中包含各种原子群的定义。
# -pbc mol: 处理周期性边界条件,确保分子不会被分割。
# -o {temp_folder}/temp.xtc: 指定输出文件名和位置。
# Command 2: 中心对齐蛋白质
command_2 = f'echo "Protein\nProtein\nProtein" | gmx trjconv -s {tpr_file} -f {temp_folder}/temp.xtc -n {temp_folder}/tarj_show.ndx -center -fit rot+trans -o {output_folder}/traj_show.xtc'
# echo "Protein\nProtein\nProtein": 三次选择蛋白质组,分别用于中心化、拟合和输出。
# -center: 将蛋白质移动到框架的中心。
# -fit rot+trans: 对齐蛋白质,通过旋转和平移来最佳拟合。
# -o {output_folder}/traj_show.xtc: 指定输出文件名和位置。
# Command 3: 抽取帧生成 .pdb 文件
command_3 = f'echo "Protein\nProtein\nProtein" | gmx trjconv -s {tpr_file} -f {temp_folder}/temp.xtc -n {temp_folder}/tarj_show.ndx -center -fit rot+trans -b 0 -e 0 -o {output_folder}/tarj_show.pdb'
# -b 0 -e 0: 指定开始和结束时间这里设置为0表示只取第一帧。
# -o {output_folder}/tarj_show.pdb: 输出为 .pdb 格式,存储在指定的位置。