Files
analysis_pdb/md_gromacs.sh.bak
2024-01-05 14:22:17 +08:00

333 lines
17 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
#This script needs to be edited for each run.
#Define PDB Filename & GROMACS Pameters
# reference: http://www.mdtutorials.com/gmx/lysozyme/
# NAME=1ao7 NSTEPS=50000000 ./md_gromacs.sh
# 命令会临时设置 NAME 为 1ao7 和 NSTEPS 为 50000000对应 100ns然后运行 md_gromacs.sh 脚本
# Check if GMXRC_PATH is provided and source it
if [ -n "$GMXRC_PATH" ]; then
source "$GMXRC_PATH" # source /home/lingyuzeng/software/gmx2023.2/bin/GMXRC
fi
NAME=${NAME:-"5sws_fixer"}
FORCEFIELD=${FORCEFIELD:-"amber99sb-ildn"}
WATERMODEL=${WATERMODEL:-"tip3p"}
WATERTOPFILE=${WATERTOPFILE:-"spc216.gro"}
BOXTYPE=${BOXTYPE:-"tric"}
BOXORIENTATION=${BOXORIENTATION:-"1.0"}
NSTEPS=${NSTEPS:-500000} # 50,000 steps for 1 ns
DT=${DT:-0.002} # 2 fs
#BOXSIZE="5.0"
#BOXCENTER="2.5"
# Define simulation name variable
MDRUN_NAME=${MDRUN_NAME:-"md"}
NDX_NAME=${NDX_NAME:-"index"}
# Define analysis parameters
# Define other filenames based on MDRUN_NAME
TPR_FILE="${MDRUN_NAME}.tpr"
XTC_FILE="${MDRUN_NAME}.xtc"
NDX_FILE="${NDX_NAME}.ndx"
NO_PBC_XTC_FILE="${MDRUN_NAME}_noPBC.xtc"
OUTPUT_FOLDER=${OUTPUT_FOLDER:-"frame_extraction_output"}
TEMP_FOLDER=${TEMP_FOLDER:-"temp"}
# Define variables for frame extraction
EXTRACT_EVERY_PS=${EXTRACT_EVERY_PS:-100} # Default to 100 ps if not set
# Print the current settings
echo "Current settings:"
echo "NAME: $NAME"
echo "FORCEFIELD: $FORCEFIELD"
echo "WATERMODEL: $WATERMODEL"
echo "WATERTOPFILE: $WATERTOPFILE"
echo "BOXTYPE: $BOXTYPE"
echo "BOXORIENTATION: $BOXORIENTATION"
echo "NSTEPS: $NSTEPS"
echo "DT: $DT"
echo "MDRUN_NAME: $MDRUN_NAME"
echo "TPR_FILE: $TPR_FILE"
echo "XTC_FILE: $XTC_FILE"
echo "NO_PBC_XTC_FILE: $NO_PBC_XTC_FILE"
echo "OUTPUT_FOLDER: $OUTPUT_FOLDER"
echo "EXTRACT_EVERY_PS: $EXTRACT_EVERY_PS"
# generate GROMACS .gro file
gmx_mpi pdb2gmx -f $NAME.pdb -o $NAME.gro -ff $FORCEFIELD -water $WATERMODEL -ignh -p topol.top
# define the box
gmx_mpi editconf -f $NAME.gro -o $NAME-box.gro -bt $BOXTYPE -c -d $BOXORIENTATION
# add solvate
gmx_mpi solvate -cp $NAME-box.gro -cs $WATERTOPFILE -o $NAME-solv.gro -p topol.top
# add icons # ! ions.mdp add by manual
# --- ions.mdp file content --- #
cat << EOF > ions.mdp
; ions.mdp - used as input into grompp to generate ions.tpr
; Parameters describing what to do, when to stop and what to save
integrator = steep ; Algorithm steep = steepest descent minimization
emtol = 1000.0 ; Stop minimization when the maximum force < 1000.0 kJ/mol/nm
emstep = 0.01 ; Minimization step size
nsteps = 50000 ; Maximum number of minimization steps to perform
; Parameters describing how to find the neighbors of each atom and how to calculate the interactions
nstlist = 1 ; Frequency to update the neighbor list and long range forces
cutoff-scheme = Verlet ; Buffered neighbor searching
ns_type = grid ; Method to determine neighbor list simple, grid
coulombtype = cutoff ; Treatment of long range electrostatic interactions
rcoulomb = 1.0 ; Short-range electrostatic cut-off
rvdw = 1.0 ; Short-range Van der Waals cut-off
pbc = xyz ; Periodic Boundary Conditions in all 3 dimensions
EOF
gmx_mpi grompp -f ions.mdp -c $NAME-solv.gro -p topol.top -o ions.tpr -maxwarn 1
echo SOL | gmx_mpi genion -s ions.tpr -o $NAME-solv-ions.gro -p topol.top -pname NA -nname CL -conc 0.125 -neutral
# energy minimization of the structure in solvate # ! minim.mdp add by manual
# --- minim.mdp file content --- #
cat << EOF > minim.mdp
; minim.mdp - used as input into grompp to generate em.tpr
; Parameters describing what to do, when to stop and what to save
integrator = steep ; Algorithm steep = steepest descent minimization
emtol = 1000.0 ; Stop minimization when the maximum force < 1000.0 kJ/mol/nm
emstep = 0.01 ; Minimization step size
nsteps = 50000 ; Maximum number of minimization steps to perform
; Parameters describing how to find the neighbors of each atom and how to calculate the interactions
nstlist = 1 ; Frequency to update the neighbor list and long range forces
cutoff-scheme = Verlet ; Buffered neighbor searching
ns_type = grid ; Method to determine neighbor list simple, grid
coulombtype = PME ; Treatment of long range electrostatic interactions
rcoulomb = 1.0 ; Short-range electrostatic cut-off
rvdw = 1.0 ; Short-range Van der Waals cut-off
pbc = xyz ; Periodic Boundary Conditions in all 3 dimensions
EOF
gmx_mpi grompp -f minim.mdp -c $NAME-solv-ions.gro -p topol.top -o em.tpr
gmx_mpi mdrun -v -deffnm em
# optional em, you will need the Xmgrace plotting too
#gmx_mpi energy -f em.edr -o potential.xvg
#position restrain
# gmx_mpi grompp -f posre.mdp -c em.gro -p topol.top -o posre.tpr -r em.gro
# gmx_mpi mdrun -v -deffnm posre
# nvt
# gmx_mpi grompp -f nvt.mdp -c posre.gro -p topol.top -o nvt.tpr
# --- nvt.mdp file content --- #
cat << EOF > nvt.mdp
title = OPLS Lysozyme NVT equilibration
define = -DPOSRES ; position restrain the protein
; Run parameters
integrator = md ; leap-frog integrator
nsteps = 50000 ; 2 * 50000 = 100 ps
dt = 0.002 ; 2 fs
; Output control
nstxout = 500 ; save coordinates every 1.0 ps
nstvout = 500 ; save velocities every 1.0 ps
nstenergy = 500 ; save energies every 1.0 ps
nstlog = 500 ; update log file every 1.0 ps
; Bond parameters
continuation = no ; first dynamics run
constraint_algorithm = lincs ; holonomic constraints
constraints = h-bonds ; bonds involving H are constrained
lincs_iter = 1 ; accuracy of LINCS
lincs_order = 4 ; also related to accuracy
; Nonbonded settings
cutoff-scheme = Verlet ; Buffered neighbor searching
ns_type = grid ; search neighboring grid cells
nstlist = 10 ; 20 fs, largely irrelevant with Verlet
rcoulomb = 1.0 ; short-range electrostatic cutoff in nm
rvdw = 1.0 ; short-range van der Waals cutoff in nm
DispCorr = EnerPres ; account for cut-off vdW scheme
; Electrostatics
coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics
pme_order = 4 ; cubic interpolation
fourierspacing = 0.16 ; grid spacing for FFT
; Temperature coupling is on
tcoupl = V-rescale ; modified Berendsen thermostat
tc-grps = Protein Non-Protein ; two coupling groups - more accurate
tau_t = 0.1 0.1 ; time constant, in ps
ref_t = 300 300 ; reference temperature, one for each group, in K
; Pressure coupling is off
pcoupl = no ; no pressure coupling in NVT
; Periodic boundary conditions
pbc = xyz ; 3-D PBC
; Velocity generation
gen_vel = yes ; assign velocities from Maxwell distribution
gen_temp = 300 ; temperature for Maxwell distribution
gen_seed = -1 ; generate a random seed
EOF
gmx_mpi grompp -f nvt.mdp -c em.gro -r em.gro -p topol.top -o nvt.tpr
gmx_mpi mdrun -v -deffnm nvt
# optional : Let's analyze the temperature progression, again using energy:
# gmx_mpi energy -f nvt.edr -o temperature.xvg
# npt
# gmx_mpi grompp -f npt.mdp -c nvt.gro -t nvt.cpt -p topol.top -o npt.tpr
# --- npt.mdp file content --- #
cat << EOF > npt.mdp
title = OPLS Lysozyme NPT equilibration
define = -DPOSRES ; position restrain the protein
; Run parameters
integrator = md ; leap-frog integrator
nsteps = 50000 ; 2 * 50000 = 100 ps
dt = 0.002 ; 2 fs
; Output control
nstxout = 500 ; save coordinates every 1.0 ps
nstvout = 500 ; save velocities every 1.0 ps
nstenergy = 500 ; save energies every 1.0 ps
nstlog = 500 ; update log file every 1.0 ps
; Bond parameters
continuation = yes ; Restarting after NVT
constraint_algorithm = lincs ; holonomic constraints
constraints = h-bonds ; bonds involving H are constrained
lincs_iter = 1 ; accuracy of LINCS
lincs_order = 4 ; also related to accuracy
; Nonbonded settings
cutoff-scheme = Verlet ; Buffered neighbor searching
ns_type = grid ; search neighboring grid cells
nstlist = 10 ; 20 fs, largely irrelevant with Verlet scheme
rcoulomb = 1.0 ; short-range electrostatic cutoff (in nm)
rvdw = 1.0 ; short-range van der Waals cutoff (in nm)
DispCorr = EnerPres ; account for cut-off vdW scheme
; Electrostatics
coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics
pme_order = 4 ; cubic interpolation
fourierspacing = 0.16 ; grid spacing for FFT
; Temperature coupling is on
tcoupl = V-rescale ; modified Berendsen thermostat
tc-grps = Protein Non-Protein ; two coupling groups - more accurate
tau_t = 0.1 0.1 ; time constant, in ps
ref_t = 300 300 ; reference temperature, one for each group, in K
; Pressure coupling is on
pcoupl = Parrinello-Rahman ; Pressure coupling on in NPT
pcoupltype = isotropic ; uniform scaling of box vectors
tau_p = 2.0 ; time constant, in ps
ref_p = 1.0 ; reference pressure, in bar
compressibility = 4.5e-5 ; isothermal compressibility of water, bar^-1
refcoord_scaling = com
; Periodic boundary conditions
pbc = xyz ; 3-D PBC
; Velocity generation
gen_vel = no ; Velocity generation is off
EOF
gmx_mpi grompp -f npt.mdp -c nvt.gro -r nvt.gro -t nvt.cpt -p topol.top -o npt.tpr
gmx_mpi mdrun -v -deffnm npt
# Optional: Let's analyze the pressure progression, again using energy: type 18 0
# gmx energy -f npt.edr -o pressure.xvg
# Optional: Let's take a look at density as well, this time using energy and entering "24 0" at the prompt.
# gmx energy -f npt.edr -o density.xvg
# md
# --- md.mdp file content --- #
cat << EOF > ${MDRUN_NAME}.mdp
title = OPLS Lysozyme NPT equilibration
; Run parameters
integrator = md ; leap-frog integrator
nsteps = ${NSTEPS} ; steps for simulation
dt = ${DT} ; time step in fs
; Output control
nstxout = 0 ; suppress bulky .trr file by specifying
nstvout = 0 ; 0 for output frequency of nstxout,
nstfout = 0 ; nstvout, and nstfout
nstenergy = 5000 ; save energies every 10.0 ps
nstlog = 5000 ; update log file every 10.0 ps
nstxout-compressed = 5000 ; save compressed coordinates every 10.0 ps
compressed-x-grps = System ; save the whole system
; Bond parameters
continuation = yes ; Restarting after NPT
constraint_algorithm = lincs ; holonomic constraints
constraints = h-bonds ; bonds involving H are constrained
lincs_iter = 1 ; accuracy of LINCS
lincs_order = 4 ; also related to accuracy
; Neighborsearching
cutoff-scheme = Verlet ; Buffered neighbor searching
ns_type = grid ; search neighboring grid cells
nstlist = 10 ; 20 fs, largely irrelevant with Verlet scheme
rcoulomb = 1.0 ; short-range electrostatic cutoff in nm
rvdw = 1.0 ; short-range van der Waals cutoff in nm
; Electrostatics
coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics
pme_order = 4 ; cubic interpolation
fourierspacing = 0.16 ; grid spacing for FFT
; Temperature coupling is on
tcoupl = V-rescale ; modified Berendsen thermostat
tc-grps = Protein Non-Protein ; two coupling groups - more accurate
tau_t = 0.1 0.1 ; time constant, in ps
ref_t = 300 300 ; reference temperature, one for each group, in K
; Pressure coupling is on
pcoupl = Parrinello-Rahman ; Pressure coupling on in NPT
pcoupltype = isotropic ; uniform scaling of box vectors
tau_p = 2.0 ; time constant, in ps
ref_p = 1.0 ; reference pressure, in bar
compressibility = 4.5e-5 ; isothermal compressibility of water, bar^-1
; Periodic boundary conditions
pbc = xyz ; 3-D PBC
; Dispersion correction
DispCorr = EnerPres ; account for cut-off vdW scheme
; Velocity generation
gen_vel = no ; Velocity generation is off
EOF
# Generate GROMACS .tpr file for the simulation
gmx_mpi grompp -f ${MDRUN_NAME}.mdp -c npt.gro -t npt.cpt -p topol.top -o ${TPR_FILE}
# Run the simulation
gmx_mpi mdrun -deffnm ${MDRUN_NAME}
# mpirun -np $(ls | egrep "Scaled[0-9]+$" | wc -l) gmx_mpi mdrun -v --deffnm md -cpi Scaled.cpt -multidir $(ls -v | egrep "Scaled[0-9]+$") -plumed plumed.dat -hrex -replex 1000 >& run_$(date "+%H%M%S_%d%m%Y").log || { echo "mdrun failed at line ${LINENO} "; exit -1; }
# extra ndx file , select protein
echo -e "1\nq" | gmx_mpi make_ndx -f ${MDRUN_NAME}.gro -o ${NDX_FILE}
# echo -e "1\nq" | gmx_mpi make_ndx -f md.gro -o index.ndx
# Create extraction output directory
mkdir -p ${OUTPUT_FOLDER}
# Create temp output directory
mkdir -p ${TEMP_FOLDER}
echo -e "1\nq" | gmx_mpi trjconv -dt ${EXTRACT_EVERY_PS} -s ${TPR_FILE} -f ${XTC_FILE} -n ${NDX_FILE} -pbc mol -o ${TEMP_FOLDER}/temp.xtc
# echo -e "1\nq" | gmx_mpi trjconv -dt 100 -s md.tpr -f md.xtc -n index.ndx -pbc mol -o temp/temp.xtc
echo -e "1\n1\n1" | gmx_mpi trjconv -s ${TPR_FILE} -f ${TEMP_FOLDER}/temp.xtc -n ${NDX_FILE} -center -fit rot+trans -o ${TEMP_FOLDER}/traj_show.xtc
# echo -e "1\n1\n1" | gmx_mpi trjconv -s md.tpr -f temp/temp.xtc -n index.ndx -center -fit rot+trans -o temp/traj_show.xtc
echo -e "1\n1\n1" | gmx_mpi trjconv -s ${TPR_FILE} -f ${TEMP_FOLDER}/temp.xtc -n ${NDX_FILE} -center -fit rot+trans -b 0 -e 0 -o ${TEMP_FOLDER}/tarj_show.pdb
# echo -e "1\n1\n1" | gmx_mpi trjconv -s md.tpr -f temp/temp.xtc -n index.ndx -center -fit rot+trans -b 0 -e 0 -o temp/tarj_show.pdb
# Group 1 ( Protein)
# ---
# Step 1: Extract frames every 1000 ps
gmx_mpi trjconv -s ${TPR_FILE} -f ${XTC_FILE} -o ${OUTPUT_FOLDER}/${NO_PBC_XTC_FILE} -dt ${EXTRACT_EVERY_PS} -pbc mol <<EOF
0
EOF
# Step 2: Center and fit the trajectory
# Centering the protein and fitting to the initial frame
gmx_mpi trjconv -s ${TPR_FILE} -f ${OUTPUT_FOLDER}/${NO_PBC_XTC_FILE} -o ${OUTPUT_FOLDER}/${NO_PBC_XTC_FILE} -pbc mol -center <<EOF
1
1
EOF
# Step 3: Output PDB format file
gmx_mpi trjconv -s ${TPR_FILE} -f ${OUTPUT_FOLDER}/${NO_PBC_XTC_FILE} -o ${OUTPUT_FOLDER}/${MDRUN_NAME}.pdb -pbc mol -center <<EOF
1
0
EOF
# Continue with further analysis like RMSD calculation...
# ... [other analysis commands] ...
# End of script
# command reference
# Command 1: 提取蛋白质
command_1 = f'echo "Protein" | gmx trjconv -dt 1000 -s {tpr_file} -f {xtc_file} -n {temp_folder}/tarj_show.ndx -pbc mol -o {temp_folder}/temp.xtc'
# echo "Protein": 选择蛋白质组,用于告诉 gmx trjconv 要处理哪个部分。
# -dt 1000: 指定时间间隔这里是1000 picoseconds用于从 .xtc 文件中抽取帧。
# -s {tpr_file}: 指定拓扑文件(.tpr它包含了模拟系统的完整描述。
# -f {xtc_file}: 指定原始的 .xtc 轨迹文件。
# -n {temp_folder}/tarj_show.ndx: 指定索引文件,其中包含各种原子群的定义。
# -pbc mol: 处理周期性边界条件,确保分子不会被分割。
# -o {temp_folder}/temp.xtc: 指定输出文件名和位置。
# Command 2: 中心对齐蛋白质
command_2 = f'echo "Protein\nProtein\nProtein" | gmx trjconv -s {tpr_file} -f {temp_folder}/temp.xtc -n {temp_folder}/tarj_show.ndx -center -fit rot+trans -o {output_folder}/traj_show.xtc'
# echo "Protein\nProtein\nProtein": 三次选择蛋白质组,分别用于中心化、拟合和输出。
# -center: 将蛋白质移动到框架的中心。
# -fit rot+trans: 对齐蛋白质,通过旋转和平移来最佳拟合。
# -o {output_folder}/traj_show.xtc: 指定输出文件名和位置。
# Command 3: 抽取帧生成 .pdb 文件
command_3 = f'echo "Protein\nProtein\nProtein" | gmx trjconv -s {tpr_file} -f {temp_folder}/temp.xtc -n {temp_folder}/tarj_show.ndx -center -fit rot+trans -b 0 -e 0 -o {output_folder}/tarj_show.pdb'
# -b 0 -e 0: 指定开始和结束时间这里设置为0表示只取第一帧。
# -o {output_folder}/tarj_show.pdb: 输出为 .pdb 格式,存储在指定的位置。