From c24fdc3283ab35b97136b2218504b8d6e37f765f Mon Sep 17 00:00:00 2001 From: hotwa Date: Mon, 4 Dec 2023 09:19:19 +0800 Subject: [PATCH] add gromacs shell script --- md_gromacs.sh | 285 +++++++++++++++++++++++++++++++++++++++++++++++++ process_xtc.sh | 29 +++++ 2 files changed, 314 insertions(+) create mode 100644 md_gromacs.sh create mode 100644 process_xtc.sh diff --git a/md_gromacs.sh b/md_gromacs.sh new file mode 100644 index 0000000..6190d3d --- /dev/null +++ b/md_gromacs.sh @@ -0,0 +1,285 @@ +#!/bin/bash +#This script needs to be edited for each run. +#Define PDB Filename & GROMACS Pameters +# reference: http://www.mdtutorials.com/gmx/lysozyme/ +# NAME=1ao7 NSTEPS=50000000 ./md_gromacs.sh +# 命令会临时设置 NAME 为 1ao7 和 NSTEPS 为 50000000(对应 100ns),然后运行 md_gromacs.sh 脚本 +export NAME="5sws_fixer" +export FORCEFIELD="amber99sb-ildn" +export WATERMODEL="tip3p" +export WATERTOPFILE="spc216.gro" +export BOXTYPE="tric" +export BOXORIENTATION="1.0" +NSTEPS=${NSTEPS:-500000} # 50,000 steps for 1 ns +DT=${DT:-0.002} # 2 fs +#BOXSIZE="5.0" +#BOXCENTER="2.5" +# Define simulation name variable +MDRUN_NAME="md" +# Define analysis parameters +# Define other filenames based on MDRUN_NAME +TPR_FILE="${MDRUN_NAME}.tpr" +XTC_FILE="${MDRUN_NAME}.xtc" +NO_PBC_XTC_FILE="${MDRUN_NAME}_noPBC.xtc" +OUTPUT_FOLDER="analysis_results" +# Define variables for frame extraction +EXTRACT_EVERY_PS=${EXTRACT_EVERY_PS:-1000} # Default to 1000 ps if not set +# source /home/lingyuzeng/software/gmx2023.2/bin/GMXRC +# generate GROMACS .gro file +gmx_mpi pdb2gmx -f $NAME.pdb -o $NAME.gro -ff $FORCEFIELD -water $WATERMODEL -ignh -p topol.top +# define the box +gmx_mpi editconf -f $NAME.gro -o $NAME-box.gro -bt $BOXTYPE -c -d $BOXORIENTATION +# add solvate +gmx_mpi solvate -cp $NAME-box.gro -cs $WATERTOPFILE -o $NAME-solv.gro -p topol.top +# add icons # ! ions.mdp add by manual +# --- ions.mdp file content --- # +cat << EOF > ions.mdp +; ions.mdp - used as input into grompp to generate ions.tpr +; Parameters describing what to do, when to stop and what to save +integrator = steep ; Algorithm (steep = steepest descent minimization) +emtol = 1000.0 ; Stop minimization when the maximum force < 1000.0 kJ/mol/nm +emstep = 0.01 ; Minimization step size +nsteps = 50000 ; Maximum number of (minimization) steps to perform +; Parameters describing how to find the neighbors of each atom and how to calculate the interactions +nstlist = 1 ; Frequency to update the neighbor list and long range forces +cutoff-scheme = Verlet ; Buffered neighbor searching +ns_type = grid ; Method to determine neighbor list (simple, grid) +coulombtype = cutoff ; Treatment of long range electrostatic interactions +rcoulomb = 1.0 ; Short-range electrostatic cut-off +rvdw = 1.0 ; Short-range Van der Waals cut-off +pbc = xyz ; Periodic Boundary Conditions in all 3 dimensions +EOF +gmx_mpi grompp -f ions.mdp -c $NAME-solv.gro -p topol.top -o ions.tpr -maxwarn 1 +echo SOL | gmx_mpi genion -s ions.tpr -o $NAME-solv-ions.gro -p topol.top -pname NA -nname CL -conc 0.125 -neutral +# energy minimization of the structure in solvate # ! minim.mdp add by manual +# --- minim.mdp file content --- # +cat << EOF > minim.mdp +; minim.mdp - used as input into grompp to generate em.tpr +; Parameters describing what to do, when to stop and what to save +integrator = steep ; Algorithm (steep = steepest descent minimization) +emtol = 1000.0 ; Stop minimization when the maximum force < 1000.0 kJ/mol/nm +emstep = 0.01 ; Minimization step size +nsteps = 50000 ; Maximum number of (minimization) steps to perform +; Parameters describing how to find the neighbors of each atom and how to calculate the interactions +nstlist = 1 ; Frequency to update the neighbor list and long range forces +cutoff-scheme = Verlet ; Buffered neighbor searching +ns_type = grid ; Method to determine neighbor list (simple, grid) +coulombtype = PME ; Treatment of long range electrostatic interactions +rcoulomb = 1.0 ; Short-range electrostatic cut-off +rvdw = 1.0 ; Short-range Van der Waals cut-off +pbc = xyz ; Periodic Boundary Conditions in all 3 dimensions +EOF +gmx_mpi grompp -f minim.mdp -c $NAME-solv-ions.gro -p topol.top -o em.tpr +gmx_mpi mdrun -v -deffnm em +# optional em, you will need the Xmgrace plotting too +#gmx_mpi energy -f em.edr -o potential.xvg +#position restrain +# gmx_mpi grompp -f posre.mdp -c em.gro -p topol.top -o posre.tpr -r em.gro +# gmx_mpi mdrun -v -deffnm posre +# nvt +# gmx_mpi grompp -f nvt.mdp -c posre.gro -p topol.top -o nvt.tpr +# --- nvt.mdp file content --- # +cat << EOF > nvt.mdp +title = OPLS Lysozyme NVT equilibration +define = -DPOSRES ; position restrain the protein +; Run parameters +integrator = md ; leap-frog integrator +nsteps = 50000 ; 2 * 50000 = 100 ps +dt = 0.002 ; 2 fs +; Output control +nstxout = 500 ; save coordinates every 1.0 ps +nstvout = 500 ; save velocities every 1.0 ps +nstenergy = 500 ; save energies every 1.0 ps +nstlog = 500 ; update log file every 1.0 ps +; Bond parameters +continuation = no ; first dynamics run +constraint_algorithm = lincs ; holonomic constraints +constraints = h-bonds ; bonds involving H are constrained +lincs_iter = 1 ; accuracy of LINCS +lincs_order = 4 ; also related to accuracy +; Nonbonded settings +cutoff-scheme = Verlet ; Buffered neighbor searching +ns_type = grid ; search neighboring grid cells +nstlist = 10 ; 20 fs, largely irrelevant with Verlet +rcoulomb = 1.0 ; short-range electrostatic cutoff (in nm) +rvdw = 1.0 ; short-range van der Waals cutoff (in nm) +DispCorr = EnerPres ; account for cut-off vdW scheme +; Electrostatics +coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics +pme_order = 4 ; cubic interpolation +fourierspacing = 0.16 ; grid spacing for FFT +; Temperature coupling is on +tcoupl = V-rescale ; modified Berendsen thermostat +tc-grps = Protein Non-Protein ; two coupling groups - more accurate +tau_t = 0.1 0.1 ; time constant, in ps +ref_t = 300 300 ; reference temperature, one for each group, in K +; Pressure coupling is off +pcoupl = no ; no pressure coupling in NVT +; Periodic boundary conditions +pbc = xyz ; 3-D PBC +; Velocity generation +gen_vel = yes ; assign velocities from Maxwell distribution +gen_temp = 300 ; temperature for Maxwell distribution +gen_seed = -1 ; generate a random seed +EOF +gmx_mpi grompp -f nvt.mdp -c em.gro -r em.gro -p topol.top -o nvt.tpr +gmx_mpi mdrun -v -deffnm nvt +# optional : Let's analyze the temperature progression, again using energy: +# gmx_mpi energy -f nvt.edr -o temperature.xvg +# npt +# gmx_mpi grompp -f npt.mdp -c nvt.gro -t nvt.cpt -p topol.top -o npt.tpr +# --- npt.mdp file content --- # +cat << EOF > npt.mdp +title = OPLS Lysozyme NPT equilibration +define = -DPOSRES ; position restrain the protein +; Run parameters +integrator = md ; leap-frog integrator +nsteps = 50000 ; 2 * 50000 = 100 ps +dt = 0.002 ; 2 fs +; Output control +nstxout = 500 ; save coordinates every 1.0 ps +nstvout = 500 ; save velocities every 1.0 ps +nstenergy = 500 ; save energies every 1.0 ps +nstlog = 500 ; update log file every 1.0 ps +; Bond parameters +continuation = yes ; Restarting after NVT +constraint_algorithm = lincs ; holonomic constraints +constraints = h-bonds ; bonds involving H are constrained +lincs_iter = 1 ; accuracy of LINCS +lincs_order = 4 ; also related to accuracy +; Nonbonded settings +cutoff-scheme = Verlet ; Buffered neighbor searching +ns_type = grid ; search neighboring grid cells +nstlist = 10 ; 20 fs, largely irrelevant with Verlet scheme +rcoulomb = 1.0 ; short-range electrostatic cutoff (in nm) +rvdw = 1.0 ; short-range van der Waals cutoff (in nm) +DispCorr = EnerPres ; account for cut-off vdW scheme +; Electrostatics +coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics +pme_order = 4 ; cubic interpolation +fourierspacing = 0.16 ; grid spacing for FFT +; Temperature coupling is on +tcoupl = V-rescale ; modified Berendsen thermostat +tc-grps = Protein Non-Protein ; two coupling groups - more accurate +tau_t = 0.1 0.1 ; time constant, in ps +ref_t = 300 300 ; reference temperature, one for each group, in K +; Pressure coupling is on +pcoupl = Parrinello-Rahman ; Pressure coupling on in NPT +pcoupltype = isotropic ; uniform scaling of box vectors +tau_p = 2.0 ; time constant, in ps +ref_p = 1.0 ; reference pressure, in bar +compressibility = 4.5e-5 ; isothermal compressibility of water, bar^-1 +refcoord_scaling = com +; Periodic boundary conditions +pbc = xyz ; 3-D PBC +; Velocity generation +gen_vel = no ; Velocity generation is off +EOF +gmx_mpi grompp -f npt.mdp -c nvt.gro -r nvt.gro -t nvt.cpt -p topol.top -o npt.tpr +gmx_mpi mdrun -v -deffnm npt +# Optional: Let's analyze the pressure progression, again using energy: type 18 0 +# gmx energy -f npt.edr -o pressure.xvg +# Optional: Let's take a look at density as well, this time using energy and entering "24 0" at the prompt. +# gmx energy -f npt.edr -o density.xvg +# md +# --- md.mdp file content --- # +cat << EOF > md.mdp +title = OPLS Lysozyme NPT equilibration +; Run parameters +integrator = md ; leap-frog integrator +nsteps = ${NSTEPS} ; steps for simulation +dt = ${DT} ; time step in fs +; Output control +nstxout = 0 ; suppress bulky .trr file by specifying +nstvout = 0 ; 0 for output frequency of nstxout, +nstfout = 0 ; nstvout, and nstfout +nstenergy = 5000 ; save energies every 10.0 ps +nstlog = 5000 ; update log file every 10.0 ps +nstxout-compressed = 5000 ; save compressed coordinates every 10.0 ps +compressed-x-grps = System ; save the whole system +; Bond parameters +continuation = yes ; Restarting after NPT +constraint_algorithm = lincs ; holonomic constraints +constraints = h-bonds ; bonds involving H are constrained +lincs_iter = 1 ; accuracy of LINCS +lincs_order = 4 ; also related to accuracy +; Neighborsearching +cutoff-scheme = Verlet ; Buffered neighbor searching +ns_type = grid ; search neighboring grid cells +nstlist = 10 ; 20 fs, largely irrelevant with Verlet scheme +rcoulomb = 1.0 ; short-range electrostatic cutoff (in nm) +rvdw = 1.0 ; short-range van der Waals cutoff (in nm) +; Electrostatics +coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics +pme_order = 4 ; cubic interpolation +fourierspacing = 0.16 ; grid spacing for FFT +; Temperature coupling is on +tcoupl = V-rescale ; modified Berendsen thermostat +tc-grps = Protein Non-Protein ; two coupling groups - more accurate +tau_t = 0.1 0.1 ; time constant, in ps +ref_t = 300 300 ; reference temperature, one for each group, in K +; Pressure coupling is on +pcoupl = Parrinello-Rahman ; Pressure coupling on in NPT +pcoupltype = isotropic ; uniform scaling of box vectors +tau_p = 2.0 ; time constant, in ps +ref_p = 1.0 ; reference pressure, in bar +compressibility = 4.5e-5 ; isothermal compressibility of water, bar^-1 +; Periodic boundary conditions +pbc = xyz ; 3-D PBC +; Dispersion correction +DispCorr = EnerPres ; account for cut-off vdW scheme +; Velocity generation +gen_vel = no ; Velocity generation is off +EOF + +# Generate GROMACS .tpr file for the simulation +gmx_mpi grompp -f ${MDRUN_NAME}.mdp -c npt.gro -t npt.cpt -p topol.top -o ${TPR_FILE} + +# Run the simulation +gmx_mpi mdrun -deffnm ${MDRUN_NAME} +# analysis +# Create analysis output directory +mkdir -p ${OUTPUT_FOLDER} + +# Command 1: 提取蛋白质 +command_1 = f'echo "Protein" | gmx trjconv -dt 1000 -s {tpr_file} -f {xtc_file} -n {temp_folder}/tarj_show.ndx -pbc mol -o {temp_folder}/temp.xtc' +# echo "Protein": 选择蛋白质组,用于告诉 gmx trjconv 要处理哪个部分。 +# -dt 1000: 指定时间间隔(这里是1000 picoseconds),用于从 .xtc 文件中抽取帧。 +# -s {tpr_file}: 指定拓扑文件(.tpr),它包含了模拟系统的完整描述。 +# -f {xtc_file}: 指定原始的 .xtc 轨迹文件。 +# -n {temp_folder}/tarj_show.ndx: 指定索引文件,其中包含各种原子群的定义。 +# -pbc mol: 处理周期性边界条件,确保分子不会被分割。 +# -o {temp_folder}/temp.xtc: 指定输出文件名和位置。 +# Command 2: 中心对齐蛋白质 +command_2 = f'echo "Protein\nProtein\nProtein" | gmx trjconv -s {tpr_file} -f {temp_folder}/temp.xtc -n {temp_folder}/tarj_show.ndx -center -fit rot+trans -o {output_folder}/traj_show.xtc' +# echo "Protein\nProtein\nProtein": 三次选择蛋白质组,分别用于中心化、拟合和输出。 +# -center: 将蛋白质移动到框架的中心。 +# -fit rot+trans: 对齐蛋白质,通过旋转和平移来最佳拟合。 +# -o {output_folder}/traj_show.xtc: 指定输出文件名和位置。 +# Command 3: 抽取帧生成 .pdb 文件 +command_3 = f'echo "Protein\nProtein\nProtein" | gmx trjconv -s {tpr_file} -f {temp_folder}/temp.xtc -n {temp_folder}/tarj_show.ndx -center -fit rot+trans -b 0 -e 0 -o {output_folder}/tarj_show.pdb' +# -b 0 -e 0: 指定开始和结束时间,这里设置为0表示只取第一帧。 +# -o {output_folder}/tarj_show.pdb: 输出为 .pdb 格式,存储在指定的位置。 + +# Step 1: Extract frames every 1000 ps +gmx_mpi trjconv -s ${TPR_FILE} -f ${XTC_FILE} -o ${OUTPUT_FOLDER}/${NO_PBC_XTC_FILE} -dt ${EXTRACT_EVERY_PS} -pbc mol <