#!/bin/bash #This script needs to be edited for each run. #Define PDB Filename & GROMACS Pameters # reference: http://www.mdtutorials.com/gmx/lysozyme/ # NAME=1ao7 NSTEPS=50000000 ./md_gromacs.sh # 命令会临时设置 NAME 为 1ao7 和 NSTEPS 为 50000000(对应 100ns),然后运行 md_gromacs.sh 脚本 # Check if GMXRC_PATH is provided and source it if [ -n "$GMXRC_PATH" ]; then source "$GMXRC_PATH" # source /home/lingyuzeng/software/gmx2023.2/bin/GMXRC fi NAME=${NAME:-"5sws_fixer"} FORCEFIELD=${FORCEFIELD:-"amber99sb-ildn"} WATERMODEL=${WATERMODEL:-"tip3p"} WATERTOPFILE=${WATERTOPFILE:-"spc216.gro"} BOXTYPE=${BOXTYPE:-"tric"} BOXORIENTATION=${BOXORIENTATION:-"1.0"} NSTEPS=${NSTEPS:-500000} # 50,000 steps for 1 ns DT=${DT:-0.002} # 2 fs #BOXSIZE="5.0" #BOXCENTER="2.5" # Define simulation name variable MDRUN_NAME=${MDRUN_NAME:-"md"} # Define analysis parameters # Define other filenames based on MDRUN_NAME TPR_FILE="${MDRUN_NAME}.tpr" XTC_FILE="${MDRUN_NAME}.xtc" NO_PBC_XTC_FILE="${MDRUN_NAME}_noPBC.xtc" OUTPUT_FOLDER=${OUTPUT_FOLDER:-"analysis_results"} # Define variables for frame extraction EXTRACT_EVERY_PS=${EXTRACT_EVERY_PS:-1000} # Default to 1000 ps if not set # Print the current settings echo "Current settings:" echo "NAME: $NAME" echo "FORCEFIELD: $FORCEFIELD" echo "WATERMODEL: $WATERMODEL" echo "WATERTOPFILE: $WATERTOPFILE" echo "BOXTYPE: $BOXTYPE" echo "BOXORIENTATION: $BOXORIENTATION" echo "NSTEPS: $NSTEPS" echo "DT: $DT" echo "MDRUN_NAME: $MDRUN_NAME" echo "TPR_FILE: $TPR_FILE" echo "XTC_FILE: $XTC_FILE" echo "NO_PBC_XTC_FILE: $NO_PBC_XTC_FILE" echo "OUTPUT_FOLDER: $OUTPUT_FOLDER" echo "EXTRACT_EVERY_PS: $EXTRACT_EVERY_PS" # generate GROMACS .gro file gmx_mpi pdb2gmx -f $NAME.pdb -o $NAME.gro -ff $FORCEFIELD -water $WATERMODEL -ignh -p topol.top # define the box gmx_mpi editconf -f $NAME.gro -o $NAME-box.gro -bt $BOXTYPE -c -d $BOXORIENTATION # add solvate gmx_mpi solvate -cp $NAME-box.gro -cs $WATERTOPFILE -o $NAME-solv.gro -p topol.top # add icons # ! ions.mdp add by manual # --- ions.mdp file content --- # cat << EOF > ions.mdp ; ions.mdp - used as input into grompp to generate ions.tpr ; Parameters describing what to do, when to stop and what to save integrator = steep ; Algorithm (steep = steepest descent minimization) emtol = 1000.0 ; Stop minimization when the maximum force < 1000.0 kJ/mol/nm emstep = 0.01 ; Minimization step size nsteps = 50000 ; Maximum number of (minimization) steps to perform ; Parameters describing how to find the neighbors of each atom and how to calculate the interactions nstlist = 1 ; Frequency to update the neighbor list and long range forces cutoff-scheme = Verlet ; Buffered neighbor searching ns_type = grid ; Method to determine neighbor list (simple, grid) coulombtype = cutoff ; Treatment of long range electrostatic interactions rcoulomb = 1.0 ; Short-range electrostatic cut-off rvdw = 1.0 ; Short-range Van der Waals cut-off pbc = xyz ; Periodic Boundary Conditions in all 3 dimensions EOF gmx_mpi grompp -f ions.mdp -c $NAME-solv.gro -p topol.top -o ions.tpr -maxwarn 1 echo SOL | gmx_mpi genion -s ions.tpr -o $NAME-solv-ions.gro -p topol.top -pname NA -nname CL -conc 0.125 -neutral # energy minimization of the structure in solvate # ! minim.mdp add by manual # --- minim.mdp file content --- # cat << EOF > minim.mdp ; minim.mdp - used as input into grompp to generate em.tpr ; Parameters describing what to do, when to stop and what to save integrator = steep ; Algorithm (steep = steepest descent minimization) emtol = 1000.0 ; Stop minimization when the maximum force < 1000.0 kJ/mol/nm emstep = 0.01 ; Minimization step size nsteps = 50000 ; Maximum number of (minimization) steps to perform ; Parameters describing how to find the neighbors of each atom and how to calculate the interactions nstlist = 1 ; Frequency to update the neighbor list and long range forces cutoff-scheme = Verlet ; Buffered neighbor searching ns_type = grid ; Method to determine neighbor list (simple, grid) coulombtype = PME ; Treatment of long range electrostatic interactions rcoulomb = 1.0 ; Short-range electrostatic cut-off rvdw = 1.0 ; Short-range Van der Waals cut-off pbc = xyz ; Periodic Boundary Conditions in all 3 dimensions EOF gmx_mpi grompp -f minim.mdp -c $NAME-solv-ions.gro -p topol.top -o em.tpr gmx_mpi mdrun -v -deffnm em # optional em, you will need the Xmgrace plotting too #gmx_mpi energy -f em.edr -o potential.xvg #position restrain # gmx_mpi grompp -f posre.mdp -c em.gro -p topol.top -o posre.tpr -r em.gro # gmx_mpi mdrun -v -deffnm posre # nvt # gmx_mpi grompp -f nvt.mdp -c posre.gro -p topol.top -o nvt.tpr # --- nvt.mdp file content --- # cat << EOF > nvt.mdp title = OPLS Lysozyme NVT equilibration define = -DPOSRES ; position restrain the protein ; Run parameters integrator = md ; leap-frog integrator nsteps = 50000 ; 2 * 50000 = 100 ps dt = 0.002 ; 2 fs ; Output control nstxout = 500 ; save coordinates every 1.0 ps nstvout = 500 ; save velocities every 1.0 ps nstenergy = 500 ; save energies every 1.0 ps nstlog = 500 ; update log file every 1.0 ps ; Bond parameters continuation = no ; first dynamics run constraint_algorithm = lincs ; holonomic constraints constraints = h-bonds ; bonds involving H are constrained lincs_iter = 1 ; accuracy of LINCS lincs_order = 4 ; also related to accuracy ; Nonbonded settings cutoff-scheme = Verlet ; Buffered neighbor searching ns_type = grid ; search neighboring grid cells nstlist = 10 ; 20 fs, largely irrelevant with Verlet rcoulomb = 1.0 ; short-range electrostatic cutoff (in nm) rvdw = 1.0 ; short-range van der Waals cutoff (in nm) DispCorr = EnerPres ; account for cut-off vdW scheme ; Electrostatics coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics pme_order = 4 ; cubic interpolation fourierspacing = 0.16 ; grid spacing for FFT ; Temperature coupling is on tcoupl = V-rescale ; modified Berendsen thermostat tc-grps = Protein Non-Protein ; two coupling groups - more accurate tau_t = 0.1 0.1 ; time constant, in ps ref_t = 300 300 ; reference temperature, one for each group, in K ; Pressure coupling is off pcoupl = no ; no pressure coupling in NVT ; Periodic boundary conditions pbc = xyz ; 3-D PBC ; Velocity generation gen_vel = yes ; assign velocities from Maxwell distribution gen_temp = 300 ; temperature for Maxwell distribution gen_seed = -1 ; generate a random seed EOF gmx_mpi grompp -f nvt.mdp -c em.gro -r em.gro -p topol.top -o nvt.tpr gmx_mpi mdrun -v -deffnm nvt # optional : Let's analyze the temperature progression, again using energy: # gmx_mpi energy -f nvt.edr -o temperature.xvg # npt # gmx_mpi grompp -f npt.mdp -c nvt.gro -t nvt.cpt -p topol.top -o npt.tpr # --- npt.mdp file content --- # cat << EOF > npt.mdp title = OPLS Lysozyme NPT equilibration define = -DPOSRES ; position restrain the protein ; Run parameters integrator = md ; leap-frog integrator nsteps = 50000 ; 2 * 50000 = 100 ps dt = 0.002 ; 2 fs ; Output control nstxout = 500 ; save coordinates every 1.0 ps nstvout = 500 ; save velocities every 1.0 ps nstenergy = 500 ; save energies every 1.0 ps nstlog = 500 ; update log file every 1.0 ps ; Bond parameters continuation = yes ; Restarting after NVT constraint_algorithm = lincs ; holonomic constraints constraints = h-bonds ; bonds involving H are constrained lincs_iter = 1 ; accuracy of LINCS lincs_order = 4 ; also related to accuracy ; Nonbonded settings cutoff-scheme = Verlet ; Buffered neighbor searching ns_type = grid ; search neighboring grid cells nstlist = 10 ; 20 fs, largely irrelevant with Verlet scheme rcoulomb = 1.0 ; short-range electrostatic cutoff (in nm) rvdw = 1.0 ; short-range van der Waals cutoff (in nm) DispCorr = EnerPres ; account for cut-off vdW scheme ; Electrostatics coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics pme_order = 4 ; cubic interpolation fourierspacing = 0.16 ; grid spacing for FFT ; Temperature coupling is on tcoupl = V-rescale ; modified Berendsen thermostat tc-grps = Protein Non-Protein ; two coupling groups - more accurate tau_t = 0.1 0.1 ; time constant, in ps ref_t = 300 300 ; reference temperature, one for each group, in K ; Pressure coupling is on pcoupl = Parrinello-Rahman ; Pressure coupling on in NPT pcoupltype = isotropic ; uniform scaling of box vectors tau_p = 2.0 ; time constant, in ps ref_p = 1.0 ; reference pressure, in bar compressibility = 4.5e-5 ; isothermal compressibility of water, bar^-1 refcoord_scaling = com ; Periodic boundary conditions pbc = xyz ; 3-D PBC ; Velocity generation gen_vel = no ; Velocity generation is off EOF gmx_mpi grompp -f npt.mdp -c nvt.gro -r nvt.gro -t nvt.cpt -p topol.top -o npt.tpr gmx_mpi mdrun -v -deffnm npt # Optional: Let's analyze the pressure progression, again using energy: type 18 0 # gmx energy -f npt.edr -o pressure.xvg # Optional: Let's take a look at density as well, this time using energy and entering "24 0" at the prompt. # gmx energy -f npt.edr -o density.xvg # md # --- md.mdp file content --- # cat << EOF > md.mdp title = OPLS Lysozyme NPT equilibration ; Run parameters integrator = md ; leap-frog integrator nsteps = ${NSTEPS} ; steps for simulation dt = ${DT} ; time step in fs ; Output control nstxout = 0 ; suppress bulky .trr file by specifying nstvout = 0 ; 0 for output frequency of nstxout, nstfout = 0 ; nstvout, and nstfout nstenergy = 5000 ; save energies every 10.0 ps nstlog = 5000 ; update log file every 10.0 ps nstxout-compressed = 5000 ; save compressed coordinates every 10.0 ps compressed-x-grps = System ; save the whole system ; Bond parameters continuation = yes ; Restarting after NPT constraint_algorithm = lincs ; holonomic constraints constraints = h-bonds ; bonds involving H are constrained lincs_iter = 1 ; accuracy of LINCS lincs_order = 4 ; also related to accuracy ; Neighborsearching cutoff-scheme = Verlet ; Buffered neighbor searching ns_type = grid ; search neighboring grid cells nstlist = 10 ; 20 fs, largely irrelevant with Verlet scheme rcoulomb = 1.0 ; short-range electrostatic cutoff (in nm) rvdw = 1.0 ; short-range van der Waals cutoff (in nm) ; Electrostatics coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics pme_order = 4 ; cubic interpolation fourierspacing = 0.16 ; grid spacing for FFT ; Temperature coupling is on tcoupl = V-rescale ; modified Berendsen thermostat tc-grps = Protein Non-Protein ; two coupling groups - more accurate tau_t = 0.1 0.1 ; time constant, in ps ref_t = 300 300 ; reference temperature, one for each group, in K ; Pressure coupling is on pcoupl = Parrinello-Rahman ; Pressure coupling on in NPT pcoupltype = isotropic ; uniform scaling of box vectors tau_p = 2.0 ; time constant, in ps ref_p = 1.0 ; reference pressure, in bar compressibility = 4.5e-5 ; isothermal compressibility of water, bar^-1 ; Periodic boundary conditions pbc = xyz ; 3-D PBC ; Dispersion correction DispCorr = EnerPres ; account for cut-off vdW scheme ; Velocity generation gen_vel = no ; Velocity generation is off EOF # Generate GROMACS .tpr file for the simulation gmx_mpi grompp -f ${MDRUN_NAME}.mdp -c npt.gro -t npt.cpt -p topol.top -o ${TPR_FILE} # Run the simulation gmx_mpi mdrun -deffnm ${MDRUN_NAME} # analysis # Create analysis output directory mkdir -p ${OUTPUT_FOLDER} # Command 1: 提取蛋白质 command_1 = f'echo "Protein" | gmx trjconv -dt 1000 -s {tpr_file} -f {xtc_file} -n {temp_folder}/tarj_show.ndx -pbc mol -o {temp_folder}/temp.xtc' # echo "Protein": 选择蛋白质组,用于告诉 gmx trjconv 要处理哪个部分。 # -dt 1000: 指定时间间隔(这里是1000 picoseconds),用于从 .xtc 文件中抽取帧。 # -s {tpr_file}: 指定拓扑文件(.tpr),它包含了模拟系统的完整描述。 # -f {xtc_file}: 指定原始的 .xtc 轨迹文件。 # -n {temp_folder}/tarj_show.ndx: 指定索引文件,其中包含各种原子群的定义。 # -pbc mol: 处理周期性边界条件,确保分子不会被分割。 # -o {temp_folder}/temp.xtc: 指定输出文件名和位置。 # Command 2: 中心对齐蛋白质 command_2 = f'echo "Protein\nProtein\nProtein" | gmx trjconv -s {tpr_file} -f {temp_folder}/temp.xtc -n {temp_folder}/tarj_show.ndx -center -fit rot+trans -o {output_folder}/traj_show.xtc' # echo "Protein\nProtein\nProtein": 三次选择蛋白质组,分别用于中心化、拟合和输出。 # -center: 将蛋白质移动到框架的中心。 # -fit rot+trans: 对齐蛋白质,通过旋转和平移来最佳拟合。 # -o {output_folder}/traj_show.xtc: 指定输出文件名和位置。 # Command 3: 抽取帧生成 .pdb 文件 command_3 = f'echo "Protein\nProtein\nProtein" | gmx trjconv -s {tpr_file} -f {temp_folder}/temp.xtc -n {temp_folder}/tarj_show.ndx -center -fit rot+trans -b 0 -e 0 -o {output_folder}/tarj_show.pdb' # -b 0 -e 0: 指定开始和结束时间,这里设置为0表示只取第一帧。 # -o {output_folder}/tarj_show.pdb: 输出为 .pdb 格式,存储在指定的位置。 # Step 1: Extract frames every 1000 ps gmx_mpi trjconv -s ${TPR_FILE} -f ${XTC_FILE} -o ${OUTPUT_FOLDER}/${NO_PBC_XTC_FILE} -dt ${EXTRACT_EVERY_PS} -pbc mol <