From b44ea7119a551687af022b443c21eacd8e5a5bf1 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 5 Jan 2024 14:22:17 +0800 Subject: [PATCH] add --- .gitignore | 3 +- md_gromacs.sh.bak | 333 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 335 insertions(+), 1 deletion(-) create mode 100755 md_gromacs.sh.bak diff --git a/.gitignore b/.gitignore index 5b5202f..5e652f7 100755 --- a/.gitignore +++ b/.gitignore @@ -16,4 +16,5 @@ fixed/ test/ fixed/ nohup.out -pdb_test \ No newline at end of file +pdb_test +pdb_* \ No newline at end of file diff --git a/md_gromacs.sh.bak b/md_gromacs.sh.bak new file mode 100755 index 0000000..fb4c444 --- /dev/null +++ b/md_gromacs.sh.bak @@ -0,0 +1,333 @@ +#!/bin/bash +#This script needs to be edited for each run. +#Define PDB Filename & GROMACS Pameters +# reference: http://www.mdtutorials.com/gmx/lysozyme/ +# NAME=1ao7 NSTEPS=50000000 ./md_gromacs.sh +# 命令会临时设置 NAME 为 1ao7 和 NSTEPS 为 50000000(对应 100ns),然后运行 md_gromacs.sh 脚本 +# Check if GMXRC_PATH is provided and source it +if [ -n "$GMXRC_PATH" ]; then + source "$GMXRC_PATH" # source /home/lingyuzeng/software/gmx2023.2/bin/GMXRC +fi +NAME=${NAME:-"5sws_fixer"} +FORCEFIELD=${FORCEFIELD:-"amber99sb-ildn"} +WATERMODEL=${WATERMODEL:-"tip3p"} +WATERTOPFILE=${WATERTOPFILE:-"spc216.gro"} +BOXTYPE=${BOXTYPE:-"tric"} +BOXORIENTATION=${BOXORIENTATION:-"1.0"} +NSTEPS=${NSTEPS:-500000} # 50,000 steps for 1 ns +DT=${DT:-0.002} # 2 fs +#BOXSIZE="5.0" +#BOXCENTER="2.5" +# Define simulation name variable +MDRUN_NAME=${MDRUN_NAME:-"md"} +NDX_NAME=${NDX_NAME:-"index"} +# Define analysis parameters +# Define other filenames based on MDRUN_NAME +TPR_FILE="${MDRUN_NAME}.tpr" +XTC_FILE="${MDRUN_NAME}.xtc" +NDX_FILE="${NDX_NAME}.ndx" +NO_PBC_XTC_FILE="${MDRUN_NAME}_noPBC.xtc" +OUTPUT_FOLDER=${OUTPUT_FOLDER:-"frame_extraction_output"} +TEMP_FOLDER=${TEMP_FOLDER:-"temp"} +# Define variables for frame extraction +EXTRACT_EVERY_PS=${EXTRACT_EVERY_PS:-100} # Default to 100 ps if not set + +# Print the current settings +echo "Current settings:" +echo "NAME: $NAME" +echo "FORCEFIELD: $FORCEFIELD" +echo "WATERMODEL: $WATERMODEL" +echo "WATERTOPFILE: $WATERTOPFILE" +echo "BOXTYPE: $BOXTYPE" +echo "BOXORIENTATION: $BOXORIENTATION" +echo "NSTEPS: $NSTEPS" +echo "DT: $DT" +echo "MDRUN_NAME: $MDRUN_NAME" +echo "TPR_FILE: $TPR_FILE" +echo "XTC_FILE: $XTC_FILE" +echo "NO_PBC_XTC_FILE: $NO_PBC_XTC_FILE" +echo "OUTPUT_FOLDER: $OUTPUT_FOLDER" +echo "EXTRACT_EVERY_PS: $EXTRACT_EVERY_PS" + +# generate GROMACS .gro file +gmx_mpi pdb2gmx -f $NAME.pdb -o $NAME.gro -ff $FORCEFIELD -water $WATERMODEL -ignh -p topol.top +# define the box +gmx_mpi editconf -f $NAME.gro -o $NAME-box.gro -bt $BOXTYPE -c -d $BOXORIENTATION +# add solvate +gmx_mpi solvate -cp $NAME-box.gro -cs $WATERTOPFILE -o $NAME-solv.gro -p topol.top +# add icons # ! ions.mdp add by manual +# --- ions.mdp file content --- # +cat << EOF > ions.mdp +; ions.mdp - used as input into grompp to generate ions.tpr +; Parameters describing what to do, when to stop and what to save +integrator = steep ; Algorithm (steep = steepest descent minimization) +emtol = 1000.0 ; Stop minimization when the maximum force < 1000.0 kJ/mol/nm +emstep = 0.01 ; Minimization step size +nsteps = 50000 ; Maximum number of (minimization) steps to perform +; Parameters describing how to find the neighbors of each atom and how to calculate the interactions +nstlist = 1 ; Frequency to update the neighbor list and long range forces +cutoff-scheme = Verlet ; Buffered neighbor searching +ns_type = grid ; Method to determine neighbor list (simple, grid) +coulombtype = cutoff ; Treatment of long range electrostatic interactions +rcoulomb = 1.0 ; Short-range electrostatic cut-off +rvdw = 1.0 ; Short-range Van der Waals cut-off +pbc = xyz ; Periodic Boundary Conditions in all 3 dimensions +EOF +gmx_mpi grompp -f ions.mdp -c $NAME-solv.gro -p topol.top -o ions.tpr -maxwarn 1 +echo SOL | gmx_mpi genion -s ions.tpr -o $NAME-solv-ions.gro -p topol.top -pname NA -nname CL -conc 0.125 -neutral +# energy minimization of the structure in solvate # ! minim.mdp add by manual +# --- minim.mdp file content --- # +cat << EOF > minim.mdp +; minim.mdp - used as input into grompp to generate em.tpr +; Parameters describing what to do, when to stop and what to save +integrator = steep ; Algorithm (steep = steepest descent minimization) +emtol = 1000.0 ; Stop minimization when the maximum force < 1000.0 kJ/mol/nm +emstep = 0.01 ; Minimization step size +nsteps = 50000 ; Maximum number of (minimization) steps to perform +; Parameters describing how to find the neighbors of each atom and how to calculate the interactions +nstlist = 1 ; Frequency to update the neighbor list and long range forces +cutoff-scheme = Verlet ; Buffered neighbor searching +ns_type = grid ; Method to determine neighbor list (simple, grid) +coulombtype = PME ; Treatment of long range electrostatic interactions +rcoulomb = 1.0 ; Short-range electrostatic cut-off +rvdw = 1.0 ; Short-range Van der Waals cut-off +pbc = xyz ; Periodic Boundary Conditions in all 3 dimensions +EOF +gmx_mpi grompp -f minim.mdp -c $NAME-solv-ions.gro -p topol.top -o em.tpr +gmx_mpi mdrun -v -deffnm em +# optional em, you will need the Xmgrace plotting too +#gmx_mpi energy -f em.edr -o potential.xvg +#position restrain +# gmx_mpi grompp -f posre.mdp -c em.gro -p topol.top -o posre.tpr -r em.gro +# gmx_mpi mdrun -v -deffnm posre +# nvt +# gmx_mpi grompp -f nvt.mdp -c posre.gro -p topol.top -o nvt.tpr +# --- nvt.mdp file content --- # +cat << EOF > nvt.mdp +title = OPLS Lysozyme NVT equilibration +define = -DPOSRES ; position restrain the protein +; Run parameters +integrator = md ; leap-frog integrator +nsteps = 50000 ; 2 * 50000 = 100 ps +dt = 0.002 ; 2 fs +; Output control +nstxout = 500 ; save coordinates every 1.0 ps +nstvout = 500 ; save velocities every 1.0 ps +nstenergy = 500 ; save energies every 1.0 ps +nstlog = 500 ; update log file every 1.0 ps +; Bond parameters +continuation = no ; first dynamics run +constraint_algorithm = lincs ; holonomic constraints +constraints = h-bonds ; bonds involving H are constrained +lincs_iter = 1 ; accuracy of LINCS +lincs_order = 4 ; also related to accuracy +; Nonbonded settings +cutoff-scheme = Verlet ; Buffered neighbor searching +ns_type = grid ; search neighboring grid cells +nstlist = 10 ; 20 fs, largely irrelevant with Verlet +rcoulomb = 1.0 ; short-range electrostatic cutoff (in nm) +rvdw = 1.0 ; short-range van der Waals cutoff (in nm) +DispCorr = EnerPres ; account for cut-off vdW scheme +; Electrostatics +coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics +pme_order = 4 ; cubic interpolation +fourierspacing = 0.16 ; grid spacing for FFT +; Temperature coupling is on +tcoupl = V-rescale ; modified Berendsen thermostat +tc-grps = Protein Non-Protein ; two coupling groups - more accurate +tau_t = 0.1 0.1 ; time constant, in ps +ref_t = 300 300 ; reference temperature, one for each group, in K +; Pressure coupling is off +pcoupl = no ; no pressure coupling in NVT +; Periodic boundary conditions +pbc = xyz ; 3-D PBC +; Velocity generation +gen_vel = yes ; assign velocities from Maxwell distribution +gen_temp = 300 ; temperature for Maxwell distribution +gen_seed = -1 ; generate a random seed +EOF +gmx_mpi grompp -f nvt.mdp -c em.gro -r em.gro -p topol.top -o nvt.tpr +gmx_mpi mdrun -v -deffnm nvt +# optional : Let's analyze the temperature progression, again using energy: +# gmx_mpi energy -f nvt.edr -o temperature.xvg +# npt +# gmx_mpi grompp -f npt.mdp -c nvt.gro -t nvt.cpt -p topol.top -o npt.tpr +# --- npt.mdp file content --- # +cat << EOF > npt.mdp +title = OPLS Lysozyme NPT equilibration +define = -DPOSRES ; position restrain the protein +; Run parameters +integrator = md ; leap-frog integrator +nsteps = 50000 ; 2 * 50000 = 100 ps +dt = 0.002 ; 2 fs +; Output control +nstxout = 500 ; save coordinates every 1.0 ps +nstvout = 500 ; save velocities every 1.0 ps +nstenergy = 500 ; save energies every 1.0 ps +nstlog = 500 ; update log file every 1.0 ps +; Bond parameters +continuation = yes ; Restarting after NVT +constraint_algorithm = lincs ; holonomic constraints +constraints = h-bonds ; bonds involving H are constrained +lincs_iter = 1 ; accuracy of LINCS +lincs_order = 4 ; also related to accuracy +; Nonbonded settings +cutoff-scheme = Verlet ; Buffered neighbor searching +ns_type = grid ; search neighboring grid cells +nstlist = 10 ; 20 fs, largely irrelevant with Verlet scheme +rcoulomb = 1.0 ; short-range electrostatic cutoff (in nm) +rvdw = 1.0 ; short-range van der Waals cutoff (in nm) +DispCorr = EnerPres ; account for cut-off vdW scheme +; Electrostatics +coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics +pme_order = 4 ; cubic interpolation +fourierspacing = 0.16 ; grid spacing for FFT +; Temperature coupling is on +tcoupl = V-rescale ; modified Berendsen thermostat +tc-grps = Protein Non-Protein ; two coupling groups - more accurate +tau_t = 0.1 0.1 ; time constant, in ps +ref_t = 300 300 ; reference temperature, one for each group, in K +; Pressure coupling is on +pcoupl = Parrinello-Rahman ; Pressure coupling on in NPT +pcoupltype = isotropic ; uniform scaling of box vectors +tau_p = 2.0 ; time constant, in ps +ref_p = 1.0 ; reference pressure, in bar +compressibility = 4.5e-5 ; isothermal compressibility of water, bar^-1 +refcoord_scaling = com +; Periodic boundary conditions +pbc = xyz ; 3-D PBC +; Velocity generation +gen_vel = no ; Velocity generation is off +EOF +gmx_mpi grompp -f npt.mdp -c nvt.gro -r nvt.gro -t nvt.cpt -p topol.top -o npt.tpr +gmx_mpi mdrun -v -deffnm npt +# Optional: Let's analyze the pressure progression, again using energy: type 18 0 +# gmx energy -f npt.edr -o pressure.xvg +# Optional: Let's take a look at density as well, this time using energy and entering "24 0" at the prompt. +# gmx energy -f npt.edr -o density.xvg +# md +# --- md.mdp file content --- # +cat << EOF > ${MDRUN_NAME}.mdp +title = OPLS Lysozyme NPT equilibration +; Run parameters +integrator = md ; leap-frog integrator +nsteps = ${NSTEPS} ; steps for simulation +dt = ${DT} ; time step in fs +; Output control +nstxout = 0 ; suppress bulky .trr file by specifying +nstvout = 0 ; 0 for output frequency of nstxout, +nstfout = 0 ; nstvout, and nstfout +nstenergy = 5000 ; save energies every 10.0 ps +nstlog = 5000 ; update log file every 10.0 ps +nstxout-compressed = 5000 ; save compressed coordinates every 10.0 ps +compressed-x-grps = System ; save the whole system +; Bond parameters +continuation = yes ; Restarting after NPT +constraint_algorithm = lincs ; holonomic constraints +constraints = h-bonds ; bonds involving H are constrained +lincs_iter = 1 ; accuracy of LINCS +lincs_order = 4 ; also related to accuracy +; Neighborsearching +cutoff-scheme = Verlet ; Buffered neighbor searching +ns_type = grid ; search neighboring grid cells +nstlist = 10 ; 20 fs, largely irrelevant with Verlet scheme +rcoulomb = 1.0 ; short-range electrostatic cutoff (in nm) +rvdw = 1.0 ; short-range van der Waals cutoff (in nm) +; Electrostatics +coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics +pme_order = 4 ; cubic interpolation +fourierspacing = 0.16 ; grid spacing for FFT +; Temperature coupling is on +tcoupl = V-rescale ; modified Berendsen thermostat +tc-grps = Protein Non-Protein ; two coupling groups - more accurate +tau_t = 0.1 0.1 ; time constant, in ps +ref_t = 300 300 ; reference temperature, one for each group, in K +; Pressure coupling is on +pcoupl = Parrinello-Rahman ; Pressure coupling on in NPT +pcoupltype = isotropic ; uniform scaling of box vectors +tau_p = 2.0 ; time constant, in ps +ref_p = 1.0 ; reference pressure, in bar +compressibility = 4.5e-5 ; isothermal compressibility of water, bar^-1 +; Periodic boundary conditions +pbc = xyz ; 3-D PBC +; Dispersion correction +DispCorr = EnerPres ; account for cut-off vdW scheme +; Velocity generation +gen_vel = no ; Velocity generation is off +EOF + +# Generate GROMACS .tpr file for the simulation +gmx_mpi grompp -f ${MDRUN_NAME}.mdp -c npt.gro -t npt.cpt -p topol.top -o ${TPR_FILE} + +# Run the simulation +gmx_mpi mdrun -deffnm ${MDRUN_NAME} +# mpirun -np $(ls | egrep "Scaled[0-9]+$" | wc -l) gmx_mpi mdrun -v --deffnm md -cpi Scaled.cpt -multidir $(ls -v | egrep "Scaled[0-9]+$") -plumed plumed.dat -hrex -replex 1000 >& run_$(date "+%H%M%S_%d%m%Y").log || { echo "mdrun failed at line ${LINENO} "; exit -1; } +# extra ndx file , select protein +echo -e "1\nq" | gmx_mpi make_ndx -f ${MDRUN_NAME}.gro -o ${NDX_FILE} +# echo -e "1\nq" | gmx_mpi make_ndx -f md.gro -o index.ndx + +# Create extraction output directory +mkdir -p ${OUTPUT_FOLDER} + +# Create temp output directory +mkdir -p ${TEMP_FOLDER} + +echo -e "1\nq" | gmx_mpi trjconv -dt ${EXTRACT_EVERY_PS} -s ${TPR_FILE} -f ${XTC_FILE} -n ${NDX_FILE} -pbc mol -o ${TEMP_FOLDER}/temp.xtc + +# echo -e "1\nq" | gmx_mpi trjconv -dt 100 -s md.tpr -f md.xtc -n index.ndx -pbc mol -o temp/temp.xtc + +echo -e "1\n1\n1" | gmx_mpi trjconv -s ${TPR_FILE} -f ${TEMP_FOLDER}/temp.xtc -n ${NDX_FILE} -center -fit rot+trans -o ${TEMP_FOLDER}/traj_show.xtc + +# echo -e "1\n1\n1" | gmx_mpi trjconv -s md.tpr -f temp/temp.xtc -n index.ndx -center -fit rot+trans -o temp/traj_show.xtc + +echo -e "1\n1\n1" | gmx_mpi trjconv -s ${TPR_FILE} -f ${TEMP_FOLDER}/temp.xtc -n ${NDX_FILE} -center -fit rot+trans -b 0 -e 0 -o ${TEMP_FOLDER}/tarj_show.pdb + +# echo -e "1\n1\n1" | gmx_mpi trjconv -s md.tpr -f temp/temp.xtc -n index.ndx -center -fit rot+trans -b 0 -e 0 -o temp/tarj_show.pdb + +# Group 1 ( Protein) + +# --- +# Step 1: Extract frames every 1000 ps +gmx_mpi trjconv -s ${TPR_FILE} -f ${XTC_FILE} -o ${OUTPUT_FOLDER}/${NO_PBC_XTC_FILE} -dt ${EXTRACT_EVERY_PS} -pbc mol <