加强 log

This commit is contained in:
root
2024-01-11 11:04:38 +08:00
parent 660f741274
commit e23905ce84
2 changed files with 83 additions and 95 deletions

View File

@@ -52,12 +52,29 @@ echo "OUTPUT_FOLDER: $OUTPUT_FOLDER"
echo "EXTRACT_EVERY_PS: $EXTRACT_EVERY_PS"
echo "NUM_CORES: $NUM_CORES"
# 函数:打印步骤信息和运行命令
run_command() {
local step_description=$1
local command=$2
echo "Starting: $step_description"
echo "Command: $command"
eval $command
local status=$?
if [ $status -ne 0 ]; then
echo "Error in $step_description. Command: $command"
exit $status
fi
echo "Completed: \n $step_description"
}
# generate GROMACS .gro file
mpirun -np $NUM_CORES gmx_mpi pdb2gmx -f $NAME.pdb -o $NAME.gro -ff $FORCEFIELD -water $WATERMODEL -ignh -p topol.top
run_command "Generating GROMACS .gro file" "mpirun -np $NUM_CORES gmx_mpi pdb2gmx -f $NAME.pdb -o $NAME.gro -ff $FORCEFIELD -water $WATERMODEL -ignh -p topol.top"
# define the box
mpirun -np $NUM_CORES gmx_mpi editconf -f $NAME.gro -o $NAME-box.gro -bt $BOXTYPE -c -d $BOXORIENTATION
run_command "Defining the box" "mpirun -np $NUM_CORES gmx_mpi editconf -f $NAME.gro -o $NAME-box.gro -bt $BOXTYPE -c -d $BOXORIENTATION"
# add solvate
mpirun -np $NUM_CORES gmx_mpi solvate -cp $NAME-box.gro -cs $WATERTOPFILE -o $NAME-solv.gro -p topol.top
run_command "Adding solvate" "mpirun -np $NUM_CORES gmx_mpi solvate -cp $NAME-box.gro -cs $WATERTOPFILE -o $NAME-solv.gro -p topol.top"
# add icons # ! ions.mdp add by manual
# --- ions.mdp file content --- #
cat << EOF > ions.mdp
@@ -76,9 +93,10 @@ rcoulomb = 1.0 ; Short-range electrostatic cut-off
rvdw = 1.0 ; Short-range Van der Waals cut-off
pbc = xyz ; Periodic Boundary Conditions in all 3 dimensions
EOF
mpirun -np $NUM_CORES gmx_mpi grompp -f ions.mdp -c $NAME-solv.gro -p topol.top -o ions.tpr -maxwarn 1
echo "SOL" > ions_input.txt
mpirun -np $NUM_CORES gmx_mpi genion -s ions.tpr -o $NAME-solv-ions.gro -p topol.top -pname NA -nname CL -conc 0.125 -neutral < ions_input.txt
run_command "Adding ions" "mpirun -np $NUM_CORES gmx_mpi grompp -f ions.mdp -c $NAME-solv.gro -p topol.top -o ions.tpr -maxwarn 1"
# echo "SOL" > ions_input.txt
# mpirun -np $NUM_CORES gmx_mpi genion -s ions.tpr -o $NAME-solv-ions.gro -p topol.top -pname NA -nname CL -conc 0.125 -neutral < ions_input.txt
run_command "Generating ions" "echo 'SOL' | mpirun -np $NUM_CORES gmx_mpi genion -s ions.tpr -o $NAME-solv-ions.gro -p topol.top -pname NA -nname CL -conc 0.125 -neutral"
# energy minimization of the structure in solvate # ! minim.mdp add by manual
# --- minim.mdp file content --- #
cat << EOF > minim.mdp
@@ -97,8 +115,8 @@ rcoulomb = 1.0 ; Short-range electrostatic cut-off
rvdw = 1.0 ; Short-range Van der Waals cut-off
pbc = xyz ; Periodic Boundary Conditions in all 3 dimensions
EOF
mpirun -np $NUM_CORES gmx_mpi grompp -f minim.mdp -c $NAME-solv-ions.gro -p topol.top -o em.tpr
mpirun -np $NUM_CORES gmx_mpi mdrun -v -deffnm em
run_command "Energy minimization" "mpirun -np $NUM_CORES gmx_mpi grompp -f minim.mdp -c $NAME-solv-ions.gro -p topol.top -o em.tpr"
run_command "Running energy minimization" "mpirun -np $NUM_CORES gmx_mpi mdrun -v -deffnm em"
# optional em, you will need the Xmgrace plotting too
#gmx_mpi energy -f em.edr -o potential.xvg
#position restrain
@@ -150,8 +168,8 @@ gen_vel = yes ; assign velocities from Maxwell distributio
gen_temp = 300 ; temperature for Maxwell distribution
gen_seed = -1 ; generate a random seed
EOF
mpirun -np $NUM_CORES gmx_mpi grompp -f nvt.mdp -c em.gro -r em.gro -p topol.top -o nvt.tpr
mpirun -np $NUM_CORES gmx_mpi mdrun -v -deffnm nvt
run_command "Preparing NVT simulation" "mpirun -np $NUM_CORES gmx_mpi grompp -f nvt.mdp -c em.gro -r em.gro -p topol.top -o nvt.tpr"
run_command "Running NVT simulation" "mpirun -np $NUM_CORES gmx_mpi mdrun -v -deffnm nvt"
# optional : Let's analyze the temperature progression, again using energy:
# gmx_mpi energy -f nvt.edr -o temperature.xvg
# npt
@@ -203,8 +221,8 @@ pbc = xyz ; 3-D PBC
; Velocity generation
gen_vel = no ; Velocity generation is off
EOF
mpirun -np $NUM_CORES gmx_mpi grompp -f npt.mdp -c nvt.gro -r nvt.gro -t nvt.cpt -p topol.top -o npt.tpr
mpirun -np $NUM_CORES gmx_mpi mdrun -v -deffnm npt
run_command "Preparing NPT simulation" "mpirun -np $NUM_CORES gmx_mpi grompp -f npt.mdp -c nvt.gro -r nvt.gro -t nvt.cpt -p topol.top -o npt.tpr"
run_command "Running NPT simulation" "mpirun -np $NUM_CORES gmx_mpi mdrun -v -deffnm npt"
# Optional: Let's analyze the pressure progression, again using energy: type 18 0
# gmx energy -f npt.edr -o pressure.xvg
# Optional: Let's take a look at density as well, this time using energy and entering "24 0" at the prompt.
@@ -261,8 +279,7 @@ gen_vel = no ; Velocity generation is off
EOF
# Generate GROMACS .tpr file for the simulation
mpirun -np $NUM_CORES gmx_mpi grompp -f ${MDRUN_NAME}.mdp -c npt.gro -t npt.cpt -p topol.top -o ${TPR_FILE}
run_command "Preparing MD simulation" "mpirun -np $NUM_CORES gmx_mpi grompp -f ${MDRUN_NAME}.mdp -c npt.gro -t npt.cpt -p topol.top -o ${TPR_FILE}"
# Run the simulation
mpirun -np $NUM_CORES gmx_mpi mdrun -deffnm ${MDRUN_NAME}
# mpirun -np $(ls | egrep "Scaled[0-9]+$" | wc -l) gmx_mpi mdrun -v --deffnm md -cpi Scaled.cpt -multidir $(ls -v | egrep "Scaled[0-9]+$") -plumed plumed.dat -hrex -replex 1000 >& run_$(date "+%H%M%S_%d%m%Y").log || { echo "mdrun failed at line ${LINENO} "; exit -1; }
@@ -270,44 +287,15 @@ mpirun -np $NUM_CORES gmx_mpi mdrun -deffnm ${MDRUN_NAME}
echo -e "1\nq" | gmx_mpi make_ndx -f ${MDRUN_NAME}.gro -o ${NDX_FILE}
# echo -e "1\nq" | gmx_mpi make_ndx -f md.gro -o index.ndx
# Create extraction output directory
mkdir -p ${OUTPUT_FOLDER}
# Create extraction output directory Create temp output directory
run_command "Creating output directories" "mkdir -p ${OUTPUT_FOLDER} && mkdir -p ${TEMP_FOLDER}"
# Create temp output directory
mkdir -p ${TEMP_FOLDER}
echo -e "1\nq" | gmx_mpi trjconv -dt ${EXTRACT_EVERY_PS} -s ${TPR_FILE} -f ${XTC_FILE} -n ${NDX_FILE} -pbc mol -o ${TEMP_FOLDER}/temp.xtc
# echo -e "1\nq" | gmx_mpi trjconv -dt 100 -s md.tpr -f md.xtc -n index.ndx -pbc mol -o temp/temp.xtc
echo -e "1\n1\n1" | gmx_mpi trjconv -s ${TPR_FILE} -f ${TEMP_FOLDER}/temp.xtc -n ${NDX_FILE} -center -fit rot+trans -o ${TEMP_FOLDER}/traj_show.xtc
# echo -e "1\n1\n1" | gmx_mpi trjconv -s md.tpr -f temp/temp.xtc -n index.ndx -center -fit rot+trans -o temp/traj_show.xtc
echo -e "1\n1\n1" | gmx_mpi trjconv -s ${TPR_FILE} -f ${TEMP_FOLDER}/temp.xtc -n ${NDX_FILE} -center -fit rot+trans -b 0 -e 0 -o ${TEMP_FOLDER}/tarj_show.pdb
# echo -e "1\n1\n1" | gmx_mpi trjconv -s md.tpr -f temp/temp.xtc -n index.ndx -center -fit rot+trans -b 0 -e 0 -o temp/tarj_show.pdb
run_command "Extracting frames" "echo -e '1\nq' | gmx_mpi trjconv -dt ${EXTRACT_EVERY_PS} -s ${TPR_FILE} -f ${XTC_FILE} -n ${NDX_FILE} -pbc mol -o ${TEMP_FOLDER}/temp.xtc"
run_command "Centering and fitting trajectory" "echo -e '1\n1\n1' | gmx_mpi trjconv -s ${TPR_FILE} -f ${TEMP_FOLDER}/temp.xtc -n ${NDX_FILE} -center -fit rot+trans -o ${TEMP_FOLDER}/traj_show.xtc"
run_command "Generating PDB file" "echo -e '1\n1\n1' | gmx_mpi trjconv -s ${TPR_FILE} -f ${TEMP_FOLDER}/temp.xtc -n ${NDX_FILE} -center -fit rot+trans -b 0 -e 0 -o ${TEMP_FOLDER}/tarj_show.pdb"
# Group 1 ( Protein)
# ---
# Step 1: Extract frames every 1000 ps
gmx_mpi trjconv -s ${TPR_FILE} -f ${XTC_FILE} -o ${OUTPUT_FOLDER}/${NO_PBC_XTC_FILE} -dt ${EXTRACT_EVERY_PS} -pbc mol <<EOF
0
EOF
# Step 2: Center and fit the trajectory
# Centering the protein and fitting to the initial frame
gmx_mpi trjconv -s ${TPR_FILE} -f ${OUTPUT_FOLDER}/${NO_PBC_XTC_FILE} -o ${OUTPUT_FOLDER}/${NO_PBC_XTC_FILE} -pbc mol -center <<EOF
1
1
EOF
# Step 3: Output PDB format file
gmx_mpi trjconv -s ${TPR_FILE} -f ${OUTPUT_FOLDER}/${NO_PBC_XTC_FILE} -o ${OUTPUT_FOLDER}/${MDRUN_NAME}.pdb -pbc mol -center <<EOF
1
0
EOF
# Continue with further analysis like RMSD calculation...
# ... [other analysis commands] ...
@@ -317,7 +305,7 @@ EOF
# command reference
# Command 1: 提取蛋白质
command_1 = f'echo "Protein" | gmx trjconv -dt 1000 -s {tpr_file} -f {xtc_file} -n {temp_folder}/tarj_show.ndx -pbc mol -o {temp_folder}/temp.xtc'
# command_1 = f'echo "Protein" | gmx trjconv -dt 1000 -s {tpr_file} -f {xtc_file} -n {temp_folder}/tarj_show.ndx -pbc mol -o {temp_folder}/temp.xtc'
# echo "Protein": 选择蛋白质组,用于告诉 gmx trjconv 要处理哪个部分。
# -dt 1000: 指定时间间隔这里是1000 picoseconds用于从 .xtc 文件中抽取帧。
# -s {tpr_file}: 指定拓扑文件(.tpr它包含了模拟系统的完整描述。
@@ -326,12 +314,12 @@ command_1 = f'echo "Protein" | gmx trjconv -dt 1000 -s {tpr_file} -f {xtc_file}
# -pbc mol: 处理周期性边界条件,确保分子不会被分割。
# -o {temp_folder}/temp.xtc: 指定输出文件名和位置。
# Command 2: 中心对齐蛋白质
command_2 = f'echo "Protein\nProtein\nProtein" | gmx trjconv -s {tpr_file} -f {temp_folder}/temp.xtc -n {temp_folder}/tarj_show.ndx -center -fit rot+trans -o {output_folder}/traj_show.xtc'
# command_2 = f'echo "Protein\nProtein\nProtein" | gmx trjconv -s {tpr_file} -f {temp_folder}/temp.xtc -n {temp_folder}/tarj_show.ndx -center -fit rot+trans -o {output_folder}/traj_show.xtc'
# echo "Protein\nProtein\nProtein": 三次选择蛋白质组,分别用于中心化、拟合和输出。
# -center: 将蛋白质移动到框架的中心。
# -fit rot+trans: 对齐蛋白质,通过旋转和平移来最佳拟合。
# -o {output_folder}/traj_show.xtc: 指定输出文件名和位置。
# Command 3: 抽取帧生成 .pdb 文件
command_3 = f'echo "Protein\nProtein\nProtein" | gmx trjconv -s {tpr_file} -f {temp_folder}/temp.xtc -n {temp_folder}/tarj_show.ndx -center -fit rot+trans -b 0 -e 0 -o {output_folder}/tarj_show.pdb'
# command_3 = f'echo "Protein\nProtein\nProtein" | gmx trjconv -s {tpr_file} -f {temp_folder}/temp.xtc -n {temp_folder}/tarj_show.ndx -center -fit rot+trans -b 0 -e 0 -o {output_folder}/tarj_show.pdb'
# -b 0 -e 0: 指定开始和结束时间这里设置为0表示只取第一帧。
# -o {output_folder}/tarj_show.pdb: 输出为 .pdb 格式,存储在指定的位置。

View File

@@ -18,12 +18,7 @@ import multiprocessing
import shutil
import os
if Path('simulation_log.log').exists():
Path('simulation_log.log').unlink()
# 设置日志记录
logging.basicConfig(level=logging.INFO, filename='simulation_log.log', filemode='a',
format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger()
@dataclass
class SimulationRunner:
@@ -31,13 +26,14 @@ class SimulationRunner:
nsteps: int
dt: float
base_folder: Path
bash_script: Path = None # Bash脚本路径作为可选参数
gmxrc_path: Path = None # GMXRC文件路径作为可选参数
bash_script: Path = None
gmxrc_path: Path = None
gpu_id: int = None
temp_folder: Path = field(init=False) # 用于存储临时数据和结果数据的路径(轨迹数据处理中间数据等)
temp_folder: Path = field(init=False)
runner_folder: Path = field(init=False)
tpr_file: Path = field(init=False)
xtc_file: Path = field(init=False)
logger: logging.Logger = field(init=False)
def __post_init__(self):
# 初始化文件夹和文件路径
@@ -50,6 +46,18 @@ class SimulationRunner:
self.bash_script = self.bash_script.absolute() if self.bash_script else Path(__file__).resolve().parent / "md_gromacs.sh"
# 设置 GMXRC_PATH 环境变量
self.gmxrc_path = self.gmxrc_path
pdb_id = self.pdb_file.stem
self.logger = self.setup_logging(pdb_id, self.base_folder)
@staticmethod
def setup_logging(pdb_id, log_folder):
log_file = log_folder / f"{pdb_id}_simulation_log.log"
if log_file.exists():
log_file.unlink()
logging.basicConfig(level=logging.INFO, filename=log_file, filemode='a',
format='%(asctime)s - %(levelname)s - %(message)s')
return logging.getLogger(pdb_id)
def copy_pdb(self):
shutil.copy(self.pdb_file, self.runner_folder / self.pdb_file.name)
@@ -120,39 +128,29 @@ class SimulationRunner:
"HOME": os.environ["HOME"],
"CUDA_VISIBLE_DEVICES": str(self.gpu_id) if self.gpu_id is not None else ""
}
logger.info(f"pdb_file: {self.pdb_file.name}")
logger.info(f"Executing script at: {self.bash_script}")
self.logger.info(f"pdb_file: {self.pdb_file.name}")
self.logger.info(f"Executing script at: {self.bash_script}")
result = subprocess.run(["bash", str(self.bash_script)], env=env_vars, cwd=self.runner_folder,
capture_output=True, text=True, check=True)
except subprocess.CalledProcessError as e:
logger.error(f"Error in simulation for {self.pdb_file.name}: {e}")
self.logger.error(f"Error in simulation for {self.pdb_file.name}: {e}")
if e.stdout:
logger.error(f"Standard Output:\n{e.stdout}")
self.logger.error(f"Standard Output:\n{e.stdout}")
if e.stderr:
logger.error(f"Standard Error:\n{e.stderr}")
self.logger.error(f"Standard Error:\n{e.stderr}")
end_time = time.time()
duration = end_time - start_time
if result:
logger.info(f"Simulation for {self.pdb_file.name} completed successfully in {duration:.2f} seconds.")
self.logger.info(f"Simulation for {self.pdb_file.name} completed successfully in {duration:.2f} seconds.")
if result.stdout:
logger.info(f"Shell Script Output:\n{result.stdout}")
self.logger.info(f"Shell Script Output:\n{result.stdout}")
if result.stderr:
logger.error(f"Shell Script Error Output:\n{result.stderr}")
self.logger.error(f"Shell Script Error Output:\n{result.stderr}")
else:
logger.error(f"Simulation for {self.pdb_file.name} failed in {duration:.2f} seconds.")
self.logger.error(f"Simulation for {self.pdb_file.name} failed in {duration:.2f} seconds.")
# def main(simulation_steps, time_step, pdb_folder_path, bash_script_path, gmxrc_path):
# pdb_folder = Path(pdb_folder_path).resolve()
# for pdb_file in pdb_folder.glob("*.pdb"):
# runner = SimulationRunner(pdb_file, simulation_steps, time_step, pdb_folder, bash_script_path, gmxrc_path)
# runner.copy_pdb()
# logger.info(f"Running simulation for {pdb_file.name} in {runner.runner_folder}...")
# runner.run_simulation()
# logger.info(f"Finished simulation for {pdb_file.name}.")
# runner.process_trajectory(extract_interval=100) # 例如每100ps抽取一次轨迹
# logger.info(f"Finished processing trajectory for {pdb_file.name}. per 100ps")
def detect_gpus():
"""检测系统上的GPU数量。"""
try:
@@ -161,37 +159,39 @@ def detect_gpus():
except subprocess.CalledProcessError:
return 0
def setup_global_logging(log_folder):
log_file = log_folder / "simulation_log.log"
if log_file.exists():
log_file.unlink()
logging.basicConfig(level=logging.INFO, filename=log_file, filemode='a',
format='%(asctime)s - %(levelname)s - %(message)s')
return logging.getLogger()
def run_simulation_task(pdb_file, simulation_steps, time_step, pdb_folder, bash_script_path, gmxrc_path, gpu_id):
runner = SimulationRunner(pdb_file, simulation_steps, time_step, pdb_folder, bash_script_path, gmxrc_path)
runner = SimulationRunner(pdb_file, simulation_steps, time_step, pdb_folder, bash_script_path, gmxrc_path, gpu_id)
runner.set_gpu(gpu_id) # 设置要使用的GPU
runner.copy_pdb()
logger.info(f"Running simulation for {pdb_file.name} on GPU {gpu_id}...")
runner.run_simulation()
runner.process_trajectory(extract_interval=100)
logger.info(f"Finished processing trajectory for {pdb_file.name} on GPU {gpu_id}.")
def main(simulation_steps, time_step, pdb_folder_path, bash_script_path, gmxrc_path):
pdb_folder = Path(pdb_folder_path).resolve()
pdb_folder = Path(pdb_folder_path)
setup_global_logging(pdb_folder) # 设置全局日志记录器
pdb_files = list(pdb_folder.glob("*.pdb"))
num_gpus = detect_gpus()
if num_gpus == 0:
logger.error("No GPUs detected, exiting.")
logging.error("No GPUs detected, exiting.")
return
# 创建一个进程池每个GPU运行1个任务
# with multiprocessing.Pool(processes=num_gpus * 1) as pool: # num_gpus * 1
# for i, pdb_file in enumerate(pdb_files):
# gpu_id = i % num_gpus # 分配GPU
# pool.apply_async(run_simulation_task, (pdb_file, simulation_steps, time_step, pdb_folder, bash_script_path, gmxrc_path, gpu_id))
# pool.close()
# pool.join()
with multiprocessing.Pool(processes=1) as pool: # num_gpus * 1
with multiprocessing.Pool(processes=1) as pool:
for i, pdb_file in enumerate(pdb_files):
gpu_id = i % num_gpus # 分配GPU
pool.apply_async(run_simulation_task, (pdb_file, simulation_steps, time_step, pdb_folder, bash_script_path, gmxrc_path, '0'))
pool.close()
pool.join()