From 11a97fb80ce81fc0338f9b133ee593d02ddc2209 Mon Sep 17 00:00:00 2001 From: hotwa Date: Wed, 3 Jan 2024 11:17:37 +0800 Subject: [PATCH] update to env path --- .gitignore | 3 +- README.md | 0 analysis_pdb.py | 0 build_modellel.py | 0 build_modeller.py | 0 fixed/fixed.tar.gz | Bin md_gromacs.sh | 2 +- process_trajectory.py | 0 runner.py | 78 ++++++++++++++++++++++++++++++++---------- 9 files changed, 63 insertions(+), 20 deletions(-) mode change 100644 => 100755 .gitignore mode change 100644 => 100755 README.md mode change 100644 => 100755 analysis_pdb.py mode change 100644 => 100755 build_modellel.py mode change 100644 => 100755 build_modeller.py mode change 100644 => 100755 fixed/fixed.tar.gz mode change 100644 => 100755 process_trajectory.py mode change 100644 => 100755 runner.py diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 index 2df45e5..5b5202f --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,5 @@ fixed/ *.log test/ fixed/ -nohup.out \ No newline at end of file +nohup.out +pdb_test \ No newline at end of file diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/analysis_pdb.py b/analysis_pdb.py old mode 100644 new mode 100755 diff --git a/build_modellel.py b/build_modellel.py old mode 100644 new mode 100755 diff --git a/build_modeller.py b/build_modeller.py old mode 100644 new mode 100755 diff --git a/fixed/fixed.tar.gz b/fixed/fixed.tar.gz old mode 100644 new mode 100755 diff --git a/md_gromacs.sh b/md_gromacs.sh index 56dc8b0..fb4c444 100755 --- a/md_gromacs.sh +++ b/md_gromacs.sh @@ -261,7 +261,7 @@ gmx_mpi grompp -f ${MDRUN_NAME}.mdp -c npt.gro -t npt.cpt -p topol.top -o ${TPR_ # Run the simulation gmx_mpi mdrun -deffnm ${MDRUN_NAME} - +# mpirun -np $(ls | egrep "Scaled[0-9]+$" | wc -l) gmx_mpi mdrun -v --deffnm md -cpi Scaled.cpt -multidir $(ls -v | egrep "Scaled[0-9]+$") -plumed plumed.dat -hrex -replex 1000 >& run_$(date "+%H%M%S_%d%m%Y").log || { echo "mdrun failed at line ${LINENO} "; exit -1; } # extra ndx file , select protein echo -e "1\nq" | gmx_mpi make_ndx -f ${MDRUN_NAME}.gro -o ${NDX_FILE} # echo -e "1\nq" | gmx_mpi make_ndx -f md.gro -o index.ndx diff --git a/process_trajectory.py b/process_trajectory.py old mode 100644 new mode 100755 diff --git a/runner.py b/runner.py old mode 100644 new mode 100755 index 747463d..4f2ac8c --- a/runner.py +++ b/runner.py @@ -14,6 +14,7 @@ import logging from dataclasses import dataclass, field from pathlib import Path import subprocess +import multiprocessing import shutil import os @@ -22,6 +23,7 @@ logging.basicConfig(level=logging.INFO, filename='simulation_log.log', filemode= format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger() +@dataclass class SimulationRunner: pdb_file: Path nsteps: int @@ -29,7 +31,8 @@ class SimulationRunner: base_folder: Path bash_script: Path = None # Bash脚本路径作为可选参数 gmxrc_path: Path = None # GMXRC文件路径作为可选参数 - temp_folder: Path # 用于存储临时数据和结果数据的路径(轨迹数据处理中间数据等) + gpu_id: int = None + temp_folder: Path = field(init=False) # 用于存储临时数据和结果数据的路径(轨迹数据处理中间数据等) runner_folder: Path = field(init=False) tpr_file: Path = field(init=False) xtc_file: Path = field(init=False) @@ -44,11 +47,15 @@ class SimulationRunner: self.temp_folder.mkdir(exist_ok=True) self.bash_script = self.bash_script.absolute() if self.bash_script else Path(__file__).resolve().parent / "md_gromacs.sh" # 设置 GMXRC_PATH 环境变量 - self.gmxrc_path = self.gmxrc_path or Path("/home/lingyuzeng/software/gmx2023.2/bin/GMXRC") + self.gmxrc_path = self.gmxrc_path def copy_pdb(self): shutil.copy(self.pdb_file, self.runner_folder / self.pdb_file.name) + def set_gpu(self, gpu_id): + """设置要使用的GPU。""" + self.gpu_id = gpu_id + @staticmethod def read_ndx_file(filename): ndx_dict = {} @@ -67,11 +74,9 @@ class SimulationRunner: return ndx_dict - @staticmethod -echo "Protein" | gmx_mpi trjconv -dt {extract_interval} -s {tpr_file} -f {xtc_file} -n {temp_folder}/tarj_show.ndx -pbc mol -o {temp_folder}/temp.xtc - # 新增处理轨迹的方法 def process_trajectory(self, extract_interval): + # echo "Protein" | gmx_mpi trjconv -dt {extract_interval} -s {tpr_file} -f {xtc_file} -n {temp_folder}/tarj_show.ndx -pbc mol -o {temp_folder}/temp.xtc # 根据提供的脚本逻辑读取和保存索引文件 ndx_dict = self.read_ndx_file(f'{self.runner_folder}/index.ndx') @@ -101,12 +106,15 @@ echo "Protein" | gmx_mpi trjconv -dt {extract_interval} -s {tpr_file} -f {xtc_fi result = None try: env_vars = { - "NAME": self.pdb_file.stem, # 首先将 NAME 设置为文件的 stem + "NAME": self.pdb_file.stem, "NSTEPS": str(self.nsteps), "DT": str(self.dt), - "GMXRC_PATH": str(self.gmxrc_path) + "GMXRC_PATH": str(self.gmxrc_path), + "PATH": os.environ.get("PATH", ""), + "LD_LIBRARY_PATH": os.environ.get("LD_LIBRARY_PATH", ""), + "HOME": os.environ["HOME"], + "CUDA_VISIBLE_DEVICES": str(self.gpu_id) if self.gpu_id is not None else "" } - env_vars["HOME"] = os.environ["HOME"] logger.info(f"pdb_file: {self.pdb_file.name}") logger.info(f"Executing script at: {self.bash_script}") result = subprocess.run(["bash", str(self.bash_script)], env=env_vars, cwd=self.runner_folder, @@ -130,25 +138,59 @@ echo "Protein" | gmx_mpi trjconv -dt {extract_interval} -s {tpr_file} -f {xtc_fi else: logger.error(f"Simulation for {self.pdb_file.name} failed in {duration:.2f} seconds.") +# def main(simulation_steps, time_step, pdb_folder_path, bash_script_path, gmxrc_path): +# pdb_folder = Path(pdb_folder_path).resolve() +# for pdb_file in pdb_folder.glob("*.pdb"): +# runner = SimulationRunner(pdb_file, simulation_steps, time_step, pdb_folder, bash_script_path, gmxrc_path) +# runner.copy_pdb() +# logger.info(f"Running simulation for {pdb_file.name} in {runner.runner_folder}...") +# runner.run_simulation() +# logger.info(f"Finished simulation for {pdb_file.name}.") +# runner.process_trajectory(extract_interval=100) # 例如,每100ps抽取一次轨迹 +# logger.info(f"Finished processing trajectory for {pdb_file.name}. per 100ps") +def detect_gpus(): + """检测系统上的GPU数量。""" + try: + output = subprocess.check_output("nvidia-smi -L", shell=True).decode('utf-8') + return len(output.strip().split('\n')) + except subprocess.CalledProcessError: + return 0 + +def run_simulation_task(pdb_file, simulation_steps, time_step, pdb_folder, bash_script_path, gmxrc_path, gpu_id): + runner = SimulationRunner(pdb_file, simulation_steps, time_step, pdb_folder, bash_script_path, gmxrc_path) + runner.set_gpu(gpu_id) # 设置要使用的GPU + runner.copy_pdb() + logger.info(f"Running simulation for {pdb_file.name} on GPU {gpu_id}...") + runner.run_simulation() + runner.process_trajectory(extract_interval=100) + logger.info(f"Finished processing trajectory for {pdb_file.name} on GPU {gpu_id}.") + def main(simulation_steps, time_step, pdb_folder_path, bash_script_path, gmxrc_path): pdb_folder = Path(pdb_folder_path).resolve() - for pdb_file in pdb_folder.glob("*.pdb"): - runner = SimulationRunner(pdb_file, simulation_steps, time_step, pdb_folder, bash_script_path, gmxrc_path) - runner.copy_pdb() - logger.info(f"Running simulation for {pdb_file.name} in {runner.runner_folder}...") - runner.run_simulation() - logger.info(f"Finished simulation for {pdb_file.name}.") - runner.process_trajectory(extract_interval=100) # 例如,每100ps抽取一次轨迹 - logger.info(f"Finished processing trajectory for {pdb_file.name}. per 100ps") + pdb_files = list(pdb_folder.glob("*.pdb")) + num_gpus = detect_gpus() + + if num_gpus == 0: + logger.error("No GPUs detected, exiting.") + return + + # 创建一个进程池,每个GPU运行两个任务 + with multiprocessing.Pool(processes=num_gpus * 2) as pool: + for i, pdb_file in enumerate(pdb_files): + gpu_id = i % num_gpus # 分配GPU + pool.apply_async(run_simulation_task, (pdb_file, simulation_steps, time_step, pdb_folder, bash_script_path, gmxrc_path, gpu_id)) + + pool.close() + pool.join() if __name__ == "__main__": NSTEPS = 50000000 # Example: 50000000 steps DT = 0.002 # Example: 2 fs time step - PDB_FOLDER_PATH = Path("./pdb_files") # Assuming the PDB files are in a folder named 'pdb_files' in the current directory + PDB_FOLDER_PATH = Path("./pdb_test") # Assuming the PDB files are in a folder named 'pdb_files' in the current directory # 传入自定义的bash脚本路径 CUSTOM_BASH_SCRIPT_PATH = Path('md_gromacs.sh') # 传入 GMXRC 文件的路径 - GMXRC_PATH = Path('/root/software/gmx2023.2/bin/GMXRC') + GMXRC_PATH = Path('/usr/local/gromacs-2021.4-plumed-2.8.0/bin/GMXRC') main(NSTEPS, DT, PDB_FOLDER_PATH, CUSTOM_BASH_SCRIPT_PATH, GMXRC_PATH)