update to env path

This commit is contained in:
2024-01-03 11:17:37 +08:00
parent 4257c2041a
commit 11a97fb80c
9 changed files with 63 additions and 20 deletions

3
.gitignore vendored Normal file → Executable file
View File

@@ -15,4 +15,5 @@ fixed/
*.log *.log
test/ test/
fixed/ fixed/
nohup.out nohup.out
pdb_test

0
README.md Normal file → Executable file
View File

0
analysis_pdb.py Normal file → Executable file
View File

0
build_modellel.py Normal file → Executable file
View File

0
build_modeller.py Normal file → Executable file
View File

0
fixed/fixed.tar.gz Normal file → Executable file
View File

View File

@@ -261,7 +261,7 @@ gmx_mpi grompp -f ${MDRUN_NAME}.mdp -c npt.gro -t npt.cpt -p topol.top -o ${TPR_
# Run the simulation # Run the simulation
gmx_mpi mdrun -deffnm ${MDRUN_NAME} gmx_mpi mdrun -deffnm ${MDRUN_NAME}
# mpirun -np $(ls | egrep "Scaled[0-9]+$" | wc -l) gmx_mpi mdrun -v --deffnm md -cpi Scaled.cpt -multidir $(ls -v | egrep "Scaled[0-9]+$") -plumed plumed.dat -hrex -replex 1000 >& run_$(date "+%H%M%S_%d%m%Y").log || { echo "mdrun failed at line ${LINENO} "; exit -1; }
# extra ndx file , select protein # extra ndx file , select protein
echo -e "1\nq" | gmx_mpi make_ndx -f ${MDRUN_NAME}.gro -o ${NDX_FILE} echo -e "1\nq" | gmx_mpi make_ndx -f ${MDRUN_NAME}.gro -o ${NDX_FILE}
# echo -e "1\nq" | gmx_mpi make_ndx -f md.gro -o index.ndx # echo -e "1\nq" | gmx_mpi make_ndx -f md.gro -o index.ndx

0
process_trajectory.py Normal file → Executable file
View File

78
runner.py Normal file → Executable file
View File

@@ -14,6 +14,7 @@ import logging
from dataclasses import dataclass, field from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
import subprocess import subprocess
import multiprocessing
import shutil import shutil
import os import os
@@ -22,6 +23,7 @@ logging.basicConfig(level=logging.INFO, filename='simulation_log.log', filemode=
format='%(asctime)s - %(levelname)s - %(message)s') format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger() logger = logging.getLogger()
@dataclass
class SimulationRunner: class SimulationRunner:
pdb_file: Path pdb_file: Path
nsteps: int nsteps: int
@@ -29,7 +31,8 @@ class SimulationRunner:
base_folder: Path base_folder: Path
bash_script: Path = None # Bash脚本路径作为可选参数 bash_script: Path = None # Bash脚本路径作为可选参数
gmxrc_path: Path = None # GMXRC文件路径作为可选参数 gmxrc_path: Path = None # GMXRC文件路径作为可选参数
temp_folder: Path # 用于存储临时数据和结果数据的路径(轨迹数据处理中间数据等) gpu_id: int = None
temp_folder: Path = field(init=False) # 用于存储临时数据和结果数据的路径(轨迹数据处理中间数据等)
runner_folder: Path = field(init=False) runner_folder: Path = field(init=False)
tpr_file: Path = field(init=False) tpr_file: Path = field(init=False)
xtc_file: Path = field(init=False) xtc_file: Path = field(init=False)
@@ -44,11 +47,15 @@ class SimulationRunner:
self.temp_folder.mkdir(exist_ok=True) self.temp_folder.mkdir(exist_ok=True)
self.bash_script = self.bash_script.absolute() if self.bash_script else Path(__file__).resolve().parent / "md_gromacs.sh" self.bash_script = self.bash_script.absolute() if self.bash_script else Path(__file__).resolve().parent / "md_gromacs.sh"
# 设置 GMXRC_PATH 环境变量 # 设置 GMXRC_PATH 环境变量
self.gmxrc_path = self.gmxrc_path or Path("/home/lingyuzeng/software/gmx2023.2/bin/GMXRC") self.gmxrc_path = self.gmxrc_path
def copy_pdb(self): def copy_pdb(self):
shutil.copy(self.pdb_file, self.runner_folder / self.pdb_file.name) shutil.copy(self.pdb_file, self.runner_folder / self.pdb_file.name)
def set_gpu(self, gpu_id):
"""设置要使用的GPU。"""
self.gpu_id = gpu_id
@staticmethod @staticmethod
def read_ndx_file(filename): def read_ndx_file(filename):
ndx_dict = {} ndx_dict = {}
@@ -67,11 +74,9 @@ class SimulationRunner:
return ndx_dict return ndx_dict
@staticmethod
echo "Protein" | gmx_mpi trjconv -dt {extract_interval} -s {tpr_file} -f {xtc_file} -n {temp_folder}/tarj_show.ndx -pbc mol -o {temp_folder}/temp.xtc
# 新增处理轨迹的方法 # 新增处理轨迹的方法
def process_trajectory(self, extract_interval): def process_trajectory(self, extract_interval):
# echo "Protein" | gmx_mpi trjconv -dt {extract_interval} -s {tpr_file} -f {xtc_file} -n {temp_folder}/tarj_show.ndx -pbc mol -o {temp_folder}/temp.xtc
# 根据提供的脚本逻辑读取和保存索引文件 # 根据提供的脚本逻辑读取和保存索引文件
ndx_dict = self.read_ndx_file(f'{self.runner_folder}/index.ndx') ndx_dict = self.read_ndx_file(f'{self.runner_folder}/index.ndx')
@@ -101,12 +106,15 @@ echo "Protein" | gmx_mpi trjconv -dt {extract_interval} -s {tpr_file} -f {xtc_fi
result = None result = None
try: try:
env_vars = { env_vars = {
"NAME": self.pdb_file.stem, # 首先将 NAME 设置为文件的 stem "NAME": self.pdb_file.stem,
"NSTEPS": str(self.nsteps), "NSTEPS": str(self.nsteps),
"DT": str(self.dt), "DT": str(self.dt),
"GMXRC_PATH": str(self.gmxrc_path) "GMXRC_PATH": str(self.gmxrc_path),
"PATH": os.environ.get("PATH", ""),
"LD_LIBRARY_PATH": os.environ.get("LD_LIBRARY_PATH", ""),
"HOME": os.environ["HOME"],
"CUDA_VISIBLE_DEVICES": str(self.gpu_id) if self.gpu_id is not None else ""
} }
env_vars["HOME"] = os.environ["HOME"]
logger.info(f"pdb_file: {self.pdb_file.name}") logger.info(f"pdb_file: {self.pdb_file.name}")
logger.info(f"Executing script at: {self.bash_script}") logger.info(f"Executing script at: {self.bash_script}")
result = subprocess.run(["bash", str(self.bash_script)], env=env_vars, cwd=self.runner_folder, result = subprocess.run(["bash", str(self.bash_script)], env=env_vars, cwd=self.runner_folder,
@@ -130,25 +138,59 @@ echo "Protein" | gmx_mpi trjconv -dt {extract_interval} -s {tpr_file} -f {xtc_fi
else: else:
logger.error(f"Simulation for {self.pdb_file.name} failed in {duration:.2f} seconds.") logger.error(f"Simulation for {self.pdb_file.name} failed in {duration:.2f} seconds.")
# def main(simulation_steps, time_step, pdb_folder_path, bash_script_path, gmxrc_path):
# pdb_folder = Path(pdb_folder_path).resolve()
# for pdb_file in pdb_folder.glob("*.pdb"):
# runner = SimulationRunner(pdb_file, simulation_steps, time_step, pdb_folder, bash_script_path, gmxrc_path)
# runner.copy_pdb()
# logger.info(f"Running simulation for {pdb_file.name} in {runner.runner_folder}...")
# runner.run_simulation()
# logger.info(f"Finished simulation for {pdb_file.name}.")
# runner.process_trajectory(extract_interval=100) # 例如每100ps抽取一次轨迹
# logger.info(f"Finished processing trajectory for {pdb_file.name}. per 100ps")
def detect_gpus():
"""检测系统上的GPU数量。"""
try:
output = subprocess.check_output("nvidia-smi -L", shell=True).decode('utf-8')
return len(output.strip().split('\n'))
except subprocess.CalledProcessError:
return 0
def run_simulation_task(pdb_file, simulation_steps, time_step, pdb_folder, bash_script_path, gmxrc_path, gpu_id):
runner = SimulationRunner(pdb_file, simulation_steps, time_step, pdb_folder, bash_script_path, gmxrc_path)
runner.set_gpu(gpu_id) # 设置要使用的GPU
runner.copy_pdb()
logger.info(f"Running simulation for {pdb_file.name} on GPU {gpu_id}...")
runner.run_simulation()
runner.process_trajectory(extract_interval=100)
logger.info(f"Finished processing trajectory for {pdb_file.name} on GPU {gpu_id}.")
def main(simulation_steps, time_step, pdb_folder_path, bash_script_path, gmxrc_path): def main(simulation_steps, time_step, pdb_folder_path, bash_script_path, gmxrc_path):
pdb_folder = Path(pdb_folder_path).resolve() pdb_folder = Path(pdb_folder_path).resolve()
for pdb_file in pdb_folder.glob("*.pdb"): pdb_files = list(pdb_folder.glob("*.pdb"))
runner = SimulationRunner(pdb_file, simulation_steps, time_step, pdb_folder, bash_script_path, gmxrc_path) num_gpus = detect_gpus()
runner.copy_pdb()
logger.info(f"Running simulation for {pdb_file.name} in {runner.runner_folder}...") if num_gpus == 0:
runner.run_simulation() logger.error("No GPUs detected, exiting.")
logger.info(f"Finished simulation for {pdb_file.name}.") return
runner.process_trajectory(extract_interval=100) # 例如每100ps抽取一次轨迹
logger.info(f"Finished processing trajectory for {pdb_file.name}. per 100ps") # 创建一个进程池每个GPU运行两个任务
with multiprocessing.Pool(processes=num_gpus * 2) as pool:
for i, pdb_file in enumerate(pdb_files):
gpu_id = i % num_gpus # 分配GPU
pool.apply_async(run_simulation_task, (pdb_file, simulation_steps, time_step, pdb_folder, bash_script_path, gmxrc_path, gpu_id))
pool.close()
pool.join()
if __name__ == "__main__": if __name__ == "__main__":
NSTEPS = 50000000 # Example: 50000000 steps NSTEPS = 50000000 # Example: 50000000 steps
DT = 0.002 # Example: 2 fs time step DT = 0.002 # Example: 2 fs time step
PDB_FOLDER_PATH = Path("./pdb_files") # Assuming the PDB files are in a folder named 'pdb_files' in the current directory PDB_FOLDER_PATH = Path("./pdb_test") # Assuming the PDB files are in a folder named 'pdb_files' in the current directory
# 传入自定义的bash脚本路径 # 传入自定义的bash脚本路径
CUSTOM_BASH_SCRIPT_PATH = Path('md_gromacs.sh') CUSTOM_BASH_SCRIPT_PATH = Path('md_gromacs.sh')
# 传入 GMXRC 文件的路径 # 传入 GMXRC 文件的路径
GMXRC_PATH = Path('/root/software/gmx2023.2/bin/GMXRC') GMXRC_PATH = Path('/usr/local/gromacs-2021.4-plumed-2.8.0/bin/GMXRC')
main(NSTEPS, DT, PDB_FOLDER_PATH, CUSTOM_BASH_SCRIPT_PATH, GMXRC_PATH) main(NSTEPS, DT, PDB_FOLDER_PATH, CUSTOM_BASH_SCRIPT_PATH, GMXRC_PATH)