Files
vina_docking_batch/vina_split_and_submit.py

94 lines
2.6 KiB
Python

import os
import shutil
from pathlib import Path
import sys
import subprocess
import argparse
import time
parser = argparse.ArgumentParser(
description="Split pdbqt files and generate & submit vina shell scripts."
)
parser.add_argument(
'-n', '--n_splits', type=int, default=12, help="Number of splits/folders. Default: 12"
)
parser.add_argument(
'-r', '--receptor', type=str, required=True, help="Path to receptor pdbqt file"
)
parser.add_argument(
'-c', '--config', type=str, required=True, help="Path to box config txt file"
)
args = parser.parse_args()
n_splits = args.n_splits
receptor_path = args.receptor
config_path = args.config
src_dir = Path('./ligand/pdbqt')
dst_root = Path('./ligand')
all_files = sorted(src_dir.glob('*.pdbqt'))
total = len(all_files)
if total == 0:
raise ValueError("No pdbqt files found in ./ligand/pdbqt")
def split_list_evenly(lst, n):
k, m = divmod(len(lst), n)
return [lst[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(n)]
chunks = split_list_evenly(all_files, n_splits)
for i, files in enumerate(chunks, 1):
dst_dir = dst_root / f"pdbqt{i}"
dst_dir.mkdir(parents=True, exist_ok=True)
for f in files:
shutil.copy2(f, dst_dir / f.name)
sh_template = """#!/bin/bash
#DSUB -n vina_job{idx}
#DSUB -R 'cpu=32'
#DSUB -aa
#DSUB --label arm
#DSUB -o vina_docking{idx}_cpu_job_%J.out
date
echo "autodock vina docking task{idx}, version: 1.2.7"
cd /share/home/lyzeng24/rdkit_script/vina
./scripts/batch_docking.sh {receptor_path} \\
{config_path} \\
./ligand/pdbqt{idx} \\
./result/poses{idx} \\
./result/batch_docking{idx}.log ./vina
cd ./result/poses{idx}
module load mamba
micromamba run -n vina mk_export.py ./*_out.pdbqt --suffix _converted
date
"""
submit_sh_list = []
for i in range(1, n_splits + 1):
sh_content = sh_template.format(
idx=i,
receptor_path=receptor_path,
config_path=config_path
)
sh_path = Path(f'./submit_vina{i}.sh')
with open(sh_path, 'w') as f:
f.write(sh_content)
os.chmod(sh_path, 0o755)
submit_sh_list.append(sh_path)
print(f"Done! {total} pdbqt files split to {n_splits} folders, {n_splits} shell scripts generated.")
# 自动提交
for i, sh_path in enumerate(submit_sh_list, 1):
print(f"提交: dsub -s {sh_path}")
subprocess.run(['dsub', '-s', str(sh_path)])
time.sleep(1) # 每次提交后等待1秒
"""
使用示例:
python vina_split_and_submit.py \
-n 128 \
-r ./receptor/FgBar1_cut_proteinprep.pdbqt \
-c ./config/FgBar1_entry_1.box.txt
"""