138 lines
5.3 KiB
Python
Executable File
138 lines
5.3 KiB
Python
Executable File
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from modeller import *
|
|
from modeller.automodel import *
|
|
import time
|
|
from typing import List
|
|
import sys
|
|
import glob
|
|
|
|
@dataclass
|
|
class PDBModeler:
|
|
structure_file: Path
|
|
fasta_file: Path
|
|
outdir: Path
|
|
chain: str
|
|
num_loop: int = 2
|
|
md_level: str = 'refine.fast' # refine.very_fast or refine.slow optional
|
|
|
|
def __post_init__(self):
|
|
self.structure = self.structure_file.stem
|
|
self.sequence = self.fasta_file.stem
|
|
self.ali_file = self.fasta_to_ali()
|
|
|
|
def make_model(self): # 单模板建模
|
|
print("***************************************************")
|
|
print("md_level ====", self.md_level)
|
|
print("***************************************************")
|
|
|
|
time_start = time.time()
|
|
|
|
env1 = Environ()
|
|
mdl = Model(env1, file=self.structure_file.as_posix(), model_segment=(f'FIRST:{self.chain}', f'LAST:{self.chain}'))
|
|
aln = Alignment(env1)
|
|
aln.append_model(mdl, align_codes=self.structure, atom_files=self.structure_file.as_posix())
|
|
aln.append(file=self.ali_file.as_posix(), align_codes=self.sequence)
|
|
aln.align2d()
|
|
aln.write(file=(self.outdir / 'alignment.ali').as_posix(), alignment_format='PIR')
|
|
aln.write(file=(self.outdir / 'alignment.pap').as_posix(), alignment_format='PAP')
|
|
|
|
log.verbose()
|
|
|
|
env2 = Environ()
|
|
env2.io.atom_files_directory = ['.']
|
|
loop_model = LoopModel(env2,
|
|
alnfile=(self.outdir / 'alignment.ali').as_posix(),
|
|
knowns=self.structure,
|
|
sequence=self.sequence,
|
|
loop_assess_methods=(assess.DOPE, assess.GA341))
|
|
# 设置循环模型数量
|
|
# 数量规则:(end - start) + 1
|
|
loop_model.loop.starting_model = 1
|
|
loop_model.loop.ending_model = self.num_loop
|
|
# 设置 MD 优化函数为 "refine.slow" 或 "refine.fast
|
|
if self.md_level.strip() == 'refine.slow':
|
|
loop_model.loop.md_level = refine.slow
|
|
elif self.md_level.strip() == 'refine.very_fast':
|
|
loop_model.loop.md_level = refine.very_fast
|
|
elif self.md_level.strip() == 'refine.fast':
|
|
loop_model.loop.md_level = refine.fast
|
|
|
|
# 调用 LoopModel 的 make 方法
|
|
loop_model.make()
|
|
end_time = time.time()
|
|
print(f"Time cost: {end_time - time_start}s")
|
|
|
|
# 获取所有成功生成的模型文件的路径
|
|
model_files = self.get_model_files(loop_model)
|
|
if model_files:
|
|
print(f"Model files: {[file.name for file in model_files]}")
|
|
else:
|
|
print("No model files found.")
|
|
|
|
return model_files
|
|
|
|
def find_pdb95_fsa_file(self) -> Path:
|
|
"""在 Conda 环境中查找 pdb95.fsa 文件的路径。"""
|
|
# 获取当前 Python 解释器的路径
|
|
python_executable_path = Path(sys.executable)
|
|
|
|
# 获取 Conda 环境的根目录
|
|
conda_env_root = python_executable_path.parent.parent
|
|
|
|
# 获取可能的 Modeller 目录
|
|
modeller_dirs = list(conda_env_root.glob("lib/modeller-*/examples/commands"))
|
|
|
|
# 选择最新版本的 Modeller 目录
|
|
modeller_dirs.sort(reverse=True)
|
|
if modeller_dirs:
|
|
latest_modeller_dir = modeller_dirs[0]
|
|
pdb95_fsa_path = latest_modeller_dir / "pdb95.fsa"
|
|
return pdb95_fsa_path
|
|
else:
|
|
raise FileNotFoundError("Modeller directory not found.")
|
|
|
|
def get_model_files(self, loop_model) -> List[Path]:
|
|
# 检查 loop_model.loop.outputs 列表,收集所有成功生成的模型文件
|
|
model_files = []
|
|
for output in loop_model.loop.outputs:
|
|
if output.get('failure') is None:
|
|
model_files.append(Path(output.get('name')))
|
|
return model_files
|
|
|
|
def fasta_to_ali(self) -> Path:
|
|
if not self.outdir.exists():
|
|
self.outdir.mkdir(parents=True, exist_ok=True)
|
|
|
|
ali_file = self.outdir / f'{self.sequence}.ali'
|
|
if ali_file.exists():
|
|
ali_file.unlink()
|
|
|
|
with open(self.fasta_file, 'r') as f:
|
|
seq = f.readlines()[1].strip()
|
|
|
|
with open(ali_file, 'w') as f:
|
|
f.write(f'>P1;{self.sequence}\n')
|
|
f.write(f'sequence:{self.sequence}:::::::0.00: 0.00\n')
|
|
f.write(f'{seq}*')
|
|
|
|
return ali_file
|
|
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
parser = argparse.ArgumentParser(description="Build model by Modeller")
|
|
parser.add_argument("-s", "--structure", help="Structure file")
|
|
parser.add_argument("-o", "--outdir", help="Output directory")
|
|
parser.add_argument("-f", "--fasta", help="Fasta file")
|
|
parser.add_argument("-n", "--num_loop", help="Number of loop model")
|
|
parser.add_argument("-m", "--md_level", help="MD level")
|
|
parser.add_argument("-c", "--chain", help="Chain ID")
|
|
args = parser.parse_args()
|
|
|
|
modeler = PDBModeler(Path(args.structure), Path(args.fasta), Path(args.outdir),
|
|
args.chain, int(args.num_loop), args.md_level)
|
|
modeler.make_model()
|
|
|
|
# test command
|
|
# python build_modellel.py -s ../5sws_fixer.pdb -o ./5swsmodellerfix -f ../rcsb_pdb_5SWS.fasta -n 1 -m refine.very_fast -c D
|
|
# python build_modeller.py -s 1j8h.pdb -o ./1j8hmodellerfix -f ./1j8h_D.fasta -n 1 -m refine.very_fast -c D |