from dataclasses import dataclass, field from pathlib import Path from modeller import * from modeller.automodel import * import time from typing import List @dataclass class PDBModeler: structure_file: Path fasta_file: Path outdir: Path chain: str num_loop: int = 2 md_level: str = 'refine.fast' # refine.very_fast or refine.slow optional def __post_init__(self): self.structure = self.structure_file.stem self.sequence = self.fasta_file.stem self.ali_file = self.fasta_to_ali() def make_model(self): print("***************************************************") print("md_level ====", self.md_level) print("***************************************************") time_start = time.time() env1 = Environ() mdl = Model(env1, file=self.structure_file.as_posix(), model_segment=(f'FIRST:{self.chain}', f'LAST:{self.chain}')) aln = Alignment(env1) aln.append_model(mdl, align_codes=self.structure, atom_files=self.structure_file.as_posix()) aln.append(file=self.ali_file.as_posix(), align_codes=self.sequence) aln.align2d() aln.write(file=(self.outdir / 'alignment.ali').as_posix(), alignment_format='PIR') aln.write(file=(self.outdir / 'alignment.pap').as_posix(), alignment_format='PAP') log.verbose() env2 = Environ() env2.io.atom_files_directory = ['.'] loop_model = LoopModel(env2, alnfile=(self.outdir / 'alignment.ali').as_posix(), knowns=self.structure, sequence=self.sequence, loop_assess_methods=(assess.DOPE, assess.GA341)) # 设置循环模型数量 # 数量规则:(end - start) + 1 loop_model.loop.starting_model = 1 loop_model.loop.ending_model = self.num_loop # 设置 MD 优化函数为 "refine.slow" 或 "refine.fast if self.md_level.strip() == 'refine.slow': loop_model.loop.md_level = refine.slow elif self.md_level.strip() == 'refine.very_fast': loop_model.loop.md_level = refine.very_fast elif self.md_level.strip() == 'refine.fast': loop_model.loop.md_level = refine.fast # 调用 LoopModel 的 make 方法 loop_model.make() end_time = time.time() print(f"Time cost: {end_time - time_start}s") # 获取所有成功生成的模型文件的路径 model_files = self.get_model_files(loop_model) if model_files: print(f"Model files: {[file.name for file in model_files]}") else: print("No model files found.") return model_files def get_model_files(self, loop_model) -> List[Path]: # 检查 loop_model.loop.outputs 列表,收集所有成功生成的模型文件 model_files = [] for output in loop_model.loop.outputs: if output.get('failure') is None: model_files.append(Path(output.get('name'))) return model_files def fasta_to_ali(self) -> Path: if not self.outdir.exists(): self.outdir.mkdir(parents=True, exist_ok=True) ali_file = self.outdir / f'{self.sequence}.ali' if ali_file.exists(): ali_file.unlink() with open(self.fasta_file, 'r') as f: seq = f.readlines()[1].strip() with open(ali_file, 'w') as f: f.write(f'>P1;{self.sequence}\n') f.write(f'sequence:{self.sequence}:::::::0.00: 0.00\n') f.write(f'{seq}*') return ali_file if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Build model by Modeller") parser.add_argument("-s", "--structure", help="Structure file") parser.add_argument("-o", "--outdir", help="Output directory") parser.add_argument("-f", "--fasta", help="Fasta file") parser.add_argument("-n", "--num_loop", help="Number of loop model") parser.add_argument("-m", "--md_level", help="MD level") parser.add_argument("-c", "--chain", help="Chain ID") args = parser.parse_args() modeler = PDBModeler(Path(args.structure), Path(args.fasta), Path(args.outdir), args.chain, int(args.num_loop), args.md_level) modeler.make_model() # test command # python build_modellel.py -s ../5sws_fixer.pdb -o ./5swsmodellerfix -f ../rcsb_pdb_5SWS.fasta -n 1 -m refine.very_fast -c D # python build_modeller.py -s 1j8h.pdb -o ./1j8hmodellerfix -f ./1j8h_D.fasta -n 1 -m refine.very_fast -c D