diff --git a/modelbuilder.py b/modelbuilder.py index dd405d3..f0381a5 100644 --- a/modelbuilder.py +++ b/modelbuilder.py @@ -19,6 +19,8 @@ from modeller import ModellerError import pymol from typing import Dict from Bio.SeqRecord import SeqRecord +from concurrent.futures import ProcessPoolExecutor +from functools import partial @dataclass class PDBAlign: @@ -34,7 +36,7 @@ class PDBAlign: self.pymol_instance = pymol.cmd self.pymol_instance.reinitialize() - def align(self): + def align(self) -> str: self.pymol_instance.reinitialize() # 首先,加载模板结构 self.pymol_instance.load(self.template_file.as_posix(), "template") @@ -44,6 +46,9 @@ class PDBAlign: self.pymol_instance.align("target", "template") return self.pymol_instance.get_pdbstr('target') + + def save(self, out_file: Path): + self.pymol_instance.save(out_file.as_posix(), "target") @dataclass class LoopModelBuilder: @@ -124,16 +129,21 @@ class LoopModelBuilder: if not self.missing_info: self.logger.info(f'No missing residues found in {self.pdb_file}. Skipping model_missing_loops.') return mc_dict - for mc in self.missing_info: - out_file = f'{self.pdb_id}_{mc}.pdb' - self.analyzer_instance.split_chain(mc).to_pdb(out_file) # get misschain pdb file + self.logger.info(f'Missing residues info for {self.pdb_file}:\n {self.missing_info}') + # create workdir + for mc in self.missing_info.keys(): + self.logger.info(f'Building model for chain {mc}') + workdir = self.pdb_file.parent.joinpath(f'./{self.pdb_id}modellerfix_{mc}') + workdir.mkdir(exist_ok=True, parents=True) mc_fasta = self.analyzer_instance.find_most_similar(self.sequences[mc]) # get misschain fasta file + # write fasta file mc_fasta_record = SeqRecord(mc_fasta, id=f'{mc}', description=f'{self.pdb_id}|{mc}') - out_fasta_file = Path(f'{self.analyzer_instance.pid}_{mc}.fasta') + out_fasta_file = workdir.joinpath(f'{self.analyzer_instance.pid}_{mc}.fasta') self.analyzer_instance.write_seq_to_fasta_single_line(mc_fasta_record, out_fasta_file) - self.logger.info(f'>{self.pdb_id}|{mc}|{self.missing_info[mc]}|{len(mc_fasta)}') + self.logger.info(f'>{self.pdb_id}|{mc}|missing site:{self.missing_info[mc]}|length:{len(mc_fasta)}') self.logger.info(mc_fasta) - modeller = PDBModeler(self.pdb_file, out_fasta_file, Path(f'./{self.pdb_id}modellerfix_{mc}'), mc, buildnumber, typestr) + # build model + modeller = PDBModeler(self.pdb_file, out_fasta_file, workdir, mc, buildnumber, typestr) try: modeller_results = modeller.make_model() except ModellerError as mod_err: @@ -147,23 +157,29 @@ class LoopModelBuilder: self.logger.info(f'Model files: {[file.name for file in modeller_results]}') # change id to original for i in modeller_results: - manalyzer = PDBAnalyzer(i) - manalyzer.change_chain_identifier('A', mc, split=False).to_pdb(i) + self.change_chain_identifier(i, 'A', mc, split=False) + # use pymol to align and merge if len(modeller_results) == 1: # use pymol to align - aligner = PDBAlign(self.pdb_file, modeller_results[0],Path(f'{self.analyzer_instance.pid}_merge_model.pdb')) + aligner = PDBAlign(self.pdb_file, modeller_results[0]) pdbstr = aligner.align() - mc_dict[mc] = pdbstr - return mc_dict + mc_dict.update({mc: pdbstr}) else: self.logger.warning('more than one model file, please set num_loop to 1') return mc_dict - - def run(self, typestr:str = 'refine.very_fast'): + + @staticmethod + def change_chain_identifier(pdb_file: Path, chain_id:str, new_chain_id:str, split:bool = True) -> Path: + o = PDBAnalyzer(pdb_file) + o.change_chain_identifier(chain_id, new_chain_id, split=split).to_pdb(pdb_file) + return pdb_file + + def run(self, typestr:str = 'refine.very_fast', buildnumber: str = 1) -> Path: split_dict = self.split_all_chains() - mc_dict = self.model_missing_loops(typestr=typestr) + mc_dict = self.model_missing_loops(typestr=typestr, buildnumber=buildnumber) split_dict.update(mc_dict) # 更新 split_dict - self.import_and_merge_pdb_strings(split_dict, "merged_object", self.output_dir.joinpath(f'{self.analyzer_instance.pid}.modellerfix.pdb').as_posix()) + out_file = self.output_dir.joinpath(f'{self.analyzer_instance.pid}.modellerfix.pdb') + self.import_and_merge_pdb_strings(split_dict, "merged_object", out_file.as_posix()) def import_and_merge_pdb_strings(self, pdb_strings, merged_object_name, output_file): # 使用 PyMOL 实例导入和合并 PDB