#!/usr/bin/env python # -*- encoding: utf-8 -*- ''' @file :modelbuilder.py @Description: : @Date :2024/01/11 15:29:28 @Author :lyzeng @Email :pylyzeng@gmail.com @version :1.0 ''' from dataclasses import dataclass, field from pathlib import Path import os import logging from analysis_pdb import PDBAnalyzer from build_modeller import PDBModeler from modeller import ModellerError import pymol from typing import Dict @dataclass class PDBAlign: template_file: Path target_file: Path pymol_instance: object = field(init=False) out_file: Path = field(default=None) # 输出文件路径 def __post_init__(self): self.initialize_pymol() def initialize_pymol(self): self.pymol_instance = pymol.cmd self.pymol_instance.reinitialize() def align(self): self.pymol_instance.reinitialize() # 首先,加载模板结构 self.pymol_instance.load(self.template_file.as_posix(), "template") # 加载并对齐所有目标结构 self.pymol_instance.load(self.target_file.as_posix(), "target") self.pymol_instance.align("target", "template") return self.pymol_instance.get_pdbstr('target') @dataclass class LoopModelBuilder: pdb_file: Path output_dir: Path = field(default=None) logger: logging.Logger = field(init=False) pymol_instance: object = field(init=False) analyzer_instance: PDBAnalyzer = field(init=False) runner_dir: Path = field(init=False) pdb_id: str = field(init=False) def __post_init__(self): self.pdb_id = self.pdb_file.stem self.output_dir = self.pdb_file.parent if self.output_dir is None else self.output_dir self.runner_dir = self.output_dir / f"runner_{self.pdb_id}" self.analyzer_instance = PDBAnalyzer(self.pdb_file) self.initialize_workdir() self.setup_logging() self.initialize_pymol() def setup_logging(self): log_file = self.output_dir / "loop_modeling.log" if log_file.exists(): log_file.unlink() logging.basicConfig(level=logging.INFO, filename=log_file, filemode='a', format='%(asctime)s - %(levelname)s - %(message)s') self.logger = logging.getLogger('LoopModelBuilder') def initialize_pymol(self): self.pymol_instance = pymol.cmd self.pymol_instance.reinitialize() def initialize_workdir(self): runner_dir = self.output_dir / f"runner_{self.pdb_file.stem}" runner_dir.mkdir(exist_ok=True, parents=True) self.runner_dir = runner_dir def split_chains(self) -> dict: split_dict = {} for chain_id in self.analyzer_instance.chain_id_list: chain_file = self.runner_dir / f"{self.analyzer_instance.pid}_{chain_id}.pdb" self.analyzer_instance.split_chain(chain_id).to_pdb(chain_file.as_posix()) split_dict[chain_id] = chain_file.read_text() return split_dict def fix_loops(self, pdb_file: Path): pdb_id = pdb_file.stem analyzer = PDBAnalyzer(pdb_file) missing_info = analyzer.extract_sequences_info() split_dict = self.split_chains(analyzer, pdb_id) mc_dict = self.model_missing_loops(analyzer, pdb_id, missing_info) split_dict.update(mc_dict) self.merge_and_save_pdb(split_dict, pdb_id) os.chdir(self.pdb_dir) # Return to the original directory # os.chdir(runner_dir) # self.logger.info(f"Processing PDB file: {self.pdb_file.name} in {runner_dir}") # try: # analyzer = PDBAnalyzer(self.pdb_file) # missing_info = analyzer.extract_sequences_info() # self.fix_loops(analyzer, self.pdb_file.stem, missing_info) # except Exception as e: # self.logger.error(f"Error processing {self.pdb_file.name}: {e}") # finally: # os.chdir(self.pdb_file) # Return to the original directory def merge_and_save_pdb(self, pdb_strings: dict, pdb_id: str): merged_file = self.runner_dir / f"{pdb_id}_merged.pdb" self.import_and_merge_pdb_strings(pdb_strings, "merged_object", merged_file) @property def sequences(self) -> Dict: return self.analyzer_instance.extract_sequences(missing_char='-') # 或者 'X', 或者 '' @property def missing_info(self) -> Dict: return self.analyzer_instance.extract_sequences_info() def split_all_chains(self): sequences = self.analyzer_instance.extract_sequences(missing_char='-') # 或者 'X', 或者 '' self.logger.info(f'Residues info for {self.pdb_file}: \n',sequences) missing_info = self.analyzer_instance.extract_sequences_info() self.logger.info(f'Missing residues info for {self.pdb_file}:\n {missing_info}') split_dict = {} # split all chains for j in self.analyzer_instance.chain_id_list: fn = Path(f'{self.pdb_file.stem}_{j}.pdb') self.analyzer_instance.split_chain(j).to_pdb(fn.as_posix()) split_dict[j]=fn.read_text() return split_dict def model_missing_loops(self, typestr:str = 'refine.very_fast') -> dict: mc_dict = {} for mc in self.missing_info: out_file = f'5sws_{mc}.pdb' self.analyzer_instance.split_chain(mc).to_pdb(out_file) # get misschain pdb file mc_fasta = self.analyzer_instance.filter_sequences(mc) # get misschain fasta file if len(mc_fasta) == 1: mc_fasta = mc_fasta[0] out_fasta_file = Path(f'{self.analyzer_instance.pid}_{mc}.fasta') self.analyzer_instance.write_seq_to_fasta_single_line(mc_fasta, out_fasta_file) self.logger.info(f'>{mc_fasta.description}') self.logger.info(mc_fasta.seq) modeller = PDBModeler(self.pdb_file, out_fasta_file, Path('.'), mc, 1, typestr) try: modeller_results = modeller.make_model() except ModellerError as mod_err: self.logger.info(f'Failed to build model for chain {mc}') self.logger.info(f'No loops detected in {out_fasta_file.name}') self.logger.info(f'may pdb file sequence is not correct') self.logger.error(f'Modeller error for chain {mc}: {mod_err}') continue except Exception as e: self.logger.error(f'Unexpected error in model_missing_loops: {e}') self.logger.info(f'Model files: {[file.name for file in modeller_results]}') # change id to original for i in modeller_results: manalyzer = PDBAnalyzer(i) manalyzer.change_chain_identifier('A', mc, split=False).to_pdb(i) if len(modeller_results) == 1: # use pymol to align aligner = PDBAlign(self.pdb_file, modeller_results[0],Path(f'{self.analyzer_instance.pid}_merge_model.pdb')) pdbstr = aligner.align() mc_dict[mc] = pdbstr return mc_dict else: print('more than one model file, please set num_loop to 1') elif len(mc_fasta) == 0: self.logger.warning(f'No chain {mc} found in PDB fasta file. Skipping chain {mc}.') continue else: raise ValueError(f'only can fix one chain content: {mc_fasta}') def run(self, typestr:str = 'refine.very_fast'): split_dict = self.split_all_chains() mc_dict = self.model_missing_loops(typestr=typestr) split_dict.update(mc_dict) # 更新 split_dict self.import_and_merge_pdb_strings(split_dict, "merged_object", f'{self.analyzer_instance.pid}.modellerfix.pdb') def import_and_merge_pdb_strings(self, pdb_strings, merged_object_name, output_file): # 使用 PyMOL 实例导入和合并 PDB for chain_id, pdb_str in pdb_strings.items(): object_name = f"chain_{chain_id}" self.pymol_instance.read_pdbstr(pdb_str, object_name) object_names = [f"chain_{chain_id}" for chain_id in pdb_strings.keys()] self.pymol_instance.create(merged_object_name, ' or '.join(object_names)) self.pymol_instance.save(output_file, merged_object_name) if __name__ == "__main__": l = LoopModelBuilder(Path('./pdb_test1/1ao7.pdb')) l.run()