diff --git a/modelbuilder.py b/modelbuilder.py index 3b7c93c..dd405d3 100644 --- a/modelbuilder.py +++ b/modelbuilder.py @@ -18,6 +18,7 @@ from build_modeller import PDBModeler from modeller import ModellerError import pymol from typing import Dict +from Bio.SeqRecord import SeqRecord @dataclass class PDBAlign: @@ -118,7 +119,7 @@ class LoopModelBuilder: split_dict[j]=fn.read_text() return split_dict - def model_missing_loops(self, typestr:str = 'refine.very_fast') -> dict: + def model_missing_loops(self, typestr:str = 'refine.very_fast', buildnumber: str = 1) -> dict: mc_dict = {} if not self.missing_info: self.logger.info(f'No missing residues found in {self.pdb_file}. Skipping model_missing_loops.') @@ -126,42 +127,37 @@ class LoopModelBuilder: for mc in self.missing_info: out_file = f'{self.pdb_id}_{mc}.pdb' self.analyzer_instance.split_chain(mc).to_pdb(out_file) # get misschain pdb file - mc_fasta = self.analyzer_instance.filter_sequences(mc) # get misschain fasta file - if len(mc_fasta) == 1: - mc_fasta = mc_fasta[0] - out_fasta_file = Path(f'{self.analyzer_instance.pid}_{mc}.fasta') - self.analyzer_instance.write_seq_to_fasta_single_line(mc_fasta, out_fasta_file) - self.logger.info(f'>{mc_fasta.description}') - self.logger.info(mc_fasta.seq) - modeller = PDBModeler(self.pdb_file, out_fasta_file, Path('.'), mc, 1, typestr) - try: - modeller_results = modeller.make_model() - except ModellerError as mod_err: - self.logger.info(f'Failed to build model for chain {mc}') - self.logger.info(f'No loops detected in {out_fasta_file.name}') - self.logger.info(f'may pdb file sequence is not correct') - self.logger.error(f'Modeller error for chain {mc}: {mod_err}') - continue - except Exception as e: - self.logger.error(f'Unexpected error in model_missing_loops: {e}') - self.logger.info(f'Model files: {[file.name for file in modeller_results]}') - # change id to original - for i in modeller_results: - manalyzer = PDBAnalyzer(i) - manalyzer.change_chain_identifier('A', mc, split=False).to_pdb(i) - if len(modeller_results) == 1: - # use pymol to align - aligner = PDBAlign(self.pdb_file, modeller_results[0],Path(f'{self.analyzer_instance.pid}_merge_model.pdb')) - pdbstr = aligner.align() - mc_dict[mc] = pdbstr - return mc_dict - else: - self.logger.warning('more than one model file, please set num_loop to 1') - elif len(mc_fasta) == 0: - self.logger.warning(f'No chain {mc} found in PDB fasta file. Skipping chain {mc}.') + mc_fasta = self.analyzer_instance.find_most_similar(self.sequences[mc]) # get misschain fasta file + mc_fasta_record = SeqRecord(mc_fasta, id=f'{mc}', description=f'{self.pdb_id}|{mc}') + out_fasta_file = Path(f'{self.analyzer_instance.pid}_{mc}.fasta') + self.analyzer_instance.write_seq_to_fasta_single_line(mc_fasta_record, out_fasta_file) + self.logger.info(f'>{self.pdb_id}|{mc}|{self.missing_info[mc]}|{len(mc_fasta)}') + self.logger.info(mc_fasta) + modeller = PDBModeler(self.pdb_file, out_fasta_file, Path(f'./{self.pdb_id}modellerfix_{mc}'), mc, buildnumber, typestr) + try: + modeller_results = modeller.make_model() + except ModellerError as mod_err: + self.logger.info(f'Failed to build model for chain {mc}') + self.logger.info(f'No loops detected in {out_fasta_file.name}') + self.logger.info(f'may pdb file sequence is not correct') + self.logger.error(f'Modeller error for chain {mc}: {mod_err}') continue + except Exception as e: + self.logger.error(f'Unexpected error in model_missing_loops: {e}') + self.logger.info(f'Model files: {[file.name for file in modeller_results]}') + # change id to original + for i in modeller_results: + manalyzer = PDBAnalyzer(i) + manalyzer.change_chain_identifier('A', mc, split=False).to_pdb(i) + if len(modeller_results) == 1: + # use pymol to align + aligner = PDBAlign(self.pdb_file, modeller_results[0],Path(f'{self.analyzer_instance.pid}_merge_model.pdb')) + pdbstr = aligner.align() + mc_dict[mc] = pdbstr + return mc_dict else: - raise ValueError(f'only can fix one chain content: {mc_fasta}') + self.logger.warning('more than one model file, please set num_loop to 1') + return mc_dict def run(self, typestr:str = 'refine.very_fast'): split_dict = self.split_all_chains()