use MAS to search fit sequence

This commit is contained in:
root
2024-01-16 15:39:26 +08:00
parent bcf5445f04
commit 504b7b4d5b

View File

@@ -18,6 +18,7 @@ from build_modeller import PDBModeler
from modeller import ModellerError
import pymol
from typing import Dict
from Bio.SeqRecord import SeqRecord
@dataclass
class PDBAlign:
@@ -118,7 +119,7 @@ class LoopModelBuilder:
split_dict[j]=fn.read_text()
return split_dict
def model_missing_loops(self, typestr:str = 'refine.very_fast') -> dict:
def model_missing_loops(self, typestr:str = 'refine.very_fast', buildnumber: str = 1) -> dict:
mc_dict = {}
if not self.missing_info:
self.logger.info(f'No missing residues found in {self.pdb_file}. Skipping model_missing_loops.')
@@ -126,42 +127,37 @@ class LoopModelBuilder:
for mc in self.missing_info:
out_file = f'{self.pdb_id}_{mc}.pdb'
self.analyzer_instance.split_chain(mc).to_pdb(out_file) # get misschain pdb file
mc_fasta = self.analyzer_instance.filter_sequences(mc) # get misschain fasta file
if len(mc_fasta) == 1:
mc_fasta = mc_fasta[0]
out_fasta_file = Path(f'{self.analyzer_instance.pid}_{mc}.fasta')
self.analyzer_instance.write_seq_to_fasta_single_line(mc_fasta, out_fasta_file)
self.logger.info(f'>{mc_fasta.description}')
self.logger.info(mc_fasta.seq)
modeller = PDBModeler(self.pdb_file, out_fasta_file, Path('.'), mc, 1, typestr)
try:
modeller_results = modeller.make_model()
except ModellerError as mod_err:
self.logger.info(f'Failed to build model for chain {mc}')
self.logger.info(f'No loops detected in {out_fasta_file.name}')
self.logger.info(f'may pdb file sequence is not correct')
self.logger.error(f'Modeller error for chain {mc}: {mod_err}')
continue
except Exception as e:
self.logger.error(f'Unexpected error in model_missing_loops: {e}')
self.logger.info(f'Model files: {[file.name for file in modeller_results]}')
# change id to original
for i in modeller_results:
manalyzer = PDBAnalyzer(i)
manalyzer.change_chain_identifier('A', mc, split=False).to_pdb(i)
if len(modeller_results) == 1:
# use pymol to align
aligner = PDBAlign(self.pdb_file, modeller_results[0],Path(f'{self.analyzer_instance.pid}_merge_model.pdb'))
pdbstr = aligner.align()
mc_dict[mc] = pdbstr
return mc_dict
else:
self.logger.warning('more than one model file, please set num_loop to 1')
elif len(mc_fasta) == 0:
self.logger.warning(f'No chain {mc} found in PDB fasta file. Skipping chain {mc}.')
mc_fasta = self.analyzer_instance.find_most_similar(self.sequences[mc]) # get misschain fasta file
mc_fasta_record = SeqRecord(mc_fasta, id=f'{mc}', description=f'{self.pdb_id}|{mc}')
out_fasta_file = Path(f'{self.analyzer_instance.pid}_{mc}.fasta')
self.analyzer_instance.write_seq_to_fasta_single_line(mc_fasta_record, out_fasta_file)
self.logger.info(f'>{self.pdb_id}|{mc}|{self.missing_info[mc]}|{len(mc_fasta)}')
self.logger.info(mc_fasta)
modeller = PDBModeler(self.pdb_file, out_fasta_file, Path(f'./{self.pdb_id}modellerfix_{mc}'), mc, buildnumber, typestr)
try:
modeller_results = modeller.make_model()
except ModellerError as mod_err:
self.logger.info(f'Failed to build model for chain {mc}')
self.logger.info(f'No loops detected in {out_fasta_file.name}')
self.logger.info(f'may pdb file sequence is not correct')
self.logger.error(f'Modeller error for chain {mc}: {mod_err}')
continue
except Exception as e:
self.logger.error(f'Unexpected error in model_missing_loops: {e}')
self.logger.info(f'Model files: {[file.name for file in modeller_results]}')
# change id to original
for i in modeller_results:
manalyzer = PDBAnalyzer(i)
manalyzer.change_chain_identifier('A', mc, split=False).to_pdb(i)
if len(modeller_results) == 1:
# use pymol to align
aligner = PDBAlign(self.pdb_file, modeller_results[0],Path(f'{self.analyzer_instance.pid}_merge_model.pdb'))
pdbstr = aligner.align()
mc_dict[mc] = pdbstr
return mc_dict
else:
raise ValueError(f'only can fix one chain content: {mc_fasta}')
self.logger.warning('more than one model file, please set num_loop to 1')
return mc_dict
def run(self, typestr:str = 'refine.very_fast'):
split_dict = self.split_all_chains()