use MAS to search fit sequence
This commit is contained in:
@@ -18,6 +18,7 @@ from build_modeller import PDBModeler
|
||||
from modeller import ModellerError
|
||||
import pymol
|
||||
from typing import Dict
|
||||
from Bio.SeqRecord import SeqRecord
|
||||
|
||||
@dataclass
|
||||
class PDBAlign:
|
||||
@@ -118,7 +119,7 @@ class LoopModelBuilder:
|
||||
split_dict[j]=fn.read_text()
|
||||
return split_dict
|
||||
|
||||
def model_missing_loops(self, typestr:str = 'refine.very_fast') -> dict:
|
||||
def model_missing_loops(self, typestr:str = 'refine.very_fast', buildnumber: str = 1) -> dict:
|
||||
mc_dict = {}
|
||||
if not self.missing_info:
|
||||
self.logger.info(f'No missing residues found in {self.pdb_file}. Skipping model_missing_loops.')
|
||||
@@ -126,42 +127,37 @@ class LoopModelBuilder:
|
||||
for mc in self.missing_info:
|
||||
out_file = f'{self.pdb_id}_{mc}.pdb'
|
||||
self.analyzer_instance.split_chain(mc).to_pdb(out_file) # get misschain pdb file
|
||||
mc_fasta = self.analyzer_instance.filter_sequences(mc) # get misschain fasta file
|
||||
if len(mc_fasta) == 1:
|
||||
mc_fasta = mc_fasta[0]
|
||||
out_fasta_file = Path(f'{self.analyzer_instance.pid}_{mc}.fasta')
|
||||
self.analyzer_instance.write_seq_to_fasta_single_line(mc_fasta, out_fasta_file)
|
||||
self.logger.info(f'>{mc_fasta.description}')
|
||||
self.logger.info(mc_fasta.seq)
|
||||
modeller = PDBModeler(self.pdb_file, out_fasta_file, Path('.'), mc, 1, typestr)
|
||||
try:
|
||||
modeller_results = modeller.make_model()
|
||||
except ModellerError as mod_err:
|
||||
self.logger.info(f'Failed to build model for chain {mc}')
|
||||
self.logger.info(f'No loops detected in {out_fasta_file.name}')
|
||||
self.logger.info(f'may pdb file sequence is not correct')
|
||||
self.logger.error(f'Modeller error for chain {mc}: {mod_err}')
|
||||
continue
|
||||
except Exception as e:
|
||||
self.logger.error(f'Unexpected error in model_missing_loops: {e}')
|
||||
self.logger.info(f'Model files: {[file.name for file in modeller_results]}')
|
||||
# change id to original
|
||||
for i in modeller_results:
|
||||
manalyzer = PDBAnalyzer(i)
|
||||
manalyzer.change_chain_identifier('A', mc, split=False).to_pdb(i)
|
||||
if len(modeller_results) == 1:
|
||||
# use pymol to align
|
||||
aligner = PDBAlign(self.pdb_file, modeller_results[0],Path(f'{self.analyzer_instance.pid}_merge_model.pdb'))
|
||||
pdbstr = aligner.align()
|
||||
mc_dict[mc] = pdbstr
|
||||
return mc_dict
|
||||
else:
|
||||
self.logger.warning('more than one model file, please set num_loop to 1')
|
||||
elif len(mc_fasta) == 0:
|
||||
self.logger.warning(f'No chain {mc} found in PDB fasta file. Skipping chain {mc}.')
|
||||
mc_fasta = self.analyzer_instance.find_most_similar(self.sequences[mc]) # get misschain fasta file
|
||||
mc_fasta_record = SeqRecord(mc_fasta, id=f'{mc}', description=f'{self.pdb_id}|{mc}')
|
||||
out_fasta_file = Path(f'{self.analyzer_instance.pid}_{mc}.fasta')
|
||||
self.analyzer_instance.write_seq_to_fasta_single_line(mc_fasta_record, out_fasta_file)
|
||||
self.logger.info(f'>{self.pdb_id}|{mc}|{self.missing_info[mc]}|{len(mc_fasta)}')
|
||||
self.logger.info(mc_fasta)
|
||||
modeller = PDBModeler(self.pdb_file, out_fasta_file, Path(f'./{self.pdb_id}modellerfix_{mc}'), mc, buildnumber, typestr)
|
||||
try:
|
||||
modeller_results = modeller.make_model()
|
||||
except ModellerError as mod_err:
|
||||
self.logger.info(f'Failed to build model for chain {mc}')
|
||||
self.logger.info(f'No loops detected in {out_fasta_file.name}')
|
||||
self.logger.info(f'may pdb file sequence is not correct')
|
||||
self.logger.error(f'Modeller error for chain {mc}: {mod_err}')
|
||||
continue
|
||||
except Exception as e:
|
||||
self.logger.error(f'Unexpected error in model_missing_loops: {e}')
|
||||
self.logger.info(f'Model files: {[file.name for file in modeller_results]}')
|
||||
# change id to original
|
||||
for i in modeller_results:
|
||||
manalyzer = PDBAnalyzer(i)
|
||||
manalyzer.change_chain_identifier('A', mc, split=False).to_pdb(i)
|
||||
if len(modeller_results) == 1:
|
||||
# use pymol to align
|
||||
aligner = PDBAlign(self.pdb_file, modeller_results[0],Path(f'{self.analyzer_instance.pid}_merge_model.pdb'))
|
||||
pdbstr = aligner.align()
|
||||
mc_dict[mc] = pdbstr
|
||||
return mc_dict
|
||||
else:
|
||||
raise ValueError(f'only can fix one chain content: {mc_fasta}')
|
||||
self.logger.warning('more than one model file, please set num_loop to 1')
|
||||
return mc_dict
|
||||
|
||||
def run(self, typestr:str = 'refine.very_fast'):
|
||||
split_dict = self.split_all_chains()
|
||||
|
||||
Reference in New Issue
Block a user