big update

This commit is contained in:
root
2024-01-16 17:35:23 +08:00
parent 504b7b4d5b
commit 349555f5bf

View File

@@ -19,6 +19,8 @@ from modeller import ModellerError
import pymol import pymol
from typing import Dict from typing import Dict
from Bio.SeqRecord import SeqRecord from Bio.SeqRecord import SeqRecord
from concurrent.futures import ProcessPoolExecutor
from functools import partial
@dataclass @dataclass
class PDBAlign: class PDBAlign:
@@ -34,7 +36,7 @@ class PDBAlign:
self.pymol_instance = pymol.cmd self.pymol_instance = pymol.cmd
self.pymol_instance.reinitialize() self.pymol_instance.reinitialize()
def align(self): def align(self) -> str:
self.pymol_instance.reinitialize() self.pymol_instance.reinitialize()
# 首先,加载模板结构 # 首先,加载模板结构
self.pymol_instance.load(self.template_file.as_posix(), "template") self.pymol_instance.load(self.template_file.as_posix(), "template")
@@ -45,6 +47,9 @@ class PDBAlign:
return self.pymol_instance.get_pdbstr('target') return self.pymol_instance.get_pdbstr('target')
def save(self, out_file: Path):
self.pymol_instance.save(out_file.as_posix(), "target")
@dataclass @dataclass
class LoopModelBuilder: class LoopModelBuilder:
pdb_file: Path pdb_file: Path
@@ -124,16 +129,21 @@ class LoopModelBuilder:
if not self.missing_info: if not self.missing_info:
self.logger.info(f'No missing residues found in {self.pdb_file}. Skipping model_missing_loops.') self.logger.info(f'No missing residues found in {self.pdb_file}. Skipping model_missing_loops.')
return mc_dict return mc_dict
for mc in self.missing_info: self.logger.info(f'Missing residues info for {self.pdb_file}:\n {self.missing_info}')
out_file = f'{self.pdb_id}_{mc}.pdb' # create workdir
self.analyzer_instance.split_chain(mc).to_pdb(out_file) # get misschain pdb file for mc in self.missing_info.keys():
self.logger.info(f'Building model for chain {mc}')
workdir = self.pdb_file.parent.joinpath(f'./{self.pdb_id}modellerfix_{mc}')
workdir.mkdir(exist_ok=True, parents=True)
mc_fasta = self.analyzer_instance.find_most_similar(self.sequences[mc]) # get misschain fasta file mc_fasta = self.analyzer_instance.find_most_similar(self.sequences[mc]) # get misschain fasta file
# write fasta file
mc_fasta_record = SeqRecord(mc_fasta, id=f'{mc}', description=f'{self.pdb_id}|{mc}') mc_fasta_record = SeqRecord(mc_fasta, id=f'{mc}', description=f'{self.pdb_id}|{mc}')
out_fasta_file = Path(f'{self.analyzer_instance.pid}_{mc}.fasta') out_fasta_file = workdir.joinpath(f'{self.analyzer_instance.pid}_{mc}.fasta')
self.analyzer_instance.write_seq_to_fasta_single_line(mc_fasta_record, out_fasta_file) self.analyzer_instance.write_seq_to_fasta_single_line(mc_fasta_record, out_fasta_file)
self.logger.info(f'>{self.pdb_id}|{mc}|{self.missing_info[mc]}|{len(mc_fasta)}') self.logger.info(f'>{self.pdb_id}|{mc}|missing site:{self.missing_info[mc]}|length:{len(mc_fasta)}')
self.logger.info(mc_fasta) self.logger.info(mc_fasta)
modeller = PDBModeler(self.pdb_file, out_fasta_file, Path(f'./{self.pdb_id}modellerfix_{mc}'), mc, buildnumber, typestr) # build model
modeller = PDBModeler(self.pdb_file, out_fasta_file, workdir, mc, buildnumber, typestr)
try: try:
modeller_results = modeller.make_model() modeller_results = modeller.make_model()
except ModellerError as mod_err: except ModellerError as mod_err:
@@ -147,23 +157,29 @@ class LoopModelBuilder:
self.logger.info(f'Model files: {[file.name for file in modeller_results]}') self.logger.info(f'Model files: {[file.name for file in modeller_results]}')
# change id to original # change id to original
for i in modeller_results: for i in modeller_results:
manalyzer = PDBAnalyzer(i) self.change_chain_identifier(i, 'A', mc, split=False)
manalyzer.change_chain_identifier('A', mc, split=False).to_pdb(i) # use pymol to align and merge
if len(modeller_results) == 1: if len(modeller_results) == 1:
# use pymol to align # use pymol to align
aligner = PDBAlign(self.pdb_file, modeller_results[0],Path(f'{self.analyzer_instance.pid}_merge_model.pdb')) aligner = PDBAlign(self.pdb_file, modeller_results[0])
pdbstr = aligner.align() pdbstr = aligner.align()
mc_dict[mc] = pdbstr mc_dict.update({mc: pdbstr})
return mc_dict
else: else:
self.logger.warning('more than one model file, please set num_loop to 1') self.logger.warning('more than one model file, please set num_loop to 1')
return mc_dict return mc_dict
def run(self, typestr:str = 'refine.very_fast'): @staticmethod
def change_chain_identifier(pdb_file: Path, chain_id:str, new_chain_id:str, split:bool = True) -> Path:
o = PDBAnalyzer(pdb_file)
o.change_chain_identifier(chain_id, new_chain_id, split=split).to_pdb(pdb_file)
return pdb_file
def run(self, typestr:str = 'refine.very_fast', buildnumber: str = 1) -> Path:
split_dict = self.split_all_chains() split_dict = self.split_all_chains()
mc_dict = self.model_missing_loops(typestr=typestr) mc_dict = self.model_missing_loops(typestr=typestr, buildnumber=buildnumber)
split_dict.update(mc_dict) # 更新 split_dict split_dict.update(mc_dict) # 更新 split_dict
self.import_and_merge_pdb_strings(split_dict, "merged_object", self.output_dir.joinpath(f'{self.analyzer_instance.pid}.modellerfix.pdb').as_posix()) out_file = self.output_dir.joinpath(f'{self.analyzer_instance.pid}.modellerfix.pdb')
self.import_and_merge_pdb_strings(split_dict, "merged_object", out_file.as_posix())
def import_and_merge_pdb_strings(self, pdb_strings, merged_object_name, output_file): def import_and_merge_pdb_strings(self, pdb_strings, merged_object_name, output_file):
# 使用 PyMOL 实例导入和合并 PDB # 使用 PyMOL 实例导入和合并 PDB