big update

This commit is contained in:
root
2024-01-16 17:35:23 +08:00
parent 504b7b4d5b
commit 349555f5bf

View File

@@ -19,6 +19,8 @@ from modeller import ModellerError
import pymol
from typing import Dict
from Bio.SeqRecord import SeqRecord
from concurrent.futures import ProcessPoolExecutor
from functools import partial
@dataclass
class PDBAlign:
@@ -34,7 +36,7 @@ class PDBAlign:
self.pymol_instance = pymol.cmd
self.pymol_instance.reinitialize()
def align(self):
def align(self) -> str:
self.pymol_instance.reinitialize()
# 首先,加载模板结构
self.pymol_instance.load(self.template_file.as_posix(), "template")
@@ -44,6 +46,9 @@ class PDBAlign:
self.pymol_instance.align("target", "template")
return self.pymol_instance.get_pdbstr('target')
def save(self, out_file: Path):
self.pymol_instance.save(out_file.as_posix(), "target")
@dataclass
class LoopModelBuilder:
@@ -124,16 +129,21 @@ class LoopModelBuilder:
if not self.missing_info:
self.logger.info(f'No missing residues found in {self.pdb_file}. Skipping model_missing_loops.')
return mc_dict
for mc in self.missing_info:
out_file = f'{self.pdb_id}_{mc}.pdb'
self.analyzer_instance.split_chain(mc).to_pdb(out_file) # get misschain pdb file
self.logger.info(f'Missing residues info for {self.pdb_file}:\n {self.missing_info}')
# create workdir
for mc in self.missing_info.keys():
self.logger.info(f'Building model for chain {mc}')
workdir = self.pdb_file.parent.joinpath(f'./{self.pdb_id}modellerfix_{mc}')
workdir.mkdir(exist_ok=True, parents=True)
mc_fasta = self.analyzer_instance.find_most_similar(self.sequences[mc]) # get misschain fasta file
# write fasta file
mc_fasta_record = SeqRecord(mc_fasta, id=f'{mc}', description=f'{self.pdb_id}|{mc}')
out_fasta_file = Path(f'{self.analyzer_instance.pid}_{mc}.fasta')
out_fasta_file = workdir.joinpath(f'{self.analyzer_instance.pid}_{mc}.fasta')
self.analyzer_instance.write_seq_to_fasta_single_line(mc_fasta_record, out_fasta_file)
self.logger.info(f'>{self.pdb_id}|{mc}|{self.missing_info[mc]}|{len(mc_fasta)}')
self.logger.info(f'>{self.pdb_id}|{mc}|missing site:{self.missing_info[mc]}|length:{len(mc_fasta)}')
self.logger.info(mc_fasta)
modeller = PDBModeler(self.pdb_file, out_fasta_file, Path(f'./{self.pdb_id}modellerfix_{mc}'), mc, buildnumber, typestr)
# build model
modeller = PDBModeler(self.pdb_file, out_fasta_file, workdir, mc, buildnumber, typestr)
try:
modeller_results = modeller.make_model()
except ModellerError as mod_err:
@@ -147,23 +157,29 @@ class LoopModelBuilder:
self.logger.info(f'Model files: {[file.name for file in modeller_results]}')
# change id to original
for i in modeller_results:
manalyzer = PDBAnalyzer(i)
manalyzer.change_chain_identifier('A', mc, split=False).to_pdb(i)
self.change_chain_identifier(i, 'A', mc, split=False)
# use pymol to align and merge
if len(modeller_results) == 1:
# use pymol to align
aligner = PDBAlign(self.pdb_file, modeller_results[0],Path(f'{self.analyzer_instance.pid}_merge_model.pdb'))
aligner = PDBAlign(self.pdb_file, modeller_results[0])
pdbstr = aligner.align()
mc_dict[mc] = pdbstr
return mc_dict
mc_dict.update({mc: pdbstr})
else:
self.logger.warning('more than one model file, please set num_loop to 1')
return mc_dict
def run(self, typestr:str = 'refine.very_fast'):
@staticmethod
def change_chain_identifier(pdb_file: Path, chain_id:str, new_chain_id:str, split:bool = True) -> Path:
o = PDBAnalyzer(pdb_file)
o.change_chain_identifier(chain_id, new_chain_id, split=split).to_pdb(pdb_file)
return pdb_file
def run(self, typestr:str = 'refine.very_fast', buildnumber: str = 1) -> Path:
split_dict = self.split_all_chains()
mc_dict = self.model_missing_loops(typestr=typestr)
mc_dict = self.model_missing_loops(typestr=typestr, buildnumber=buildnumber)
split_dict.update(mc_dict) # 更新 split_dict
self.import_and_merge_pdb_strings(split_dict, "merged_object", self.output_dir.joinpath(f'{self.analyzer_instance.pid}.modellerfix.pdb').as_posix())
out_file = self.output_dir.joinpath(f'{self.analyzer_instance.pid}.modellerfix.pdb')
self.import_and_merge_pdb_strings(split_dict, "merged_object", out_file.as_posix())
def import_and_merge_pdb_strings(self, pdb_strings, merged_object_name, output_file):
# 使用 PyMOL 实例导入和合并 PDB