add log_numbering_error
This commit is contained in:
@@ -106,6 +106,7 @@ class PDBAnalyzer:
|
|||||||
log_file: Path = field(init=False)
|
log_file: Path = field(init=False)
|
||||||
logger: logging.Logger = field(init=False)
|
logger: logging.Logger = field(init=False)
|
||||||
ca_distances: List[float] = field(init=False)
|
ca_distances: List[float] = field(init=False)
|
||||||
|
renumber_errors: List[Dict[str, Tuple[int, int, int]]] = field(default_factory=list)
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
"""
|
"""
|
||||||
@@ -197,6 +198,55 @@ class PDBAnalyzer:
|
|||||||
# Return a new instance of PDBAnalyzer pointing to the cleaned file
|
# Return a new instance of PDBAnalyzer pointing to the cleaned file
|
||||||
return cls(out_file)
|
return cls(out_file)
|
||||||
|
|
||||||
|
def collect_renumbering_info(self):
|
||||||
|
"""
|
||||||
|
收集需要重新编号的残基信息。
|
||||||
|
"""
|
||||||
|
self.renumbering_info = {} # Chain ID as key, list of (original_res_num, new_res_num) tuples as value
|
||||||
|
for chain_id in self.chain_id_list:
|
||||||
|
residues = self.biodata.df['ATOM'][self.biodata.df['ATOM']['chain_id'] == chain_id]
|
||||||
|
unique_residues = residues['residue_number'].unique()
|
||||||
|
sorted_residues = sorted(unique_residues)
|
||||||
|
|
||||||
|
new_res_num = 1
|
||||||
|
self.renumbering_info[chain_id] = []
|
||||||
|
for orig_res_num in sorted_residues:
|
||||||
|
self.renumbering_info[chain_id].append((orig_res_num, new_res_num))
|
||||||
|
new_res_num += 1
|
||||||
|
|
||||||
|
def log_numbering_error(self, chain_id: str, start_residue: int, end_residue: int, estimated_missing: int):
|
||||||
|
"""
|
||||||
|
记录编号错误的信息。
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chain_id (str): 链的ID。
|
||||||
|
start_residue (int): 错误开始的残基编号。
|
||||||
|
end_residue (int): 错误结束的残基编号。
|
||||||
|
estimated_missing (int): 估计的缺失残基数量。
|
||||||
|
"""
|
||||||
|
self.renumber_errors.append({
|
||||||
|
"chain_id": chain_id,
|
||||||
|
"start_residue": start_residue,
|
||||||
|
"end_residue": end_residue,
|
||||||
|
"estimated_missing": estimated_missing
|
||||||
|
})
|
||||||
|
|
||||||
|
# def renumber_residues(self):
|
||||||
|
# """
|
||||||
|
# 根据收集的残基信息重新编号所有残基。
|
||||||
|
# """
|
||||||
|
# for chain_id, res_pairs in self.renumbering_info.items():
|
||||||
|
# for orig_res_num, new_res_num in res_pairs:
|
||||||
|
# self.modify_residue_number(chain_id, orig_res_num, new_res_num)
|
||||||
|
|
||||||
|
# def modify_residue_number(self, chain_id, original_res_num, new_res_num):
|
||||||
|
# """
|
||||||
|
# 修改指定链上的残基编号。
|
||||||
|
# """
|
||||||
|
# mask = (self.biodata.df['ATOM']['chain_id'] == chain_id) & \
|
||||||
|
# (self.biodata.df['ATOM']['residue_number'] == original_res_num)
|
||||||
|
# self.biodata.df['ATOM'].loc[mask, 'residue_number'] = new_res_num
|
||||||
|
|
||||||
def check_and_log_sequence_issues(self):
|
def check_and_log_sequence_issues(self):
|
||||||
"""
|
"""
|
||||||
检测并记录每条链的编号问题,并计算相邻残基间的距离。
|
检测并记录每条链的编号问题,并计算相邻残基间的距离。
|
||||||
@@ -224,6 +274,7 @@ class PDBAnalyzer:
|
|||||||
else:
|
else:
|
||||||
missing_number = int(np.round(distance / 3.8 ) - 1)
|
missing_number = int(np.round(distance / 3.8 ) - 1)
|
||||||
self.logger.warning(f"Wrong sequence numbering in chain {chain_id} between residues {prev_res[0]} and {next_res[0]}, distance: {distance:.2f} Å, missing residue number: {missing_number}")
|
self.logger.warning(f"Wrong sequence numbering in chain {chain_id} between residues {prev_res[0]} and {next_res[0]}, distance: {distance:.2f} Å, missing residue number: {missing_number}")
|
||||||
|
self.log_numbering_error(chain_id, prev_res[0], next_res[0], missing_number)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def renumber_residues_based_on_issues_and_clean(cls, input_file: Path, out_ext: str = ".renumbered.pdb", chains: Union[List[str], str, None] = None) -> 'PDBAnalyzer':
|
def renumber_residues_based_on_issues_and_clean(cls, input_file: Path, out_ext: str = ".renumbered.pdb", chains: Union[List[str], str, None] = None) -> 'PDBAnalyzer':
|
||||||
|
|||||||
Reference in New Issue
Block a user