add log_numbering_error
This commit is contained in:
@@ -106,6 +106,7 @@ class PDBAnalyzer:
|
||||
log_file: Path = field(init=False)
|
||||
logger: logging.Logger = field(init=False)
|
||||
ca_distances: List[float] = field(init=False)
|
||||
renumber_errors: List[Dict[str, Tuple[int, int, int]]] = field(default_factory=list)
|
||||
|
||||
def __post_init__(self):
|
||||
"""
|
||||
@@ -197,6 +198,55 @@ class PDBAnalyzer:
|
||||
# Return a new instance of PDBAnalyzer pointing to the cleaned file
|
||||
return cls(out_file)
|
||||
|
||||
def collect_renumbering_info(self):
|
||||
"""
|
||||
收集需要重新编号的残基信息。
|
||||
"""
|
||||
self.renumbering_info = {} # Chain ID as key, list of (original_res_num, new_res_num) tuples as value
|
||||
for chain_id in self.chain_id_list:
|
||||
residues = self.biodata.df['ATOM'][self.biodata.df['ATOM']['chain_id'] == chain_id]
|
||||
unique_residues = residues['residue_number'].unique()
|
||||
sorted_residues = sorted(unique_residues)
|
||||
|
||||
new_res_num = 1
|
||||
self.renumbering_info[chain_id] = []
|
||||
for orig_res_num in sorted_residues:
|
||||
self.renumbering_info[chain_id].append((orig_res_num, new_res_num))
|
||||
new_res_num += 1
|
||||
|
||||
def log_numbering_error(self, chain_id: str, start_residue: int, end_residue: int, estimated_missing: int):
|
||||
"""
|
||||
记录编号错误的信息。
|
||||
|
||||
Args:
|
||||
chain_id (str): 链的ID。
|
||||
start_residue (int): 错误开始的残基编号。
|
||||
end_residue (int): 错误结束的残基编号。
|
||||
estimated_missing (int): 估计的缺失残基数量。
|
||||
"""
|
||||
self.renumber_errors.append({
|
||||
"chain_id": chain_id,
|
||||
"start_residue": start_residue,
|
||||
"end_residue": end_residue,
|
||||
"estimated_missing": estimated_missing
|
||||
})
|
||||
|
||||
# def renumber_residues(self):
|
||||
# """
|
||||
# 根据收集的残基信息重新编号所有残基。
|
||||
# """
|
||||
# for chain_id, res_pairs in self.renumbering_info.items():
|
||||
# for orig_res_num, new_res_num in res_pairs:
|
||||
# self.modify_residue_number(chain_id, orig_res_num, new_res_num)
|
||||
|
||||
# def modify_residue_number(self, chain_id, original_res_num, new_res_num):
|
||||
# """
|
||||
# 修改指定链上的残基编号。
|
||||
# """
|
||||
# mask = (self.biodata.df['ATOM']['chain_id'] == chain_id) & \
|
||||
# (self.biodata.df['ATOM']['residue_number'] == original_res_num)
|
||||
# self.biodata.df['ATOM'].loc[mask, 'residue_number'] = new_res_num
|
||||
|
||||
def check_and_log_sequence_issues(self):
|
||||
"""
|
||||
检测并记录每条链的编号问题,并计算相邻残基间的距离。
|
||||
@@ -224,6 +274,7 @@ class PDBAnalyzer:
|
||||
else:
|
||||
missing_number = int(np.round(distance / 3.8 ) - 1)
|
||||
self.logger.warning(f"Wrong sequence numbering in chain {chain_id} between residues {prev_res[0]} and {next_res[0]}, distance: {distance:.2f} Å, missing residue number: {missing_number}")
|
||||
self.log_numbering_error(chain_id, prev_res[0], next_res[0], missing_number)
|
||||
|
||||
@classmethod
|
||||
def renumber_residues_based_on_issues_and_clean(cls, input_file: Path, out_ext: str = ".renumbered.pdb", chains: Union[List[str], str, None] = None) -> 'PDBAnalyzer':
|
||||
|
||||
Reference in New Issue
Block a user