From 26552e81a551dd0bfcf53359d4ccfb5ca62cc90f Mon Sep 17 00:00:00 2001 From: lingyuzeng Date: Thu, 7 Mar 2024 17:10:13 +0800 Subject: [PATCH] update check_and_log_sequence_issues --- analysis_pdb.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/analysis_pdb.py b/analysis_pdb.py index 854f06b..7d9992e 100755 --- a/analysis_pdb.py +++ b/analysis_pdb.py @@ -214,24 +214,28 @@ class PDBAnalyzer: 检测并记录每条链的编号问题,并计算相邻残基间的距离。 """ for chain_id, detailed_seq in self.extract_sequences(detailed=True).items(): - # 检测起始编号是否为1 if detailed_seq[0][0] != 1: self.logger.warning(f"Chain {chain_id} does not start with residue number 1.") - - # 检测编号连续性和错误,同时计算距离 - for i in range(len(detailed_seq) - 1): - diff = detailed_seq[i + 1][0] - detailed_seq[i][0] - # if diff > 1: - # 计算两个相邻残基的Cα原子之间的距离 - distance = self.calculate_distance(chain_id, detailed_seq[i][0], detailed_seq[i + 1][0]) - # 如果编号相差很大,但Cα原子距离不符合预期(既不符合正常距离,也不符合预期的缺失距离) - if diff > 10 or (distance < 3 or distance > 5 * diff): - self.logger.warning(f"Potential numbering error or unexpected distance in chain {chain_id} between residues {detailed_seq[i][0]} ({detailed_seq[i][1]}) and {detailed_seq[i + 1][0]} ({detailed_seq[i + 1][1]}). Distance: {distance:.2f} Å") - elif 3.3 <= distance <= 4.8 * diff: - # 距离在合理范围内,可能是正常的残基缺失 - self.ca_distances.append(distance) - # self.logger.info(f"Missing residues detected in chain {chain_id} between residues {detailed_seq[i][0]} ({detailed_seq[i][1]}) and {detailed_seq[i + 1][0]} ({detailed_seq[i + 1][1]}). Estimated missing residues based on distance: {round(distance / 5)}. Distance: {distance:.2f} Å") + for i in range(len(detailed_seq) - 1): + prev_res, next_res = detailed_seq[i], detailed_seq[i + 1] + diff = next_res[0] - prev_res[0] + distance = self.calculate_distance(chain_id, prev_res[0], next_res[0]) + self.ca_distances.append(distance) + + min_expected_dist = 3.3 * (diff - 0.5) + max_expected_dist = 4.3 * (diff + 0.5) + + # 简化条件判断 + if diff == 1: + if not (3.3 <= distance <= 4.3): + self.logger.warning(f"Potential numbering error or unexpected distance in chain {chain_id} between residues {prev_res[0]} ({prev_res[1]}) and {next_res[0]} ({next_res[1]}). Distance: {distance:.2f} Å") + elif diff > 1: + if (min_expected_dist <= distance <= max_expected_dist): + self.logger.info(f"Potential missing residue in chain {chain_id} between residues {prev_res[0]} and {next_res[0]}") + else: + self.logger.warning(f"Wrong sequence numbering in chain {chain_id} between residues {prev_res[0]} and {next_res[0]}") + @classmethod def renumber_residues_based_on_issues_and_clean(cls, input_file: Path, out_ext: str = ".renumbered.pdb", chains: Union[List[str], str, None] = None) -> 'PDBAnalyzer': """