update check_and_log_sequence_issues

This commit is contained in:
2024-03-07 17:10:13 +08:00
parent b66761ad9b
commit 26552e81a5

View File

@@ -214,24 +214,28 @@ class PDBAnalyzer:
检测并记录每条链的编号问题,并计算相邻残基间的距离。
"""
for chain_id, detailed_seq in self.extract_sequences(detailed=True).items():
# 检测起始编号是否为1
if detailed_seq[0][0] != 1:
self.logger.warning(f"Chain {chain_id} does not start with residue number 1.")
# 检测编号连续性和错误,同时计算距离
for i in range(len(detailed_seq) - 1):
diff = detailed_seq[i + 1][0] - detailed_seq[i][0]
# if diff > 1:
# 计算两个相邻残基的Cα原子之间的距离
distance = self.calculate_distance(chain_id, detailed_seq[i][0], detailed_seq[i + 1][0])
# 如果编号相差很大但Cα原子距离不符合预期既不符合正常距离也不符合预期的缺失距离
if diff > 10 or (distance < 3 or distance > 5 * diff):
self.logger.warning(f"Potential numbering error or unexpected distance in chain {chain_id} between residues {detailed_seq[i][0]} ({detailed_seq[i][1]}) and {detailed_seq[i + 1][0]} ({detailed_seq[i + 1][1]}). Distance: {distance:.2f} Å")
elif 3.3 <= distance <= 4.8 * diff:
# 距离在合理范围内,可能是正常的残基缺失
self.ca_distances.append(distance)
# self.logger.info(f"Missing residues detected in chain {chain_id} between residues {detailed_seq[i][0]} ({detailed_seq[i][1]}) and {detailed_seq[i + 1][0]} ({detailed_seq[i + 1][1]}). Estimated missing residues based on distance: {round(distance / 5)}. Distance: {distance:.2f} Å")
for i in range(len(detailed_seq) - 1):
prev_res, next_res = detailed_seq[i], detailed_seq[i + 1]
diff = next_res[0] - prev_res[0]
distance = self.calculate_distance(chain_id, prev_res[0], next_res[0])
self.ca_distances.append(distance)
min_expected_dist = 3.3 * (diff - 0.5)
max_expected_dist = 4.3 * (diff + 0.5)
# 简化条件判断
if diff == 1:
if not (3.3 <= distance <= 4.3):
self.logger.warning(f"Potential numbering error or unexpected distance in chain {chain_id} between residues {prev_res[0]} ({prev_res[1]}) and {next_res[0]} ({next_res[1]}). Distance: {distance:.2f} Å")
elif diff > 1:
if (min_expected_dist <= distance <= max_expected_dist):
self.logger.info(f"Potential missing residue in chain {chain_id} between residues {prev_res[0]} and {next_res[0]}")
else:
self.logger.warning(f"Wrong sequence numbering in chain {chain_id} between residues {prev_res[0]} and {next_res[0]}")
@classmethod
def renumber_residues_based_on_issues_and_clean(cls, input_file: Path, out_ext: str = ".renumbered.pdb", chains: Union[List[str], str, None] = None) -> 'PDBAnalyzer':
"""