update check_and_log_sequence_issues
This commit is contained in:
@@ -214,24 +214,28 @@ class PDBAnalyzer:
|
||||
检测并记录每条链的编号问题,并计算相邻残基间的距离。
|
||||
"""
|
||||
for chain_id, detailed_seq in self.extract_sequences(detailed=True).items():
|
||||
# 检测起始编号是否为1
|
||||
if detailed_seq[0][0] != 1:
|
||||
self.logger.warning(f"Chain {chain_id} does not start with residue number 1.")
|
||||
|
||||
# 检测编号连续性和错误,同时计算距离
|
||||
for i in range(len(detailed_seq) - 1):
|
||||
diff = detailed_seq[i + 1][0] - detailed_seq[i][0]
|
||||
# if diff > 1:
|
||||
# 计算两个相邻残基的Cα原子之间的距离
|
||||
distance = self.calculate_distance(chain_id, detailed_seq[i][0], detailed_seq[i + 1][0])
|
||||
# 如果编号相差很大,但Cα原子距离不符合预期(既不符合正常距离,也不符合预期的缺失距离)
|
||||
if diff > 10 or (distance < 3 or distance > 5 * diff):
|
||||
self.logger.warning(f"Potential numbering error or unexpected distance in chain {chain_id} between residues {detailed_seq[i][0]} ({detailed_seq[i][1]}) and {detailed_seq[i + 1][0]} ({detailed_seq[i + 1][1]}). Distance: {distance:.2f} Å")
|
||||
elif 3.3 <= distance <= 4.8 * diff:
|
||||
# 距离在合理范围内,可能是正常的残基缺失
|
||||
self.ca_distances.append(distance)
|
||||
# self.logger.info(f"Missing residues detected in chain {chain_id} between residues {detailed_seq[i][0]} ({detailed_seq[i][1]}) and {detailed_seq[i + 1][0]} ({detailed_seq[i + 1][1]}). Estimated missing residues based on distance: {round(distance / 5)}. Distance: {distance:.2f} Å")
|
||||
|
||||
for i in range(len(detailed_seq) - 1):
|
||||
prev_res, next_res = detailed_seq[i], detailed_seq[i + 1]
|
||||
diff = next_res[0] - prev_res[0]
|
||||
distance = self.calculate_distance(chain_id, prev_res[0], next_res[0])
|
||||
self.ca_distances.append(distance)
|
||||
|
||||
min_expected_dist = 3.3 * (diff - 0.5)
|
||||
max_expected_dist = 4.3 * (diff + 0.5)
|
||||
|
||||
# 简化条件判断
|
||||
if diff == 1:
|
||||
if not (3.3 <= distance <= 4.3):
|
||||
self.logger.warning(f"Potential numbering error or unexpected distance in chain {chain_id} between residues {prev_res[0]} ({prev_res[1]}) and {next_res[0]} ({next_res[1]}). Distance: {distance:.2f} Å")
|
||||
elif diff > 1:
|
||||
if (min_expected_dist <= distance <= max_expected_dist):
|
||||
self.logger.info(f"Potential missing residue in chain {chain_id} between residues {prev_res[0]} and {next_res[0]}")
|
||||
else:
|
||||
self.logger.warning(f"Wrong sequence numbering in chain {chain_id} between residues {prev_res[0]} and {next_res[0]}")
|
||||
|
||||
@classmethod
|
||||
def renumber_residues_based_on_issues_and_clean(cls, input_file: Path, out_ext: str = ".renumbered.pdb", chains: Union[List[str], str, None] = None) -> 'PDBAnalyzer':
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user