diff --git a/analysis_pdb.py b/analysis_pdb.py index 37aa257..1896246 100755 --- a/analysis_pdb.py +++ b/analysis_pdb.py @@ -13,6 +13,7 @@ from dataclasses import dataclass, field from Bio.PDB import PDBParser from Bio.SeqUtils import seq1 +from Bio.Data import IUPACData from typing import List, Dict, Tuple, Optional from functools import reduce, partial from Bio.PDB import MMCIFIO, PDBIO, Chain, Structure @@ -26,6 +27,8 @@ from copy import deepcopy from pymol import cmd import pymol import os +# 使用 BioPython 导入氨基酸缩写 +AMINO_ACIDS = set(IUPACData.protein_letters) @dataclass class PDBAnalyzer: @@ -78,7 +81,23 @@ class PDBAnalyzer: with open(out_file, "w") as fid: fid.writelines(good) return Path(out_file) + + def sequence_similarity(self, seq1: str, seq2: str) -> float: + """ + Calculate the similarity between two sequences. + Args: + seq1 (str): First sequence. + seq2 (str): Second sequence. + + Returns: + float: Similarity score between 0 and 1, where 1 is identical. + """ + aligner = PairwiseAligner() + aligner.mode = 'global' + score = aligner.score(seq1, seq2) + max_score = min(len(seq1), len(seq2)) * aligner.match_score + return score / max_score def check_continuity(self, chain, missing_char): """