add sequence_similarity func

2024-01-19 16:31:30 +08:00
parent 426bbddf95
commit 3400b6a3da
1 changed files with 19 additions and 0 deletions
--- a/analysis_pdb.py
+++ b/analysis_pdb.py
@@ -13,6 +13,7 @@
 from dataclasses import dataclass, field
 from Bio.PDB import PDBParser
 from Bio.SeqUtils import seq1
+from Bio.Data import IUPACData
 from typing import List, Dict, Tuple, Optional
 from functools import reduce, partial
 from Bio.PDB import MMCIFIO, PDBIO, Chain, Structure
@@ -26,6 +27,8 @@ from copy import deepcopy
 from pymol import cmd
 import pymol
 import os
+# 使用 BioPython 导入氨基酸缩写
+AMINO_ACIDS = set(IUPACData.protein_letters)

@dataclass
 class PDBAnalyzer:
@@ -79,6 +82,22 @@ class PDBAnalyzer:
            fid.writelines(good)
        return Path(out_file)
    
+    def sequence_similarity(self, seq1: str, seq2: str) -> float:
+        """
+        Calculate the similarity between two sequences.
+
+        Args:
+        seq1 (str): First sequence.
+        seq2 (str): Second sequence.
+
+        Returns:
+        float: Similarity score between 0 and 1, where 1 is identical.
+        """
+        aligner = PairwiseAligner()
+        aligner.mode = 'global'
+        score = aligner.score(seq1, seq2)
+        max_score = min(len(seq1), len(seq2)) * aligner.match_score
+        return score / max_score

    def check_continuity(self, chain, missing_char):
        """