add sequence_similarity func

2024-01-19 16:31:30 +08:00
parent 426bbddf95
commit 3400b6a3da
1 changed files with 19 additions and 0 deletions
--- a/analysis_pdb.py
+++ b/analysis_pdb.py
@@ -13,6 +13,7 @@
 from dataclasses import dataclass, field
 from Bio.PDB import PDBParser
 from Bio.SeqUtils import seq1
 from Bio.Data import IUPACData
 from typing import List, Dict, Tuple, Optional
 from functools import reduce, partial
 from Bio.PDB import MMCIFIO, PDBIO, Chain, Structure
@@ -26,6 +27,8 @@ from copy import deepcopy
 from pymol import cmd
 import pymol
 import os
 # 使用 BioPython 导入氨基酸缩写
 AMINO_ACIDS = set(IUPACData.protein_letters)
@dataclass
 class PDBAnalyzer:
@@ -78,7 +81,23 @@ class PDBAnalyzer:
        with open(out_file, "w") as fid:
            fid.writelines(good)
        return Path(out_file)
    def sequence_similarity(self, seq1: str, seq2: str) -> float:
        """
        Calculate the similarity between two sequences.
        Args:
        seq1 (str): First sequence.
        seq2 (str): Second sequence.
        Returns:
        float: Similarity score between 0 and 1, where 1 is identical.
        """
        aligner = PairwiseAligner()
        aligner.mode = 'global'
        score = aligner.score(seq1, seq2)
        max_score = min(len(seq1), len(seq2)) * aligner.match_score
        return score / max_score
    def check_continuity(self, chain, missing_char):
        """