add sequence_similarity func

This commit is contained in:
root
2024-01-19 16:31:30 +08:00
parent 426bbddf95
commit 3400b6a3da

View File

@@ -13,6 +13,7 @@
from dataclasses import dataclass, field
from Bio.PDB import PDBParser
from Bio.SeqUtils import seq1
from Bio.Data import IUPACData
from typing import List, Dict, Tuple, Optional
from functools import reduce, partial
from Bio.PDB import MMCIFIO, PDBIO, Chain, Structure
@@ -26,6 +27,8 @@ from copy import deepcopy
from pymol import cmd
import pymol
import os
# 使用 BioPython 导入氨基酸缩写
AMINO_ACIDS = set(IUPACData.protein_letters)
@dataclass
class PDBAnalyzer:
@@ -78,7 +81,23 @@ class PDBAnalyzer:
with open(out_file, "w") as fid:
fid.writelines(good)
return Path(out_file)
def sequence_similarity(self, seq1: str, seq2: str) -> float:
"""
Calculate the similarity between two sequences.
Args:
seq1 (str): First sequence.
seq2 (str): Second sequence.
Returns:
float: Similarity score between 0 and 1, where 1 is identical.
"""
aligner = PairwiseAligner()
aligner.mode = 'global'
score = aligner.score(seq1, seq2)
max_score = min(len(seq1), len(seq2)) * aligner.match_score
return score / max_score
def check_continuity(self, chain, missing_char):
"""