add sequence_similarity func

This commit is contained in:
root
2024-01-19 16:31:30 +08:00
parent 426bbddf95
commit 3400b6a3da

View File

@@ -13,6 +13,7 @@
from dataclasses import dataclass, field from dataclasses import dataclass, field
from Bio.PDB import PDBParser from Bio.PDB import PDBParser
from Bio.SeqUtils import seq1 from Bio.SeqUtils import seq1
from Bio.Data import IUPACData
from typing import List, Dict, Tuple, Optional from typing import List, Dict, Tuple, Optional
from functools import reduce, partial from functools import reduce, partial
from Bio.PDB import MMCIFIO, PDBIO, Chain, Structure from Bio.PDB import MMCIFIO, PDBIO, Chain, Structure
@@ -26,6 +27,8 @@ from copy import deepcopy
from pymol import cmd from pymol import cmd
import pymol import pymol
import os import os
# 使用 BioPython 导入氨基酸缩写
AMINO_ACIDS = set(IUPACData.protein_letters)
@dataclass @dataclass
class PDBAnalyzer: class PDBAnalyzer:
@@ -78,7 +81,23 @@ class PDBAnalyzer:
with open(out_file, "w") as fid: with open(out_file, "w") as fid:
fid.writelines(good) fid.writelines(good)
return Path(out_file) return Path(out_file)
def sequence_similarity(self, seq1: str, seq2: str) -> float:
"""
Calculate the similarity between two sequences.
Args:
seq1 (str): First sequence.
seq2 (str): Second sequence.
Returns:
float: Similarity score between 0 and 1, where 1 is identical.
"""
aligner = PairwiseAligner()
aligner.mode = 'global'
score = aligner.score(seq1, seq2)
max_score = min(len(seq1), len(seq2)) * aligner.match_score
return score / max_score
def check_continuity(self, chain, missing_char): def check_continuity(self, chain, missing_char):
""" """