add sequencebaseclass

This commit is contained in:
2024-01-19 14:06:00 +08:00
parent ae91117634
commit 16ec685ca9

40
sequence_base.py Normal file
View File

@@ -0,0 +1,40 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@file :sequence_base.py
@Description: : 序列信息基类
@Date :2024/01/19 10:17:41
@Author :lyzeng
@Email :pylyzeng@gmail.com
@version :1.0
'''
from Bio.Data import IUPACData
from pydantic import BaseModel, Field, FilePath, field_validator
# 使用 BioPython 导入氨基酸缩写
AMINO_ACIDS = set(IUPACData.protein_letters)
class ProteinSequence(BaseModel):
label_asym_id: str
auth_asym_id: str
sequence: str
is_id_consistent: bool = Field(default=False)
@property
def is_id_consistent(self) -> bool:
return self.label_asym_id == self.auth_asym_id
def display_chain_id(self) -> str:
if not self.is_id_consistent:
return f"{self.label_asym_id} [auth {self.auth_asym_id}]"
return self.label_asym_id
class BaseProteinSequence(BaseModel):
sequence: str
@classmethod
def validate_amino_acids(cls, sequence: str) -> str:
if not set(sequence).issubset(AMINO_ACIDS):
raise ValueError('Sequence contains invalid amino acids, not conforming to IUPAC standards')
return sequence