add sequencebaseclass
This commit is contained in:
40
sequence_base.py
Normal file
40
sequence_base.py
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- encoding: utf-8 -*-
|
||||||
|
'''
|
||||||
|
@file :sequence_base.py
|
||||||
|
@Description: : 序列信息基类
|
||||||
|
@Date :2024/01/19 10:17:41
|
||||||
|
@Author :lyzeng
|
||||||
|
@Email :pylyzeng@gmail.com
|
||||||
|
@version :1.0
|
||||||
|
'''
|
||||||
|
from Bio.Data import IUPACData
|
||||||
|
from pydantic import BaseModel, Field, FilePath, field_validator
|
||||||
|
|
||||||
|
|
||||||
|
# 使用 BioPython 导入氨基酸缩写
|
||||||
|
AMINO_ACIDS = set(IUPACData.protein_letters)
|
||||||
|
|
||||||
|
class ProteinSequence(BaseModel):
|
||||||
|
label_asym_id: str
|
||||||
|
auth_asym_id: str
|
||||||
|
sequence: str
|
||||||
|
is_id_consistent: bool = Field(default=False)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_id_consistent(self) -> bool:
|
||||||
|
return self.label_asym_id == self.auth_asym_id
|
||||||
|
|
||||||
|
def display_chain_id(self) -> str:
|
||||||
|
if not self.is_id_consistent:
|
||||||
|
return f"{self.label_asym_id} [auth {self.auth_asym_id}]"
|
||||||
|
return self.label_asym_id
|
||||||
|
|
||||||
|
class BaseProteinSequence(BaseModel):
|
||||||
|
sequence: str
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def validate_amino_acids(cls, sequence: str) -> str:
|
||||||
|
if not set(sequence).issubset(AMINO_ACIDS):
|
||||||
|
raise ValueError('Sequence contains invalid amino acids, not conforming to IUPAC standards')
|
||||||
|
return sequence
|
||||||
Reference in New Issue
Block a user