40 lines
1.2 KiB
Python
40 lines
1.2 KiB
Python
#!/usr/bin/env python
|
|
# -*- encoding: utf-8 -*-
|
|
'''
|
|
@file :sequence_base.py
|
|
@Description: : 序列信息基类
|
|
@Date :2024/01/19 10:17:41
|
|
@Author :lyzeng
|
|
@Email :pylyzeng@gmail.com
|
|
@version :1.0
|
|
'''
|
|
from Bio.Data import IUPACData
|
|
from pydantic import BaseModel, Field, FilePath, field_validator
|
|
|
|
|
|
# 使用 BioPython 导入氨基酸缩写
|
|
AMINO_ACIDS = set(IUPACData.protein_letters)
|
|
|
|
class ProteinSequence(BaseModel):
|
|
label_asym_id: str
|
|
auth_asym_id: str
|
|
sequence: str
|
|
is_id_consistent: bool = Field(default=False)
|
|
|
|
@property
|
|
def is_id_consistent(self) -> bool:
|
|
return self.label_asym_id == self.auth_asym_id
|
|
|
|
def display_chain_id(self) -> str:
|
|
if not self.is_id_consistent:
|
|
return f"{self.label_asym_id} [auth {self.auth_asym_id}]"
|
|
return self.label_asym_id
|
|
|
|
class BaseProteinSequence(BaseModel):
|
|
sequence: str
|
|
|
|
@classmethod
|
|
def validate_amino_acids(cls, sequence: str) -> str:
|
|
if not set(sequence).issubset(AMINO_ACIDS):
|
|
raise ValueError('Sequence contains invalid amino acids, not conforming to IUPAC standards')
|
|
return sequence |