#!/usr/bin/env python # -*- encoding: utf-8 -*- ''' @file :sequence_base.py @Description: : 序列信息基类 @Date :2024/01/19 10:17:41 @Author :lyzeng @Email :pylyzeng@gmail.com @version :1.0 ''' from Bio.Data import IUPACData from pydantic import BaseModel, Field, FilePath, field_validator # 使用 BioPython 导入氨基酸缩写 AMINO_ACIDS = set(IUPACData.protein_letters) class ProteinSequence(BaseModel): label_asym_id: str auth_asym_id: str sequence: str is_id_consistent: bool = Field(default=False) @property def is_id_consistent(self) -> bool: return self.label_asym_id == self.auth_asym_id def display_chain_id(self) -> str: if not self.is_id_consistent: return f"{self.label_asym_id} [auth {self.auth_asym_id}]" return self.label_asym_id class BaseProteinSequence(BaseModel): sequence: str @classmethod def validate_amino_acids(cls, sequence: str) -> str: if not set(sequence).issubset(AMINO_ACIDS): raise ValueError('Sequence contains invalid amino acids, not conforming to IUPAC standards') return sequence