use sequence_base.py

This commit is contained in:
root
2024-01-19 16:30:55 +08:00
parent c27b484660
commit 426bbddf95

View File

@@ -12,34 +12,10 @@ from Bio.Data import IUPACData
from pydantic import BaseModel, Field, FilePath, field_validator
from typing import Optional, Dict, List, Any, Union
from pathlib import Path
from sequence_base import ProteinSequence, BaseProteinSequence
# 使用 BioPython 导入氨基酸缩写
AMINO_ACIDS = set(IUPACData.protein_letters)
class BaseProteinSequence(BaseModel):
sequence: str
@classmethod
def validate_amino_acids(cls, sequence: str) -> str:
if not set(sequence).issubset(AMINO_ACIDS):
raise ValueError('Sequence contains invalid amino acids, not conforming to IUPAC standards')
return sequence
class ProteinSequence(BaseModel):
label_asym_id: str
auth_asym_id: str
sequence: str
is_id_consistent: bool = Field(default=False)
@property
def is_id_consistent(self) -> bool:
return self.label_asym_id == self.auth_asym_id
def display_chain_id(self) -> str:
if not self.is_id_consistent:
return f"{self.label_asym_id} [auth {self.auth_asym_id}]"
return self.label_asym_id
class ProteinComplex(BaseModel):
pdb_id: str
tcr_alpha: Optional[ProteinSequence] = None