From 62484f046e7ee7569a1e8c233aa70e1053fe8448 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 18 Jan 2024 14:44:07 +0800 Subject: [PATCH] add update --- tcr_pmhc_complexes.py | 44 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tcr_pmhc_complexes.py b/tcr_pmhc_complexes.py index 4a484b9..fd88b23 100644 --- a/tcr_pmhc_complexes.py +++ b/tcr_pmhc_complexes.py @@ -95,6 +95,50 @@ class FastaHeaderInfo(BaseModel): return cls(pdb_id=pdb_id, chain_ids=chain_ids, auth_chain_ids=auth_chain_ids, description=description, is_polymeric=is_polymeric) +# 对于标准fasta的兼容尝试处理,废弃 +# class FastaHeaderInfo(BaseModel): +# info: Dict[str, Union[str, List[str], Dict[str, str]]] = {} +# is_polymeric: Optional[str] = None # 新增属性 + +# @classmethod +# def from_header_line(cls, header_line: str): +# header_info = {} +# if '|' in header_line and 'Chain' in header_line: +# # PDB FASTA格式处理 +# parts = header_line.split('|') +# header_info['identifier'] = parts[0].strip('>') +# chain_info = parts[1] if len(parts) > 1 else '' +# header_info['description'] = parts[2] if len(parts) > 2 else '' + +# chain_parts = chain_info.replace('Chain ', '').replace('Chains ', '').split(',') +# chain_ids = [] +# auth_chain_ids = {} + +# for part in chain_parts: +# part = part.strip() +# if '[' in part: +# chain_id, auth_chain_id = part.split('[') +# chain_id = chain_id.strip() +# auth_chain_id = auth_chain_id.strip(']').strip() +# chain_ids.append(chain_id) +# auth_chain_ids[chain_id] = auth_chain_id +# else: +# chain_ids.append(part) +# auth_chain_ids[part] = part + +# header_info['chain_ids'] = chain_ids +# header_info['auth_chain_ids'] = auth_chain_ids +# header_info['is_polymeric'] = "Yes" if len(chain_ids) > 1 else "No" +# else: +# # 处理非PDB或非标准FASTA头信息 +# identifier = header_line[1:].split()[0] # 取第一个空格前的文本作为标识符 +# description = ' '.join(header_line[1:].split()[1:]) # 剩余的文本作为描述 +# header_info['identifier'] = identifier +# header_info['description'] = description +# header_info['is_polymeric'] = "Unknown" + +# return cls(info=header_info) + class FastaSequence(BaseModel): header_info: FastaHeaderInfo