Files
analysis_pdb/test.py
2024-01-18 15:26:06 +08:00

57 lines
2.2 KiB
Python

from tcr_pmhc_complexes import FastaFile
from pathlib import Path
import glob
from analysis_pdb import PDBAnalyzer
def download_fasta_for_pdb(pdb_id, pdb_directory):
"""Download the FASTA file for a given PDB ID into the specified directory."""
pdb_analyzer = PDBAnalyzer(pdb_file=Path(pdb_directory) / f"{pdb_id}.pdb")
fasta_file = pdb_analyzer.download_fasta(pdb_id)
return fasta_file
def test_fasta_file(file_path):
fasta_file = FastaFile(file=Path(file_path))
print(f"File: {file_path}")
print(f"Number of Sequences: {fasta_file.sequence_num}\n")
for seq in fasta_file.sequences:
header_info = seq.header_info
print(f"PDB ID: {header_info.pdb_id if header_info.pdb_id else 'N/A'}")
print(f"Chain IDs: {', '.join(header_info.chain_ids) if header_info.chain_ids else 'N/A'}")
print(f"Author Chain IDs: {', '.join([f'{cid} ({aid})' for cid, aid in header_info.auth_chain_ids.items()]) if header_info.auth_chain_ids else 'N/A'}")
print(f"Is Polymeric: {header_info.is_polymeric if header_info.is_polymeric else 'Unknown'}")
print(f"Description: {header_info.description}")
print(f"Sequence: {seq.sequence.sequence[:30]}...")
print(f"Sequence Length: {len(seq.sequence.sequence)}\n")
return fasta_file
# Discover all runner_* directories
runner_directories = glob.glob('/mnt/mydrive/analysis_pdb-dev/pdb_test1/runner_*')
fasta_files = []
for directory in runner_directories:
pdb_file = Path(directory).joinpath(f'{directory[-4:]}.pdb')
fasta_file = Path(directory).joinpath(f'{directory[-4:]}.fasta')
if pdb_file.exists() and fasta_file.exists():
fasta_files.append(fasta_file)
else:
print(f'File {fasta_file} does not exist, download...')
ins = PDBAnalyzer(pdb_file)
ins.download_fasta()
# Check if the number of directories matches the number of PDB files
if len(runner_directories) != len(fasta_files):
print("Warning: Number of runner directories does not match number of PDB files.")
# Add the standard FASTA file to the list of files to be tested
# fasta_files.append(Path('/mnt/mydrive/analysis_pdb-dev/test.fasta'))
# # Test each FASTA file
# for file_path in fasta_files:
# test_fasta_file(file_path)