57 lines
2.2 KiB
Python
57 lines
2.2 KiB
Python
from tcr_pmhc_complexes import FastaFile
|
|
from pathlib import Path
|
|
import glob
|
|
from analysis_pdb import PDBAnalyzer
|
|
|
|
def download_fasta_for_pdb(pdb_id, pdb_directory):
|
|
"""Download the FASTA file for a given PDB ID into the specified directory."""
|
|
pdb_analyzer = PDBAnalyzer(pdb_file=Path(pdb_directory) / f"{pdb_id}.pdb")
|
|
fasta_file = pdb_analyzer.download_fasta(pdb_id)
|
|
return fasta_file
|
|
|
|
def test_fasta_file(file_path):
|
|
fasta_file = FastaFile(file=Path(file_path))
|
|
|
|
print(f"File: {file_path}")
|
|
print(f"Number of Sequences: {fasta_file.sequence_num}\n")
|
|
|
|
for seq in fasta_file.sequences:
|
|
header_info = seq.header_info
|
|
print(f"PDB ID: {header_info.pdb_id if header_info.pdb_id else 'N/A'}")
|
|
print(f"Chain IDs: {', '.join(header_info.chain_ids) if header_info.chain_ids else 'N/A'}")
|
|
print(f"Author Chain IDs: {', '.join([f'{cid} ({aid})' for cid, aid in header_info.auth_chain_ids.items()]) if header_info.auth_chain_ids else 'N/A'}")
|
|
print(f"Is Polymeric: {header_info.is_polymeric if header_info.is_polymeric else 'Unknown'}")
|
|
print(f"Description: {header_info.description}")
|
|
print(f"Sequence: {seq.sequence.sequence[:30]}...")
|
|
print(f"Sequence Length: {len(seq.sequence.sequence)}\n")
|
|
return fasta_file
|
|
|
|
# Discover all runner_* directories
|
|
runner_directories = glob.glob('/mnt/mydrive/analysis_pdb-dev/pdb_test1/runner_*')
|
|
fasta_files = []
|
|
|
|
for directory in runner_directories:
|
|
pdb_file = Path(directory).joinpath(f'{directory[-4:]}.pdb')
|
|
fasta_file = Path(directory).joinpath(f'{directory[-4:]}.fasta')
|
|
if pdb_file.exists() and fasta_file.exists():
|
|
fasta_files.append(fasta_file)
|
|
else:
|
|
print(f'File {fasta_file} does not exist, download...')
|
|
ins = PDBAnalyzer(pdb_file)
|
|
ins.download_fasta()
|
|
|
|
# Check if the number of directories matches the number of PDB files
|
|
if len(runner_directories) != len(fasta_files):
|
|
print("Warning: Number of runner directories does not match number of PDB files.")
|
|
|
|
# Add the standard FASTA file to the list of files to be tested
|
|
# fasta_files.append(Path('/mnt/mydrive/analysis_pdb-dev/test.fasta'))
|
|
|
|
# # Test each FASTA file
|
|
# for file_path in fasta_files:
|
|
# test_fasta_file(file_path)
|
|
|
|
|
|
|
|
|