from tcr_pmhc_complexes import FastaFile from pathlib import Path import glob from analysis_pdb import PDBAnalyzer def download_fasta_for_pdb(pdb_id, pdb_directory): """Download the FASTA file for a given PDB ID into the specified directory.""" pdb_analyzer = PDBAnalyzer(pdb_file=Path(pdb_directory) / f"{pdb_id}.pdb") fasta_file = pdb_analyzer.download_fasta(pdb_id) return fasta_file def test_fasta_file(file_path): fasta_file = FastaFile(file=Path(file_path)) print(f"File: {file_path}") print(f"Number of Sequences: {fasta_file.sequence_num}\n") for seq in fasta_file.sequences: header_info = seq.header_info print(f"PDB ID: {header_info.pdb_id if header_info.pdb_id else 'N/A'}") print(f"Chain IDs: {', '.join(header_info.chain_ids) if header_info.chain_ids else 'N/A'}") print(f"Author Chain IDs: {', '.join([f'{cid} ({aid})' for cid, aid in header_info.auth_chain_ids.items()]) if header_info.auth_chain_ids else 'N/A'}") print(f"Is Polymeric: {header_info.is_polymeric if header_info.is_polymeric else 'Unknown'}") print(f"Description: {header_info.description}") print(f"Sequence: {seq.sequence.sequence[:30]}...") print(f"Sequence Length: {len(seq.sequence.sequence)}\n") return fasta_file # Discover all runner_* directories runner_directories = glob.glob('/mnt/mydrive/analysis_pdb-dev/pdb_test1/runner_*') fasta_files = [] for directory in runner_directories: pdb_file = Path(directory).joinpath(f'{directory[-4:]}.pdb') fasta_file = Path(directory).joinpath(f'{directory[-4:]}.fasta') if pdb_file.exists() and fasta_file.exists(): fasta_files.append(fasta_file) else: print(f'File {fasta_file} does not exist, download...') ins = PDBAnalyzer(pdb_file) ins.download_fasta() # Check if the number of directories matches the number of PDB files if len(runner_directories) != len(fasta_files): print("Warning: Number of runner directories does not match number of PDB files.") # Add the standard FASTA file to the list of files to be tested # fasta_files.append(Path('/mnt/mydrive/analysis_pdb-dev/test.fasta')) # # Test each FASTA file # for file_path in fasta_files: # test_fasta_file(file_path)