#!/usr/bin/env python3 """从 BPPRC/NCBI 下载测试数据""" import os import argparse from pathlib import Path from Bio import Entrez, SeqIO import logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) Entrez.email = "your_email@example.com" TEST_GENOMES = { 'Bacillus_thuringiensis_HD-73': 'NZ_CP004069.1', 'Bacillus_thuringiensis_YBT-1520': 'NZ_CP003889.1', 'Bacillus_thuringiensis_BMB171': 'NC_014171.1', } def download_genome(accession, output_file): """下载基因组""" try: logger.info(f"Downloading {accession}...") handle = Entrez.efetch( db="nucleotide", id=accession, rettype="fasta", retmode="text" ) with open(output_file, 'w') as f: f.write(handle.read()) handle.close() logger.info(f"✓ Downloaded: {output_file}") return True except Exception as e: logger.error(f"✗ Failed: {e}") return False def main(): parser = argparse.ArgumentParser() parser.add_argument('--output-dir', default='tests/test_data/genomes') parser.add_argument('--email', required=True) args = parser.parse_args() Entrez.email = args.email output_dir = Path(args.output_dir) output_dir.mkdir(parents=True, exist_ok=True) for name, accession in TEST_GENOMES.items(): output_file = output_dir / f"{name}.fna" download_genome(accession, output_file) if __name__ == '__main__': main()