57 lines
1.5 KiB
Python
Executable File
57 lines
1.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""从 BPPRC/NCBI 下载测试数据"""
|
|
import os
|
|
import argparse
|
|
from pathlib import Path
|
|
from Bio import Entrez, SeqIO
|
|
import logging
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
Entrez.email = "your_email@example.com"
|
|
|
|
TEST_GENOMES = {
|
|
'Bacillus_thuringiensis_HD-73': 'NZ_CP004069.1',
|
|
'Bacillus_thuringiensis_YBT-1520': 'NZ_CP003889.1',
|
|
'Bacillus_thuringiensis_BMB171': 'NC_014171.1',
|
|
}
|
|
|
|
def download_genome(accession, output_file):
|
|
"""下载基因组"""
|
|
try:
|
|
logger.info(f"Downloading {accession}...")
|
|
handle = Entrez.efetch(
|
|
db="nucleotide",
|
|
id=accession,
|
|
rettype="fasta",
|
|
retmode="text"
|
|
)
|
|
|
|
with open(output_file, 'w') as f:
|
|
f.write(handle.read())
|
|
|
|
handle.close()
|
|
logger.info(f"✓ Downloaded: {output_file}")
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"✗ Failed: {e}")
|
|
return False
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('--output-dir', default='tests/test_data/genomes')
|
|
parser.add_argument('--email', required=True)
|
|
args = parser.parse_args()
|
|
|
|
Entrez.email = args.email
|
|
output_dir = Path(args.output_dir)
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
for name, accession in TEST_GENOMES.items():
|
|
output_file = output_dir / f"{name}.fna"
|
|
download_genome(accession, output_file)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|