Initial commit: BtToxin Pipeline project structure
This commit is contained in:
56
scripts/download_bpprc_data.py
Executable file
56
scripts/download_bpprc_data.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env python3
|
||||
"""从 BPPRC/NCBI 下载测试数据"""
|
||||
import os
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from Bio import Entrez, SeqIO
|
||||
import logging
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
Entrez.email = "your_email@example.com"
|
||||
|
||||
TEST_GENOMES = {
|
||||
'Bacillus_thuringiensis_HD-73': 'NZ_CP004069.1',
|
||||
'Bacillus_thuringiensis_YBT-1520': 'NZ_CP003889.1',
|
||||
'Bacillus_thuringiensis_BMB171': 'NC_014171.1',
|
||||
}
|
||||
|
||||
def download_genome(accession, output_file):
|
||||
"""下载基因组"""
|
||||
try:
|
||||
logger.info(f"Downloading {accession}...")
|
||||
handle = Entrez.efetch(
|
||||
db="nucleotide",
|
||||
id=accession,
|
||||
rettype="fasta",
|
||||
retmode="text"
|
||||
)
|
||||
|
||||
with open(output_file, 'w') as f:
|
||||
f.write(handle.read())
|
||||
|
||||
handle.close()
|
||||
logger.info(f"✓ Downloaded: {output_file}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"✗ Failed: {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--output-dir', default='tests/test_data/genomes')
|
||||
parser.add_argument('--email', required=True)
|
||||
args = parser.parse_args()
|
||||
|
||||
Entrez.email = args.email
|
||||
output_dir = Path(args.output_dir)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for name, accession in TEST_GENOMES.items():
|
||||
output_file = output_dir / f"{name}.fna"
|
||||
download_genome(accession, output_file)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user