Files
bttoxin-pipeline/tools/crispr_cas_analysis/scripts/detect_crispr.py
zly c75c85c53b Refactor: Unified pipeline execution, simplified UI, and fixed Docker config
- Backend: Refactored tasks.py to directly invoke run_single_fna_pipeline.py for consistency.
- Backend: Changed output format to ZIP and added auto-cleanup of intermediate files.
- Backend: Fixed language parameter passing in API and tasks.
- Frontend: Removed CRISPR Fusion UI elements from Submit and Monitor views.
- Frontend: Implemented simulated progress bar for better UX.
- Frontend: Restored One-click load button and added result file structure documentation.
- Docker: Fixed critical Restarting loop by removing incorrect image directive in docker-compose.yml.
- Docker: Optimized Dockerfile to correct .pixi environment path issues and prevent accidental deletion of frontend assets.
2026-01-20 20:25:25 +08:00

139 lines
4.6 KiB
Python

#!/usr/bin/env python3
"""
CRISPR-Cas Detection Wrapper
Wrapper for CRISPRCasFinder or similar tools to detect CRISPR arrays and Cas genes.
"""
import argparse
import json
import logging
import shutil
import subprocess
import sys
from pathlib import Path
from typing import Dict, List, Any
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def parse_args():
parser = argparse.ArgumentParser(description="Detect CRISPR arrays and Cas genes in genome")
parser.add_argument("--input", "-i", type=Path, required=True, help="Input genome file (.fna)")
parser.add_argument("--output", "-o", type=Path, required=True, help="Output JSON results file")
parser.add_argument("--tool-path", type=Path, default=None, help="Path to CRISPRCasFinder.pl")
parser.add_argument("--mock", action="store_true", help="Use mock data (for testing without external tools)")
return parser.parse_args()
def check_dependencies(tool_path: Path = None) -> bool:
"""Check if CRISPRCasFinder is available"""
if tool_path and tool_path.exists():
return True
# Check in PATH
if shutil.which("CRISPRCasFinder.pl"):
return True
return False
def generate_mock_results(genome_file: Path) -> Dict[str, Any]:
"""Generate mock CRISPR results for testing"""
logger.info(f"Generating mock CRISPR results for {genome_file.name}")
strain_id = genome_file.stem
return {
"strain_id": strain_id,
"cas_systems": [
{
"type": "I-E",
"subtype": "I-E",
"position": "contig_1:15000-25000",
"genes": ["cas1", "cas2", "cas3", "casA", "casB", "casC", "casD", "casE"]
}
],
"arrays": [
{
"id": "CRISPR_1",
"contig": "contig_1",
"start": 12345,
"end": 12678,
"consensus_repeat": "GTTTTAGAGCTATGCTGTTTTGAATGGTCCCAAAAC",
"num_spacers": 5,
"spacers": [
{"sequence": "ATGCGTCGACATGCGTCGACATGCGTCGAC", "position": 1},
{"sequence": "CGTAGCTAGCCGTAGCTAGCCGTAGCTAGC", "position": 2},
{"sequence": "TGCATGCATGTGCATGCATGTGCATGCATG", "position": 3},
{"sequence": "GCTAGCTAGCGCTAGCTAGCGCTAGCTAGC", "position": 4},
{"sequence": "AAAAATTTTTAAAAATTTTTAAAAATTTTT", "position": 5}
]
},
{
"id": "CRISPR_2",
"contig": "contig_2",
"start": 50000,
"end": 50500,
"consensus_repeat": "GTTTTAGAGCTATGCTGTTTTGAATGGTCCCAAAAC",
"num_spacers": 8,
"spacers": [
{"sequence": "CCCGGGAAACCCGGGAAACCCGGGAAA", "position": 1}
]
}
],
"summary": {
"has_cas": True,
"has_crispr": True,
"num_arrays": 2,
"num_spacers": 13,
"cas_types": ["I-E"]
},
"metadata": {
"tool": "CRISPRCasFinder",
"version": "Mock-v1.0",
"date": "2025-01-14"
}
}
def run_crisprcasfinder(input_file: Path, output_file: Path, tool_path: Path = None):
"""Run actual CRISPRCasFinder tool (Placeholder)"""
# This would implement the actual subprocess call to CRISPRCasFinder.pl
# For now, we raise NotImplementedError unless mock is used
raise NotImplementedError("Real tool integration not yet implemented. Use --mock flag.")
def main():
args = parse_args()
if not args.input.exists():
logger.error(f"Input file not found: {args.input}")
sys.exit(1)
# Create parent directory for output if needed
args.output.parent.mkdir(parents=True, exist_ok=True)
try:
if args.mock:
results = generate_mock_results(args.input)
else:
if not check_dependencies(args.tool_path):
logger.warning("CRISPRCasFinder not found. Falling back to mock data.")
results = generate_mock_results(args.input)
else:
# Real implementation would go here
run_crisprcasfinder(args.input, args.output, args.tool_path)
return
# Write results
with open(args.output, 'w') as f:
json.dump(results, f, indent=2)
logger.info(f"Results written to {args.output}")
except Exception as e:
logger.error(f"Error executing CRISPR detection: {e}")
sys.exit(1)
if __name__ == "__main__":
main()