- Backend: Refactored tasks.py to directly invoke run_single_fna_pipeline.py for consistency. - Backend: Changed output format to ZIP and added auto-cleanup of intermediate files. - Backend: Fixed language parameter passing in API and tasks. - Frontend: Removed CRISPR Fusion UI elements from Submit and Monitor views. - Frontend: Implemented simulated progress bar for better UX. - Frontend: Restored One-click load button and added result file structure documentation. - Docker: Fixed critical Restarting loop by removing incorrect image directive in docker-compose.yml. - Docker: Optimized Dockerfile to correct .pixi environment path issues and prevent accidental deletion of frontend assets.
139 lines
4.6 KiB
Python
139 lines
4.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
CRISPR-Cas Detection Wrapper
|
|
Wrapper for CRISPRCasFinder or similar tools to detect CRISPR arrays and Cas genes.
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Dict, List, Any
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser(description="Detect CRISPR arrays and Cas genes in genome")
|
|
parser.add_argument("--input", "-i", type=Path, required=True, help="Input genome file (.fna)")
|
|
parser.add_argument("--output", "-o", type=Path, required=True, help="Output JSON results file")
|
|
parser.add_argument("--tool-path", type=Path, default=None, help="Path to CRISPRCasFinder.pl")
|
|
parser.add_argument("--mock", action="store_true", help="Use mock data (for testing without external tools)")
|
|
return parser.parse_args()
|
|
|
|
def check_dependencies(tool_path: Path = None) -> bool:
|
|
"""Check if CRISPRCasFinder is available"""
|
|
if tool_path and tool_path.exists():
|
|
return True
|
|
|
|
# Check in PATH
|
|
if shutil.which("CRISPRCasFinder.pl"):
|
|
return True
|
|
|
|
return False
|
|
|
|
def generate_mock_results(genome_file: Path) -> Dict[str, Any]:
|
|
"""Generate mock CRISPR results for testing"""
|
|
logger.info(f"Generating mock CRISPR results for {genome_file.name}")
|
|
|
|
strain_id = genome_file.stem
|
|
|
|
return {
|
|
"strain_id": strain_id,
|
|
"cas_systems": [
|
|
{
|
|
"type": "I-E",
|
|
"subtype": "I-E",
|
|
"position": "contig_1:15000-25000",
|
|
"genes": ["cas1", "cas2", "cas3", "casA", "casB", "casC", "casD", "casE"]
|
|
}
|
|
],
|
|
"arrays": [
|
|
{
|
|
"id": "CRISPR_1",
|
|
"contig": "contig_1",
|
|
"start": 12345,
|
|
"end": 12678,
|
|
"consensus_repeat": "GTTTTAGAGCTATGCTGTTTTGAATGGTCCCAAAAC",
|
|
"num_spacers": 5,
|
|
"spacers": [
|
|
{"sequence": "ATGCGTCGACATGCGTCGACATGCGTCGAC", "position": 1},
|
|
{"sequence": "CGTAGCTAGCCGTAGCTAGCCGTAGCTAGC", "position": 2},
|
|
{"sequence": "TGCATGCATGTGCATGCATGTGCATGCATG", "position": 3},
|
|
{"sequence": "GCTAGCTAGCGCTAGCTAGCGCTAGCTAGC", "position": 4},
|
|
{"sequence": "AAAAATTTTTAAAAATTTTTAAAAATTTTT", "position": 5}
|
|
]
|
|
},
|
|
{
|
|
"id": "CRISPR_2",
|
|
"contig": "contig_2",
|
|
"start": 50000,
|
|
"end": 50500,
|
|
"consensus_repeat": "GTTTTAGAGCTATGCTGTTTTGAATGGTCCCAAAAC",
|
|
"num_spacers": 8,
|
|
"spacers": [
|
|
{"sequence": "CCCGGGAAACCCGGGAAACCCGGGAAA", "position": 1}
|
|
]
|
|
}
|
|
],
|
|
"summary": {
|
|
"has_cas": True,
|
|
"has_crispr": True,
|
|
"num_arrays": 2,
|
|
"num_spacers": 13,
|
|
"cas_types": ["I-E"]
|
|
},
|
|
"metadata": {
|
|
"tool": "CRISPRCasFinder",
|
|
"version": "Mock-v1.0",
|
|
"date": "2025-01-14"
|
|
}
|
|
}
|
|
|
|
def run_crisprcasfinder(input_file: Path, output_file: Path, tool_path: Path = None):
|
|
"""Run actual CRISPRCasFinder tool (Placeholder)"""
|
|
# This would implement the actual subprocess call to CRISPRCasFinder.pl
|
|
# For now, we raise NotImplementedError unless mock is used
|
|
raise NotImplementedError("Real tool integration not yet implemented. Use --mock flag.")
|
|
|
|
def main():
|
|
args = parse_args()
|
|
|
|
if not args.input.exists():
|
|
logger.error(f"Input file not found: {args.input}")
|
|
sys.exit(1)
|
|
|
|
# Create parent directory for output if needed
|
|
args.output.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
try:
|
|
if args.mock:
|
|
results = generate_mock_results(args.input)
|
|
else:
|
|
if not check_dependencies(args.tool_path):
|
|
logger.warning("CRISPRCasFinder not found. Falling back to mock data.")
|
|
results = generate_mock_results(args.input)
|
|
else:
|
|
# Real implementation would go here
|
|
run_crisprcasfinder(args.input, args.output, args.tool_path)
|
|
return
|
|
|
|
# Write results
|
|
with open(args.output, 'w') as f:
|
|
json.dump(results, f, indent=2)
|
|
|
|
logger.info(f"Results written to {args.output}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error executing CRISPR detection: {e}")
|
|
sys.exit(1)
|
|
|
|
if __name__ == "__main__":
|
|
main() |