Refactor: Unified pipeline execution, simplified UI, and fixed Docker config

- Backend: Refactored tasks.py to directly invoke run_single_fna_pipeline.py for consistency.
- Backend: Changed output format to ZIP and added auto-cleanup of intermediate files.
- Backend: Fixed language parameter passing in API and tasks.
- Frontend: Removed CRISPR Fusion UI elements from Submit and Monitor views.
- Frontend: Implemented simulated progress bar for better UX.
- Frontend: Restored One-click load button and added result file structure documentation.
- Docker: Fixed critical Restarting loop by removing incorrect image directive in docker-compose.yml.
- Docker: Optimized Dockerfile to correct .pixi environment path issues and prevent accidental deletion of frontend assets.
This commit is contained in:
zly
2026-01-20 20:25:25 +08:00
parent 5067169b0b
commit c75c85c53b
134 changed files with 146457 additions and 996647 deletions

View File

@@ -0,0 +1 @@
"""Scripts for CRISPR-Cas detection and analysis"""

View File

@@ -0,0 +1,139 @@
#!/usr/bin/env python3
"""
CRISPR-Cas Detection Wrapper
Wrapper for CRISPRCasFinder or similar tools to detect CRISPR arrays and Cas genes.
"""
import argparse
import json
import logging
import shutil
import subprocess
import sys
from pathlib import Path
from typing import Dict, List, Any
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def parse_args():
parser = argparse.ArgumentParser(description="Detect CRISPR arrays and Cas genes in genome")
parser.add_argument("--input", "-i", type=Path, required=True, help="Input genome file (.fna)")
parser.add_argument("--output", "-o", type=Path, required=True, help="Output JSON results file")
parser.add_argument("--tool-path", type=Path, default=None, help="Path to CRISPRCasFinder.pl")
parser.add_argument("--mock", action="store_true", help="Use mock data (for testing without external tools)")
return parser.parse_args()
def check_dependencies(tool_path: Path = None) -> bool:
"""Check if CRISPRCasFinder is available"""
if tool_path and tool_path.exists():
return True
# Check in PATH
if shutil.which("CRISPRCasFinder.pl"):
return True
return False
def generate_mock_results(genome_file: Path) -> Dict[str, Any]:
"""Generate mock CRISPR results for testing"""
logger.info(f"Generating mock CRISPR results for {genome_file.name}")
strain_id = genome_file.stem
return {
"strain_id": strain_id,
"cas_systems": [
{
"type": "I-E",
"subtype": "I-E",
"position": "contig_1:15000-25000",
"genes": ["cas1", "cas2", "cas3", "casA", "casB", "casC", "casD", "casE"]
}
],
"arrays": [
{
"id": "CRISPR_1",
"contig": "contig_1",
"start": 12345,
"end": 12678,
"consensus_repeat": "GTTTTAGAGCTATGCTGTTTTGAATGGTCCCAAAAC",
"num_spacers": 5,
"spacers": [
{"sequence": "ATGCGTCGACATGCGTCGACATGCGTCGAC", "position": 1},
{"sequence": "CGTAGCTAGCCGTAGCTAGCCGTAGCTAGC", "position": 2},
{"sequence": "TGCATGCATGTGCATGCATGTGCATGCATG", "position": 3},
{"sequence": "GCTAGCTAGCGCTAGCTAGCGCTAGCTAGC", "position": 4},
{"sequence": "AAAAATTTTTAAAAATTTTTAAAAATTTTT", "position": 5}
]
},
{
"id": "CRISPR_2",
"contig": "contig_2",
"start": 50000,
"end": 50500,
"consensus_repeat": "GTTTTAGAGCTATGCTGTTTTGAATGGTCCCAAAAC",
"num_spacers": 8,
"spacers": [
{"sequence": "CCCGGGAAACCCGGGAAACCCGGGAAA", "position": 1}
]
}
],
"summary": {
"has_cas": True,
"has_crispr": True,
"num_arrays": 2,
"num_spacers": 13,
"cas_types": ["I-E"]
},
"metadata": {
"tool": "CRISPRCasFinder",
"version": "Mock-v1.0",
"date": "2025-01-14"
}
}
def run_crisprcasfinder(input_file: Path, output_file: Path, tool_path: Path = None):
"""Run actual CRISPRCasFinder tool (Placeholder)"""
# This would implement the actual subprocess call to CRISPRCasFinder.pl
# For now, we raise NotImplementedError unless mock is used
raise NotImplementedError("Real tool integration not yet implemented. Use --mock flag.")
def main():
args = parse_args()
if not args.input.exists():
logger.error(f"Input file not found: {args.input}")
sys.exit(1)
# Create parent directory for output if needed
args.output.parent.mkdir(parents=True, exist_ok=True)
try:
if args.mock:
results = generate_mock_results(args.input)
else:
if not check_dependencies(args.tool_path):
logger.warning("CRISPRCasFinder not found. Falling back to mock data.")
results = generate_mock_results(args.input)
else:
# Real implementation would go here
run_crisprcasfinder(args.input, args.output, args.tool_path)
return
# Write results
with open(args.output, 'w') as f:
json.dump(results, f, indent=2)
logger.info(f"Results written to {args.output}")
except Exception as e:
logger.error(f"Error executing CRISPR detection: {e}")
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,166 @@
#!/usr/bin/env python3
"""
CRISPR-Toxin Fusion Analysis
Analyzes associations between CRISPR spacers and toxin genes.
"""
import argparse
import json
import logging
import sys
from pathlib import Path
from typing import Dict, List, Any
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def parse_args():
parser = argparse.ArgumentParser(description="Analyze CRISPR-Toxin associations")
parser.add_argument("--crispr-results", type=Path, required=True, help="CRISPR detection results (JSON)")
parser.add_argument("--toxin-results", type=Path, required=True, help="Toxin detection results (JSON or TXT)")
parser.add_argument("--genome", type=Path, required=True, help="Original genome file (.fna)")
parser.add_argument("--output", "-o", type=Path, required=True, help="Output analysis JSON")
parser.add_argument("--mock", action="store_true", help="Use mock analysis logic")
return parser.parse_args()
def load_json(path: Path) -> Dict:
with open(path) as f:
return json.load(f)
def calculate_distance(range1: str, range2: str) -> int:
"""
Calculate distance between two genomic ranges.
Format: 'contig:start-end'
"""
try:
contig1, coords1 = range1.split(':')
start1, end1 = map(int, coords1.split('-'))
contig2, coords2 = range2.split(':')
start2, end2 = map(int, coords2.split('-'))
if contig1 != contig2:
return -1 # Different contigs
# Check for overlap
if max(start1, start2) <= min(end1, end2):
return 0
# Calculate distance
if start1 > end2:
return start1 - end2
else:
return start2 - end1
except Exception as e:
logger.warning(f"Error calculating distance: {e}")
return -1
def mock_blast_spacers(spacers: List[str], toxins: List[Dict]) -> List[Dict]:
"""Mock BLAST spacers against toxins"""
matches = []
# Simulate a match if 'Cry' is in the spacer name (just for demo logic) or random
# In reality, we'd blast sequences.
# Let's just create a fake match for the first spacer
if spacers and toxins:
matches.append({
"spacer_seq": spacers[0],
"target_toxin": toxins[0].get("name", "Unknown"),
"identity": 98.5,
"alignment_length": 32,
"mismatches": 1
})
return matches
def perform_fusion_analysis(crispr_data: Dict, toxin_file: Path, mock: bool = False) -> Dict:
"""
Main analysis logic.
1. Map CRISPR arrays
2. Map Toxin genes
3. Calculate distances
4. Check for spacer matches
"""
analysis_results = {
"strain_id": crispr_data.get("strain_id"),
"associations": [],
"summary": {"proximal_pairs": 0, "spacer_matches": 0}
}
# Extract arrays
arrays = crispr_data.get("arrays", [])
# Mock Toxin Parsing (assuming simple list for now if not JSON)
toxins = []
if mock:
toxins = [
{"name": "Cry1Ac1", "position": "contig_1:10000-12000"},
{"name": "Vip3Aa1", "position": "contig_2:60000-62000"}
]
else:
# TODO: Implement real toxin file parsing (e.g. from All_Toxins.txt)
logger.warning("Real toxin parsing not implemented yet, using empty list")
# Analyze Proximity
for array in arrays:
array_pos = f"{array.get('contig')}:{array.get('start')}-{array.get('end')}"
for toxin in toxins:
dist = calculate_distance(array_pos, toxin["position"])
if dist != -1 and dist < 10000: # 10kb window
association = {
"type": "proximity",
"array_id": array.get("id"),
"toxin": toxin["name"],
"distance": dist,
"array_position": array_pos,
"toxin_position": toxin["position"]
}
analysis_results["associations"].append(association)
analysis_results["summary"]["proximal_pairs"] += 1
# Analyze Spacer Matches (Mock)
all_spacers = []
for array in arrays:
for spacer in array.get("spacers", []):
all_spacers.append(spacer.get("sequence"))
matches = mock_blast_spacers(all_spacers, toxins)
for match in matches:
analysis_results["associations"].append({
"type": "spacer_match",
**match
})
analysis_results["summary"]["spacer_matches"] += 1
return analysis_results
def main():
args = parse_args()
if not args.crispr_results.exists():
logger.error(f"CRISPR results file not found: {args.crispr_results}")
sys.exit(1)
try:
crispr_data = load_json(args.crispr_results)
results = perform_fusion_analysis(crispr_data, args.toxin_results, args.mock)
# Write results
with open(args.output, 'w') as f:
json.dump(results, f, indent=2)
logger.info(f"Fusion analysis complete. Results: {args.output}")
except Exception as e:
logger.error(f"Error during fusion analysis: {e}")
sys.exit(1)
if __name__ == "__main__":
main()