Fix(pipeline): prevent nested zip packaging and update CRISPR dependencies
- Add filter to skip .zip and .tar.gz files when creating result archive - Update CRISPR feature with CASFinder dependencies (hmmer, blast, vmatch, etc.) - Add install-casfinder task for macsydata installation - Remove obsolete CRISPR test files Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
0
tools/bttoxin_digger/run_digger_pixi.sh
Executable file → Normal file
0
tools/bttoxin_digger/run_digger_pixi.sh
Executable file → Normal file
30
tools/crispr_cas_analysis/pixi.toml
Normal file
30
tools/crispr_cas_analysis/pixi.toml
Normal file
@@ -0,0 +1,30 @@
|
||||
[workspace]
|
||||
authors = ["zly <644706215@qq.com>"]
|
||||
channels = ["https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge", "https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda", "https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main"]
|
||||
name = "crispr_cas_analysis"
|
||||
platforms = ["linux-64"]
|
||||
version = "0.1.0"
|
||||
|
||||
[dependencies]
|
||||
python = ">=3.10"
|
||||
wget = "*"
|
||||
curl = "*"
|
||||
git = "*"
|
||||
java-jdk = "*"
|
||||
parallel = "*"
|
||||
perl-app-cpanminus = "*"
|
||||
hmmer = "*"
|
||||
emboss = "*"
|
||||
blast = "*"
|
||||
perl-bioperl-core = "*"
|
||||
perl-xml-simple = "*"
|
||||
perl-digest-md5 = "*"
|
||||
vmatch = "*"
|
||||
muscle = "*"
|
||||
prodigal = "*"
|
||||
mamba = "*"
|
||||
macsyfinder = "==2.1.2"
|
||||
|
||||
[tasks]
|
||||
install-casfinder = "macsydata install -u CASFinder==3.1.0"
|
||||
detect = "python scripts/detect_crispr.py"
|
||||
@@ -1 +0,0 @@
|
||||
"""Tests for CRISPR-Cas module"""
|
||||
@@ -1,42 +0,0 @@
|
||||
import pytest
|
||||
import json
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from crispr_cas.scripts.detect_crispr import generate_mock_results
|
||||
|
||||
def test_generate_mock_results(tmp_path):
|
||||
"""Test mock result generation"""
|
||||
input_file = tmp_path / "test_genome.fna"
|
||||
input_file.touch()
|
||||
|
||||
results = generate_mock_results(input_file)
|
||||
|
||||
assert results["strain_id"] == "test_genome"
|
||||
assert "cas_systems" in results
|
||||
assert "arrays" in results
|
||||
assert results["summary"]["has_cas"] is True
|
||||
assert len(results["arrays"]) > 0
|
||||
|
||||
def test_script_execution(tmp_path):
|
||||
"""Test full script execution via subprocess"""
|
||||
# Create dummy input
|
||||
input_file = tmp_path / "genome.fna"
|
||||
input_file.touch()
|
||||
output_file = tmp_path / "results.json"
|
||||
script_path = Path("crispr_cas/scripts/detect_crispr.py").absolute()
|
||||
|
||||
import subprocess
|
||||
cmd = [
|
||||
"python3", str(script_path),
|
||||
"--input", str(input_file),
|
||||
"--output", str(output_file),
|
||||
"--mock"
|
||||
]
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
assert result.returncode == 0
|
||||
assert output_file.exists()
|
||||
|
||||
with open(output_file) as f:
|
||||
data = json.load(f)
|
||||
assert data["strain_id"] == "genome"
|
||||
@@ -1,93 +0,0 @@
|
||||
import pytest
|
||||
import json
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
# Add project root to path to allow importing modules
|
||||
sys.path.insert(0, str(Path(__file__).parents[2]))
|
||||
|
||||
from crispr_cas.scripts.fusion_analysis import calculate_distance, perform_fusion_analysis
|
||||
|
||||
def test_calculate_distance():
|
||||
"""Test genomic distance calculation"""
|
||||
# Same contig, no overlap
|
||||
# Range1: 100-200, Range2: 300-400 -> Dist 100
|
||||
assert calculate_distance("c1:100-200", "c1:300-400") == 100
|
||||
|
||||
# Same contig, overlap
|
||||
# Range1: 100-300, Range2: 200-400 -> Dist 0
|
||||
assert calculate_distance("c1:100-300", "c1:200-400") == 0
|
||||
|
||||
# Different contig
|
||||
assert calculate_distance("c1:100-200", "c2:300-400") == -1
|
||||
|
||||
# Invalid format
|
||||
assert calculate_distance("invalid", "c1:100-200") == -1
|
||||
|
||||
def test_fusion_analysis_logic(tmp_path):
|
||||
"""Test main analysis logic with mock data"""
|
||||
|
||||
# Mock CRISPR data
|
||||
crispr_data = {
|
||||
"strain_id": "test_strain",
|
||||
"arrays": [
|
||||
{
|
||||
"id": "A1",
|
||||
"contig": "contig_1",
|
||||
"start": 1000,
|
||||
"end": 2000,
|
||||
"spacers": [{"sequence": "ATGC"}]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# Mock toxin file (just a placeholder for path)
|
||||
toxin_file = tmp_path / "toxins.txt"
|
||||
toxin_file.touch()
|
||||
|
||||
# Run analysis in mock mode
|
||||
# In mock mode, the script generates its own toxin list:
|
||||
# {"name": "Cry1Ac1", "position": "contig_1:10000-12000"}
|
||||
# Distance: 10000 - 2000 = 8000 (< 10000 threshold) -> Should match
|
||||
|
||||
results = perform_fusion_analysis(crispr_data, toxin_file, mock=True)
|
||||
|
||||
assert results["strain_id"] == "test_strain"
|
||||
assert len(results["associations"]) > 0
|
||||
|
||||
# Check for proximity match
|
||||
proximity_matches = [a for a in results["associations"] if a["type"] == "proximity"]
|
||||
assert len(proximity_matches) > 0
|
||||
assert proximity_matches[0]["distance"] == 8000
|
||||
|
||||
def test_script_execution(tmp_path):
|
||||
"""Test full script execution via subprocess"""
|
||||
|
||||
# Create input files
|
||||
crispr_file = tmp_path / "crispr.json"
|
||||
with open(crispr_file, 'w') as f:
|
||||
json.dump({"strain_id": "test", "arrays": []}, f)
|
||||
|
||||
toxin_file = tmp_path / "toxins.txt"
|
||||
toxin_file.touch()
|
||||
|
||||
genome_file = tmp_path / "genome.fna"
|
||||
genome_file.touch()
|
||||
|
||||
output_file = tmp_path / "output.json"
|
||||
|
||||
script_path = Path("crispr_cas/scripts/fusion_analysis.py").absolute()
|
||||
|
||||
import subprocess
|
||||
cmd = [
|
||||
"python3", str(script_path),
|
||||
"--crispr-results", str(crispr_file),
|
||||
"--toxin-results", str(toxin_file),
|
||||
"--genome", str(genome_file),
|
||||
"--output", str(output_file),
|
||||
"--mock"
|
||||
]
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
assert result.returncode == 0
|
||||
assert output_file.exists()
|
||||
Reference in New Issue
Block a user