Refactor: Unified pipeline execution, simplified UI, and fixed Docker config
- Backend: Refactored tasks.py to directly invoke run_single_fna_pipeline.py for consistency. - Backend: Changed output format to ZIP and added auto-cleanup of intermediate files. - Backend: Fixed language parameter passing in API and tasks. - Frontend: Removed CRISPR Fusion UI elements from Submit and Monitor views. - Frontend: Implemented simulated progress bar for better UX. - Frontend: Restored One-click load button and added result file structure documentation. - Docker: Fixed critical Restarting loop by removing incorrect image directive in docker-compose.yml. - Docker: Optimized Dockerfile to correct .pixi environment path issues and prevent accidental deletion of frontend assets.
This commit is contained in:
@@ -15,6 +15,7 @@ from __future__ import annotations
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import math
|
||||
from dataclasses import dataclass, asdict, field
|
||||
import os
|
||||
from pathlib import Path
|
||||
@@ -436,7 +437,58 @@ def partner_fulfilled_for_hit(hit_family: str, strain_df: pd.DataFrame, index: S
|
||||
return False
|
||||
|
||||
|
||||
def score_strain(strain: str, sdf: pd.DataFrame, index: SpecificityIndex) -> Tuple[List[ToxinHit], StrainScores, Optional[StrainSpeciesScores]]:
|
||||
def apply_logit_prior(
|
||||
score: float,
|
||||
bgc_data: Dict[str, int],
|
||||
mobilome_data: Dict[str, int],
|
||||
crispr_state: int,
|
||||
betas: Dict[str, float]
|
||||
) -> float:
|
||||
"""
|
||||
Apply Logit Prior adjustment: S_final = sigmoid(logit(S_tox) + Delta)
|
||||
Delta = beta_z*b_z + beta_t*b_t + beta_a*b_a + beta_m*g(m) + beta_c*h(c)
|
||||
"""
|
||||
if score <= 0: return 0.0
|
||||
if score >= 1: return 1.0
|
||||
|
||||
# epsilon to avoid inf
|
||||
epsilon = 1e-6
|
||||
p = max(epsilon, min(1.0 - epsilon, score))
|
||||
logit_p = math.log(p / (1.0 - p))
|
||||
|
||||
# BGC
|
||||
b_z = bgc_data.get("ZWA", 0)
|
||||
b_t = bgc_data.get("Thu", 0)
|
||||
b_a = bgc_data.get("TAA", 0)
|
||||
|
||||
# Mobilome: g(m) = ln(1+m)
|
||||
m = mobilome_data.get("mobile_elements_count", 0)
|
||||
g_m = math.log(1.0 + m)
|
||||
|
||||
# CRISPR: h(c) = 1 - c/2 (0->1, 1->0.5, 2->0)
|
||||
# c in [0, 1, 2]
|
||||
c = crispr_state
|
||||
h_c = 1.0 - (c / 2.0)
|
||||
|
||||
delta = (betas["z"] * b_z) + \
|
||||
(betas["t"] * b_t) + \
|
||||
(betas["a"] * b_a) + \
|
||||
(betas["m"] * g_m) + \
|
||||
(betas["c"] * h_c)
|
||||
|
||||
final_logit = logit_p + delta
|
||||
return 1.0 / (1.0 + math.exp(-final_logit))
|
||||
|
||||
|
||||
def score_strain(
|
||||
strain: str,
|
||||
sdf: pd.DataFrame,
|
||||
index: SpecificityIndex,
|
||||
crispr_associations: Dict[str, Any] = None,
|
||||
crispr_weight: float = 0.0,
|
||||
context_data: Dict[str, Any] = None,
|
||||
betas: Dict[str, float] = None
|
||||
) -> Tuple[List[ToxinHit], StrainScores, Optional[StrainSpeciesScores]]:
|
||||
# Include special buckets per requirements
|
||||
order_set = sorted({*index.all_orders, "other", "unknown"})
|
||||
per_hit: List[ToxinHit] = []
|
||||
@@ -477,6 +529,14 @@ def score_strain(strain: str, sdf: pd.DataFrame, index: SpecificityIndex) -> Tup
|
||||
if not fulfilled:
|
||||
w *= 0.2 # partner penalty if required but not present
|
||||
|
||||
# CRISPR Boost (Hit-level)
|
||||
if crispr_associations and crispr_weight > 0:
|
||||
# Check if this toxin is associated with CRISPR
|
||||
# keys in crispr_associations are toxin names
|
||||
assoc = crispr_associations.get(hit_id) or crispr_associations.get(name_key)
|
||||
if assoc:
|
||||
w = min(1.0, w + crispr_weight)
|
||||
|
||||
contribs: Dict[str, float] = {}
|
||||
for o in order_set:
|
||||
p = float(order_dist.get(o, 0.0))
|
||||
@@ -523,7 +583,21 @@ def score_strain(strain: str, sdf: pd.DataFrame, index: SpecificityIndex) -> Tup
|
||||
)
|
||||
)
|
||||
|
||||
# 1. Calculate S_tox (Noisy-OR)
|
||||
order_scores = {o: (1.0 - one_minus[o]) for o in order_set}
|
||||
|
||||
# 2. Apply Logit Prior if context data is present
|
||||
if context_data and betas:
|
||||
bgc = context_data.get("bgc", {})
|
||||
mobi = context_data.get("mobilome", {})
|
||||
crispr_st = context_data.get("crispr_state", 0)
|
||||
|
||||
for o in order_scores:
|
||||
s_tox = order_scores[o]
|
||||
if s_tox > 0:
|
||||
s_final = apply_logit_prior(s_tox, bgc, mobi, crispr_st, betas)
|
||||
order_scores[o] = s_final
|
||||
|
||||
# choose top order excluding unresolved buckets if possible
|
||||
preferred = [o for o in index.all_orders if o in order_scores]
|
||||
if not preferred:
|
||||
@@ -538,6 +612,20 @@ def score_strain(strain: str, sdf: pd.DataFrame, index: SpecificityIndex) -> Tup
|
||||
species_scores_obj: Optional[StrainSpeciesScores] = None
|
||||
if species_set:
|
||||
species_scores = {sp: (1.0 - sp_one_minus[sp]) for sp in species_set}
|
||||
|
||||
# Apply Logit Prior to Species scores too?
|
||||
# The math doc focuses on "Strain x Order", but presumably it applies to species too if we follow the logic.
|
||||
# "combine evidence... then prior". Let's apply it for consistency.
|
||||
if context_data and betas:
|
||||
bgc = context_data.get("bgc", {})
|
||||
mobi = context_data.get("mobilome", {})
|
||||
crispr_st = context_data.get("crispr_state", 0)
|
||||
for sp in species_scores:
|
||||
s_tox = species_scores[sp]
|
||||
if s_tox > 0:
|
||||
s_final = apply_logit_prior(s_tox, bgc, mobi, crispr_st, betas)
|
||||
species_scores[sp] = s_final
|
||||
|
||||
if species_scores:
|
||||
top_sp = max(species_set, key=lambda sp: species_scores.get(sp, 0.0))
|
||||
top_sp_score = float(species_scores.get(top_sp, 0.0))
|
||||
@@ -635,8 +723,87 @@ def main():
|
||||
help="Keep only hits that map to a known name/subfamily/family in the specificity index")
|
||||
ap.add_argument("--min_identity", type=float, default=0.0, help="Minimum identity (0-1) to keep a hit")
|
||||
ap.add_argument("--min_coverage", type=float, default=0.0, help="Minimum coverage (0-1) to keep a hit")
|
||||
|
||||
# CRISPR Integration
|
||||
ap.add_argument("--crispr_results", type=Path, default=None, help="Path to CRISPR Fusion analysis results (JSON) for hit-level boost")
|
||||
ap.add_argument("--crispr_fusion", action="store_true", help="Use fusion analysis results for stronger evidence")
|
||||
ap.add_argument("--crispr_weight", type=float, default=0.1, help="Weight boost for CRISPR-associated toxins (0-1)")
|
||||
|
||||
# Genome Context Priors (Strain-level)
|
||||
ap.add_argument("--context_bgc", type=Path, default=None, help="Path to BGC detection results (JSON)")
|
||||
ap.add_argument("--context_mobilome", type=Path, default=None, help="Path to Mobilome analysis results (JSON)")
|
||||
ap.add_argument("--context_crispr", type=Path, default=None, help="Path to CRISPR detection results (JSON) for strain-level prior")
|
||||
|
||||
# Prior Weights (Defaults from math doc)
|
||||
ap.add_argument("--beta_z", type=float, default=0.5, help="Weight for ZWA BGC")
|
||||
ap.add_argument("--beta_t", type=float, default=0.5, help="Weight for Thu BGC")
|
||||
ap.add_argument("--beta_a", type=float, default=0.5, help="Weight for TAA BGC")
|
||||
ap.add_argument("--beta_m", type=float, default=0.5, help="Weight for Mobilome")
|
||||
ap.add_argument("--beta_c", type=float, default=0.5, help="Weight for CRISPR state")
|
||||
|
||||
args = ap.parse_args()
|
||||
|
||||
# Load CRISPR data if available
|
||||
crispr_associations = {}
|
||||
if args.crispr_results and args.crispr_results.exists():
|
||||
try:
|
||||
with open(args.crispr_results) as f:
|
||||
cdata = json.load(f)
|
||||
# If fusion analysis results (has 'associations')
|
||||
if "associations" in cdata:
|
||||
for assoc in cdata["associations"]:
|
||||
toxin_name = assoc.get("toxin")
|
||||
if toxin_name:
|
||||
# Normalize name if possible or keep as is.
|
||||
# Digger outputs might have variants, but fusion usually uses specific names.
|
||||
crispr_associations[toxin_name] = assoc
|
||||
print(f"[Shoter] Loaded {len(crispr_associations)} CRISPR associations")
|
||||
except Exception as e:
|
||||
print(f"[Shoter] Failed to load CRISPR results: {e}")
|
||||
|
||||
# Load Prior Data
|
||||
context_data = {
|
||||
"bgc": {},
|
||||
"mobilome": {},
|
||||
"crispr_state": 0
|
||||
}
|
||||
|
||||
# BGC
|
||||
if args.context_bgc and args.context_bgc.exists():
|
||||
try:
|
||||
with open(args.context_bgc) as f:
|
||||
context_data["bgc"] = json.load(f)
|
||||
print(f"[Shoter] Loaded BGC context: {context_data['bgc']}")
|
||||
except Exception as e:
|
||||
print(f"[Shoter] Failed to load BGC context: {e}")
|
||||
|
||||
# Mobilome
|
||||
if args.context_mobilome and args.context_mobilome.exists():
|
||||
try:
|
||||
with open(args.context_mobilome) as f:
|
||||
context_data["mobilome"] = json.load(f)
|
||||
print(f"[Shoter] Loaded Mobilome context: {context_data['mobilome']}")
|
||||
except Exception as e:
|
||||
print(f"[Shoter] Failed to load Mobilome context: {e}")
|
||||
|
||||
# CRISPR State
|
||||
if args.context_crispr and args.context_crispr.exists():
|
||||
try:
|
||||
with open(args.context_crispr) as f:
|
||||
cdata = json.load(f)
|
||||
context_data["crispr_state"] = cdata.get("crispr_state", 0)
|
||||
print(f"[Shoter] Loaded CRISPR state: {context_data['crispr_state']}")
|
||||
except Exception as e:
|
||||
print(f"[Shoter] Failed to load CRISPR context: {e}")
|
||||
|
||||
betas = {
|
||||
"z": args.beta_z,
|
||||
"t": args.beta_t,
|
||||
"a": args.beta_a,
|
||||
"m": args.beta_m,
|
||||
"c": args.beta_c
|
||||
}
|
||||
|
||||
index = SpecificityIndex.from_csv(args.toxicity_csv)
|
||||
|
||||
df = parse_all_toxins(args.all_toxins)
|
||||
@@ -672,7 +839,13 @@ def main():
|
||||
|
||||
for strain in strains:
|
||||
sdf = df[df["Strain"].astype(str).eq(strain)].copy()
|
||||
per_hit, sscore, sspecies = score_strain(strain, sdf, index)
|
||||
per_hit, sscore, sspecies = score_strain(
|
||||
strain, sdf, index,
|
||||
crispr_associations=crispr_associations,
|
||||
crispr_weight=args.crispr_weight if args.crispr_fusion else 0.0,
|
||||
context_data=context_data,
|
||||
betas=betas
|
||||
)
|
||||
all_hits.extend(per_hit)
|
||||
all_strain_scores.append(sscore)
|
||||
if sspecies is not None:
|
||||
|
||||
Reference in New Issue
Block a user