feat: migrate from Docker to pixi for BtToxin_Digger execution
- Add pixi.toml with digger and pipeline environments - Implement PixiRunner class replacing DockerContainerManager - Add run_digger_stage.py for standalone digger execution - Update run_single_fna_pipeline.py to use PixiRunner - Remove docker dependency from pyproject.toml - Delete docker_client.py (no longer needed) BREAKING CHANGE: Docker is no longer required. Use 'pixi install' instead.
This commit is contained in:
@@ -3,20 +3,20 @@
|
||||
|
||||
- Input: one .fna file (nucleotide scaffold)
|
||||
- Steps:
|
||||
1) Stage this single file, run BtToxin_Digger via DockerContainerManager
|
||||
2) Run Shotter scoring on Digger's All_Toxins.txt
|
||||
3) Render heatmaps + paper-style report
|
||||
1) Stage this single file, run BtToxin_Digger via PixiRunner (pixi environment)
|
||||
2) Run Shotter scoring on Digger's All_Toxins.txt via pixi run -e pipeline
|
||||
3) Render heatmaps + paper-style report via pixi run -e pipeline
|
||||
4) Organize outputs under one root folder:
|
||||
<out_root>/
|
||||
├─ digger/ (container outputs)
|
||||
├─ digger/ (pixi digger env outputs)
|
||||
├─ shotter/ (Shotter TSV/JSON + plots + report)
|
||||
└─ pipeline_results.tar.gz (bundle)
|
||||
|
||||
Notes
|
||||
- Digger is executed in a container (root in container); files may be owned by root on host.
|
||||
We write everything into <out_root>/digger to keep permissions/locality predictable.
|
||||
- Digger is executed in the pixi 'digger' environment with bioconda dependencies.
|
||||
- Shotter and plotting are executed in the pixi 'pipeline' environment with Python dependencies.
|
||||
- This script exposes CLI flags for Shotter filters to allow strict/loose runs.
|
||||
- 默认使用 external_dbs/bt_toxin 作为外部数据库(若存在),覆盖容器内置旧库。
|
||||
- 默认使用 external_dbs/bt_toxin 作为外部数据库(若存在)。
|
||||
|
||||
Example
|
||||
python scripts/run_single_fna_pipeline.py \\
|
||||
@@ -30,6 +30,9 @@ Example
|
||||
python scripts/run_single_fna_pipeline.py \\
|
||||
--fna tests/test_data/HAN055.fna \\
|
||||
--bttoxin_db_dir /path/to/custom/bt_toxin
|
||||
|
||||
# 使用 pixi 任务运行
|
||||
pixi run pipeline --fna tests/test_data/HAN055.fna
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -40,11 +43,10 @@ import subprocess
|
||||
import sys
|
||||
import tarfile
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any
|
||||
from typing import Dict, Any, List
|
||||
|
||||
# import DockerContainerManager from backend
|
||||
sys.path.append(str(Path(__file__).resolve().parents[1] / "backend"))
|
||||
from app.utils.docker_client import DockerContainerManager # type: ignore
|
||||
# Import PixiRunner and command builders from scripts
|
||||
from pixi_runner import PixiRunner, build_shotter_command, build_plot_command
|
||||
|
||||
|
||||
def _shell(cmd: list[str]) -> subprocess.CompletedProcess:
|
||||
@@ -72,20 +74,28 @@ def run_single_fna_pipeline(
|
||||
fna_path: Path,
|
||||
out_root: Path,
|
||||
toxicity_csv: Path = Path("Data/toxicity-data.csv"),
|
||||
image: str = "quay.io/biocontainers/bttoxin_digger:1.0.10--hdfd78af_0",
|
||||
platform: str = "linux/amd64",
|
||||
min_identity: float = 0.0,
|
||||
min_coverage: float = 0.0,
|
||||
allow_unknown_families: bool = True,
|
||||
require_index_hit: bool = False,
|
||||
lang: str = "zh",
|
||||
bttoxin_db_dir: Path | None = None,
|
||||
threads: int = 4,
|
||||
) -> Dict[str, Any]:
|
||||
"""运行单个 fna 文件的完整 pipeline。
|
||||
"""运行单个 fna 文件的完整 pipeline(使用 pixi 环境)。
|
||||
|
||||
Args:
|
||||
fna_path: 输入 .fna 文件路径
|
||||
out_root: 输出根目录
|
||||
toxicity_csv: 毒性数据 CSV 文件路径
|
||||
min_identity: 最小 identity 阈值
|
||||
min_coverage: 最小 coverage 阈值
|
||||
allow_unknown_families: 是否允许未知家族
|
||||
require_index_hit: 是否要求索引命中
|
||||
lang: 报告语言 (zh/en)
|
||||
bttoxin_db_dir: 外部 bt_toxin 数据库目录。若为 None,则自动检测
|
||||
项目根目录下的 external_dbs/bt_toxin。
|
||||
threads: 线程数
|
||||
"""
|
||||
fna_path = fna_path.resolve()
|
||||
out_root = out_root.resolve()
|
||||
@@ -98,7 +108,7 @@ def run_single_fna_pipeline(
|
||||
bttoxin_db_dir = default_db
|
||||
print(f"[pipeline] 使用外部数据库: {bttoxin_db_dir}")
|
||||
else:
|
||||
print("[pipeline] 未找到外部数据库,将使用容器内置数据库(可能较旧)")
|
||||
print("[pipeline] 未找到外部数据库,将使用 pixi 环境内置数据库")
|
||||
|
||||
digger_dir = out_root / "digger"
|
||||
shotter_dir = out_root / "shotter"
|
||||
@@ -111,15 +121,15 @@ def run_single_fna_pipeline(
|
||||
staged_fna = stage_dir / fna_path.name
|
||||
shutil.copy2(fna_path, staged_fna)
|
||||
|
||||
# 1) Run BtToxin_Digger via DockerContainerManager
|
||||
mgr = DockerContainerManager(image=image, platform=platform)
|
||||
result = mgr.run_bttoxin_digger(
|
||||
# 1) Run BtToxin_Digger via PixiRunner (pixi digger environment)
|
||||
runner = PixiRunner(env_name="digger")
|
||||
result = runner.run_bttoxin_digger(
|
||||
input_dir=stage_dir,
|
||||
output_dir=digger_dir,
|
||||
log_dir=logs_dir,
|
||||
sequence_type="nucl",
|
||||
scaf_suffix=fna_path.suffix or ".fna",
|
||||
threads=4,
|
||||
threads=threads,
|
||||
bttoxin_db_dir=bttoxin_db_dir,
|
||||
)
|
||||
if not result.get("success"):
|
||||
@@ -135,27 +145,22 @@ def run_single_fna_pipeline(
|
||||
if not all_toxins.exists():
|
||||
return {"ok": False, "stage": "digger", "error": f"Missing All_Toxins.txt at {all_toxins}"}
|
||||
|
||||
# 2) Run Shotter scoring
|
||||
# 2) Run Shotter scoring via pixi run -e pipeline
|
||||
shotter_dir.mkdir(parents=True, exist_ok=True)
|
||||
py = sys.executable
|
||||
shoter_cmd: list[str] = [
|
||||
py,
|
||||
str(Path(__file__).resolve().parents[0] / "bttoxin_shoter.py"),
|
||||
"--toxicity_csv",
|
||||
str(toxicity_csv),
|
||||
"--all_toxins",
|
||||
str(all_toxins),
|
||||
"--output_dir",
|
||||
str(shotter_dir),
|
||||
]
|
||||
if min_identity and min_identity > 0:
|
||||
shoter_cmd += ["--min_identity", str(min_identity)]
|
||||
if min_coverage and min_coverage > 0:
|
||||
shoter_cmd += ["--min_coverage", str(min_coverage)]
|
||||
if not allow_unknown_families:
|
||||
shoter_cmd += ["--disallow_unknown_families"]
|
||||
if require_index_hit:
|
||||
shoter_cmd += ["--require_index_hit"]
|
||||
scripts_dir = Path(__file__).resolve().parents[0]
|
||||
pixi_project_dir = Path(__file__).resolve().parents[1]
|
||||
|
||||
shoter_cmd = build_shotter_command(
|
||||
pixi_project_dir=pixi_project_dir,
|
||||
script_path=scripts_dir / "bttoxin_shoter.py",
|
||||
toxicity_csv=toxicity_csv,
|
||||
all_toxins=all_toxins,
|
||||
output_dir=shotter_dir,
|
||||
min_identity=min_identity,
|
||||
min_coverage=min_coverage,
|
||||
allow_unknown_families=allow_unknown_families,
|
||||
require_index_hit=require_index_hit,
|
||||
)
|
||||
|
||||
r1 = _shell(shoter_cmd)
|
||||
if r1.returncode != 0:
|
||||
@@ -165,27 +170,20 @@ def run_single_fna_pipeline(
|
||||
toxin_support = shotter_dir / "toxin_support.tsv"
|
||||
species_scores = shotter_dir / "strain_target_species_scores.tsv"
|
||||
|
||||
# 3) Plot & report
|
||||
# 3) Plot & report via pixi run -e pipeline
|
||||
strain_for_plot = _read_first_strain(strain_scores)
|
||||
plot_cmd: list[str] = [
|
||||
py,
|
||||
str(Path(__file__).resolve().parents[0] / "plot_shotter.py"),
|
||||
"--strain_scores",
|
||||
str(strain_scores),
|
||||
"--toxin_support",
|
||||
str(toxin_support),
|
||||
"--species_scores",
|
||||
str(species_scores),
|
||||
"--out_dir",
|
||||
str(shotter_dir),
|
||||
"--merge_unresolved",
|
||||
"--report_mode",
|
||||
"paper",
|
||||
"--lang",
|
||||
lang,
|
||||
]
|
||||
if strain_for_plot:
|
||||
plot_cmd += ["--per_hit_strain", strain_for_plot]
|
||||
plot_cmd = build_plot_command(
|
||||
pixi_project_dir=pixi_project_dir,
|
||||
script_path=scripts_dir / "plot_shotter.py",
|
||||
strain_scores=strain_scores,
|
||||
toxin_support=toxin_support,
|
||||
species_scores=species_scores,
|
||||
out_dir=shotter_dir,
|
||||
merge_unresolved=True,
|
||||
report_mode="paper",
|
||||
lang=lang,
|
||||
per_hit_strain=strain_for_plot if strain_for_plot else None,
|
||||
)
|
||||
|
||||
r2 = _shell(plot_cmd)
|
||||
if r2.returncode != 0:
|
||||
@@ -209,12 +207,10 @@ def run_single_fna_pipeline(
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser(description="Run single-fna Digger -> Shotter pipeline")
|
||||
ap = argparse.ArgumentParser(description="Run single-fna Digger -> Shotter pipeline (pixi-based)")
|
||||
ap.add_argument("--fna", type=Path, required=True, help="Path to a single .fna file")
|
||||
ap.add_argument("--toxicity_csv", type=Path, default=Path("Data/toxicity-data.csv"))
|
||||
ap.add_argument("--out_root", type=Path, default=Path("runs/single_run"))
|
||||
ap.add_argument("--image", type=str, default="quay.io/biocontainers/bttoxin_digger:1.0.10--hdfd78af_0")
|
||||
ap.add_argument("--platform", type=str, default="linux/amd64")
|
||||
ap.add_argument("--min_identity", type=float, default=0.0)
|
||||
ap.add_argument("--min_coverage", type=float, default=0.0)
|
||||
ap.add_argument("--disallow_unknown_families", action="store_true", default=False)
|
||||
@@ -222,6 +218,7 @@ def main() -> int:
|
||||
ap.add_argument("--lang", type=str, choices=["zh", "en"], default="zh")
|
||||
ap.add_argument("--bttoxin_db_dir", type=Path, default=None,
|
||||
help="外部 bt_toxin 数据库目录路径(默认自动检测 external_dbs/bt_toxin)")
|
||||
ap.add_argument("--threads", type=int, default=4, help="线程数")
|
||||
args = ap.parse_args()
|
||||
|
||||
# derive per-run default out_root using file stem
|
||||
@@ -233,14 +230,13 @@ def main() -> int:
|
||||
fna_path=args.fna,
|
||||
out_root=args.out_root,
|
||||
toxicity_csv=args.toxicity_csv,
|
||||
image=args.image,
|
||||
platform=args.platform,
|
||||
min_identity=args.min_identity,
|
||||
min_coverage=args.min_coverage,
|
||||
allow_unknown_families=not args.disallow_unknown_families,
|
||||
require_index_hit=args.require_index_hit,
|
||||
lang=args.lang,
|
||||
bttoxin_db_dir=args.bttoxin_db_dir,
|
||||
threads=args.threads,
|
||||
)
|
||||
|
||||
if not res.get("ok"):
|
||||
|
||||
Reference in New Issue
Block a user