feat: 支持绑定外部 bt_toxin 数据库 (2025-11-04 更新)
- docker_client.py: run_bttoxin_digger() 新增 bttoxin_db_dir 参数,支持挂载外部数据库 - run_single_fna_pipeline.py: 新增 --bttoxin_db_dir 参数,自动检测 external_dbs/bt_toxin - README.md: 添加 bttoxin_db 更新说明和 Docker 绑定文档 - external_dbs/bt_toxin: 添加 2025-11-04 版本数据库文件 测试验证: HAN055 样本毒素命名版本号变化 (Cry2Aa9→22, Cry2Ab35→41, Cry1Ia40→42, Vip3Aa7→79)
This commit is contained in:
@@ -16,14 +16,20 @@ Notes
|
||||
- Digger is executed in a container (root in container); files may be owned by root on host.
|
||||
We write everything into <out_root>/digger to keep permissions/locality predictable.
|
||||
- This script exposes CLI flags for Shotter filters to allow strict/loose runs.
|
||||
- 默认使用 external_dbs/bt_toxin 作为外部数据库(若存在),覆盖容器内置旧库。
|
||||
|
||||
Example
|
||||
python scripts/run_single_fna_pipeline.py \
|
||||
--fna tests/test_data/C15.fna \
|
||||
--toxicity_csv Data/toxicity-data.csv \
|
||||
--out_root runs/C15_run \
|
||||
--min_identity 0.50 --min_coverage 0.60 \
|
||||
python scripts/run_single_fna_pipeline.py \\
|
||||
--fna tests/test_data/HAN055.fna \\
|
||||
--toxicity_csv Data/toxicity-data.csv \\
|
||||
--out_root runs/HAN055_run \\
|
||||
--min_identity 0.50 --min_coverage 0.60 \\
|
||||
--disallow_unknown_families --require_index_hit --lang zh
|
||||
|
||||
# 使用自定义数据库路径
|
||||
python scripts/run_single_fna_pipeline.py \\
|
||||
--fna tests/test_data/HAN055.fna \\
|
||||
--bttoxin_db_dir /path/to/custom/bt_toxin
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -73,11 +79,27 @@ def run_single_fna_pipeline(
|
||||
allow_unknown_families: bool = True,
|
||||
require_index_hit: bool = False,
|
||||
lang: str = "zh",
|
||||
bttoxin_db_dir: Path | None = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""运行单个 fna 文件的完整 pipeline。
|
||||
|
||||
Args:
|
||||
bttoxin_db_dir: 外部 bt_toxin 数据库目录。若为 None,则自动检测
|
||||
项目根目录下的 external_dbs/bt_toxin。
|
||||
"""
|
||||
fna_path = fna_path.resolve()
|
||||
out_root = out_root.resolve()
|
||||
out_root.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 自动检测外部数据库
|
||||
if bttoxin_db_dir is None:
|
||||
default_db = Path(__file__).resolve().parents[1] / "external_dbs" / "bt_toxin"
|
||||
if default_db.exists() and (default_db / "db").exists():
|
||||
bttoxin_db_dir = default_db
|
||||
print(f"[pipeline] 使用外部数据库: {bttoxin_db_dir}")
|
||||
else:
|
||||
print("[pipeline] 未找到外部数据库,将使用容器内置数据库(可能较旧)")
|
||||
|
||||
digger_dir = out_root / "digger"
|
||||
shotter_dir = out_root / "shotter"
|
||||
logs_dir = out_root / "logs"
|
||||
@@ -98,6 +120,7 @@ def run_single_fna_pipeline(
|
||||
sequence_type="nucl",
|
||||
scaf_suffix=fna_path.suffix or ".fna",
|
||||
threads=4,
|
||||
bttoxin_db_dir=bttoxin_db_dir,
|
||||
)
|
||||
if not result.get("success"):
|
||||
return {
|
||||
@@ -197,6 +220,8 @@ def main() -> int:
|
||||
ap.add_argument("--disallow_unknown_families", action="store_true", default=False)
|
||||
ap.add_argument("--require_index_hit", action="store_true", default=False)
|
||||
ap.add_argument("--lang", type=str, choices=["zh", "en"], default="zh")
|
||||
ap.add_argument("--bttoxin_db_dir", type=Path, default=None,
|
||||
help="外部 bt_toxin 数据库目录路径(默认自动检测 external_dbs/bt_toxin)")
|
||||
args = ap.parse_args()
|
||||
|
||||
# derive per-run default out_root using file stem
|
||||
@@ -215,6 +240,7 @@ def main() -> int:
|
||||
allow_unknown_families=not args.disallow_unknown_families,
|
||||
require_index_hit=args.require_index_hit,
|
||||
lang=args.lang,
|
||||
bttoxin_db_dir=args.bttoxin_db_dir,
|
||||
)
|
||||
|
||||
if not res.get("ok"):
|
||||
|
||||
Reference in New Issue
Block a user