- 新增 scripts/bttoxin_shoter.py:从 BPPRC 正样本 CSV 构建 name/亚家族/家族特异性索引, 解析 BtToxin_Digger All_Toxins.txt,计算 per-hit 权重并以 noisy-OR 合成菌株×目标目/物种分数, 输出 TSV/JSON;含 HMM 加成与配对毒素规则(Vip1/Vip2,Vpa/Vpb),other/unknown 桶。 - 新增端到端工具链: - scripts/run_single_fna_pipeline.py:Digger → Shotter → Plot → 打包 - scripts/plot_shotter.py:绘制热图并生成论文式/摘要式报告 - scripts/bttoxin_api.py 与 bttoxin/api.py:纯 Python API;bttoxin/cli.py 暴露 bttoxin-run - pyproject.toml:项目打包与 CLI 入口 - docs(README): 增加输入文件格式与结果解读,补充单目录写入方案 - chore(gitignore): 忽略 runs/ 与 tests/output - ci: 移除 .woodpecker/test.yml
544 lines
22 KiB
Python
544 lines
22 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Plotting utilities for Bttoxin_Shoter outputs.
|
||
|
||
Generates heatmaps for:
|
||
- Strain × Target Orders scores (from strain_target_scores.tsv)
|
||
- Optional Per-hit contributions for one strain (from toxin_support.tsv)
|
||
- Optional Strain × Target Species scores (from strain_target_species_scores.tsv)
|
||
|
||
Can also write a Markdown report with:
|
||
- summary mode: brief overview
|
||
- paper mode: expanded, paper-like sections (Abstract, Methods, Results, Discussion, Params, Appendix)
|
||
|
||
Dependencies:
|
||
- pandas, matplotlib; seaborn is optional (for nicer heatmaps). If seaborn is
|
||
missing, a pure-matplotlib fallback will be used.
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
from pathlib import Path
|
||
from typing import List, Optional
|
||
|
||
import pandas as pd
|
||
|
||
try:
|
||
import seaborn as sns # type: ignore
|
||
_HAS_SNS = True
|
||
except Exception:
|
||
_HAS_SNS = False
|
||
|
||
import matplotlib.pyplot as plt
|
||
|
||
|
||
def _parse_figsize(s: str | None, default=(12, 6)):
|
||
if not s:
|
||
return default
|
||
try:
|
||
w, h = s.lower().split("x")
|
||
return (float(w), float(h))
|
||
except Exception:
|
||
return default
|
||
|
||
|
||
def _merge_unresolved_columns(df: pd.DataFrame, merge_unresolved: bool) -> pd.DataFrame:
|
||
if not merge_unresolved:
|
||
return df
|
||
cols = df.columns.tolist()
|
||
has_other = "other" in cols
|
||
has_unknown = "unknown" in cols
|
||
if has_other or has_unknown:
|
||
df = df.copy()
|
||
df["unresolved"] = df.get("other", 0.0) + df.get("unknown", 0.0)
|
||
drop_cols = [c for c in ("other", "unknown") if c in df.columns]
|
||
df = df.drop(columns=drop_cols)
|
||
return df
|
||
|
||
|
||
def plot_strain_scores(strain_scores_path: Path, out_path: Path, cmap: str, vmin: float, vmax: float, figsize: str | None, merge_unresolved: bool):
|
||
df = pd.read_csv(strain_scores_path, sep="\t")
|
||
base_cols = {"Strain", "TopOrder", "TopScore"}
|
||
orders = [c for c in df.columns if c not in base_cols]
|
||
mat = df.set_index("Strain")[orders]
|
||
mat = _merge_unresolved_columns(mat, merge_unresolved)
|
||
orders = list(mat.columns)
|
||
|
||
# dynamic figure size if not provided
|
||
if figsize:
|
||
size = _parse_figsize(figsize)
|
||
else:
|
||
size = (max(8, 0.6 * len(orders) + 3), max(4, 0.35 * len(mat) + 2))
|
||
|
||
plt.figure(figsize=size)
|
||
if _HAS_SNS:
|
||
ax = sns.heatmap(mat, cmap=cmap, vmin=vmin, vmax=vmax, cbar_kws={"label": "score"})
|
||
else:
|
||
ax = plt.gca()
|
||
im = ax.imshow(mat.values, aspect="auto", cmap=cmap, vmin=vmin, vmax=vmax)
|
||
plt.colorbar(im, ax=ax, label="score")
|
||
ax.set_xticks(range(len(orders)))
|
||
ax.set_xticklabels(orders, rotation=45, ha="right")
|
||
ax.set_yticks(range(len(mat.index)))
|
||
ax.set_yticklabels(mat.index)
|
||
ax.set_title("Strain vs Target Orders (Shotter)")
|
||
plt.tight_layout()
|
||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||
plt.savefig(out_path, dpi=200)
|
||
plt.close()
|
||
|
||
|
||
def plot_per_hit_for_strain(toxin_support_path: Path, strain: str, out_path: Path, cmap: str, vmin: float, vmax: float, figsize: str | None, merge_unresolved: bool):
|
||
hits = pd.read_csv(toxin_support_path, sep="\t")
|
||
sub = hits[hits["Strain"].astype(str).eq(strain)].copy()
|
||
if sub.empty:
|
||
print(f"[plot_shotter] No hits for strain: {strain}")
|
||
return
|
||
|
||
base_cols = {
|
||
"Strain","Protein_id","Hit_id","Identity","Aln_length","Hit_length","Coverage",
|
||
"HMM","Family","NameKey","PartnerFulfilled","Weight","TopOrder","TopScore"
|
||
}
|
||
orders = [c for c in sub.columns if c not in base_cols]
|
||
mat = sub.set_index("Hit_id")[orders]
|
||
mat = _merge_unresolved_columns(mat, merge_unresolved)
|
||
orders = list(mat.columns)
|
||
|
||
# dynamic figure size if not provided
|
||
if figsize:
|
||
size = _parse_figsize(figsize)
|
||
else:
|
||
size = (max(8, 0.6 * len(orders) + 3), max(4, 0.35 * len(mat) + 2))
|
||
|
||
plt.figure(figsize=size)
|
||
if _HAS_SNS:
|
||
ax = sns.heatmap(mat, cmap=cmap, vmin=vmin, vmax=vmax, cbar_kws={"label": "contrib"})
|
||
else:
|
||
ax = plt.gca()
|
||
im = ax.imshow(mat.values, aspect="auto", cmap=cmap, vmin=vmin, vmax=vmax)
|
||
plt.colorbar(im, ax=ax, label="contrib")
|
||
ax.set_xticks(range(len(orders)))
|
||
ax.set_xticklabels(orders, rotation=45, ha="right")
|
||
ax.set_yticks(range(len(mat.index)))
|
||
ax.set_yticklabels(mat.index)
|
||
ax.set_title(f"Per-hit contributions for {strain}")
|
||
plt.tight_layout()
|
||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||
plt.savefig(out_path, dpi=200)
|
||
plt.close()
|
||
|
||
|
||
def plot_species_scores(species_scores_path: Path, out_path: Path, cmap: str, vmin: float, vmax: float, figsize: str | None):
|
||
df = pd.read_csv(species_scores_path, sep="\t")
|
||
species_cols = [c for c in df.columns if c not in ("Strain", "TopSpecies", "TopSpeciesScore")]
|
||
if not species_cols:
|
||
print("[plot_shotter] No species columns found; skip species heatmap")
|
||
return
|
||
mat = df.set_index("Strain")[species_cols]
|
||
if figsize:
|
||
size = _parse_figsize(figsize)
|
||
else:
|
||
size = (max(8, 0.6 * len(species_cols) + 3), max(4, 0.35 * len(mat) + 2))
|
||
plt.figure(figsize=size)
|
||
if _HAS_SNS:
|
||
ax = sns.heatmap(mat, cmap=cmap, vmin=vmin, vmax=vmax, cbar_kws={"label": "score"})
|
||
else:
|
||
ax = plt.gca()
|
||
im = ax.imshow(mat.values, aspect="auto", cmap=cmap, vmin=vmin, vmax=vmax)
|
||
plt.colorbar(im, ax=ax, label="score")
|
||
ax.set_xticks(range(len(species_cols)))
|
||
ax.set_xticklabels(species_cols, rotation=45, ha="right")
|
||
ax.set_yticks(range(len(mat.index)))
|
||
ax.set_yticklabels(mat.index)
|
||
ax.set_title("Strain vs Target Species (Shotter)")
|
||
plt.tight_layout()
|
||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||
plt.savefig(out_path, dpi=200)
|
||
plt.close()
|
||
|
||
|
||
def _percent(x: float) -> str:
|
||
try:
|
||
return f"{100.0*float(x):.1f}%"
|
||
except Exception:
|
||
return "NA"
|
||
|
||
|
||
def _fmtf(x, nd=3):
|
||
try:
|
||
return f"{float(x):.{nd}f}"
|
||
except Exception:
|
||
return "NA"
|
||
|
||
|
||
def _labels(lang: str):
|
||
zh = {
|
||
"title": "Bttoxin Shotter 论文式报告",
|
||
"summary": "# Bttoxin Shotter Summary",
|
||
"params": "## 参数",
|
||
"abstract": "## 摘要",
|
||
"methods": "## 数据与方法",
|
||
"results": "## 结果",
|
||
"discussion": "## 讨论与局限",
|
||
"appendix": "## 附录",
|
||
"top_targets": "## 各菌株 TopOrder/TopScore",
|
||
"heat_orders": "## 热图:菌株 × 目标目",
|
||
"heat_species": "## 热图:菌株 × 目标物种",
|
||
"perhit": "## 命中 TopOrder/TopScore(前50)",
|
||
"notes": "## 说明",
|
||
"unresolved_ratio": "未解析贡献(other+unknown)整体占比",
|
||
"order_distribution": "整体目标目分布(按合成分数求和排名前5):",
|
||
"hits_stats": "命中统计",
|
||
"n_strains": "菌株数",
|
||
"n_hits": "命中条数",
|
||
"avg_weight": "平均权重",
|
||
"table_header_strain": "Strain | TopOrder | TopScore",
|
||
"table_header_hit": "Hit_id | Family | TopOrder | TopScore | Weight",
|
||
"table_sep": "--- | --- | ---",
|
||
"table_sep_hit": "--- | --- | --- | --- | ---",
|
||
"methods_text": (
|
||
"- 输入:BtToxin_Digger 的 All_Toxins.txt;BPPRC 特异性 CSV(activity=Yes)。\n"
|
||
"- 特异性索引:按 name→亚家族→家族聚合,target_order/target_species 以加权和归一构成分布。\n"
|
||
"- 相似性权重 w_hit:若 identity≥0.78 且 coverage≥0.8 则 base=1;若 0.45≤identity<0.78 则线性插值;否则 0。"
|
||
"最终 w=min(1, base×coverage + 0.1×I(HMM))). 若需配对而未满足,w×=0.2。\n"
|
||
"- 单命中贡献:c(order)=w_hit×P(order|·)。\n"
|
||
"- 菌株层合成:noisy-OR,score(order)=1-∏(1-c_i(order))。\n"
|
||
"- 物种维度:同上(若 CSV 含 target_species)。\n"
|
||
"- 分类含义:other=家族可解析但无证据;unknown=家族不可解析。\n"
|
||
),
|
||
"discussion_text": (
|
||
"- unresolved 偏高常见于:命名异常、家族稀缺、索引覆盖不足。\n"
|
||
"- 可用更严格阈值或 --require_index_hit 减少伪阳性;也可 --merge_unresolved 合并展示。\n"
|
||
"- 名称差异:Digger 与 BPPRC 均含 Cry 之外家族(Tpp/Spp/Vip 等),采用 name→亚家族→家族回退映射。\n"
|
||
),
|
||
"notes_items": [
|
||
"TopOrder/TopScore:最高目标目及其分数(命中或菌株)。",
|
||
"contrib:单命中对某目标目的贡献 w_hit×P(order|·)。",
|
||
"unresolved=other+unknown,仅为可视化合并开关,不改变计算。",
|
||
],
|
||
}
|
||
en = {
|
||
"title": "Bttoxin Shotter Paper-style Report",
|
||
"summary": "# Bttoxin Shotter Summary",
|
||
"params": "## Parameters",
|
||
"abstract": "## Abstract",
|
||
"methods": "## Data and Methods",
|
||
"results": "## Results",
|
||
"discussion": "## Discussion and Limitations",
|
||
"appendix": "## Appendix",
|
||
"top_targets": "## Top targets per strain",
|
||
"heat_orders": "## Heatmap: Strain × Orders",
|
||
"heat_species": "## Heatmap: Strain × Species",
|
||
"perhit": "## Per-hit TopOrder/TopScore (Top 50)",
|
||
"notes": "## Notes",
|
||
"unresolved_ratio": "Overall unresolved (other+unknown) fraction",
|
||
"order_distribution": "Overall order distribution (top 5 by summed score):",
|
||
"hits_stats": "Hit statistics",
|
||
"n_strains": "#Strains",
|
||
"n_hits": "#Hits",
|
||
"avg_weight": "Avg weight",
|
||
"table_header_strain": "Strain | TopOrder | TopScore",
|
||
"table_header_hit": "Hit_id | Family | TopOrder | TopScore | Weight",
|
||
"table_sep": "--- | --- | ---",
|
||
"table_sep_hit": "--- | --- | --- | --- | ---",
|
||
"methods_text": (
|
||
"- Inputs: All_Toxins.txt from BtToxin_Digger; BPPRC specificity CSV (activity=Yes).\n"
|
||
"- Specificity index: aggregate name→subfamily→family; build P(order/species|·) by weighted sums and normalization.\n"
|
||
"- Similarity weight w_hit: base=1 if identity≥0.78 & coverage≥0.8; linear if 0.45≤identity<0.78; else 0.\n"
|
||
" Final w=min(1, base×coverage + 0.1×I(HMM))). Partner missing → w×=0.2.\n"
|
||
"- Per-hit contribution: c(order)=w_hit×P(order|·).\n"
|
||
"- Strain level: noisy-OR, score(order)=1-∏(1-c_i(order)).\n"
|
||
"- Species dimension: same if target_species present.\n"
|
||
"- Buckets: other=parseable family but no evidence; unknown=unparseable family.\n"
|
||
),
|
||
"discussion_text": (
|
||
"- High unresolved is common with unusual names, rare families, or limited index coverage.\n"
|
||
"- Use stricter thresholds or --require_index_hit to reduce false positives; --merge_unresolved for visualization only.\n"
|
||
"- Naming: both Digger and BPPRC include non-Cry families (Tpp/Spp/Vip etc.), mapped via name→subfamily→family backoff.\n"
|
||
),
|
||
"notes_items": [
|
||
"TopOrder/TopScore: max target order and score (per hit or strain).",
|
||
"contrib: per-hit contribution w_hit×P(order|·).",
|
||
"unresolved=other+unknown merge is cosmetic; computation unchanged.",
|
||
],
|
||
}
|
||
return zh if lang == "zh" else en
|
||
|
||
|
||
def write_summary_md(
|
||
out_path: Path,
|
||
strain_scores_path: Path,
|
||
toxin_support_path: Optional[Path],
|
||
species_scores_path: Optional[Path],
|
||
strain_heatmap_path: Optional[Path],
|
||
per_hit_heatmap_path: Optional[Path],
|
||
species_heatmap_path: Optional[Path],
|
||
merge_unresolved: bool,
|
||
args_namespace,
|
||
):
|
||
labels = _labels(getattr(args_namespace, "lang", "zh"))
|
||
lines: List[str] = []
|
||
lines.append(labels["summary"])
|
||
lines.append("")
|
||
# Parameters
|
||
lines.append(labels["params"])
|
||
lines.append(f"- allow_unknown_families: {getattr(args_namespace, 'allow_unknown_families', 'NA')}")
|
||
lines.append(f"- require_index_hit: {getattr(args_namespace, 'require_index_hit', 'NA')}")
|
||
lines.append(f"- min_identity: {getattr(args_namespace, 'min_identity', 'NA')}")
|
||
lines.append(f"- min_coverage: {getattr(args_namespace, 'min_coverage', 'NA')}")
|
||
lines.append(f"- merge_unresolved: {merge_unresolved}")
|
||
lines.append("")
|
||
|
||
# Top per strain table
|
||
try:
|
||
df = pd.read_csv(strain_scores_path, sep="\t")
|
||
cols = [c for c in df.columns if c not in ("Strain", "TopOrder", "TopScore")]
|
||
unresolved_cols = [c for c in ("other", "unknown") if c in cols]
|
||
unresolved_total = df[unresolved_cols].sum(axis=1).sum() if unresolved_cols else 0.0
|
||
total_sum = df[cols].sum(axis=1).sum() if cols else 0.0
|
||
frac_unresolved = (unresolved_total / total_sum) if total_sum > 0 else 0.0
|
||
lines.append(labels["top_targets"])
|
||
lines.append("")
|
||
lines.append(labels["table_header_strain"])
|
||
lines.append(labels["table_sep"])
|
||
for _, r in df.iterrows():
|
||
lines.append(f"{r['Strain']} | {r.get('TopOrder','')} | {float(r.get('TopScore',0.0)):.3f}")
|
||
lines.append("")
|
||
lines.append(f"{labels['unresolved_ratio']}: {_percent(frac_unresolved)}")
|
||
lines.append("")
|
||
except Exception as e:
|
||
lines.append(f"[warn] Failed to read strain scores: {e}")
|
||
|
||
if strain_heatmap_path and strain_heatmap_path.exists():
|
||
lines.append(labels["heat_orders"])
|
||
lines.append(f"")
|
||
lines.append("")
|
||
|
||
if species_scores_path and species_scores_path.exists():
|
||
lines.append(labels["heat_species"])
|
||
if species_heatmap_path and species_heatmap_path.exists():
|
||
lines.append(f"")
|
||
else:
|
||
lines.append("Species heatmap not generated.")
|
||
lines.append("")
|
||
|
||
# Per-hit summary
|
||
if toxin_support_path and toxin_support_path.exists():
|
||
try:
|
||
hits = pd.read_csv(toxin_support_path, sep="\t")
|
||
lines.append(labels["perhit"])
|
||
lines.append("")
|
||
lines.append(labels["table_header_hit"])
|
||
lines.append(labels["table_sep_hit"])
|
||
for _, r in hits.sort_values("TopScore", ascending=False).head(50).iterrows():
|
||
lines.append(
|
||
f"{r['Hit_id']} | {r.get('Family','')} | {r.get('TopOrder','')} | {float(r.get('TopScore',0.0)):.3f} | {float(r.get('Weight',0.0)):.3f}"
|
||
)
|
||
lines.append("")
|
||
except Exception as e:
|
||
lines.append(f"[warn] Failed to read toxin support: {e}")
|
||
|
||
# FAQs
|
||
lines.append(labels["notes"])
|
||
for it in _labels(getattr(args_namespace, "lang", "zh"))["notes_items"]:
|
||
lines.append(f"- {it}")
|
||
|
||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||
out_path.write_text("\n".join(lines), encoding="utf-8")
|
||
|
||
|
||
def write_report_md(
|
||
out_path: Path,
|
||
mode: str,
|
||
lang: str,
|
||
strain_scores_path: Path,
|
||
toxin_support_path: Optional[Path],
|
||
species_scores_path: Optional[Path],
|
||
strain_heatmap_path: Optional[Path],
|
||
per_hit_heatmap_path: Optional[Path],
|
||
species_heatmap_path: Optional[Path],
|
||
merge_unresolved: bool,
|
||
args_namespace,
|
||
):
|
||
if mode == "summary":
|
||
return write_summary_md(
|
||
out_path,
|
||
strain_scores_path,
|
||
toxin_support_path,
|
||
species_scores_path,
|
||
strain_heatmap_path,
|
||
per_hit_heatmap_path,
|
||
species_heatmap_path,
|
||
merge_unresolved,
|
||
args_namespace,
|
||
)
|
||
|
||
L = _labels(lang)
|
||
lines: List[str] = []
|
||
lines.append(L["title"]) # title
|
||
lines.append("")
|
||
|
||
# Abstract: brief auto-summary
|
||
try:
|
||
df = pd.read_csv(strain_scores_path, sep="\t")
|
||
base_cols = {"Strain", "TopOrder", "TopScore"}
|
||
order_cols = [c for c in df.columns if c not in base_cols]
|
||
orders_only = df[order_cols]
|
||
unresolved_cols = [c for c in ("other", "unknown") if c in order_cols]
|
||
frac_unres = 0.0
|
||
if order_cols:
|
||
tot = orders_only.sum(axis=1).sum()
|
||
unres = df[unresolved_cols].sum(axis=1).sum() if unresolved_cols else 0.0
|
||
frac_unres = (unres / tot) if tot > 0 else 0.0
|
||
top_global = []
|
||
if order_cols:
|
||
sums = orders_only.sum(axis=0).sort_values(ascending=False)
|
||
for k, v in sums.head(5).items():
|
||
if k in ("other", "unknown") and merge_unresolved:
|
||
continue
|
||
top_global.append(f"{k}:{_fmtf(v)}")
|
||
n_strains = len(df)
|
||
except Exception:
|
||
n_strains, frac_unres, top_global = 0, 0.0, []
|
||
|
||
n_hits, avg_w = 0, 0.0
|
||
try:
|
||
if toxin_support_path and toxin_support_path.exists():
|
||
hits = pd.read_csv(toxin_support_path, sep="\t")
|
||
n_hits = len(hits)
|
||
if "Weight" in hits.columns and len(hits) > 0:
|
||
avg_w = float(hits["Weight"].mean())
|
||
except Exception:
|
||
pass
|
||
|
||
lines.append(L["abstract"])
|
||
lines.append(
|
||
f"- {L['n_strains']}: {n_strains}; {L['n_hits']}: {n_hits}; {L['avg_weight']}: {_fmtf(avg_w)}."
|
||
)
|
||
if top_global:
|
||
lines.append(f"- {L['order_distribution']} " + ", ".join(top_global))
|
||
lines.append(f"- {L['unresolved_ratio']}: {_percent(frac_unres)}")
|
||
lines.append("")
|
||
|
||
# Methods
|
||
lines.append(L["methods"])
|
||
lines.append(L["methods_text"])
|
||
|
||
# Results
|
||
lines.append(L["results"])
|
||
try:
|
||
df = pd.read_csv(strain_scores_path, sep="\t")
|
||
lines.append(L["top_targets"])
|
||
lines.append("")
|
||
lines.append(L["table_header_strain"])
|
||
lines.append(L["table_sep"])
|
||
for _, r in df.iterrows():
|
||
lines.append(f"{r['Strain']} | {r.get('TopOrder','')} | {_fmtf(r.get('TopScore',0.0))}")
|
||
lines.append("")
|
||
except Exception as e:
|
||
lines.append(f"[warn] Failed to read strain scores: {e}")
|
||
|
||
if strain_heatmap_path and strain_heatmap_path.exists():
|
||
lines.append(L["heat_orders"])
|
||
lines.append(f"")
|
||
lines.append("")
|
||
if species_heatmap_path and species_heatmap_path.exists():
|
||
lines.append(L["heat_species"])
|
||
lines.append(f"")
|
||
lines.append("")
|
||
|
||
# Discussion
|
||
lines.append(L["discussion"])
|
||
lines.append(L["discussion_text"])
|
||
|
||
# Params
|
||
lines.append(L["params"])
|
||
lines.append(
|
||
f"- allow_unknown_families: {getattr(args_namespace, 'allow_unknown_families', 'NA')}\n"
|
||
f"- require_index_hit: {getattr(args_namespace, 'require_index_hit', 'NA')}\n"
|
||
f"- min_identity: {getattr(args_namespace, 'min_identity', 'NA')}\n"
|
||
f"- min_coverage: {getattr(args_namespace, 'min_coverage', 'NA')}\n"
|
||
f"- merge_unresolved: {merge_unresolved}"
|
||
)
|
||
lines.append("")
|
||
|
||
# Appendix
|
||
if toxin_support_path and toxin_support_path.exists():
|
||
try:
|
||
hits = pd.read_csv(toxin_support_path, sep="\t")
|
||
lines.append(L["appendix"])
|
||
lines.append(L["perhit"])
|
||
lines.append("")
|
||
lines.append(L["table_header_hit"])
|
||
lines.append(L["table_sep_hit"])
|
||
for _, r in hits.sort_values("TopScore", ascending=False).head(50).iterrows():
|
||
lines.append(
|
||
f"{r['Hit_id']} | {r.get('Family','')} | {r.get('TopOrder','')} | {_fmtf(r.get('TopScore',0.0))} | {_fmtf(r.get('Weight',0.0))}"
|
||
)
|
||
lines.append("")
|
||
except Exception as e:
|
||
lines.append(f"[warn] Failed to read toxin support: {e}")
|
||
|
||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||
out_path.write_text("\n".join(lines), encoding="utf-8")
|
||
|
||
|
||
def main():
|
||
ap = argparse.ArgumentParser(description="Plot heatmaps from Bttoxin_Shoter outputs")
|
||
ap.add_argument("--strain_scores", type=Path, default=Path("shotter_outputs/strain_target_scores.tsv"))
|
||
ap.add_argument("--toxin_support", type=Path, default=None)
|
||
ap.add_argument("--species_scores", type=Path, default=None)
|
||
ap.add_argument("--out_dir", type=Path, default=Path("shotter_outputs"))
|
||
ap.add_argument("--cmap", type=str, default="viridis")
|
||
ap.add_argument("--vmin", type=float, default=0.0)
|
||
ap.add_argument("--vmax", type=float, default=1.0)
|
||
ap.add_argument("--figsize", type=str, default=None, help="e.g. 12x6")
|
||
ap.add_argument("--per_hit_strain", type=str, default=None, help="If provided, also draw per-hit heatmap for this strain (requires --toxin_support)")
|
||
ap.add_argument("--merge_unresolved", action="store_true", default=False, help="Merge columns 'other' and 'unknown' into 'unresolved' for plots")
|
||
ap.add_argument("--summary_md", type=Path, default=None, help="Write a Markdown report to this path")
|
||
ap.add_argument("--report_mode", type=str, choices=["summary", "paper"], default="paper", help="Report template style")
|
||
ap.add_argument("--lang", type=str, choices=["zh", "en"], default="zh", help="Report language")
|
||
args = ap.parse_args()
|
||
|
||
args.out_dir.mkdir(parents=True, exist_ok=True)
|
||
|
||
# Strain × Orders heatmap
|
||
out1 = args.out_dir / "strain_target_scores.png"
|
||
plot_strain_scores(args.strain_scores, out1, args.cmap, args.vmin, args.vmax, args.figsize, args.merge_unresolved)
|
||
print(f"Saved: {out1}")
|
||
|
||
# Optional per-hit heatmap
|
||
if args.per_hit_strain and args.toxin_support:
|
||
out2 = args.out_dir / f"per_hit_{args.per_hit_strain}.png"
|
||
plot_per_hit_for_strain(args.toxin_support, args.per_hit_strain, out2, args.cmap, args.vmin, args.vmax, args.figsize, args.merge_unresolved)
|
||
print(f"Saved: {out2}")
|
||
|
||
# Optional species heatmap
|
||
species_png: Optional[Path] = None
|
||
if args.species_scores and args.species_scores.exists():
|
||
species_png = args.out_dir / "strain_target_species_scores.png"
|
||
plot_species_scores(args.species_scores, species_png, args.cmap, args.vmin, args.vmax, args.figsize)
|
||
print(f"Saved: {species_png}")
|
||
|
||
# Report
|
||
default_name = "shotter_report_paper.md" if args.report_mode == "paper" else "shotter_summary.md"
|
||
summary_md = args.summary_md or (args.out_dir / default_name)
|
||
write_report_md(
|
||
out_path=summary_md,
|
||
mode=args.report_mode,
|
||
lang=args.lang,
|
||
strain_scores_path=args.strain_scores,
|
||
toxin_support_path=args.toxin_support,
|
||
species_scores_path=args.species_scores,
|
||
strain_heatmap_path=out1,
|
||
per_hit_heatmap_path=(args.out_dir / f"per_hit_{args.per_hit_strain}.png") if (args.per_hit_strain and args.toxin_support) else None,
|
||
species_heatmap_path=species_png,
|
||
merge_unresolved=args.merge_unresolved,
|
||
args_namespace=args,
|
||
)
|
||
print(f"Saved: {summary_md}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|