first add
This commit is contained in:
339
backend/app/utils/docker_client.py
Normal file
339
backend/app/utils/docker_client.py
Normal file
@@ -0,0 +1,339 @@
|
||||
"""Docker/Podman 容器管理(修正版,支持 arm64/macOS 与 linux/amd64)"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import logging
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Optional, List
|
||||
|
||||
try:
|
||||
import docker # type: ignore
|
||||
except Exception: # 允许在无 docker SDK 环境下使用 podman fallback
|
||||
docker = None # type: ignore
|
||||
|
||||
from ..core.config import settings
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _which(cmd: str) -> Optional[str]:
|
||||
from shutil import which
|
||||
return which(cmd)
|
||||
|
||||
|
||||
class DockerContainerManager:
|
||||
"""容器管理器 - 兼容 Docker 与 Podman。
|
||||
|
||||
优先尝试 docker SDK;若不可用则回落到 podman CLI(或 docker CLI)。
|
||||
在 arm64 主机上默认以 --platform linux/amd64 运行镜像。
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
image: str = settings.DOCKER_IMAGE,
|
||||
platform: str = settings.DOCKER_PLATFORM,
|
||||
) -> None:
|
||||
self.image = image
|
||||
self.platform = platform
|
||||
|
||||
self._engine: str = "docker"
|
||||
self._client = None
|
||||
|
||||
# 首选 docker-py 客户端(若可用)
|
||||
if docker is not None:
|
||||
try:
|
||||
self._client = docker.from_env()
|
||||
# 探测 daemon
|
||||
self._client.ping()
|
||||
self._engine = "docker-sdk"
|
||||
except Exception as err:
|
||||
logger.info(f"docker SDK 不可用,将尝试 CLI 回落: {err}")
|
||||
self._client = None
|
||||
|
||||
# CLI 回落:优先 podman,其次 docker
|
||||
if self._client is None:
|
||||
if _which("podman"):
|
||||
self._engine = "podman-cli"
|
||||
elif _which("docker"):
|
||||
self._engine = "docker-cli"
|
||||
else:
|
||||
raise RuntimeError("未找到可用的容器引擎(需要 podman 或 docker)")
|
||||
|
||||
self._ensure_image()
|
||||
|
||||
# ----------------------------- 公共方法 -----------------------------
|
||||
def run_command_in_container(
|
||||
self,
|
||||
command: List[str],
|
||||
volumes: Dict[str, Dict[str, str]],
|
||||
environment: Optional[Dict[str, str]] = None,
|
||||
working_dir: str = "/workspace",
|
||||
name: Optional[str] = None,
|
||||
detach: bool = False,
|
||||
remove: bool = True,
|
||||
) -> Dict[str, Any]:
|
||||
"""在容器中执行命令,返回执行结果。"""
|
||||
if self._engine == "docker-sdk" and self._client is not None:
|
||||
return self._run_with_docker_sdk(
|
||||
command, volumes, environment, working_dir, name, detach, remove
|
||||
)
|
||||
else:
|
||||
return self._run_with_cli(
|
||||
command, volumes, environment, working_dir, name, detach, remove
|
||||
)
|
||||
|
||||
def update_database(self, log_dir: Path) -> Dict[str, Any]:
|
||||
"""在容器中更新 BtToxin_Digger 数据库。"""
|
||||
cmd = [
|
||||
"/usr/local/env-execute",
|
||||
"BtToxin_Digger",
|
||||
"--update-db",
|
||||
]
|
||||
vols = {str(log_dir): {"bind": "/logs", "mode": "rw"}}
|
||||
result = self.run_command_in_container(
|
||||
command=cmd, volumes=vols, working_dir="/tmp", name=f"bttoxin_update_db_{int(time.time())}"
|
||||
)
|
||||
if result.get("logs"):
|
||||
(log_dir / "update_db.log").write_text(result["logs"], encoding="utf-8")
|
||||
return result
|
||||
|
||||
def validate_reads_filenames(
|
||||
self,
|
||||
input_dir: Path,
|
||||
platform: str,
|
||||
reads1_suffix: str,
|
||||
reads2_suffix: Optional[str] = None,
|
||||
suffix_len: int = 0,
|
||||
) -> Dict[str, Any]:
|
||||
files = list(input_dir.glob("*"))
|
||||
if platform == "illumina":
|
||||
r1 = [f for f in files if reads1_suffix and reads1_suffix in f.name]
|
||||
r2 = [f for f in files if reads2_suffix and reads2_suffix in f.name]
|
||||
if not r1 or not r2 or len(r1) != len(r2):
|
||||
return {"valid": False, "error": "Illumina R1/R2 配对数量不匹配或缺失"}
|
||||
for f1 in r1:
|
||||
strain = f1.name.replace(reads1_suffix, "")
|
||||
if not (input_dir / f"{strain}{reads2_suffix}").exists():
|
||||
return {"valid": False, "error": f"未找到配对文件: {strain}{reads2_suffix}"}
|
||||
return {
|
||||
"valid": True,
|
||||
"strain_count": len(r1),
|
||||
"suggested_suffix_len": suffix_len or len(reads1_suffix),
|
||||
}
|
||||
if platform in ("pacbio", "oxford"):
|
||||
r = [f for f in files if reads1_suffix and reads1_suffix in f.name]
|
||||
if not r:
|
||||
return {"valid": False, "error": f"未找到匹配 {reads1_suffix} 的 reads 文件"}
|
||||
return {
|
||||
"valid": True,
|
||||
"strain_count": len(r),
|
||||
"suggested_suffix_len": suffix_len or len(reads1_suffix),
|
||||
}
|
||||
return {"valid": True}
|
||||
|
||||
def run_bttoxin_digger(
|
||||
self,
|
||||
input_dir: Path,
|
||||
output_dir: Path,
|
||||
log_dir: Path,
|
||||
sequence_type: str = "nucl",
|
||||
scaf_suffix: str = ".fna",
|
||||
threads: int = 4,
|
||||
**kwargs: Any,
|
||||
) -> Dict[str, Any]:
|
||||
"""在容器中运行 BtToxin_Digger 主分析(工作目录挂载到 /workspace)。"""
|
||||
command: List[str] = [
|
||||
"/usr/local/env-execute",
|
||||
"BtToxin_Digger",
|
||||
"--SeqPath",
|
||||
"/data/input",
|
||||
"--SequenceType",
|
||||
sequence_type,
|
||||
"--threads",
|
||||
str(threads),
|
||||
]
|
||||
|
||||
if sequence_type == "nucl":
|
||||
command += ["--Scaf_suffix", scaf_suffix]
|
||||
elif sequence_type == "orfs":
|
||||
command += ["--orfs_suffix", kwargs.get("orfs_suffix", ".ffn")]
|
||||
elif sequence_type == "prot":
|
||||
command += ["--prot_suffix", kwargs.get("prot_suffix", ".faa")]
|
||||
elif sequence_type == "reads":
|
||||
platform = kwargs.get("platform", "illumina")
|
||||
command += ["--platform", platform]
|
||||
if platform == "illumina":
|
||||
r1 = kwargs.get("reads1_suffix", "_R1.fastq.gz")
|
||||
r2 = kwargs.get("reads2_suffix", "_R2.fastq.gz")
|
||||
sfx = kwargs.get("suffix_len") or len(r1)
|
||||
v = self.validate_reads_filenames(input_dir, platform, r1, r2, sfx)
|
||||
if not v.get("valid"):
|
||||
raise ValueError(f"Reads 文件验证失败: {v.get('error')}")
|
||||
sfx = v.get("suggested_suffix_len", sfx)
|
||||
command += ["--reads1", r1, "--reads2", r2, "--suffix_len", str(sfx)]
|
||||
elif platform in ("pacbio", "oxford"):
|
||||
r = kwargs.get("reads1_suffix", ".fastq.gz")
|
||||
gsize = kwargs.get("genome_size", "6.07m")
|
||||
sfx = kwargs.get("suffix_len") or len(r)
|
||||
v = self.validate_reads_filenames(input_dir, platform, r, None, sfx)
|
||||
if not v.get("valid"):
|
||||
raise ValueError(f"Reads 文件验证失败: {v.get('error')}")
|
||||
sfx = v.get("suggested_suffix_len", sfx)
|
||||
command += ["--reads1", r, "--genomeSize", gsize, "--suffix_len", str(sfx)]
|
||||
elif platform == "hybrid":
|
||||
short1 = kwargs.get("short1")
|
||||
short2 = kwargs.get("short2")
|
||||
long = kwargs.get("long")
|
||||
if not all([short1, short2, long]):
|
||||
raise ValueError("hybrid 需要 short1/short2/long 三个完整文件名")
|
||||
for fn in (short1, short2, long):
|
||||
if not (input_dir / fn).exists():
|
||||
raise ValueError(f"文件不存在: {fn}")
|
||||
command += [
|
||||
"--short1",
|
||||
short1,
|
||||
"--short2",
|
||||
short2,
|
||||
"--long",
|
||||
long,
|
||||
"--hout",
|
||||
"/workspace/Results/Assembles/Hybrid",
|
||||
]
|
||||
|
||||
if kwargs.get("assemble_only"):
|
||||
command.append("--assemble_only")
|
||||
|
||||
volumes = {
|
||||
str(input_dir.resolve()): {"bind": "/data/input", "mode": "ro"},
|
||||
str(output_dir.resolve()): {"bind": "/workspace", "mode": "rw"},
|
||||
str(log_dir.resolve()): {"bind": "/data/logs", "mode": "rw"},
|
||||
}
|
||||
|
||||
logger.info("开始 BtToxin_Digger 分析...")
|
||||
result = self.run_command_in_container(
|
||||
command=command,
|
||||
volumes=volumes,
|
||||
working_dir="/workspace",
|
||||
name=f"bttoxin_digger_{int(time.time())}",
|
||||
)
|
||||
|
||||
# 保存容器日志
|
||||
logs_path = log_dir / "digger_execution.log"
|
||||
if result.get("logs"):
|
||||
logs_path.write_text(result["logs"], encoding="utf-8")
|
||||
logger.info(f"容器日志已保存: {logs_path}")
|
||||
|
||||
# 验证输出
|
||||
results_dir = output_dir / "Results"
|
||||
if result.get("success") and results_dir.exists():
|
||||
files = [f for f in results_dir.rglob("*") if f.is_file()]
|
||||
result["output_files"] = len(files)
|
||||
else:
|
||||
result["output_files"] = 0
|
||||
return result
|
||||
|
||||
# ----------------------------- 内部实现 -----------------------------
|
||||
def _ensure_image(self) -> None:
|
||||
if self._engine == "docker-sdk" and self._client is not None:
|
||||
try:
|
||||
self._client.images.get(self.image)
|
||||
return
|
||||
except Exception:
|
||||
logger.info(f"拉取镜像 {self.image} (platform={self.platform}) ...")
|
||||
self._client.images.pull(self.image, platform=self.platform)
|
||||
else:
|
||||
# CLI 模式:先尝试拉取
|
||||
cli = "podman" if self._engine == "podman-cli" else "docker"
|
||||
try:
|
||||
subprocess.run(
|
||||
[cli, "pull", "--platform", self.platform, self.image],
|
||||
check=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
text=True,
|
||||
)
|
||||
except Exception as err:
|
||||
logger.warning(f"{cli} pull 失败: {err}")
|
||||
|
||||
def _run_with_docker_sdk(
|
||||
self,
|
||||
command: List[str],
|
||||
volumes: Dict[str, Dict[str, str]],
|
||||
environment: Optional[Dict[str, str]],
|
||||
working_dir: str,
|
||||
name: Optional[str],
|
||||
detach: bool,
|
||||
remove: bool,
|
||||
) -> Dict[str, Any]:
|
||||
assert self._client is not None
|
||||
try:
|
||||
container = self._client.containers.run(
|
||||
image=self.image,
|
||||
command=command,
|
||||
volumes=volumes,
|
||||
environment=environment or {},
|
||||
working_dir=working_dir,
|
||||
platform=self.platform,
|
||||
name=name,
|
||||
detach=detach,
|
||||
remove=False, # 等获取日志后再删
|
||||
stdout=True,
|
||||
stderr=True,
|
||||
)
|
||||
if detach:
|
||||
return {"success": True, "container_id": container.id, "status": "running"}
|
||||
exit_info = container.wait()
|
||||
code = exit_info.get("StatusCode", 1)
|
||||
logs = container.logs().decode("utf-8", errors="ignore")
|
||||
if remove:
|
||||
try:
|
||||
container.remove()
|
||||
except Exception:
|
||||
pass
|
||||
return {"success": code == 0, "exit_code": code, "logs": logs, "status": "completed" if code == 0 else "failed"}
|
||||
except Exception as e:
|
||||
logger.error(f"docker SDK 运行失败: {e}", exc_info=True)
|
||||
return {"success": False, "error": str(e), "exit_code": -1, "status": "error"}
|
||||
|
||||
def _run_with_cli(
|
||||
self,
|
||||
command: List[str],
|
||||
volumes: Dict[str, Dict[str, str]],
|
||||
environment: Optional[Dict[str, str]],
|
||||
working_dir: str,
|
||||
name: Optional[str],
|
||||
detach: bool,
|
||||
remove: bool,
|
||||
) -> Dict[str, Any]:
|
||||
cli = "podman" if self._engine == "podman-cli" else "docker"
|
||||
cmd: List[str] = [cli, "run", "--rm" if remove and not detach else ""]
|
||||
cmd = [c for c in cmd if c]
|
||||
cmd += ["--platform", self.platform]
|
||||
if name:
|
||||
cmd += ["--name", name]
|
||||
for host, spec in volumes.items():
|
||||
bind = spec.get("bind")
|
||||
mode = spec.get("mode", "rw")
|
||||
cmd += ["-v", f"{host}:{bind}:{mode}"]
|
||||
for k, v in (environment or {}).items():
|
||||
cmd += ["-e", f"{k}={v}"]
|
||||
cmd += ["-w", working_dir, self.image]
|
||||
cmd += command
|
||||
|
||||
try:
|
||||
if detach:
|
||||
# 后台运行:CLI 简化返回
|
||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
|
||||
return {"success": True, "status": "running", "pid": p.pid}
|
||||
else:
|
||||
proc = subprocess.run(cmd, capture_output=True, text=True)
|
||||
out = (proc.stdout or "") + (proc.stderr or "")
|
||||
return {"success": proc.returncode == 0, "exit_code": proc.returncode, "logs": out, "status": "completed" if proc.returncode == 0 else "failed"}
|
||||
except Exception as e:
|
||||
logger.error(f"{cli} 运行失败: {e}", exc_info=True)
|
||||
return {"success": False, "error": str(e), "exit_code": -1, "status": "error"}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user