Fix(pipeline): optimize docker build, fix zip structure, and update UI
- Docker: - Explicitly install pixi environments (digger, pipeline, webbackend) during build to prevent runtime network/DNS failures. - Optimize pnpm config (copy method) to fix EAGAIN errors. - Backend: - Refactor ZIP bundling: use flat semantic directories (1_Toxin_Mining, etc.). - Fix "nested zip" issue by cleaning existing archives before bundling. - Exclude raw 'context' directory from final download. - Frontend: - Update TutorialView documentation to match new result structure. - Improve TaskMonitor progress bar precision (1 decimal place). - Update i18n (en/zh) for new file descriptions. Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -126,42 +126,60 @@ def run_bttoxin_analysis(
|
||||
logger.info(f"Job {job_id}: Creating zip bundle")
|
||||
zip_path = output_path / f"pipeline_results_{job_id}.zip"
|
||||
|
||||
# 需要打包的子目录
|
||||
subdirs_to_zip = ["digger", "shoter", "logs"]
|
||||
# 在创建新 ZIP 前,删除目录下任何现有的 zip/tar.gz 文件,防止递归打包
|
||||
for existing_archive in output_path.glob("*.zip"):
|
||||
try:
|
||||
existing_archive.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
for existing_archive in output_path.glob("*.tar.gz"):
|
||||
try:
|
||||
existing_archive.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 定义映射关系:原始目录 -> 压缩包内展示名称
|
||||
dir_mapping = {
|
||||
"digger": "1_Toxin_Mining",
|
||||
"shotter": "2_Toxicity_Scoring",
|
||||
"logs": "Logs"
|
||||
}
|
||||
|
||||
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
||||
# 添加输入文件
|
||||
zipf.write(input_file, arcname=input_file.name)
|
||||
# 1. 添加输入文件 (放入 Input 目录)
|
||||
zipf.write(input_file, arcname=f"Input/{input_file.name}")
|
||||
|
||||
# 添加结果目录
|
||||
for subdir_name in subdirs_to_zip:
|
||||
subdir_path = output_path / subdir_name
|
||||
if subdir_path.exists():
|
||||
for root, dirs, files in os.walk(subdir_path):
|
||||
# 2. 添加结果目录 (重命名)
|
||||
for src_name, dest_name in dir_mapping.items():
|
||||
src_path = output_path / src_name
|
||||
if src_path.exists():
|
||||
for root, dirs, files in os.walk(src_path):
|
||||
for file in files:
|
||||
file_path = Path(root) / file
|
||||
# 保持相对路径结构
|
||||
arcname = file_path.relative_to(output_path)
|
||||
# 排除压缩包自己(如果有意外情况)
|
||||
if file_path == zip_path:
|
||||
continue
|
||||
|
||||
# 计算相对路径,例如 digger/Results/foo.txt -> Results/foo.txt
|
||||
rel_path = file_path.relative_to(src_path)
|
||||
# 构造新的归档路径 -> 1_Toxin_Mining/Results/foo.txt
|
||||
arcname = Path(dest_name) / rel_path
|
||||
zipf.write(file_path, arcname=str(arcname))
|
||||
|
||||
# 删除原始结果目录 (保留 logs 以便调试? 或者也删除)
|
||||
# 根据需求:只保留压缩包
|
||||
# 删除原始结果目录
|
||||
logger.info(f"Job {job_id}: Cleaning up intermediate files")
|
||||
for subdir_name in subdirs_to_zip:
|
||||
subdir_path = output_path / subdir_name
|
||||
if subdir_path.exists():
|
||||
shutil.rmtree(subdir_path)
|
||||
# 需要清理的原始目录名
|
||||
dirs_to_clean = ["digger", "shotter", "context", "logs", "stage"]
|
||||
for d in dirs_to_clean:
|
||||
d_path = output_path / d
|
||||
if d_path.exists():
|
||||
shutil.rmtree(d_path)
|
||||
|
||||
# 删除 tar.gz (如果脚本生成了)
|
||||
tar_gz = output_path / "pipeline_results.tar.gz"
|
||||
if tar_gz.exists():
|
||||
tar_gz.unlink()
|
||||
|
||||
# 移除 stage 目录 (run_single_fna_pipeline 生成的)
|
||||
stage_dir = output_path / "stage"
|
||||
if stage_dir.exists():
|
||||
shutil.rmtree(stage_dir)
|
||||
|
||||
# 验证 Zip 是否生成
|
||||
if not zip_path.exists():
|
||||
raise Exception("Failed to create result zip file")
|
||||
|
||||
Reference in New Issue
Block a user