#!/bin/bash # 合并并行预测的结果 # 用法: bash merge_results.sh set -e TEMP_DIR="Data/fragment/Frags-Enamine-18M_temp" OUTPUT_FILE="Data/fragment/Frags-Enamine-18M_predicted.csv" echo "============================================================" echo "📦 合并预测结果" echo "============================================================" # 检查临时目录是否存在 if [ ! -d "$TEMP_DIR" ]; then echo "❌ 临时目录不存在: $TEMP_DIR" exit 1 fi # 检查所有部分文件是否存在 MISSING=0 for i in 0 1 2 3; do if [ ! -f "${TEMP_DIR}/part_${i}.csv" ]; then echo "❌ 缺少文件: part_${i}.csv" MISSING=1 else LINES=$(wc -l < "${TEMP_DIR}/part_${i}.csv") echo "✓ part_${i}.csv: $LINES 行" fi done if [ $MISSING -eq 1 ]; then echo "❌ 有文件缺失,请等待所有进程完成" exit 1 fi echo "" echo "合并文件..." # 合并 CSV 文件(保留第一个文件的表头,跳过其他文件的表头) cat "${TEMP_DIR}/part_0.csv" > "$OUTPUT_FILE" for i in 1 2 3; do tail -n +2 "${TEMP_DIR}/part_${i}.csv" >> "$OUTPUT_FILE" done # 统计结果 TOTAL_LINES=$(wc -l < "$OUTPUT_FILE") TOTAL_MOLECULES=$((TOTAL_LINES - 1)) echo "" echo "============================================================" echo "✓ 合并完成" echo "============================================================" echo "输出文件: $OUTPUT_FILE" echo "总分子数: $TOTAL_MOLECULES" echo "" # 统计广谱抗菌分子 if command -v python3 &> /dev/null; then python3 << EOF import pandas as pd df = pd.read_csv("$OUTPUT_FILE") n_broad = df['broad_spectrum'].sum() print(f"广谱抗菌: {n_broad:,} 个 ({n_broad/len(df)*100:.2f}%)") print(f"非广谱: {len(df)-n_broad:,} 个 ({(len(df)-n_broad)/len(df)*100:.2f}%)") print("") print("抑制菌株数分布:") for threshold in [0, 5, 10, 15, 20, 30]: n = (df['ginhib_total'] >= threshold).sum() print(f" ≥{threshold:2d} 个菌株: {n:,} ({n/len(df)*100:.2f}%)") EOF fi echo "============================================================"