#!/bin/bash # 并行批量准备配体脚本(带日志记录 + GNU parallel) # 用法: ./batch_prepare_ligands_parallel.sh input_sdf_dir output_pdbqt_dir [log_file] [max_jobs] if [ $# -lt 2 ] || [ $# -gt 4 ]; then echo "用法: $0 <输入SDF目录> <输出PDBQT目录> [日志文件] [并发数]" exit 1 fi INPUT_DIR="$1" OUTPUT_DIR="$2" LOG_FILE="${3:-batch_prepare_ligands_parallel.log}" MAX_JOBS="${4:-4}" # 默认4并发 # 创建输出目录和日志文件 mkdir -p "$OUTPUT_DIR" mkdir -p "$(dirname "$LOG_FILE")" # 初始化日志文件 echo "=== 并行批量配体准备日志 ===" > "$LOG_FILE" echo "开始时间: $(date)" >> "$LOG_FILE" echo "输入目录: $INPUT_DIR" >> "$LOG_FILE" echo "输出目录: $OUTPUT_DIR" >> "$LOG_FILE" echo "最大并发数: $MAX_JOBS" >> "$LOG_FILE" echo "================================" >> "$LOG_FILE" export OUTPUT_DIR LOG_FILE # 单个任务的处理逻辑,写成函数供parallel调用 process_ligand() { sdf_file="$1" basename=$(basename "$sdf_file" .sdf) output_file="$OUTPUT_DIR/${basename}.pdbqt" error_file=$(mktemp) if mk_prepare_ligand.py -i "$sdf_file" -o "$output_file" 2>"$error_file"; then echo "[SUCCESS] $(date): $basename -> $output_file" >> "$LOG_FILE" rm -f "$error_file" echo "SUCCESS $sdf_file" else { echo "[FAILED] $(date): $basename" echo " 输入文件: $sdf_file" echo " 预期输出: $output_file" echo " 错误信息:" if [ -s "$error_file" ]; then sed 's/^/ /' "$error_file" else echo " 未捕获到具体错误信息" fi echo "" } >> "$LOG_FILE" rm -f "$error_file" echo "FAILED $sdf_file" fi } export -f process_ligand echo "开始并行批量准备配体..." echo "输入目录: $INPUT_DIR" echo "输出目录: $OUTPUT_DIR" echo "日志文件: $LOG_FILE" echo "最大并发数: $MAX_JOBS" # 用 parallel 调度 results=$(find "$INPUT_DIR" -maxdepth 1 -name '*.sdf' | parallel -j "$MAX_JOBS" process_ligand {}) count=$(echo "$results" | grep -c '^SUCCESS ') failed=$(echo "$results" | grep -c '^FAILED ') # 汇总统计 echo "================================" >> "$LOG_FILE" echo "结束时间: $(date)" >> "$LOG_FILE" echo "成功处理: $count 个配体" >> "$LOG_FILE" echo "失败: $failed 个配体" >> "$LOG_FILE" echo "批量准备完成!" echo "成功处理: $count 个配体" echo "失败: $failed 个配体" echo "详细日志请查看: $LOG_FILE"