add shell script
This commit is contained in:
161
scripts/batch_docking.sh
Executable file
161
scripts/batch_docking.sh
Executable file
@@ -0,0 +1,161 @@
|
||||
#!/bin/bash
|
||||
|
||||
# 批量对接脚本(带日志记录)
|
||||
# 用法:
|
||||
# ./batch_docking.sh receptor.pdbqt config.txt ligands_dir output_dir [log_file] [vina_path] [exhaustiveness] [num_modes] [energy_range]
|
||||
|
||||
if [ $# -lt 4 ] || [ $# -gt 9 ]; then
|
||||
echo "用法: $0 <受体PDBQT文件> <配置文件> <配体目录> <输出目录> [日志文件] [vina路径] [exhaustiveness] [num_modes] [energy_range]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
RECEPTOR="$1"
|
||||
CONFIG="$2"
|
||||
LIGANDS_DIR="$3"
|
||||
OUTPUT_DIR="$4"
|
||||
LOG_FILE="${5:-batch_docking.log}"
|
||||
VINA_CMD="${6:-vina}"
|
||||
EXHAUSTIVENESS="${7:-32}"
|
||||
NUM_MODES="${8:-20}"
|
||||
ENERGY_RANGE="${9:-5.0}"
|
||||
|
||||
# 创建输出目录和日志文件
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
mkdir -p "$(dirname "$LOG_FILE")"
|
||||
|
||||
# 初始化日志文件
|
||||
echo "=== 批量对接日志 ===" > "$LOG_FILE"
|
||||
echo "开始时间: $(date)" >> "$LOG_FILE"
|
||||
echo "受体文件: $RECEPTOR" >> "$LOG_FILE"
|
||||
echo "配置文件: $CONFIG" >> "$LOG_FILE"
|
||||
echo "配体目录: $LIGANDS_DIR" >> "$LOG_FILE"
|
||||
echo "输出目录: $OUTPUT_DIR" >> "$LOG_FILE"
|
||||
echo "vina路径: $VINA_CMD" >> "$LOG_FILE"
|
||||
echo "exhaustiveness: $EXHAUSTIVENESS" >> "$LOG_FILE"
|
||||
echo "num_modes: $NUM_MODES" >> "$LOG_FILE"
|
||||
echo "energy_range: $ENERGY_RANGE" >> "$LOG_FILE"
|
||||
echo "================================" >> "$LOG_FILE"
|
||||
|
||||
# 检查输入文件
|
||||
if [ ! -f "$RECEPTOR" ]; then
|
||||
error_msg="错误: 受体文件不存在: $RECEPTOR"
|
||||
echo "$error_msg"
|
||||
echo "[ERROR] $(date): $error_msg" >> "$LOG_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "$CONFIG" ]; then
|
||||
error_msg="错误: 配置文件不存在: $CONFIG"
|
||||
echo "$error_msg"
|
||||
echo "[ERROR] $(date): $error_msg" >> "$LOG_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -d "$LIGANDS_DIR" ]; then
|
||||
error_msg="错误: 配体目录不存在: $LIGANDS_DIR"
|
||||
echo "$error_msg"
|
||||
echo "[ERROR] $(date): $error_msg" >> "$LOG_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 检查配体文件数量
|
||||
ligand_count=$(find "$LIGANDS_DIR" -name "*.pdbqt" | wc -l)
|
||||
if [ "$ligand_count" -eq 0 ]; then
|
||||
error_msg="错误: 在 $LIGANDS_DIR 中未找到任何 .pdbqt 文件"
|
||||
echo "$error_msg"
|
||||
echo "[ERROR] $(date): $error_msg" >> "$LOG_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "开始批量对接..."
|
||||
echo "受体: $RECEPTOR"
|
||||
echo "配置: $CONFIG"
|
||||
echo "配体目录: $LIGANDS_DIR (找到 $ligand_count 个配体文件)"
|
||||
echo "输出目录: $OUTPUT_DIR"
|
||||
echo "日志文件: $LOG_FILE"
|
||||
|
||||
echo "[INFO] $(date): 找到 $ligand_count 个配体文件" >> "$LOG_FILE"
|
||||
|
||||
# 创建临时错误文件
|
||||
error_file=$(mktemp)
|
||||
|
||||
# 使用Vina的批量模式,捕获错误输出
|
||||
echo "[INFO] $(date): 开始执行 vina 命令" >> "$LOG_FILE"
|
||||
|
||||
if "$VINA_CMD" --receptor "$RECEPTOR" \
|
||||
--batch "$LIGANDS_DIR" \
|
||||
--config "$CONFIG" \
|
||||
--dir "$OUTPUT_DIR" \
|
||||
--exhaustiveness="$EXHAUSTIVENESS" \
|
||||
--num_modes="$NUM_MODES" \
|
||||
--energy_range="$ENERGY_RANGE" 2>"$error_file"; then
|
||||
|
||||
echo "批量对接完成!"
|
||||
echo "结果保存在: $OUTPUT_DIR"
|
||||
echo "[SUCCESS] $(date): 批量对接成功完成" >> "$LOG_FILE"
|
||||
|
||||
# 统计成功生成的输出文件
|
||||
output_count=$(find "$OUTPUT_DIR" -name "*_out.pdbqt" | wc -l)
|
||||
echo "[INFO] $(date): 生成了 $output_count 个输出文件" >> "$LOG_FILE"
|
||||
|
||||
else
|
||||
echo "批量对接失败!"
|
||||
echo "[FAILED] $(date): 批量对接执行失败" >> "$LOG_FILE"
|
||||
echo " 命令: $VINA_CMD --receptor $RECEPTOR --batch $LIGANDS_DIR --config $CONFIG --dir $OUTPUT_DIR --exhaustiveness=$EXHAUSTIVENESS --num_modes=$NUM_MODES --energy_range=$ENERGY_RANGE" >> "$LOG_FILE"
|
||||
echo " 错误信息:" >> "$LOG_FILE"
|
||||
|
||||
# 将错误信息写入日志
|
||||
if [ -s "$error_file" ]; then
|
||||
sed 's/^/ /' "$error_file" >> "$LOG_FILE"
|
||||
else
|
||||
echo " 未捕获到具体错误信息" >> "$LOG_FILE"
|
||||
fi
|
||||
|
||||
# 检查可能的问题
|
||||
echo " 可能的问题排查:" >> "$LOG_FILE"
|
||||
|
||||
# 检查受体文件
|
||||
if [ ! -s "$RECEPTOR" ]; then
|
||||
echo " - 受体文件为空或不可读" >> "$LOG_FILE"
|
||||
fi
|
||||
|
||||
# 检查配置文件格式
|
||||
if ! grep -q "center_" "$CONFIG" 2>/dev/null; then
|
||||
echo " - 配置文件可能格式不正确(缺少center_参数)" >> "$LOG_FILE"
|
||||
fi
|
||||
|
||||
# 检查配体文件
|
||||
invalid_ligands=0
|
||||
for ligand in "$LIGANDS_DIR"/*.pdbqt; do
|
||||
if [ -f "$ligand" ] && [ ! -s "$ligand" ]; then
|
||||
echo " - 发现空的配体文件: $(basename "$ligand")" >> "$LOG_FILE"
|
||||
((invalid_ligands++))
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$invalid_ligands" -gt 0 ]; then
|
||||
echo " - 发现 $invalid_ligands 个无效的配体文件" >> "$LOG_FILE"
|
||||
fi
|
||||
|
||||
rm -f "$error_file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 记录汇总信息
|
||||
echo "================================" >> "$LOG_FILE"
|
||||
echo "结束时间: $(date)" >> "$LOG_FILE"
|
||||
|
||||
# 统计最终结果
|
||||
final_output_count=$(find "$OUTPUT_DIR" -name "*_out.pdbqt" | wc -l)
|
||||
echo "最终输出文件数量: $final_output_count" >> "$LOG_FILE"
|
||||
|
||||
if [ "$final_output_count" -lt "$ligand_count" ]; then
|
||||
missing_count=$((ligand_count - final_output_count))
|
||||
echo "警告: 有 $missing_count 个配体可能对接失败" >> "$LOG_FILE"
|
||||
fi
|
||||
|
||||
# 清理临时文件
|
||||
rm -f "$error_file"
|
||||
|
||||
echo "详细日志请查看: $LOG_FILE"
|
||||
|
||||
83
scripts/batch_prepare_ligands.sh
Executable file
83
scripts/batch_prepare_ligands.sh
Executable file
@@ -0,0 +1,83 @@
|
||||
#!/bin/bash
|
||||
|
||||
# 并行批量准备配体脚本(带日志记录 + GNU parallel)
|
||||
# 用法: ./batch_prepare_ligands_parallel.sh input_sdf_dir output_pdbqt_dir [log_file] [max_jobs]
|
||||
|
||||
if [ $# -lt 2 ] || [ $# -gt 4 ]; then
|
||||
echo "用法: $0 <输入SDF目录> <输出PDBQT目录> [日志文件] [并发数]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
INPUT_DIR="$1"
|
||||
OUTPUT_DIR="$2"
|
||||
LOG_FILE="${3:-batch_prepare_ligands_parallel.log}"
|
||||
MAX_JOBS="${4:-4}" # 默认4并发
|
||||
|
||||
# 创建输出目录和日志文件
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
mkdir -p "$(dirname "$LOG_FILE")"
|
||||
|
||||
# 初始化日志文件
|
||||
echo "=== 并行批量配体准备日志 ===" > "$LOG_FILE"
|
||||
echo "开始时间: $(date)" >> "$LOG_FILE"
|
||||
echo "输入目录: $INPUT_DIR" >> "$LOG_FILE"
|
||||
echo "输出目录: $OUTPUT_DIR" >> "$LOG_FILE"
|
||||
echo "最大并发数: $MAX_JOBS" >> "$LOG_FILE"
|
||||
echo "================================" >> "$LOG_FILE"
|
||||
|
||||
export OUTPUT_DIR LOG_FILE
|
||||
|
||||
# 单个任务的处理逻辑,写成函数供parallel调用
|
||||
process_ligand() {
|
||||
sdf_file="$1"
|
||||
basename=$(basename "$sdf_file" .sdf)
|
||||
output_file="$OUTPUT_DIR/${basename}.pdbqt"
|
||||
error_file=$(mktemp)
|
||||
|
||||
if mk_prepare_ligand.py -i "$sdf_file" -o "$output_file" 2>"$error_file"; then
|
||||
echo "[SUCCESS] $(date): $basename -> $output_file" >> "$LOG_FILE"
|
||||
rm -f "$error_file"
|
||||
echo "SUCCESS $sdf_file"
|
||||
else
|
||||
{
|
||||
echo "[FAILED] $(date): $basename"
|
||||
echo " 输入文件: $sdf_file"
|
||||
echo " 预期输出: $output_file"
|
||||
echo " 错误信息:"
|
||||
if [ -s "$error_file" ]; then
|
||||
sed 's/^/ /' "$error_file"
|
||||
else
|
||||
echo " 未捕获到具体错误信息"
|
||||
fi
|
||||
echo ""
|
||||
} >> "$LOG_FILE"
|
||||
rm -f "$error_file"
|
||||
echo "FAILED $sdf_file"
|
||||
fi
|
||||
}
|
||||
|
||||
export -f process_ligand
|
||||
|
||||
echo "开始并行批量准备配体..."
|
||||
echo "输入目录: $INPUT_DIR"
|
||||
echo "输出目录: $OUTPUT_DIR"
|
||||
echo "日志文件: $LOG_FILE"
|
||||
echo "最大并发数: $MAX_JOBS"
|
||||
|
||||
# 用 parallel 调度
|
||||
results=$(find "$INPUT_DIR" -maxdepth 1 -name '*.sdf' | parallel -j "$MAX_JOBS" process_ligand {})
|
||||
|
||||
count=$(echo "$results" | grep -c '^SUCCESS ')
|
||||
failed=$(echo "$results" | grep -c '^FAILED ')
|
||||
|
||||
# 汇总统计
|
||||
echo "================================" >> "$LOG_FILE"
|
||||
echo "结束时间: $(date)" >> "$LOG_FILE"
|
||||
echo "成功处理: $count 个配体" >> "$LOG_FILE"
|
||||
echo "失败: $failed 个配体" >> "$LOG_FILE"
|
||||
|
||||
echo "批量准备完成!"
|
||||
echo "成功处理: $count 个配体"
|
||||
echo "失败: $failed 个配体"
|
||||
echo "详细日志请查看: $LOG_FILE"
|
||||
|
||||
Reference in New Issue
Block a user