Files
labweb/models/kofamscan.sh
2025-12-16 11:39:15 +08:00

92 lines
3.2 KiB
Bash

#!/bin/bash
# Script name: run_kofam_scan.sh
# Function: Batch perform KO annotation using kofam_scan for .faa files
# Usage: ./run_kofam_scan.sh <input_directory> <output_directory>
# Example: ./run_kofam_scan.sh ./input_genes ./output_ko
# Check input parameters (must be 2: input directory and output directory)
if [ $# -ne 2 ]; then
echo "Error: Incorrect number of arguments!"
echo "Usage: $0 <input_directory> <output_directory>"
echo "Example: $0 ./prokka_annotation ./media_predict_dir"
exit 1
fi
input_dir="$1"
output_root="$2" # Output directory from parameter
# Define fixed parameters for kofam_scan (modify these according to your actual paths)
# KOFAM_EXEC="exec_annotation" # Path to exec_annotation executable
PROFILES_DIR="/mnt/profiles" # Path to database profiles directory
KO_LIST="/mnt/ko_list" # Path to ko_list file
CPU=20 # Number of threads
TMP_DIR="." # Temporary file directory
E_VALUE="1e-5" # E-value threshold
OUTPUT_FORMAT="mapper" # Output format
# Create main output directory
echo "Creating main output directory: $output_root"
mkdir -p "$output_root" || {
echo "Error: Failed to create output directory $output_root!"
exit 1
}
# Check if kofam_scan executable exists
# if [ ! -f "$KOFAM_EXEC" ]; then
# echo "Error: kofam_scan executable not found at $KOFAM_EXEC"
# exit 1
# fi
# Find all .faa files in input directory (including subdirectories)
echo "Searching for .faa files in $input_dir..."
FAA_FILES=$(find "$input_dir" -type f -name "*.faa")
# Check if any .faa files were found
if [ -z "$FAA_FILES" ]; then
echo "Error: No .faa files found in $input_dir (including subdirectories)!"
exit 1
fi
# Copy all .faa files to output directory
echo "Copying .faa files to $output_root..."
cp -v $FAA_FILES "$output_root/" || {
echo "Error: Failed to copy .faa files to $output_root!"
exit 1
}
# Process each .faa file with kofam_scan
echo "Starting kofam_scan annotation..."
for faa_file in "$output_root"/*.faa; do
# Skip if not a valid file (e.g., empty glob)
[ -f "$faa_file" ] || continue
# Extract sample name (remove path and .faa suffix)
sample_name=$(basename "$faa_file" .faa)
# Define output files for this sample
ko_output="${output_root}/${sample_name}_ko.txt" # kofam_scan result
ko_log="${output_root}/${sample_name}_ko.log" # Log file
echo "Processing $faa_file..."
# Run kofam_scan
exec_annotation -p "$PROFILES_DIR" \
-k "$KO_LIST" \
--cpu "$CPU" \
--tmp-dir "$TMP_DIR" \
-E "$E_VALUE" \
-f "$OUTPUT_FORMAT" \
-o "$ko_output" \
"$faa_file" > "$ko_log" 2>&1
# Check if kofam_scan succeeded
if [ $? -ne 0 ]; then
echo "Warning: kofam_scan failed for $faa_file (see log: $ko_log)"
continue
fi
echo "Completed processing $sample_name:"
echo " Results: $ko_output"
echo " Log: $ko_log"
done
echo "All kofam_scan analyses completed! Results in $output_root"