Files
labweb/public/scripts/prokka.sh
2025-12-16 11:39:15 +08:00

125 lines
4.1 KiB
Bash
Executable File

#!/bin/bash
# Prokka batch analysis script
# Usage: Executed inside Docker container with mounted data directory
# Usage: ./script.sh [--output OUTPUT_DIR] [--threads THREADS] [--force]
# Default configuration
THREADS=4
FORCE=true
TARGET_DIR="/app" # Working directory in container
OUTPUT_DIR="prokka_annotation" # Default output directory
# Parse command line arguments
while [[ $# -gt 0 ]]; do
case "$1" in
--output|-o)
OUTPUT_DIR="$2"
shift 2
;;
--threads|-t)
THREADS="$2"
shift 2
;;
--force|-f)
FORCE=true
shift
;;
--no-force)
FORCE=false
shift
;;
--input|-d)
INPUT_DIR="$2"
shift 2
;;
--help|-h)
echo "Usage: $0 [OPTIONS]"
echo "Options:"
echo " -o, --output DIR Set output directory path (default: prokka_annotation)"
echo " -t, --threads NUM Set number of threads (default: 4)"
echo " -i, --input DIR Set target directory (to find FASTA files) (default: /app)"
echo " -f, --force Force overwrite existing files (default: true)"
echo " --no-force Do not overwrite existing files"
echo " -h, --help Show this help message"
exit 0
;;
*)
echo "Unknown parameter: $1"
echo "Use -h or --help for usage information"
exit 1
;;
esac
done
# Create output directory (ensure it exists)
mkdir -p "$OUTPUT_DIR" || { echo "Failed to create output directory: $OUTPUT_DIR"; exit 1; }
# Find all FASTA format files (.fa, .fna, .fasta)
fasta_files=$(find "$INPUT_DIR" -maxdepth 1 -type f \( -name '*.fa' -o -name '*.fna' -o -name '*.fasta' \))
# Check if any FASTA files were found
if [ -z "$fasta_files" ]; then
echo "Warning: No FASTA files (.fa, .fna, .fasta) found in $INPUT_DIR"
exit 0
fi
# Process each FASTA file
echo "Found $(echo "$fasta_files" | wc -l) FASTA files, starting processing..."
echo "Output directory: $(realpath "$OUTPUT_DIR")"
echo "----------------------------------------"
while read -r fasta_file; do
# Skip empty lines
[ -z "$fasta_file" ] && continue
# Check if file exists and is readable
if [ ! -f "$fasta_file" ] || [ ! -r "$fasta_file" ]; then
echo "Skipping unreadable file: $fasta_file"
continue
fi
# Extract filename (without path and extension)
filename=$(basename "$fasta_file")
prefix=${filename%.*}
# Build specific output subdirectory
current_outdir="${OUTPUT_DIR}/prokka_output"
# Skip if directory exists and force is disabled
if [ "$FORCE" = false ] && [ -d "$current_outdir" ]; then
echo "Skipping existing analysis: $filename (directory already exists)"
echo "----------------------------------------"
continue
fi
# Execute Prokka command (suppress output)
echo "Starting analysis for file: $filename"
echo "Output subdirectory: $current_outdir"
echo "fasta_file: $fasta_file"
# prokka -h
/prokka-1.14.6/bin/prokka \
--outdir "$current_outdir" \
--prefix "$prefix" \
--cpus "$THREADS" \
--centre X \
--compliant \
$([ "$FORCE" = true ] && echo "--force") \
"$fasta_file" > /dev/null 2>&1 # Suppress all Prokka output
# Check execution result
if [ $? -eq 0 ]; then
echo "✓ Analysis completed: $filename"
# # Optional: Copy .faa file to parent directory for subsequent analysis
# if [ -f "${current_outdir}/${prefix}.faa" ]; then
# cp "${current_outdir}/${prefix}.faa" "${OUTPUT_DIR}/${prefix}.faa" > /dev/null 2>&1
# echo "→ Protein sequence file copied: ${OUTPUT_DIR}/${prefix}.faa"
# fi
else
echo "✗ Analysis failed: $filename"
fi
echo "----------------------------------------"
done <<< "$fasta_files"
echo "All files processed!"
echo "Results directory: $(realpath "$OUTPUT_DIR")"