Files
labweb/public/scripts/prokka_annotation.sh
2025-12-16 11:39:15 +08:00

75 lines
2.6 KiB
Bash

#!/bin/bash
# --- Script Description ---
# This script automates the annotation of multiple genome assembly files using Prokka.
# It takes a single argument: a directory path.
# The script will find all .fasta and .fna files in that directory and run
# Prokka on each one, using the filename (without extension) as the output prefix.
#
# Usage: ./batch_prokka.sh /path/to/your/fasta_folder
# Example: ./batch_prokka.sh ./genome_assemblies
# --- 1. Check for Input Argument ---
if [ $# -ne 1 ]; then
echo "Error: Incorrect number of arguments."
echo "Usage: $0 <directory_with_fasta_files>"
exit 1
fi
# Assign the first argument to a variable for clarity
TARGET_DIR="$1"
# --- 2. Check if the Directory Exists ---
if [ ! -d "$TARGET_DIR" ]; then
echo "Error: Directory '$TARGET_DIR' not found."
exit 1
fi
echo "=================================================="
echo "Starting batch Prokka annotation for directory: $TARGET_DIR"
echo "=================================================="
# --- 3. Find and Loop Through Fasta/Fna Files ---
# Use a flag to check if any files were found
found_files=false
# The 'for' loop will not run if no files match the pattern, which is safe.
# The pattern *.{fasta,fna} is case-sensitive. For case-insensitivity, you could
# use 'shopt -s nocaseglob' before the loop, but this is more explicit.
for contig_file in "$TARGET_DIR"/*.fasta "$TARGET_DIR"/*.fna; do
# This check handles the case where no files match the pattern.
# If no files are found, the loop runs once with the literal pattern string.
if [ ! -e "$contig_file" ]; then
continue
fi
found_files=true
echo "--> Processing file: $contig_file"
# --- 4. Extract Prefix from Filename ---
# Get just the filename from the full path
base_name=$(basename "$contig_file")
# Remove the .fasta or .fna extension to get the prefix
prefix=${base_name%.fasta}
prefix=${prefix%.fna}
echo " Using prefix: '$prefix'"
mkdir dir -p "./prokka_annotation/"
# --- 5. Run the Prokka Command ---
prokka --cpus 4 --prefix "$prefix" -o "./prokka_annotation/${prefix}_prokka_output" --force "$contig_file"
echo " Finished Prokka for '$base_name'."
echo "--------------------------------------------------"
done
# --- 6. Final Report ---
if [ "$found_files" = false ]; then
echo "No .fasta or .fna files were found in '$TARGET_DIR'."
else
echo "=================================================="
echo "Prokka annotation complete."
echo "All output files are located in the current directory."
echo "=================================================="
fi
exit 0