74 lines
2.4 KiB
Bash
74 lines
2.4 KiB
Bash
#!/bin/bash
|
|
# Script name: run_annotation_pipeline.sh
|
|
# Function: Combine kofamscan annotation and Inference prediction
|
|
# Usage: ./run_annotation_pipeline.sh <prokka_annotation_dir> <output_dir> <fasta_root_path>
|
|
# Example: ./run_annotation_pipeline.sh ./prokka_annotation ./media_predict /home/gzy/uncultured/uncultivated_genome/uncultured_protein/
|
|
|
|
# Check input parameters
|
|
if [ $# -ne 2 ]; then
|
|
echo "Error: Incorrect number of arguments!"
|
|
echo "Usage: $0 <prokka_annotation_dir> <output_dir>"
|
|
echo "Example: $0 ./prokka_annotation ./media_predict"
|
|
exit 1
|
|
fi
|
|
|
|
# Assign parameters
|
|
PROKKA_DIR="$1"
|
|
OUTPUT_DIR="$2"
|
|
FASTA_ROOT="$2"
|
|
INFERENCE_SCRIPT="./scripts/Inference.py"
|
|
DATA_PKL="./scripts/data.pkl"
|
|
MODEL_PATH="./scripts/best_model.pt"
|
|
ANNOTATION_SUFFIX="_ko.txt"
|
|
|
|
# Create output directory
|
|
echo "=== Creating output directory ==="
|
|
mkdir -p "$OUTPUT_DIR" || {
|
|
echo "Error: Failed to create output directory $OUTPUT_DIR"
|
|
exit 1
|
|
}
|
|
|
|
# Step 1: Run kofamscan annotation
|
|
echo -e "\n=== Step 1: Running kofamscan annotation ==="
|
|
if [ -f "./scripts/kofamscan.sh" ]; then
|
|
bash ./scripts/kofamscan.sh "$PROKKA_DIR" "$OUTPUT_DIR" || {
|
|
echo "Error: kofamscan.sh execution failed"
|
|
exit 1
|
|
}
|
|
else
|
|
echo "Error: kofamscan.sh not found in current directory"
|
|
exit 1
|
|
fi
|
|
|
|
# Check if annotation files were generated
|
|
echo -e "\n=== Checking annotation results ==="
|
|
annotation_files=$(find "$OUTPUT_DIR" -type f -name "*$ANNOTATION_SUFFIX")
|
|
if [ -z "$annotation_files" ]; then
|
|
echo "Error: No annotation files (*$ANNOTATION_SUFFIX) found in $OUTPUT_DIR"
|
|
exit 1
|
|
else
|
|
echo "Found $(echo "$annotation_files" | wc -l) annotation files. Proceeding to prediction..."
|
|
fi
|
|
|
|
# Step 2: Run Inference prediction
|
|
echo -e "\n=== Step 2: Running Inference prediction ==="
|
|
if [ -f "$INFERENCE_SCRIPT" ]; then
|
|
python "$INFERENCE_SCRIPT" \
|
|
--annotation_dir "$OUTPUT_DIR" \
|
|
--startswith "" \
|
|
--endswith "$ANNOTATION_SUFFIX" \
|
|
--fasta_path "$FASTA_ROOT" \
|
|
--data_pkl "$DATA_PKL" \
|
|
--model_path "$MODEL_PATH" \
|
|
--output_dir "$OUTPUT_DIR" \
|
|
--annotation_type kofamscan || {
|
|
echo "Error: Inference.py execution failed"
|
|
exit 1
|
|
}
|
|
else
|
|
echo "Error: $INFERENCE_SCRIPT not found in current directory"
|
|
exit 1
|
|
fi
|
|
|
|
echo -e "\n=== All pipeline steps completed successfully ==="
|
|
echo "Results are saved in: $OUTPUT_DIR" |