Files
labweb/public/scripts/GrowthPredict.sh
2025-12-16 11:39:15 +08:00

101 lines
2.6 KiB
Bash

#!/bin/bash
# Script name: prepare_growth_prediction.sh
# Function: Copy GFF/FNA files, extract CDS names for each GFF, and run R script
# Usage: ./prepare_growth_prediction.sh <prokka_annotation_path>
# Example: ./prepare_growth_prediction.sh ./prokka_annotation
# Check input parameter
if [ $# -ne 2 ]; then
echo "Error: Incorrect number of arguments!"
echo "Usage: $0 <prokka_annotation_path> <growth_predict_dir>"
echo "Example: $0 ./prokka_annotation ./growth_predict_dir"
exit 1
fi
PROKKA_PATH="$1"
OUTPUT_DIR="$2"
# Create output directory
mkdir -p "$OUTPUT_DIR" || {
echo "Error: Failed to create $OUTPUT_DIR"
exit 1
}
# Check Prokka directory exists
if [ ! -d "$PROKKA_PATH" ]; then
echo "Error: $PROKKA_PATH does not exist"
exit 1
fi
# Find and copy GFF/FNA/FASTA files
GFF_FILES=$(find "$PROKKA_PATH" -type f -name "*.gff")
FNA_FILES=$(find "$PROKKA_PATH" -type f -name "*.fna")
FFN_FILES=$(find "$PROKKA_PATH" -type f -name "*.ffn")
# Copy GFF files
if [ -n "$GFF_FILES" ]; then
echo "Copying GFF files to $OUTPUT_DIR..."
cp -v $GFF_FILES "$OUTPUT_DIR/" || {
echo "Error: Failed to copy GFF files"
exit 1
}
else
echo "Warning: No GFF files found in $PROKKA_PATH"
exit 1 # GFF is required for CDS extraction, exit if none found
fi
# Copy FNA/FASTA files
if [ -n "$FNA_FILES" ]; then
echo "Copying FNA/FASTA files to $OUTPUT_DIR..."
cp -v $FNA_FILES "$OUTPUT_DIR/" || {
echo "Error: Failed to copy FNA/FASTA files"
exit 1
}
else
echo "Warning: No FNA/FASTA files found in $PROKKA_PATH"
fi
# Copy FFN files
if [ -n "$FFN_FILES" ]; then
echo "Copying FNA/FASTA files to $OUTPUT_DIR..."
cp -v $FFN_FILES "$OUTPUT_DIR/" || {
echo "Error: Failed to copy FFN files"
exit 1
}
else
echo "Warning: No FFN files found in $PROKKA_PATH"
fi
# Process each GFF file to extract CDS names
echo "Extracting CDS names from GFF files..."
for gff in "$OUTPUT_DIR"/*.gff; do
# Skip if not a valid file
[ -f "$gff" ] || continue
# Get GFF filename without extension
gff_basename=$(basename "$gff" .gff)
# Define output CDS filename
cds_output="$OUTPUT_DIR/${gff_basename}_cds_name.txt"
# Extract CDS IDs
sed -n '/##FASTA/q;p' "$gff" | \
awk '$3=="CDS"' | \
awk '{print $9}' | \
awk 'gsub(";.*","")' | \
awk 'gsub("ID=","")' > "$cds_output" || {
echo "Error: Failed to process $gff"
exit 1
}
echo "Generated: $cds_output"
done
# Run R script with the output directory as input
echo "Running growth prediction R script..."
Rscript ./scripts/GrowthPredict.R "$OUTPUT_DIR" || {
echo "Error: R script execution failed"
exit 1
}
echo "All steps completed successfully!"