#!/bin/bash # Script name: prepare_growth_prediction.sh # Function: Copy GFF/FNA files, extract CDS names for each GFF, and run R script # Usage: ./prepare_growth_prediction.sh # Example: ./prepare_growth_prediction.sh ./prokka_annotation # Check input parameter if [ $# -ne 2 ]; then echo "Error: Incorrect number of arguments!" echo "Usage: $0 " echo "Example: $0 ./prokka_annotation ./growth_predict_dir" exit 1 fi PROKKA_PATH="$1" OUTPUT_DIR="$2" # Create output directory mkdir -p "$OUTPUT_DIR" || { echo "Error: Failed to create $OUTPUT_DIR" exit 1 } # Check Prokka directory exists if [ ! -d "$PROKKA_PATH" ]; then echo "Error: $PROKKA_PATH does not exist" exit 1 fi # Find and copy GFF/FNA/FASTA files GFF_FILES=$(find "$PROKKA_PATH" -type f -name "*.gff") FNA_FILES=$(find "$PROKKA_PATH" -type f -name "*.fna") FFN_FILES=$(find "$PROKKA_PATH" -type f -name "*.ffn") # Copy GFF files if [ -n "$GFF_FILES" ]; then echo "Copying GFF files to $OUTPUT_DIR..." cp -v $GFF_FILES "$OUTPUT_DIR/" || { echo "Error: Failed to copy GFF files" exit 1 } else echo "Warning: No GFF files found in $PROKKA_PATH" exit 1 # GFF is required for CDS extraction, exit if none found fi # Copy FNA/FASTA files if [ -n "$FNA_FILES" ]; then echo "Copying FNA/FASTA files to $OUTPUT_DIR..." cp -v $FNA_FILES "$OUTPUT_DIR/" || { echo "Error: Failed to copy FNA/FASTA files" exit 1 } else echo "Warning: No FNA/FASTA files found in $PROKKA_PATH" fi # Copy FFN files if [ -n "$FFN_FILES" ]; then echo "Copying FNA/FASTA files to $OUTPUT_DIR..." cp -v $FFN_FILES "$OUTPUT_DIR/" || { echo "Error: Failed to copy FFN files" exit 1 } else echo "Warning: No FFN files found in $PROKKA_PATH" fi # Process each GFF file to extract CDS names echo "Extracting CDS names from GFF files..." for gff in "$OUTPUT_DIR"/*.gff; do # Skip if not a valid file [ -f "$gff" ] || continue # Get GFF filename without extension gff_basename=$(basename "$gff" .gff) # Define output CDS filename cds_output="$OUTPUT_DIR/${gff_basename}_cds_name.txt" # Extract CDS IDs sed -n '/##FASTA/q;p' "$gff" | \ awk '$3=="CDS"' | \ awk '{print $9}' | \ awk 'gsub(";.*","")' | \ awk 'gsub("ID=","")' > "$cds_output" || { echo "Error: Failed to process $gff" exit 1 } echo "Generated: $cds_output" done # Run R script with the output directory as input echo "Running growth prediction R script..." Rscript ./scripts/GrowthPredict.R "$OUTPUT_DIR" || { echo "Error: R script execution failed" exit 1 } echo "All steps completed successfully!"