#!/bin/bash # Script name: run_interproscan.sh # Function: Batch process .faa files with InterProScan # Usage: ./run_interproscan.sh # Example: ./run_interproscan.sh ./input_genes ./ph_predict # Check input parameters (must be 2: input directory and output directory) if [ $# -ne 2 ]; then echo "Error: Incorrect number of arguments!" echo "Usage: $0 " echo "Example: $0 ./prokka_annotation ./ph_predict" exit 1 fi input_dir="$1" output_root="$2" # Output directory from parameter # Create main output directory echo "Creating main output directory: $output_root" mkdir -p "$output_root" || { echo "Error: Failed to create output directory $output_root!" exit 1 } # Find all .faa files in input directory (including subdirectories) echo "Searching for .faa files in $input_dir..." FAA_FILES=$(find "$input_dir" -type f -name "*.faa") # Check if any .faa files were found if [ -z "$FAA_FILES" ]; then echo "Error: No .faa files found in $input_dir (including subdirectories)!" exit 1 fi # Copy all .faa files to output directory echo "Copying .faa files to $output_root..." cp -v $FAA_FILES "$output_root/" || { echo "Error: Failed to copy .faa files to $output_root!" exit 1 } # Process each .faa file with InterProScan echo "Starting InterProScan analysis..." for faa_file in "$output_root"/*.faa; do # Skip if not a valid file (e.g., empty glob) [ -f "$faa_file" ] || continue # Extract sample name (remove path and .faa suffix) sample_name=$(basename "$faa_file" .faa) # Define output files for this sample # ips_output="${output_root}/${sample_name}_interproscan.tsv" # InterProScan result # filtered_output="${output_root}/${sample_name}_filtered.tsv" # Filtered result # gene_list="${output_root}/${sample_name}_gene_list.txt" # Extracted gene names echo "Processing $faa_file..." # Run InterProScan (adjust parameters as needed; example uses common options) /media/interproscan-5.75-106.0/interproscan.sh \ -i "$faa_file" \ -d "$output_root" \ -f tsv \ -cpu 8 \ -appl Pfam \ > "${output_root}/${sample_name}_ips.log" 2>&1 # Check if InterProScan succeeded if [ $? -ne 0 ]; then echo "Warning: InterProScan failed for $faa_file (see log: ${output_root}/${sample_name}_ips.log)" continue fi # Filter results (example: retain rows with significance; adjust columns/conditions as needed) # awk -F'\t' 'NR == 1 || $8 < 1e-5' "$ips_output" > "$filtered_output" # Extract gene names (column 1 in InterProScan TSV) # awk -F'\t' 'NR > 1 {print $1}' "$filtered_output" | sort -u > "$gene_list" echo "Completed processing $sample_name:" echo " Raw results: $ips_output" # echo " Filtered results: $filtered_output" # echo " Gene list: $gene_list" done echo "All InterProScan analyses completed! Results in $output_root"