first commit

This commit is contained in:
gzy
2025-12-16 11:39:15 +08:00
commit a3bdbee7c2
118 changed files with 34631 additions and 0 deletions

View File

@@ -0,0 +1,84 @@
#!/bin/bash
# Script name: run_interproscan.sh
# Function: Batch process .faa files with InterProScan
# Usage: ./run_interproscan.sh <input_directory> <output_directory>
# Example: ./run_interproscan.sh ./input_genes ./ph_predict
# Check input parameters (must be 2: input directory and output directory)
if [ $# -ne 2 ]; then
echo "Error: Incorrect number of arguments!"
echo "Usage: $0 <input_directory> <output_directory>"
echo "Example: $0 ./prokka_annotation ./ph_predict"
exit 1
fi
input_dir="$1"
output_root="$2" # Output directory from parameter
# Create main output directory
echo "Creating main output directory: $output_root"
mkdir -p "$output_root" || {
echo "Error: Failed to create output directory $output_root!"
exit 1
}
# Find all .faa files in input directory (including subdirectories)
echo "Searching for .faa files in $input_dir..."
FAA_FILES=$(find "$input_dir" -type f -name "*.faa")
# Check if any .faa files were found
if [ -z "$FAA_FILES" ]; then
echo "Error: No .faa files found in $input_dir (including subdirectories)!"
exit 1
fi
# Copy all .faa files to output directory
echo "Copying .faa files to $output_root..."
cp -v $FAA_FILES "$output_root/" || {
echo "Error: Failed to copy .faa files to $output_root!"
exit 1
}
# Process each .faa file with InterProScan
echo "Starting InterProScan analysis..."
for faa_file in "$output_root"/*.faa; do
# Skip if not a valid file (e.g., empty glob)
[ -f "$faa_file" ] || continue
# Extract sample name (remove path and .faa suffix)
sample_name=$(basename "$faa_file" .faa)
# Define output files for this sample
# ips_output="${output_root}/${sample_name}_interproscan.tsv" # InterProScan result
# filtered_output="${output_root}/${sample_name}_filtered.tsv" # Filtered result
# gene_list="${output_root}/${sample_name}_gene_list.txt" # Extracted gene names
echo "Processing $faa_file..."
# Run InterProScan (adjust parameters as needed; example uses common options)
/media/interproscan-5.75-106.0/interproscan.sh \
-i "$faa_file" \
-d "$output_root" \
-f tsv \
-cpu 8 \
-appl Pfam \
> "${output_root}/${sample_name}_ips.log" 2>&1
# Check if InterProScan succeeded
if [ $? -ne 0 ]; then
echo "Warning: InterProScan failed for $faa_file (see log: ${output_root}/${sample_name}_ips.log)"
continue
fi
# Filter results (example: retain rows with significance; adjust columns/conditions as needed)
# awk -F'\t' 'NR == 1 || $8 < 1e-5' "$ips_output" > "$filtered_output"
# Extract gene names (column 1 in InterProScan TSV)
# awk -F'\t' 'NR > 1 {print $1}' "$filtered_output" | sort -u > "$gene_list"
echo "Completed processing $sample_name:"
echo " Raw results: $ips_output"
# echo " Filtered results: $filtered_output"
# echo " Gene list: $gene_list"
done
echo "All InterProScan analyses completed! Results in $output_root"