59 lines
2.3 KiB
Python
59 lines
2.3 KiB
Python
# -*- coding: utf-8 -*-
|
||
|
||
import os
|
||
import subprocess
|
||
import argparse
|
||
from tqdm import tqdm
|
||
|
||
def main():
|
||
# Parse command line arguments
|
||
parser = argparse.ArgumentParser(description='Batch run predict.py to generate bacbi.txt files')
|
||
parser.add_argument('predict_script', type=str, help='Path to the predict.py script (e.g., /path/to/predict.py)')
|
||
parser.add_argument('input_dir', type=str, help='Directory containing input TSV files (e.g., interpro_*.tsv)')
|
||
parser.add_argument('output_dir', type=str, help='Directory to save output bacbi.txt files')
|
||
args = parser.parse_args()
|
||
|
||
# Validate input directory
|
||
if not os.path.isdir(args.input_dir):
|
||
raise ValueError(f"Input directory does not exist: {args.input_dir}")
|
||
|
||
# Create output directory if it doesn't exist
|
||
os.makedirs(args.output_dir, exist_ok=True)
|
||
|
||
# Validate predict.py script exists
|
||
if not os.path.isfile(args.predict_script):
|
||
raise FileNotFoundError(f"predict.py not found at: {args.predict_script}")
|
||
|
||
# Get all eligible input files (interpro_*.tsv)
|
||
input_files = [f for f in os.listdir(args.input_dir)
|
||
if f.endswith('.faa.tsv')]
|
||
|
||
if not input_files:
|
||
print(f"No input files found in {args.input_dir} (looking for *.tsv)")
|
||
return
|
||
|
||
# Batch execute commands
|
||
print(f"Found {len(input_files)} files to process...")
|
||
for filename in tqdm(input_files):
|
||
# Construct input file path
|
||
input_path = os.path.join(args.input_dir, filename)
|
||
|
||
# Generate output filename (interpro_xxx.tsv <20><> interpro_xxx_bacbi.txt)
|
||
output_filename = f"{os.path.splitext(filename)[0]}_bacbi.txt"
|
||
output_path = os.path.join(args.output_dir, output_filename)
|
||
|
||
# Construct command: python predict.py all input.tsv > output.txt
|
||
cmd = f"python {args.predict_script} all {input_path} > {output_path}"
|
||
|
||
# Execute command
|
||
try:
|
||
subprocess.run(cmd, shell=True, check=True)
|
||
except subprocess.CalledProcessError as e:
|
||
print(f"Error processing {filename}: {e}")
|
||
|
||
print(f"All tasks completed.")
|
||
print(f"{output_filename}")
|
||
print(f"Output files saved to: {args.output_dir}")
|
||
|
||
if __name__ == "__main__":
|
||
main() |