# -*- coding: utf-8 -*- import os import subprocess import argparse from tqdm import tqdm def main(): # Parse command line arguments parser = argparse.ArgumentParser(description='Batch run predict.py to generate bacbi.txt files') parser.add_argument('predict_script', type=str, help='Path to the predict.py script (e.g., /path/to/predict.py)') parser.add_argument('input_dir', type=str, help='Directory containing input TSV files (e.g., interpro_*.tsv)') parser.add_argument('output_dir', type=str, help='Directory to save output bacbi.txt files') args = parser.parse_args() # Validate input directory if not os.path.isdir(args.input_dir): raise ValueError(f"Input directory does not exist: {args.input_dir}") # Create output directory if it doesn't exist os.makedirs(args.output_dir, exist_ok=True) # Validate predict.py script exists if not os.path.isfile(args.predict_script): raise FileNotFoundError(f"predict.py not found at: {args.predict_script}") # Get all eligible input files (interpro_*.tsv) input_files = [f for f in os.listdir(args.input_dir) if f.endswith('.faa.tsv')] if not input_files: print(f"No input files found in {args.input_dir} (looking for *.tsv)") return # Batch execute commands print(f"Found {len(input_files)} files to process...") for filename in tqdm(input_files): # Construct input file path input_path = os.path.join(args.input_dir, filename) # Generate output filename (interpro_xxx.tsv �� interpro_xxx_bacbi.txt) output_filename = f"{os.path.splitext(filename)[0]}_bacbi.txt" output_path = os.path.join(args.output_dir, output_filename) # Construct command: python predict.py all input.tsv > output.txt cmd = f"python {args.predict_script} all {input_path} > {output_path}" # Execute command try: subprocess.run(cmd, shell=True, check=True) except subprocess.CalledProcessError as e: print(f"Error processing {filename}: {e}") print(f"All tasks completed.") print(f"{output_filename}") print(f"Output files saved to: {args.output_dir}") if __name__ == "__main__": main()