Files
labweb/public/scripts/temperatureOxygenPredict.py
2025-12-16 11:39:15 +08:00

59 lines
2.3 KiB
Python
Raw Blame History

# -*- coding: utf-8 -*-
import os
import subprocess
import argparse
from tqdm import tqdm
def main():
# Parse command line arguments
parser = argparse.ArgumentParser(description='Batch run predict.py to generate bacbi.txt files')
parser.add_argument('predict_script', type=str, help='Path to the predict.py script (e.g., /path/to/predict.py)')
parser.add_argument('input_dir', type=str, help='Directory containing input TSV files (e.g., interpro_*.tsv)')
parser.add_argument('output_dir', type=str, help='Directory to save output bacbi.txt files')
args = parser.parse_args()
# Validate input directory
if not os.path.isdir(args.input_dir):
raise ValueError(f"Input directory does not exist: {args.input_dir}")
# Create output directory if it doesn't exist
os.makedirs(args.output_dir, exist_ok=True)
# Validate predict.py script exists
if not os.path.isfile(args.predict_script):
raise FileNotFoundError(f"predict.py not found at: {args.predict_script}")
# Get all eligible input files (interpro_*.tsv)
input_files = [f for f in os.listdir(args.input_dir)
if f.endswith('.faa.tsv')]
if not input_files:
print(f"No input files found in {args.input_dir} (looking for *.tsv)")
return
# Batch execute commands
print(f"Found {len(input_files)} files to process...")
for filename in tqdm(input_files):
# Construct input file path
input_path = os.path.join(args.input_dir, filename)
# Generate output filename (interpro_xxx.tsv <20><> interpro_xxx_bacbi.txt)
output_filename = f"{os.path.splitext(filename)[0]}_bacbi.txt"
output_path = os.path.join(args.output_dir, output_filename)
# Construct command: python predict.py all input.tsv > output.txt
cmd = f"python {args.predict_script} all {input_path} > {output_path}"
# Execute command
try:
subprocess.run(cmd, shell=True, check=True)
except subprocess.CalledProcessError as e:
print(f"Error processing {filename}: {e}")
print(f"All tasks completed.")
print(f"{output_filename}")
print(f"Output files saved to: {args.output_dir}")
if __name__ == "__main__":
main()