#!/usr/bin/env python3 """ Validate MacrolactoneDB 12-20 membered rings. Usage: pixi run python scripts/validate_macrolactone_db.py \ --input data/MacrolactoneDB/ring12_20/temp.csv \ --output validation_output \ --sample-ratio 0.1 """ import argparse import sys from pathlib import Path # Add src to path sys.path.insert(0, str(Path(__file__).parent.parent / "src")) from macro_lactone_toolkit.validation.validator import MacrolactoneValidator def main(): parser = argparse.ArgumentParser( description="Validate MacrolactoneDB 12-20 membered rings" ) parser.add_argument( "--input", type=str, default="data/MacrolactoneDB/ring12_20/temp.csv", help="Input CSV file path", ) parser.add_argument( "--output", type=str, default="validation_output", help="Output directory", ) parser.add_argument( "--sample-ratio", type=float, default=0.1, help="Sampling ratio (0.0-1.0)", ) parser.add_argument( "--smiles-col", type=str, default="smiles", help="SMILES column name", ) parser.add_argument( "--id-col", type=str, default="IDs", help="ID column name", ) args = parser.parse_args() print("=" * 60) print("MacrolactoneDB Validation") print("=" * 60) print(f"Input: {args.input}") print(f"Output: {args.output}") print(f"Sample ratio: {args.sample_ratio}") print("=" * 60) validator = MacrolactoneValidator( output_dir=args.output, sample_ratio=args.sample_ratio, smiles_col=args.smiles_col, id_col=args.id_col, ) results = validator.run(args.input) print("\n" + "=" * 60) print("Validation Complete") print("=" * 60) print(f"Total processed: {results['total']}") print(f"Success: {results['success']}") print(f"Failed: {results['failed']}") print(f"Skipped: {results['skipped']}") print("=" * 60) return 0 if __name__ == "__main__": sys.exit(main())