From dc2812842e9a3e981cfb18ecb892eb6ddf990bb7 Mon Sep 17 00:00:00 2001 From: lingyuzeng Date: Thu, 19 Mar 2026 10:31:38 +0800 Subject: [PATCH] feat(validation): add CLI entry point script --- scripts/validate_macrolactone_db.py | 89 +++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100755 scripts/validate_macrolactone_db.py diff --git a/scripts/validate_macrolactone_db.py b/scripts/validate_macrolactone_db.py new file mode 100755 index 0000000..bbe7228 --- /dev/null +++ b/scripts/validate_macrolactone_db.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 +""" +Validate MacrolactoneDB 12-20 membered rings. + +Usage: + pixi run python scripts/validate_macrolactone_db.py \ + --input data/MacrolactoneDB/ring12_20/temp.csv \ + --output validation_output \ + --sample-ratio 0.1 +""" + +import argparse +import sys +from pathlib import Path + +# Add src to path +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from macro_lactone_toolkit.validation.validator import MacrolactoneValidator + + +def main(): + parser = argparse.ArgumentParser( + description="Validate MacrolactoneDB 12-20 membered rings" + ) + parser.add_argument( + "--input", + type=str, + default="data/MacrolactoneDB/ring12_20/temp.csv", + help="Input CSV file path", + ) + parser.add_argument( + "--output", + type=str, + default="validation_output", + help="Output directory", + ) + parser.add_argument( + "--sample-ratio", + type=float, + default=0.1, + help="Sampling ratio (0.0-1.0)", + ) + parser.add_argument( + "--smiles-col", + type=str, + default="smiles", + help="SMILES column name", + ) + parser.add_argument( + "--id-col", + type=str, + default="IDs", + help="ID column name", + ) + + args = parser.parse_args() + + print("=" * 60) + print("MacrolactoneDB Validation") + print("=" * 60) + print(f"Input: {args.input}") + print(f"Output: {args.output}") + print(f"Sample ratio: {args.sample_ratio}") + print("=" * 60) + + validator = MacrolactoneValidator( + output_dir=args.output, + sample_ratio=args.sample_ratio, + smiles_col=args.smiles_col, + id_col=args.id_col, + ) + + results = validator.run(args.input) + + print("\n" + "=" * 60) + print("Validation Complete") + print("=" * 60) + print(f"Total processed: {results['total']}") + print(f"Success: {results['success']}") + print(f"Failed: {results['failed']}") + print(f"Skipped: {results['skipped']}") + print("=" * 60) + + return 0 + + +if __name__ == "__main__": + sys.exit(main())