Files
macro_split/scripts/validate_macrolactone_db.py

90 lines
2.1 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Validate MacrolactoneDB 12-20 membered rings.
Usage:
pixi run python scripts/validate_macrolactone_db.py \
--input data/MacrolactoneDB/ring12_20/temp.csv \
--output validation_output \
--sample-ratio 0.1
"""
import argparse
import sys
from pathlib import Path
# Add src to path
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
from macro_lactone_toolkit.validation.validator import MacrolactoneValidator
def main():
parser = argparse.ArgumentParser(
description="Validate MacrolactoneDB 12-20 membered rings"
)
parser.add_argument(
"--input",
type=str,
default="data/MacrolactoneDB/ring12_20/temp.csv",
help="Input CSV file path",
)
parser.add_argument(
"--output",
type=str,
default="validation_output",
help="Output directory",
)
parser.add_argument(
"--sample-ratio",
type=float,
default=0.1,
help="Sampling ratio (0.0-1.0)",
)
parser.add_argument(
"--smiles-col",
type=str,
default="smiles",
help="SMILES column name",
)
parser.add_argument(
"--id-col",
type=str,
default="IDs",
help="ID column name",
)
args = parser.parse_args()
print("=" * 60)
print("MacrolactoneDB Validation")
print("=" * 60)
print(f"Input: {args.input}")
print(f"Output: {args.output}")
print(f"Sample ratio: {args.sample_ratio}")
print("=" * 60)
validator = MacrolactoneValidator(
output_dir=args.output,
sample_ratio=args.sample_ratio,
smiles_col=args.smiles_col,
id_col=args.id_col,
)
results = validator.run(args.input)
print("\n" + "=" * 60)
print("Validation Complete")
print("=" * 60)
print(f"Total processed: {results['total']}")
print(f"Success: {results['success']}")
print(f"Failed: {results['failed']}")
print(f"Skipped: {results['skipped']}")
print("=" * 60)
return 0
if __name__ == "__main__":
sys.exit(main())