from __future__ import annotations import argparse import json from pathlib import Path import pandas as pd from macro_lactone_toolkit.workflows import _fragment_csv_with_errors, results_to_dataframe def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser(description="Batch fragment macrolactones into a flat CSV workflow.") parser.add_argument("--input", required=True) parser.add_argument("--output", required=True) parser.add_argument("--errors-output", default=None) parser.add_argument("--summary-output", default=None) parser.add_argument("--smiles-column", default="smiles") parser.add_argument("--id-column", default="id") parser.add_argument("--ring-size", type=int, default=None) parser.add_argument("--max-rows", type=int, default=None) return parser def main(argv: list[str] | None = None) -> None: args = build_parser().parse_args(argv) results, errors = _fragment_csv_with_errors( input_csv=args.input, smiles_column=args.smiles_column, id_column=args.id_column, ring_size=args.ring_size, max_rows=args.max_rows, ) fragments = results_to_dataframe(results) output_path = Path(args.output) output_path.parent.mkdir(parents=True, exist_ok=True) fragments.to_csv(output_path, index=False) if args.errors_output: errors_output = Path(args.errors_output) errors_output.parent.mkdir(parents=True, exist_ok=True) pd.DataFrame( [ {key: value for key, value in error.items() if key != "exception"} for error in errors ] ).to_csv(errors_output, index=False) summary = { "processed": len(results) + len(errors), "successful": len(results), "failed": len(errors), "fragments": int(len(fragments)), "ring_size": args.ring_size, "output": str(output_path), } if args.summary_output: summary_path = Path(args.summary_output) summary_path.parent.mkdir(parents=True, exist_ok=True) summary_path.write_text(json.dumps(summary, indent=2, ensure_ascii=False) + "\n", encoding="utf-8") else: print(json.dumps(summary, indent=2, ensure_ascii=False)) if __name__ == "__main__": main()