feat(toolkit): add classification and migration
Implement the standard/non-standard/not-macrolactone classification layer and integrate it into analyzer, fragmenter, and CLI outputs. Port the remaining legacy package capabilities into new visualization and workflow modules, restore batch/statistics/SDF scripts on top of the flat CSV workflow, and update active docs to the new package API.
This commit is contained in:
@@ -1,10 +1,67 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from macro_lactone_toolkit.cli import main
|
||||
import pandas as pd
|
||||
|
||||
from macro_lactone_toolkit.workflows import _fragment_csv_with_errors, results_to_dataframe
|
||||
|
||||
|
||||
def build_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(description="Batch fragment macrolactones into a flat CSV workflow.")
|
||||
parser.add_argument("--input", required=True)
|
||||
parser.add_argument("--output", required=True)
|
||||
parser.add_argument("--errors-output", default=None)
|
||||
parser.add_argument("--summary-output", default=None)
|
||||
parser.add_argument("--smiles-column", default="smiles")
|
||||
parser.add_argument("--id-column", default="id")
|
||||
parser.add_argument("--ring-size", type=int, default=None)
|
||||
parser.add_argument("--max-rows", type=int, default=None)
|
||||
return parser
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> None:
|
||||
args = build_parser().parse_args(argv)
|
||||
results, errors = _fragment_csv_with_errors(
|
||||
input_csv=args.input,
|
||||
smiles_column=args.smiles_column,
|
||||
id_column=args.id_column,
|
||||
ring_size=args.ring_size,
|
||||
max_rows=args.max_rows,
|
||||
)
|
||||
|
||||
fragments = results_to_dataframe(results)
|
||||
output_path = Path(args.output)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
fragments.to_csv(output_path, index=False)
|
||||
|
||||
if args.errors_output:
|
||||
errors_output = Path(args.errors_output)
|
||||
errors_output.parent.mkdir(parents=True, exist_ok=True)
|
||||
pd.DataFrame(
|
||||
[
|
||||
{key: value for key, value in error.items() if key != "exception"}
|
||||
for error in errors
|
||||
]
|
||||
).to_csv(errors_output, index=False)
|
||||
|
||||
summary = {
|
||||
"processed": len(results) + len(errors),
|
||||
"successful": len(results),
|
||||
"failed": len(errors),
|
||||
"fragments": int(len(fragments)),
|
||||
"ring_size": args.ring_size,
|
||||
"output": str(output_path),
|
||||
}
|
||||
if args.summary_output:
|
||||
summary_path = Path(args.summary_output)
|
||||
summary_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
summary_path.write_text(json.dumps(summary, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
|
||||
else:
|
||||
print(json.dumps(summary, indent=2, ensure_ascii=False))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.argv = ["macro-lactone-toolkit", "fragment", *sys.argv[1:]]
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user