feat(toolkit): ship macro_lactone_toolkit package

Unify macrolactone detection, numbering, fragmentation, and
splicing under the installable macro_lactone_toolkit package.

- replace legacy src.* modules with the new package layout
- add analyze/number/fragment CLI entrypoints and pixi tasks
- migrate tests, README, and scripts to the new package API
This commit is contained in:
2026-03-18 22:06:45 +08:00
parent a768d26e47
commit 5e7b236f31
45 changed files with 1302 additions and 6304 deletions

74
tests/test_cli.py Normal file
View File

@@ -0,0 +1,74 @@
from __future__ import annotations
import json
import subprocess
import sys
import pandas as pd
from .helpers import build_ambiguous_smiles, build_macrolactone
def run_cli(*args: str) -> subprocess.CompletedProcess[str]:
return subprocess.run(
[sys.executable, "-m", "macro_lactone_toolkit.cli", *args],
capture_output=True,
text=True,
check=False,
)
def test_cli_smoke_commands():
built = build_macrolactone(16, {5: "methyl"})
analyze = run_cli("analyze", "--smiles", built.smiles)
assert analyze.returncode == 0, analyze.stderr
analyze_payload = json.loads(analyze.stdout)
assert analyze_payload["valid_ring_sizes"] == [16]
number = run_cli("number", "--smiles", built.smiles)
assert number.returncode == 0, number.stderr
number_payload = json.loads(number.stdout)
assert number_payload["ring_size"] == 16
assert number_payload["position_to_atom"]["1"] >= 0
fragment = run_cli("fragment", "--smiles", built.smiles, "--parent-id", "cli_1")
assert fragment.returncode == 0, fragment.stderr
fragment_payload = json.loads(fragment.stdout)
assert fragment_payload["parent_id"] == "cli_1"
assert fragment_payload["ring_size"] == 16
assert fragment_payload["fragments"][0]["fragment_smiles_labeled"]
def test_cli_fragment_csv_skips_ambiguous_and_records_errors(tmp_path):
valid = build_macrolactone(14, {4: "methyl"})
ambiguous = build_ambiguous_smiles()
input_path = tmp_path / "molecules.csv"
output_path = tmp_path / "fragments.csv"
errors_path = tmp_path / "errors.csv"
pd.DataFrame(
[
{"id": "valid_1", "smiles": valid.smiles},
{"id": "ambiguous_1", "smiles": ambiguous},
]
).to_csv(input_path, index=False)
completed = run_cli(
"fragment",
"--input",
str(input_path),
"--output",
str(output_path),
"--errors-output",
str(errors_path),
)
assert completed.returncode == 0, completed.stderr
fragments = pd.read_csv(output_path)
errors = pd.read_csv(errors_path)
assert set(fragments["parent_id"]) == {"valid_1"}
assert errors.loc[0, "parent_id"] == "ambiguous_1"
assert errors.loc[0, "error_type"] == "AmbiguousMacrolactoneError"