feat(toolkit): ship macro_lactone_toolkit package

Unify macrolactone detection, numbering, fragmentation, and
splicing under the installable macro_lactone_toolkit package.

- replace legacy src.* modules with the new package layout
- add analyze/number/fragment CLI entrypoints and pixi tasks
- migrate tests, README, and scripts to the new package API
This commit is contained in:
2026-03-18 22:06:45 +08:00
parent a768d26e47
commit 5e7b236f31
45 changed files with 1302 additions and 6304 deletions

View File

@@ -1,77 +1,51 @@
import pytest
from rdkit import Chem
from src.splicing.engine import splice_molecule
from macro_lactone_toolkit import MacrolactoneFragmenter
from macro_lactone_toolkit.splicing.engine import splice_molecule
from macro_lactone_toolkit.splicing.scaffold_prep import prepare_macrolactone_scaffold
from .helpers import build_macrolactone, canonicalize
def test_splice_benzene_methyl():
"""
Test splicing a benzene scaffold (isotope 1) with a methyl fragment.
Scaffold: c1ccccc1[1*] (Phenyl radical-ish dummy)
Fragment: C* (Methyl radical-ish dummy)
Result: Cc1ccccc1 (Toluene)
"""
scaffold = Chem.MolFromSmiles("c1ccccc1[1*]")
fragment = Chem.MolFromSmiles("C*")
assert scaffold is not None
assert fragment is not None
product = splice_molecule(scaffold, fragment, position=1)
# Expected result: Toluene
expected_smiles = "Cc1ccccc1"
expected_mol = Chem.MolFromSmiles(expected_smiles)
expected_canonical = Chem.MolToSmiles(expected_mol, isomericSmiles=True)
product_canonical = Chem.MolToSmiles(product, isomericSmiles=True)
assert product_canonical == expected_canonical
assert canonicalize(product) == canonicalize("Cc1ccccc1")
def test_splice_missing_isotope():
"""Test that error is raised if the requested position is not found on scaffold."""
scaffold = Chem.MolFromSmiles("c1ccccc1[2*]") # Isotope 2
scaffold = Chem.MolFromSmiles("c1ccccc1[2*]")
fragment = Chem.MolFromSmiles("C*")
with pytest.raises(ValueError, match="Scaffold dummy atom with isotope 1 not found"):
splice_molecule(scaffold, fragment, position=1)
def test_splice_no_fragment_dummy():
"""Test that error is raised if fragment has no dummy atom."""
scaffold = Chem.MolFromSmiles("c1ccccc1[1*]")
fragment = Chem.MolFromSmiles("C") # Methane, no dummy
fragment = Chem.MolFromSmiles("C")
with pytest.raises(ValueError, match="Fragment does not contain a dummy atom"):
splice_molecule(scaffold, fragment, position=1)
def test_complex_splicing():
"""
Test splicing with more complex structures.
Scaffold: Pyridine derivative n1cccc1CC[1*]
Fragment: Cyclopropane C1CC1*
Result: n1cccc1CCC1CC1
"""
scaffold = Chem.MolFromSmiles("n1cccc1CC[1*]")
fragment = Chem.MolFromSmiles("*C1CC1")
product = splice_molecule(scaffold, fragment, position=1)
expected = Chem.MolFromSmiles("n1cccc1CCC1CC1")
assert Chem.MolToSmiles(product) == Chem.MolToSmiles(expected)
def test_scaffold_with_multiple_different_dummies():
"""
Test splicing when scaffold has multiple dummies with different isotopes.
Scaffold: [1*]c1ccccc1[2*]
Fragment: C*
Target: Splicing at 1 should leave [2*] intact.
"""
scaffold = Chem.MolFromSmiles("[1*]c1ccccc1[2*]")
fragment = Chem.MolFromSmiles("C*")
# Splice at 1
product = splice_molecule(scaffold, fragment, position=1)
# Expected: Cc1ccccc1[2*]
expected = Chem.MolFromSmiles("Cc1ccccc1[2*]")
assert Chem.MolToSmiles(product) == Chem.MolToSmiles(expected)
def test_prepare_scaffold_and_reassemble_fragment():
built = build_macrolactone(16, {5: "ethyl"})
result = MacrolactoneFragmenter(ring_size=16).fragment_molecule(built.smiles, parent_id="reassemble")
fragment = next(fragment for fragment in result.fragments if fragment.cleavage_position == 5)
scaffold, dummy_map = prepare_macrolactone_scaffold(
built.smiles,
positions=[5],
ring_size=16,
)
assert 5 in dummy_map
product = splice_molecule(scaffold, Chem.MolFromSmiles(fragment.fragment_smiles_labeled), position=5)
assert canonicalize(product) == canonicalize(built.mol)