feat(toolkit): ship macro_lactone_toolkit package

Unify macrolactone detection, numbering, fragmentation, and
splicing under the installable macro_lactone_toolkit package.

- replace legacy src.* modules with the new package layout
- add analyze/number/fragment CLI entrypoints and pixi tasks
- migrate tests, README, and scripts to the new package API
This commit is contained in:
2026-03-18 22:06:45 +08:00
parent a768d26e47
commit 5e7b236f31
45 changed files with 1302 additions and 6304 deletions

View File

@@ -1,95 +1,42 @@
import pytest
from rdkit import Chem
from src.splicing.fragment_prep import activate_fragment
from macro_lactone_toolkit.splicing.fragment_prep import activate_fragment
def test_activate_smart_ethanol():
"""Test 'smart' activation on Ethanol (CCO). Should attach to Oxygen."""
smiles = "CCO"
mol = activate_fragment(smiles, strategy="smart")
# Check if we have a dummy atom
mol = activate_fragment("CCO", strategy="smart")
assert mol is not None
assert mol.GetNumAtoms() == 4 # C, C, O, *
# Check if the dummy atom is attached to Oxygen
# Find the dummy atom
dummy_atom = None
for atom in mol.GetAtoms():
if atom.GetSymbol() == '*':
dummy_atom = atom
break
assert dummy_atom is not None
# Check neighbors of dummy atom
neighbors = dummy_atom.GetNeighbors()
assert len(neighbors) == 1
assert neighbors[0].GetSymbol() == 'O'
# Check output SMILES format
out_smiles = Chem.MolToSmiles(mol)
assert '*' in out_smiles
assert mol.GetNumAtoms() == 4
dummy_atom = next(atom for atom in mol.GetAtoms() if atom.GetAtomicNum() == 0)
assert dummy_atom.GetNeighbors()[0].GetSymbol() == "O"
assert "*" in Chem.MolToSmiles(mol)
def test_activate_smart_amine():
"""Test 'smart' activation on Ethylamine (CCN). Should attach to Nitrogen."""
smiles = "CCN"
mol = activate_fragment(smiles, strategy="smart")
assert mol is not None
# Find the dummy atom
dummy_atom = None
for atom in mol.GetAtoms():
if atom.GetSymbol() == '*':
dummy_atom = atom
break
assert dummy_atom is not None
neighbors = dummy_atom.GetNeighbors()
assert neighbors[0].GetSymbol() == 'N'
mol = activate_fragment("CCN", strategy="smart")
dummy_atom = next(atom for atom in mol.GetAtoms() if atom.GetAtomicNum() == 0)
assert dummy_atom.GetNeighbors()[0].GetSymbol() == "N"
def test_activate_random_pentane():
"""Test 'random' activation on Pentane (CCCCC). Should attach to a Carbon."""
smiles = "CCCCC"
# Seed is not easily passed to the function unless we add it to the signature or fix it inside.
# For this test, any Carbon is fine.
mol = activate_fragment(smiles, strategy="random")
assert mol is not None
assert mol.GetNumAtoms() == 6 # 5 C + 1 *
dummy_atom = None
for atom in mol.GetAtoms():
if atom.GetSymbol() == '*':
dummy_atom = atom
break
assert dummy_atom is not None
neighbors = dummy_atom.GetNeighbors()
assert neighbors[0].GetSymbol() == 'C'
mol = activate_fragment("CCCCC", strategy="random")
assert mol.GetNumAtoms() == 6
dummy_atom = next(atom for atom in mol.GetAtoms() if atom.GetAtomicNum() == 0)
assert dummy_atom.GetNeighbors()[0].GetSymbol() == "C"
def test_activate_smart_fallback():
"""Test 'smart' fallback when no heteroatoms are found (e.g. Propane)."""
smiles = "CCC"
# Should fall back to finding a terminal carbon or random
# The requirement says "fall back to a terminal Carbon" or random.
# Let's assume the implementation picks a terminal carbon if possible, or just behaves like random on C.
mol = activate_fragment(smiles, strategy="smart")
assert mol is not None
dummy_atom = None
for atom in mol.GetAtoms():
if atom.GetSymbol() == '*':
dummy_atom = atom
break
assert dummy_atom is not None
neighbor = dummy_atom.GetNeighbors()[0]
assert neighbor.GetSymbol() == 'C'
# Verify it's a valid molecule
mol = activate_fragment("CCC", strategy="smart")
dummy_atom = next(atom for atom in mol.GetAtoms() if atom.GetAtomicNum() == 0)
assert dummy_atom.GetNeighbors()[0].GetSymbol() == "C"
assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE
def test_invalid_smiles():
with pytest.raises(ValueError):
activate_fragment("NotASmiles", strategy="smart")