feat(toolkit): ship macro_lactone_toolkit package

Unify macrolactone detection, numbering, fragmentation, and splicing under the installable macro_lactone_toolkit package. - replace legacy src.* modules with the new package layout - add analyze/number/fragment CLI entrypoints and pixi tasks - migrate tests, README, and scripts to the new package API
2026-03-18 22:06:45 +08:00
parent a768d26e47
commit 5e7b236f31
45 changed files with 1302 additions and 6304 deletions
--- a/tests/init.py
+++ b/tests/init.py
@@ -0,0 +1 @@
+# Tests package marker for helper imports.
--- a/tests/helpers.py
+++ b/tests/helpers.py
@@ -0,0 +1,96 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Mapping
+
+from rdkit import Chem
+
+
+@dataclass(frozen=True)
+class BuiltMacrolactone:
+    mol: Chem.Mol
+    smiles: str
+    position_to_atom: dict[int, int]
+
+
+def build_macrolactone(
+    ring_size: int,
+    side_chains: Mapping[int, str] | None = None,
+) -> BuiltMacrolactone:
+    if not 12 <= ring_size <= 20:
+        raise ValueError("ring_size must be between 12 and 20")
+
+    side_chains = dict(side_chains or {})
+    rwmol = Chem.RWMol()
+
+    position_to_atom: dict[int, int] = {
+        1: rwmol.AddAtom(Chem.Atom("C")),
+        2: rwmol.AddAtom(Chem.Atom("O")),
+    }
+    for position in range(3, ring_size + 1):
+        position_to_atom[position] = rwmol.AddAtom(Chem.Atom("C"))
+
+    carbonyl_oxygen_idx = rwmol.AddAtom(Chem.Atom("O"))
+
+    rwmol.AddBond(position_to_atom[1], position_to_atom[2], Chem.BondType.SINGLE)
+    for position in range(2, ring_size):
+        rwmol.AddBond(
+            position_to_atom[position],
+            position_to_atom[position + 1],
+            Chem.BondType.SINGLE,
+        )
+    rwmol.AddBond(position_to_atom[ring_size], position_to_atom[1], Chem.BondType.SINGLE)
+    rwmol.AddBond(position_to_atom[1], carbonyl_oxygen_idx, Chem.BondType.DOUBLE)
+
+    for position, side_chain in side_chains.items():
+        if position not in position_to_atom:
+            raise ValueError(f"Invalid ring position: {position}")
+        _add_side_chain(rwmol, position_to_atom[position], side_chain)
+
+    mol = rwmol.GetMol()
+    Chem.SanitizeMol(mol)
+    return BuiltMacrolactone(
+        mol=mol,
+        smiles=Chem.MolToSmiles(mol, isomericSmiles=True),
+        position_to_atom=position_to_atom,
+    )
+
+
+def build_ambiguous_smiles() -> str:
+    mol_12 = build_macrolactone(12).mol
+    mol_14 = build_macrolactone(14).mol
+    combined = Chem.CombineMols(mol_12, mol_14)
+    return Chem.MolToSmiles(combined, isomericSmiles=True)
+
+
+def canonicalize(smiles_or_mol: str | Chem.Mol) -> str:
+    if isinstance(smiles_or_mol, Chem.Mol):
+        mol = smiles_or_mol
+    else:
+        mol = Chem.MolFromSmiles(smiles_or_mol)
+        if mol is None:
+            raise ValueError(f"Unable to parse SMILES: {smiles_or_mol}")
+    return Chem.MolToSmiles(mol, isomericSmiles=True)
+
+
+def _add_side_chain(rwmol: Chem.RWMol, ring_atom_idx: int, side_chain: str) -> None:
+    if side_chain == "methyl":
+        carbon_idx = rwmol.AddAtom(Chem.Atom("C"))
+        rwmol.AddBond(ring_atom_idx, carbon_idx, Chem.BondType.SINGLE)
+        return
+
+    if side_chain == "ethyl":
+        carbon_1_idx = rwmol.AddAtom(Chem.Atom("C"))
+        carbon_2_idx = rwmol.AddAtom(Chem.Atom("C"))
+        rwmol.AddBond(ring_atom_idx, carbon_1_idx, Chem.BondType.SINGLE)
+        rwmol.AddBond(carbon_1_idx, carbon_2_idx, Chem.BondType.SINGLE)
+        return
+
+    if side_chain == "exocyclic_alkene":
+        carbon_1_idx = rwmol.AddAtom(Chem.Atom("C"))
+        carbon_2_idx = rwmol.AddAtom(Chem.Atom("C"))
+        rwmol.AddBond(ring_atom_idx, carbon_1_idx, Chem.BondType.DOUBLE)
+        rwmol.AddBond(carbon_1_idx, carbon_2_idx, Chem.BondType.SINGLE)
+        return
+
+    raise ValueError(f"Unsupported side chain: {side_chain}")
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -0,0 +1,74 @@
+from __future__ import annotations
+
+import json
+import subprocess
+import sys
+
+import pandas as pd
+
+from .helpers import build_ambiguous_smiles, build_macrolactone
+
+
+def run_cli(*args: str) -> subprocess.CompletedProcess[str]:
+    return subprocess.run(
+        [sys.executable, "-m", "macro_lactone_toolkit.cli", *args],
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+
+
+def test_cli_smoke_commands():
+    built = build_macrolactone(16, {5: "methyl"})
+
+    analyze = run_cli("analyze", "--smiles", built.smiles)
+    assert analyze.returncode == 0, analyze.stderr
+    analyze_payload = json.loads(analyze.stdout)
+    assert analyze_payload["valid_ring_sizes"] == [16]
+
+    number = run_cli("number", "--smiles", built.smiles)
+    assert number.returncode == 0, number.stderr
+    number_payload = json.loads(number.stdout)
+    assert number_payload["ring_size"] == 16
+    assert number_payload["position_to_atom"]["1"] >= 0
+
+    fragment = run_cli("fragment", "--smiles", built.smiles, "--parent-id", "cli_1")
+    assert fragment.returncode == 0, fragment.stderr
+    fragment_payload = json.loads(fragment.stdout)
+    assert fragment_payload["parent_id"] == "cli_1"
+    assert fragment_payload["ring_size"] == 16
+    assert fragment_payload["fragments"][0]["fragment_smiles_labeled"]
+
+
+def test_cli_fragment_csv_skips_ambiguous_and_records_errors(tmp_path):
+    valid = build_macrolactone(14, {4: "methyl"})
+    ambiguous = build_ambiguous_smiles()
+    input_path = tmp_path / "molecules.csv"
+    output_path = tmp_path / "fragments.csv"
+    errors_path = tmp_path / "errors.csv"
+
+    pd.DataFrame(
+        [
+            {"id": "valid_1", "smiles": valid.smiles},
+            {"id": "ambiguous_1", "smiles": ambiguous},
+        ]
+    ).to_csv(input_path, index=False)
+
+    completed = run_cli(
+        "fragment",
+        "--input",
+        str(input_path),
+        "--output",
+        str(output_path),
+        "--errors-output",
+        str(errors_path),
+    )
+
+    assert completed.returncode == 0, completed.stderr
+
+    fragments = pd.read_csv(output_path)
+    errors = pd.read_csv(errors_path)
+
+    assert set(fragments["parent_id"]) == {"valid_1"}
+    assert errors.loc[0, "parent_id"] == "ambiguous_1"
+    assert errors.loc[0, "error_type"] == "AmbiguousMacrolactoneError"
--- a/tests/test_detection_and_numbering.py
+++ b/tests/test_detection_and_numbering.py
@@ -0,0 +1,73 @@
+import pytest
+from rdkit import Chem
+
+from macro_lactone_toolkit import (
+    AmbiguousMacrolactoneError,
+    MacroLactoneAnalyzer,
+    MacrolactoneDetectionError,
+    MacrolactoneFragmenter,
+)
+
+from .helpers import build_ambiguous_smiles, build_macrolactone
+
+
+@pytest.mark.parametrize("ring_size", [12, 14, 16, 20])
+def test_analyzer_detects_supported_ring_sizes(ring_size: int):
+    built = build_macrolactone(ring_size)
+    analyzer = MacroLactoneAnalyzer()
+
+    assert analyzer.get_valid_ring_sizes(built.smiles) == [ring_size]
+
+
+def test_analyzer_rejects_non_lactone_macrocycle():
+    analyzer = MacroLactoneAnalyzer()
+
+    assert analyzer.get_valid_ring_sizes("C1CCCCCCCCCCC1") == []
+
+
+def test_fragmenter_auto_numbers_ring_with_expected_positions():
+    built = build_macrolactone(16, {5: "methyl"})
+    result = MacrolactoneFragmenter().number_molecule(built.mol)
+
+    assert result.ring_size == 16
+    assert result.position_to_atom == built.position_to_atom
+    assert set(result.position_to_atom) == set(range(1, 17))
+    assert result.atom_to_position == {
+        atom_idx: position for position, atom_idx in built.position_to_atom.items()
+    }
+
+    carbonyl_atom = built.mol.GetAtomWithIdx(result.position_to_atom[1])
+    assert carbonyl_atom.GetSymbol() == "C"
+    assert any(
+        bond.GetBondType() == Chem.BondType.DOUBLE and bond.GetOtherAtom(carbonyl_atom).GetSymbol() == "O"
+        for bond in carbonyl_atom.GetBonds()
+    )
+
+    ester_oxygen = built.mol.GetAtomWithIdx(result.position_to_atom[2])
+    assert ester_oxygen.GetSymbol() == "O"
+
+
+def test_fragmenter_requires_explicit_ring_size_for_ambiguous_molecule():
+    ambiguous_smiles = build_ambiguous_smiles()
+
+    with pytest.raises(AmbiguousMacrolactoneError):
+        MacrolactoneFragmenter().number_molecule(ambiguous_smiles)
+
+
+def test_fragmenter_raises_for_missing_macrolactone():
+    with pytest.raises(MacrolactoneDetectionError):
+        MacrolactoneFragmenter().number_molecule("CCO")
+
+
+def test_explicit_ring_size_selects_requested_ring():
+    built = build_macrolactone(14)
+    result = MacrolactoneFragmenter(ring_size=14).number_molecule(built.smiles)
+
+    assert result.ring_size == 14
+
+
+def test_explicit_ring_size_rejects_wrong_ring():
+    built = build_macrolactone(12)
+
+    with pytest.raises(MacrolactoneDetectionError):
+        MacrolactoneFragmenter(ring_size=16).number_molecule(built.smiles)
--- a/tests/test_env_integration.py
+++ b/tests/test_env_integration.py
@@ -1,39 +0,0 @@
-import sys
-import os
-from pathlib import Path
-
-# Add SIME to path
-SIME_PATH = "/home/zly/project/SIME"
-if SIME_PATH not in sys.path:
-    sys.path.append(SIME_PATH)
-
-# Add project root to path so we can import 'src'
-PROJECT_ROOT = str(Path(__file__).parent.parent)
-if PROJECT_ROOT not in sys.path:
-    sys.path.append(PROJECT_ROOT)
-
-def test_imports():
-    """Verify that we can import from both local project and SIME."""
-    print(f"sys.path: {sys.path}")
-    
-    # 1. Test local import from src
-    try:
-        # Correct function name based on file inspection
-        from src.ring_numbering import assign_ring_numbering
-        assert callable(assign_ring_numbering)
-        print("Successfully imported src.ring_numbering.assign_ring_numbering")
-    except ImportError as e:
-        print(f"Failed to import src.ring_numbering: {e}")
-        raise
-
-    # 2. Test SIME import
-    try:
-        from utils.mole_predictor import ParallelBroadSpectrumPredictor
-        assert ParallelBroadSpectrumPredictor is not None
-        print("Successfully imported ParallelBroadSpectrumPredictor from utils.mole_predictor")
-    except ImportError as e:
-        print(f"Failed to import from SIME: {e}")
-        raise
-
-if __name__ == "__main__":
-    test_imports()
--- a/tests/test_fragment_prep.py
+++ b/tests/test_fragment_prep.py
@@ -1,95 +1,42 @@
 import pytest
 from rdkit import Chem
-from src.splicing.fragment_prep import activate_fragment
+
+from macro_lactone_toolkit.splicing.fragment_prep import activate_fragment
+

 def test_activate_smart_ethanol():
-    """Test 'smart' activation on Ethanol (CCO). Should attach to Oxygen."""
-    smiles = "CCO"
-    mol = activate_fragment(smiles, strategy="smart")
-    
-    # Check if we have a dummy atom
+    mol = activate_fragment("CCO", strategy="smart")
+
    assert mol is not None
-    assert mol.GetNumAtoms() == 4  # C, C, O, *
-    
-    # Check if the dummy atom is attached to Oxygen
-    # Find the dummy atom
-    dummy_atom = None
-    for atom in mol.GetAtoms():
-        if atom.GetSymbol() == '*':
-            dummy_atom = atom
-            break
-    
-    assert dummy_atom is not None
-    
-    # Check neighbors of dummy atom
-    neighbors = dummy_atom.GetNeighbors()
-    assert len(neighbors) == 1
-    assert neighbors[0].GetSymbol() == 'O'
-    
-    # Check output SMILES format
-    out_smiles = Chem.MolToSmiles(mol)
-    assert '*' in out_smiles
+    assert mol.GetNumAtoms() == 4
+    dummy_atom = next(atom for atom in mol.GetAtoms() if atom.GetAtomicNum() == 0)
+    assert dummy_atom.GetNeighbors()[0].GetSymbol() == "O"
+    assert "*" in Chem.MolToSmiles(mol)
+

 def test_activate_smart_amine():
-    """Test 'smart' activation on Ethylamine (CCN). Should attach to Nitrogen."""
-    smiles = "CCN"
-    mol = activate_fragment(smiles, strategy="smart")
-    
-    assert mol is not None
-    
-    # Find the dummy atom
-    dummy_atom = None
-    for atom in mol.GetAtoms():
-        if atom.GetSymbol() == '*':
-            dummy_atom = atom
-            break
-            
-    assert dummy_atom is not None
-    neighbors = dummy_atom.GetNeighbors()
-    assert neighbors[0].GetSymbol() == 'N'
+    mol = activate_fragment("CCN", strategy="smart")
+
+    dummy_atom = next(atom for atom in mol.GetAtoms() if atom.GetAtomicNum() == 0)
+    assert dummy_atom.GetNeighbors()[0].GetSymbol() == "N"
+

 def test_activate_random_pentane():
-    """Test 'random' activation on Pentane (CCCCC). Should attach to a Carbon."""
-    smiles = "CCCCC"
-    # Seed is not easily passed to the function unless we add it to the signature or fix it inside.
-    # For this test, any Carbon is fine.
-    mol = activate_fragment(smiles, strategy="random")
-    
-    assert mol is not None
-    assert mol.GetNumAtoms() == 6 # 5 C + 1 *
-    
-    dummy_atom = None
-    for atom in mol.GetAtoms():
-        if atom.GetSymbol() == '*':
-            dummy_atom = atom
-            break
-            
-    assert dummy_atom is not None
-    neighbors = dummy_atom.GetNeighbors()
-    assert neighbors[0].GetSymbol() == 'C'
+    mol = activate_fragment("CCCCC", strategy="random")
+
+    assert mol.GetNumAtoms() == 6
+    dummy_atom = next(atom for atom in mol.GetAtoms() if atom.GetAtomicNum() == 0)
+    assert dummy_atom.GetNeighbors()[0].GetSymbol() == "C"
+

 def test_activate_smart_fallback():
-    """Test 'smart' fallback when no heteroatoms are found (e.g. Propane)."""
-    smiles = "CCC"
-    # Should fall back to finding a terminal carbon or random
-    # The requirement says "fall back to a terminal Carbon" or random. 
-    # Let's assume the implementation picks a terminal carbon if possible, or just behaves like random on C.
-    mol = activate_fragment(smiles, strategy="smart")
-    
-    assert mol is not None
-    dummy_atom = None
-    for atom in mol.GetAtoms():
-        if atom.GetSymbol() == '*':
-            dummy_atom = atom
-            break
-    
-    assert dummy_atom is not None
-    neighbor = dummy_atom.GetNeighbors()[0]
-    assert neighbor.GetSymbol() == 'C'
-    # Verify it's a valid molecule
+    mol = activate_fragment("CCC", strategy="smart")
+
+    dummy_atom = next(atom for atom in mol.GetAtoms() if atom.GetAtomicNum() == 0)
+    assert dummy_atom.GetNeighbors()[0].GetSymbol() == "C"
    assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE

+
 def test_invalid_smiles():
    with pytest.raises(ValueError):
        activate_fragment("NotASmiles", strategy="smart")
-
--- a/tests/test_fragmentation.py
+++ b/tests/test_fragmentation.py
@@ -0,0 +1,53 @@
+from rdkit import Chem
+
+from macro_lactone_toolkit import MacrolactoneFragmenter
+
+from .helpers import build_macrolactone
+
+
+def test_fragmentation_returns_empty_list_without_sidechains():
+    built = build_macrolactone(12)
+    result = MacrolactoneFragmenter().fragment_molecule(built.smiles, parent_id="plain")
+
+    assert result.fragments == []
+
+
+def test_fragmentation_emits_labeled_and_plain_smiles_round_trip():
+    built = build_macrolactone(16, {5: "ethyl", 8: "methyl"})
+    result = MacrolactoneFragmenter().fragment_molecule(built.smiles, parent_id="mol_001")
+
+    assert result.parent_id == "mol_001"
+    assert result.ring_size == 16
+    assert {fragment.cleavage_position for fragment in result.fragments} == {5, 8}
+
+    for fragment in result.fragments:
+        labeled = Chem.MolFromSmiles(fragment.fragment_smiles_labeled)
+        plain = Chem.MolFromSmiles(fragment.fragment_smiles_plain)
+
+        assert labeled is not None
+        assert plain is not None
+        assert Chem.MolToSmiles(labeled, isomericSmiles=True)
+        assert Chem.MolToSmiles(plain, isomericSmiles=True)
+        assert any(
+            atom.GetAtomicNum() == 0 and atom.GetIsotope() == fragment.cleavage_position
+            for atom in labeled.GetAtoms()
+        )
+        assert any(
+            atom.GetAtomicNum() == 0 and atom.GetIsotope() == 0
+            for atom in plain.GetAtoms()
+        )
+
+
+def test_fragmentation_preserves_attachment_bond_type():
+    built = build_macrolactone(16, {6: "exocyclic_alkene"})
+    result = MacrolactoneFragmenter().fragment_molecule(built.smiles, parent_id="bond_type")
+
+    fragment = next(fragment for fragment in result.fragments if fragment.cleavage_position == 6)
+    labeled = Chem.MolFromSmiles(fragment.fragment_smiles_labeled)
+    plain = Chem.MolFromSmiles(fragment.fragment_smiles_plain)
+
+    for mol in (labeled, plain):
+        dummy_atom = next(atom for atom in mol.GetAtoms() if atom.GetAtomicNum() == 0)
+        neighbor = dummy_atom.GetNeighbors()[0]
+        bond = mol.GetBondBetweenAtoms(dummy_atom.GetIdx(), neighbor.GetIdx())
+        assert bond.GetBondType() == Chem.BondType.DOUBLE
--- a/tests/test_imports.py
+++ b/tests/test_imports.py
@@ -0,0 +1,5 @@
+import macro_lactone_toolkit
+
+
+def test_public_imports_smoke():
+    assert macro_lactone_toolkit is not None
--- a/tests/test_ring_numbering.py
+++ b/tests/test_ring_numbering.py
@@ -1,223 +0,0 @@
-"""
-测试环编号功能 - 验证原子编号是否固定
-"""
-import sys
-sys.path.insert(0, '/home/zly/project/macro_split')
-
-from rdkit import Chem
-from rdkit.Chem import Draw, AllChem
-from rdkit.Chem.Draw import rdMolDraw2D
-from src.ring_visualization import (
-    get_macrolactone_numbering,
-    get_ring_atoms_by_size
-)
-
-
-def test_ring_numbering_consistency(smiles: str, ring_size: int = 16, num_tests: int = 5):
-    """
-    测试环编号的一致性 - 多次运行确保编号固定
-    """
-    print("=" * 70)
-    print("测试环编号一致性")
-    print("=" * 70)
-    print(f"\nSMILES: {smiles[:80]}...")
-    print(f"环大小: {ring_size}")
-    print(f"测试次数: {num_tests}")
-
-    # 解析分子
-    mol = Chem.MolFromSmiles(smiles)
-    if mol is None:
-        print("❌ 无法解析SMILES")
-        return False
-
-    print(f"✓ 分子解析成功，共 {mol.GetNumAtoms()} 个原子")
-
-    # 检测环大小
-    ring_atoms = get_ring_atoms_by_size(mol, ring_size)
-    if ring_atoms is None:
-        for size in range(12, 21):
-            ring_atoms = get_ring_atoms_by_size(mol, size)
-            if ring_atoms:
-                ring_size = size
-                print(f"⚠️  使用检测到的{size}元环")
-                break
-
-    if ring_atoms is None:
-        print("❌ 未找到12-20元环")
-        return False
-
-    print(f"✓ 找到{ring_size}元环，包含 {len(ring_atoms)} 个原子")
-
-    # 多次测试编号一致性
-    all_numberings = []
-    all_carbonyl_carbons = []
-    all_ester_oxygens = []
-
-    for i in range(num_tests):
-        result = get_macrolactone_numbering(mol, ring_size)
-        ring_atoms_result, ring_numbering, ordered_atoms, carbonyl_carbon, ester_oxygen, (is_valid, reason) = result
-
-        if not is_valid:
-            print(f"❌ 第{i+1}次测试失败: {reason}")
-            return False
-
-        all_numberings.append(ring_numbering.copy())
-        all_carbonyl_carbons.append(carbonyl_carbon)
-        all_ester_oxygens.append(ester_oxygen)
-
-    # 验证一致性
-    print("\n" + "-" * 50)
-    print("编号一致性检查:")
-    print("-" * 50)
-
-    is_consistent = True
-
-    if len(set(all_carbonyl_carbons)) == 1:
-        print(f"✓ 羰基碳位置一致: 原子索引 {all_carbonyl_carbons[0]}")
-    else:
-        print(f"❌ 羰基碳位置不一致: {all_carbonyl_carbons}")
-        is_consistent = False
-
-    if len(set(all_ester_oxygens)) == 1:
-        print(f"✓ 酯氧位置一致: 原子索引 {all_ester_oxygens[0]}")
-    else:
-        print(f"❌ 酯氧位置不一致: {all_ester_oxygens}")
-        is_consistent = False
-
-    first_numbering = all_numberings[0]
-    for i, numbering in enumerate(all_numberings[1:], 2):
-        if numbering != first_numbering:
-            print(f"❌ 第{i}次编号与第1次不一致")
-            is_consistent = False
-            break
-
-    if is_consistent:
-        print(f"✓ 所有{num_tests}次测试的编号完全一致")
-
-    # 显示详细编号信息
-    print("\n" + "-" * 50)
-    print("环原子编号详情:")
-    print("-" * 50)
-
-    numbering = all_numberings[0]
-    carbonyl_carbon = all_carbonyl_carbons[0]
-    ester_oxygen = all_ester_oxygens[0]
-
-    sorted_items = sorted(numbering.items(), key=lambda x: x[1])
-
-    print(f"{'位置':<6} {'原子索引':<10} {'元素':<6} {'说明'}")
-    print("-" * 40)
-
-    for atom_idx, position in sorted_items:
-        atom = mol.GetAtomWithIdx(atom_idx)
-        symbol = atom.GetSymbol()
-        note = ""
-        if atom_idx == carbonyl_carbon:
-            note = "← 羰基碳 (C=O)"
-        elif atom_idx == ester_oxygen:
-            note = "← 酯键氧"
-        print(f"{position:<6} {atom_idx:<10} {symbol:<6} {note}")
-
-    return is_consistent
-
-
-def save_visualization(smiles: str, output_path: str, ring_size: int = 16):
-    """保存分子可视化图片"""
-    print("\n" + "=" * 70)
-    print("保存可视化图片")
-    print("=" * 70)
-
-    mol = Chem.MolFromSmiles(smiles)
-    if mol is None:
-        print("❌ 无法解析SMILES")
-        return
-
-    for size in range(12, 21):
-        ring_atoms = get_ring_atoms_by_size(mol, size)
-        if ring_atoms:
-            ring_size = size
-            break
-
-    result = get_macrolactone_numbering(mol, ring_size)
-    ring_atoms, ring_numbering, ordered_atoms, carbonyl_carbon, ester_oxygen, (is_valid, reason) = result
-
-    if not is_valid:
-        print(f"❌ 无法获取编号: {reason}")
-        return
-
-    mol_copy = Chem.Mol(mol)
-    AllChem.Compute2DCoords(mol_copy)
-
-    for atom_idx in ring_atoms:
-        if atom_idx in ring_numbering:
-            atom = mol_copy.GetAtomWithIdx(atom_idx)
-            atom.SetProp("atomNote", str(ring_numbering[atom_idx]))
-
-    atom_colors = {}
-    for atom_idx in ring_atoms:
-        atom = mol.GetAtomWithIdx(atom_idx)
-        symbol = atom.GetSymbol()
-
-        if atom_idx == carbonyl_carbon:
-            atom_colors[atom_idx] = (1.0, 0.6, 0.0)
-        elif atom_idx == ester_oxygen:
-            atom_colors[atom_idx] = (1.0, 0.4, 0.4)
-        elif symbol == 'C':
-            atom_colors[atom_idx] = (0.7, 0.85, 1.0)
-        elif symbol == 'O':
-            atom_colors[atom_idx] = (1.0, 0.7, 0.7)
-        elif symbol == 'N':
-            atom_colors[atom_idx] = (0.8, 0.7, 1.0)
-        else:
-            atom_colors[atom_idx] = (0.8, 1.0, 0.8)
-
-    drawer = rdMolDraw2D.MolDraw2DSVG(1000, 1000)
-    drawer.SetFontSize(14)
-    drawer.DrawMolecule(mol_copy, highlightAtoms=list(ring_atoms), highlightAtomColors=atom_colors)
-    drawer.FinishDrawing()
-    svg = drawer.GetDrawingText()
-
-    svg_path = output_path.replace('.png', '.svg')
-    with open(svg_path, 'w', encoding='utf-8') as f:
-        f.write(svg)
-    print(f"✓ SVG已保存到: {svg_path}")
-
-    try:
-        drawer_png = rdMolDraw2D.MolDraw2DCairo(1000, 1000)
-        drawer_png.SetFontSize(14)
-        drawer_png.DrawMolecule(mol_copy, highlightAtoms=list(ring_atoms), highlightAtomColors=atom_colors)
-        drawer_png.FinishDrawing()
-        drawer_png.WriteDrawingText(output_path)
-        print(f"✓ PNG已保存到: {output_path}")
-    except Exception as e:
-        print(f"⚠️  PNG保存失败: {e}")
-
-    print("\n颜色说明:")
-    print("  橙色: 羰基碳 (位置1)")
-    print("  红色: 酯键氧 (位置2)")
-    print("  浅蓝色: 环上碳原子")
-
-
-def main():
-    smiles = "O[C@H]1[C@H]([C@H]([C@H](OC[C@@H]2[C@@H](CC)OC(C[C@H]([C@H](C)[C@H]([C@@H](CC=O)C[C@@H](C)C(/C=C/C(/C)=C/2)=O)O[C@H]2[C@@H]([C@H]([C@@H]([C@@H](C)O2)O[C@H]2C[C@](C)([C@@H]([C@@H](C)O2)O)O)[N@](C)C)O)O)=O)O[C@@H]1C)OC)OC"
-
-    print("\n大环内酯环编号测试\n")
-    is_consistent = test_ring_numbering_consistency(smiles, ring_size=16, num_tests=5)
-
-    output_path = "/home/zly/project/macro_split/output/test_ring_numbering.png"
-    save_visualization(smiles, output_path, ring_size=16)
-
-    print("\n" + "=" * 70)
-    print("测试总结")
-    print("=" * 70)
-    if is_consistent:
-        print("✅ 所有测试通过！环原子编号是固定的。")
-    else:
-        print("❌ 测试失败：环原子编号不一致")
-
-    return is_consistent
-
-
-if __name__ == "__main__":
-    success = main()
-    sys.exit(0 if success else 1)
--- a/tests/test_scaffold_prep.py
+++ b/tests/test_scaffold_prep.py
@@ -1,84 +0,0 @@
-import pytest
-from rdkit import Chem
-from src.splicing.scaffold_prep import prepare_tylosin_scaffold
-from src.ring_numbering import assign_ring_numbering
-
-def test_prepare_tylosin_scaffold():
-    # Construct a 16-membered lactone with side chains
-    # Numbering logic (assumed based on implementation):
-    # 1: C=O
-    # 2-6: CH2
-    # 7: CH(CH3)  <- Methyl side chain
-    # 8-14: CH2
-    # 15: CH(CC)  <- Ethyl side chain
-    # 16: O
-    
-    # SMILES:
-    # O=C1 (pos 1)
-    # CCCCC (pos 2-6)
-    # C(C) (pos 7, with Methyl)
-    # CCCCCCC (pos 8-14)
-    # C(CC) (pos 15, with Ethyl)
-    # O1 (pos 16)
-    
-    smiles = "O=C1CCCCC(C)CCCCCCCCC(CC)O1"
-    
-    # Verify initial assumption about numbering
-    mol = Chem.MolFromSmiles(smiles)
-    numbering = assign_ring_numbering(mol)
-    
-    # Find atom indices for pos 7 and 15 to ensure our SMILES construction is correct for the test
-    pos_map = {v: k for k, v in numbering.items()}
-    assert 7 in pos_map, "Position 7 not found in ring"
-    assert 15 in pos_map, "Position 15 not found in ring"
-    assert 5 in pos_map, "Position 5 not found in ring"
-    
-    atom7 = mol.GetAtomWithIdx(pos_map[7])
-    atom15 = mol.GetAtomWithIdx(pos_map[15])
-    atom5 = mol.GetAtomWithIdx(pos_map[5])
-    
-    # Check side chains exist
-    # Atom 7 should have 3 neighbors (2 ring, 1 methyl)
-    assert len(atom7.GetNeighbors()) == 3
-    # Atom 15 should have 3 neighbors (2 ring, 1 ethyl)
-    assert len(atom15.GetNeighbors()) == 3
-    # Atom 5 should have 2 neighbors (2 ring, 2 implicit H)
-    assert len(atom5.GetNeighbors()) == 2
-    
-    # Execute scaffold prep
-    target_positions = [5, 7, 15]
-    res_mol, dummy_map = prepare_tylosin_scaffold(smiles, target_positions)
-    
-    assert res_mol is not None
-    assert len(dummy_map) == 3
-    
-    # Verify dummies
-    for pos in target_positions:
-        assert pos in dummy_map
-        dummy_idx = dummy_map[pos]
-        dummy_atom = res_mol.GetAtomWithIdx(dummy_idx)
-        assert dummy_atom.GetSymbol() == "*"
-        assert dummy_atom.GetIsotope() == pos
-        
-        # Check that dummy is connected to the correct ring position
-        neighbors = dummy_atom.GetNeighbors()
-        assert len(neighbors) == 1
-        
-    # Verify side chains removed
-    # New atom counts.
-    # Original: 16 (ring) + 1 (O=) + 1 (Me) + 2 (Et) = 20 heavy atoms.
-    # Removed: Me (1), Et (2). Total -3.
-    # Added: 3 dummies. Total +3.
-    # Net: 20.
-    assert res_mol.GetNumAtoms() == 20
-    
-    # Check that the specific side chains are gone.
-    # Count carbons.
-    # Original C count: 1 (C=O) + 14 (CH2/CH) + 1(Me) + 2(Et) = 18 C.
-    # New C count: 1 (C=O) + 14 (Ring C) = 15 C.
-    # Dummies are *. O are O.
-    c_count = sum(1 for a in res_mol.GetAtoms() if a.GetSymbol() == 'C')
-    assert c_count == 15, f"Expected 15 Carbons, found {c_count}"
-    
-    dummy_count = sum(1 for a in res_mol.GetAtoms() if a.GetSymbol() == '*')
-    assert dummy_count == 3
--- a/tests/test_splicing_engine.py
+++ b/tests/test_splicing_engine.py
@@ -1,77 +1,51 @@
 import pytest
 from rdkit import Chem
-from src.splicing.engine import splice_molecule
+
+from macro_lactone_toolkit import MacrolactoneFragmenter
+from macro_lactone_toolkit.splicing.engine import splice_molecule
+from macro_lactone_toolkit.splicing.scaffold_prep import prepare_macrolactone_scaffold
+
+from .helpers import build_macrolactone, canonicalize
+

 def test_splice_benzene_methyl():
-    """
-    Test splicing a benzene scaffold (isotope 1) with a methyl fragment.
-    Scaffold: c1ccccc1[1*] (Phenyl radical-ish dummy)
-    Fragment: C* (Methyl radical-ish dummy)
-    Result: Cc1ccccc1 (Toluene)
-    """
    scaffold = Chem.MolFromSmiles("c1ccccc1[1*]")
    fragment = Chem.MolFromSmiles("C*")
-    
-    assert scaffold is not None
-    assert fragment is not None
-    
+
    product = splice_molecule(scaffold, fragment, position=1)
-    
-    # Expected result: Toluene
-    expected_smiles = "Cc1ccccc1"
-    expected_mol = Chem.MolFromSmiles(expected_smiles)
-    expected_canonical = Chem.MolToSmiles(expected_mol, isomericSmiles=True)
-    
-    product_canonical = Chem.MolToSmiles(product, isomericSmiles=True)
-    
-    assert product_canonical == expected_canonical
+
+    assert canonicalize(product) == canonicalize("Cc1ccccc1")
+

 def test_splice_missing_isotope():
-    """Test that error is raised if the requested position is not found on scaffold."""
-    scaffold = Chem.MolFromSmiles("c1ccccc1[2*]") # Isotope 2
+    scaffold = Chem.MolFromSmiles("c1ccccc1[2*]")
    fragment = Chem.MolFromSmiles("C*")
-    
+
    with pytest.raises(ValueError, match="Scaffold dummy atom with isotope 1 not found"):
        splice_molecule(scaffold, fragment, position=1)

+
 def test_splice_no_fragment_dummy():
-    """Test that error is raised if fragment has no dummy atom."""
    scaffold = Chem.MolFromSmiles("c1ccccc1[1*]")
-    fragment = Chem.MolFromSmiles("C") # Methane, no dummy
-    
+    fragment = Chem.MolFromSmiles("C")
+
    with pytest.raises(ValueError, match="Fragment does not contain a dummy atom"):
        splice_molecule(scaffold, fragment, position=1)

-def test_complex_splicing():
-    """
-    Test splicing with more complex structures.
-    Scaffold: Pyridine derivative n1cccc1CC[1*]
-    Fragment: Cyclopropane C1CC1*
-    Result: n1cccc1CCC1CC1
-    """
-    scaffold = Chem.MolFromSmiles("n1cccc1CC[1*]")
-    fragment = Chem.MolFromSmiles("*C1CC1")
-    
-    product = splice_molecule(scaffold, fragment, position=1)
-    
-    expected = Chem.MolFromSmiles("n1cccc1CCC1CC1")
-    
-    assert Chem.MolToSmiles(product) == Chem.MolToSmiles(expected)

-def test_scaffold_with_multiple_different_dummies():
-    """
-    Test splicing when scaffold has multiple dummies with different isotopes.
-    Scaffold: [1*]c1ccccc1[2*]
-    Fragment: C*
-    Target: Splicing at 1 should leave [2*] intact.
-    """
-    scaffold = Chem.MolFromSmiles("[1*]c1ccccc1[2*]")
-    fragment = Chem.MolFromSmiles("C*")
-    
-    # Splice at 1
-    product = splice_molecule(scaffold, fragment, position=1)
-    
-    # Expected: Cc1ccccc1[2*]
-    expected = Chem.MolFromSmiles("Cc1ccccc1[2*]")
-    
-    assert Chem.MolToSmiles(product) == Chem.MolToSmiles(expected)
+def test_prepare_scaffold_and_reassemble_fragment():
+    built = build_macrolactone(16, {5: "ethyl"})
+    result = MacrolactoneFragmenter(ring_size=16).fragment_molecule(built.smiles, parent_id="reassemble")
+    fragment = next(fragment for fragment in result.fragments if fragment.cleavage_position == 5)
+
+    scaffold, dummy_map = prepare_macrolactone_scaffold(
+        built.smiles,
+        positions=[5],
+        ring_size=16,
+    )
+
+    assert 5 in dummy_map
+
+    product = splice_molecule(scaffold, Chem.MolFromSmiles(fragment.fragment_smiles_labeled), position=5)
+
+    assert canonicalize(product) == canonicalize(built.mol)