Add splicing module and related tests

- Add src/splicing/ module with scaffold_prep, fragment_prep, and engine - Add tylosin_splicer.py entry script - Add unit tests for splicing components Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-18 17:47:00 +08:00
parent 0ced0fa816
commit 68f171ad1d
9 changed files with 594 additions and 0 deletions
--- a/scripts/tylosin_splicer.py
+++ b/scripts/tylosin_splicer.py
@@ -0,0 +1,10 @@
+#!/usr/bin/env python
+"""
+Tylosin Splicing System - Main Entry Point
+"""
+
+def main():
+    print("Hello from Tylosin Splicer")
+
+if __name__ == "__main__":
+    main()
--- a/src/splicing/init.py
+++ b/src/splicing/init.py
--- a/src/splicing/engine.py
+++ b/src/splicing/engine.py
@@ -0,0 +1,79 @@
+from rdkit import Chem
+
+def splice_molecule(scaffold: Chem.Mol, fragment: Chem.Mol, position: int) -> Chem.Mol:
+    """
+    Connects a scaffold and a fragment by replacing a specific dummy atom on the scaffold
+    and a dummy atom on the fragment with a single bond.
+
+    Args:
+        scaffold: The scaffold molecule containing labeled dummy atoms (e.g., [1*]).
+        fragment: The fragment molecule containing a dummy atom (*).
+        position: The isotope number of the dummy atom on the scaffold to attach to.
+
+    Returns:
+        Chem.Mol: The spliced molecule.
+
+    Raises:
+        ValueError: If the specified dummy atom is not found on the scaffold
+                    or if the fragment does not contain a dummy atom.
+    """
+    # 1. Combine molecules
+    # Note: CombineMols preserves atom indices of mol1 (scaffold), and appends mol2 (fragment).
+    # Atoms 0 to N-1 are scaffold, N to N+M-1 are fragment.
+    combined = Chem.CombineMols(scaffold, fragment)
+    rw_mol = Chem.RWMol(combined)
+    
+    scaffold_atom_count = scaffold.GetNumAtoms()
+    total_atoms = rw_mol.GetNumAtoms()
+    
+    # 2. Find Scaffold Dummy
+    scaffold_dummy_idx = -1
+    for i in range(scaffold_atom_count):
+        atom = rw_mol.GetAtomWithIdx(i)
+        if atom.GetAtomicNum() == 0 and atom.GetIsotope() == position:
+            scaffold_dummy_idx = i
+            break
+            
+    if scaffold_dummy_idx == -1:
+        raise ValueError(f"Scaffold dummy atom with isotope {position} not found")
+        
+    # 3. Find Fragment Dummy
+    # We search in the fragment part (indices >= scaffold_atom_count)
+    fragment_dummy_idx = -1
+    for i in range(scaffold_atom_count, total_atoms):
+        atom = rw_mol.GetAtomWithIdx(i)
+        # We assume any dummy atom in the fragment is the attachment point.
+        # Usually fragment has isotope 0 on its dummy.
+        if atom.GetAtomicNum() == 0:
+            fragment_dummy_idx = i
+            break
+            
+    if fragment_dummy_idx == -1:
+        raise ValueError("Fragment does not contain a dummy atom")
+        
+    # 4. Identify Neighbors (Anchors)
+    scaffold_dummy = rw_mol.GetAtomWithIdx(scaffold_dummy_idx)
+    if scaffold_dummy.GetDegree() != 1:
+        raise ValueError(f"Scaffold dummy atom at index {scaffold_dummy_idx} must have exactly one neighbor")
+    scaffold_anchor_idx = scaffold_dummy.GetNeighbors()[0].GetIdx()
+    
+    fragment_dummy = rw_mol.GetAtomWithIdx(fragment_dummy_idx)
+    if fragment_dummy.GetDegree() != 1:
+        raise ValueError(f"Fragment dummy atom at index {fragment_dummy_idx} must have exactly one neighbor")
+    fragment_anchor_idx = fragment_dummy.GetNeighbors()[0].GetIdx()
+    
+    # 5. Add Bond
+    rw_mol.AddBond(scaffold_anchor_idx, fragment_anchor_idx, Chem.BondType.SINGLE)
+    
+    # 6. Remove Dummy Atoms
+    # Remove the higher index first to preserve the lower index
+    # We know fragment_dummy_idx > scaffold_dummy_idx because fragment atoms were appended.
+    # However, just to be safe, we sort them.
+    indices_to_remove = sorted([scaffold_dummy_idx, fragment_dummy_idx], reverse=True)
+    for idx in indices_to_remove:
+        rw_mol.RemoveAtom(idx)
+        
+    # 7. Sanitize
+    Chem.SanitizeMol(rw_mol)
+    
+    return rw_mol.GetMol()
--- a/src/splicing/fragment_prep.py
+++ b/src/splicing/fragment_prep.py
@@ -0,0 +1,87 @@
+import random
+from rdkit import Chem
+
+def activate_fragment(smiles: str, strategy: str = "smart") -> Chem.Mol:
+    """
+    Convert a small molecule fragment into an attachable R-group by adding a dummy atom (*).
+
+    Args:
+        smiles: SMILES string of the fragment.
+        strategy: 'smart' (prioritize heteroatoms) or 'random' (any atom with H).
+
+    Returns:
+        Chem.Mol: The activated fragment with a dummy atom attached.
+    """
+    mol = Chem.MolFromSmiles(smiles)
+    if mol is None:
+        raise ValueError(f"Invalid SMILES string: {smiles}")
+
+    target_idx = -1
+
+    if strategy == "smart":
+        # Order of preference: Amine, Alcohol/Phenol, Thiol
+        # Amine: [N;!H0] - Nitrogen with at least one H
+        # Alcohol/Phenol: [O;H1] - Oxygen with 1 H (usually 2 bonds total)
+        # Thiol: [S;H1]
+        
+        smarts_patterns = [
+            "[N;!H0]", # Primary/Secondary amine
+            "[O;H1]",  # Alcohol/Phenol
+            "[S;H1]"   # Thiol
+        ]
+        
+        for smarts in smarts_patterns:
+            pattern = Chem.MolFromSmarts(smarts)
+            if pattern:
+                matches = mol.GetSubstructMatches(pattern)
+                if matches:
+                    # Pick the first match
+                    target_idx = matches[0][0]
+                    break
+        
+        if target_idx == -1:
+            # Fallback to random if no smart match found
+            strategy = "random"
+
+    if strategy == "random":
+        # Find all atoms with at least one H
+        candidates = []
+        carbon_candidates = []
+        
+        for atom in mol.GetAtoms():
+            # GetTotalNumHs includes implicit and explicit Hs
+            if atom.GetTotalNumHs() > 0:
+                candidates.append(atom.GetIdx())
+                if atom.GetSymbol() == 'C':
+                    carbon_candidates.append(atom.GetIdx())
+        
+        if not candidates:
+            raise ValueError("No suitable atoms with hydrogens found to attach to.")
+        
+        # Prefer Carbon atoms if available
+        if carbon_candidates:
+            # Pick the first one for deterministic behavior
+            target_idx = carbon_candidates[0]
+        else:
+            target_idx = candidates[0]
+
+    if target_idx == -1:
+        # Should be caught by the candidates check, but just in case
+        raise ValueError("Could not identify a target atom for activation.")
+
+    # Perform attachment
+    rwmol = Chem.RWMol(mol)
+    
+    # Add dummy atom
+    dummy_idx = rwmol.AddAtom(Chem.Atom('*'))
+    
+    # Add bond to target atom
+    rwmol.AddBond(target_idx, dummy_idx, Chem.BondType.SINGLE)
+    
+    # Sanitize to fix implicit H counts and ensure validity
+    try:
+        Chem.SanitizeMol(rwmol)
+    except Exception as e:
+        raise ValueError(f"Failed to sanitize molecule after activation: {e}")
+
+    return rwmol.GetMol()
--- a/src/splicing/scaffold_prep.py
+++ b/src/splicing/scaffold_prep.py
@@ -0,0 +1,123 @@
+from rdkit import Chem
+from typing import List, Dict, Tuple, Set, Optional
+from src.ring_numbering import assign_ring_numbering
+
+def get_subtree_indices(mol: Chem.Mol, start_atom_idx: int, forbidden_idx: int) -> Set[int]:
+    """
+    Get all atom indices in the subtree starting at start_atom_idx,
+    moving away from forbidden_idx.
+    Used to identify side chain atoms to remove.
+    """
+    subtree = set()
+    stack = [start_atom_idx]
+    
+    while stack:
+        current = stack.pop()
+        if current in subtree:
+            continue
+        subtree.add(current)
+        
+        atom = mol.GetAtomWithIdx(current)
+        for neighbor in atom.GetNeighbors():
+            n_idx = neighbor.GetIdx()
+            # Traverse neighbors except the one leading back to the ring (forbidden)
+            # and those already visited
+            if n_idx != forbidden_idx and n_idx not in subtree:
+                stack.append(n_idx)
+                
+    return subtree
+
+def prepare_tylosin_scaffold(smiles: str, positions: List[int]) -> Tuple[Chem.Mol, Dict[int, int]]:
+    """
+    Prepare the Tylosin scaffold by removing side chains at specified positions
+    and marking them with dummy atoms.
+    
+    Args:
+        smiles: SMILES string of the scaffold/molecule.
+        positions: List of ring positions (1-16) to prepare (add sockets).
+        
+    Returns:
+        Tuple of (Modified Molecule, Dict mapping position -> new_dummy_atom_idx)
+        The returned molecule has dummy atoms ('*') at the specified positions,
+        marked with Isotope = position number.
+    """
+    mol = Chem.MolFromSmiles(smiles)
+    if not mol:
+        raise ValueError(f"Invalid SMILES: {smiles}")
+        
+    # 1. Ring numbering to identify target atoms
+    ring_map = assign_ring_numbering(mol) # atom_idx -> ring_pos
+    if not ring_map:
+        raise ValueError("Could not assign ring numbering. Is this a 16-membered lactone?")
+        
+    # Reverse map for easy lookup: ring_pos -> atom_idx
+    pos_to_atom = {v: k for k, v in ring_map.items()}
+    
+    atoms_to_remove = set()
+    dummies_to_add = [] # List of (ring_atom_idx, position)
+    
+    # 2. Identify edits
+    for pos in positions:
+        if pos not in pos_to_atom:
+            raise ValueError(f"Position {pos} not found in ring numbering.")
+            
+        ring_atom_idx = pos_to_atom[pos]
+        ring_atom = mol.GetAtomWithIdx(ring_atom_idx)
+        
+        # Identify non-ring neighbors (side chains)
+        # Note: neighbors in ring have indices in ring_map
+        side_chain_neighbors = []
+        for n in ring_atom.GetNeighbors():
+            if n.GetIdx() not in ring_map:
+                side_chain_neighbors.append(n.GetIdx())
+        
+        # If side chains exist, mark them for removal
+        if side_chain_neighbors:
+            for sc_idx in side_chain_neighbors:
+                subtree = get_subtree_indices(mol, sc_idx, forbidden_idx=ring_atom_idx)
+                atoms_to_remove.update(subtree)
+        
+        # Plan to add dummy at this ring atom
+        dummies_to_add.append((ring_atom_idx, pos))
+
+    # 3. Apply edits using RWMol
+    rwmol = Chem.RWMol(mol)
+    
+    # Step A: Add dummy atoms
+    # We add them before deletion to use stable ring indices.
+    # Note: Adding atoms does not change existing indices.
+    
+    for ring_idx, pos in dummies_to_add:
+        # Create dummy atom
+        dummy = Chem.Atom('*')
+        dummy.SetIsotope(pos) # Mark with position
+        
+        new_idx = rwmol.AddAtom(dummy)
+        
+        # Add bond to ring atom
+        rwmol.AddBond(ring_idx, new_idx, Chem.BondType.SINGLE)
+        
+    # Step B: Remove side chain atoms
+    # Sort descending to preserve lower indices during deletion
+    sorted_remove = sorted(list(atoms_to_remove), reverse=True)
+    for idx in sorted_remove:
+        rwmol.RemoveAtom(idx)
+        
+    # 4. Finalize
+    mol_final = rwmol.GetMol()
+    
+    try:
+        Chem.SanitizeMol(mol_final)
+    except Exception:
+        # Sometime dummies trigger sanitization errors, but usually partial sanitization works.
+        # We'll ignore strict sanitization errors for the scaffold as it has dummies.
+        pass
+        
+    # 5. Build result map (position -> atom_index in new mol)
+    # Since indices shifted, we find dummies by their isotope markers.
+    final_dummy_map = {}
+    for atom in mol_final.GetAtoms():
+        if atom.GetSymbol() == '*' and atom.GetIsotope() in positions:
+            final_dummy_map[atom.GetIsotope()] = atom.GetIdx()
+            
+    return mol_final, final_dummy_map
--- a/tests/test_env_integration.py
+++ b/tests/test_env_integration.py
@@ -0,0 +1,39 @@
+import sys
+import os
+from pathlib import Path
+
+# Add SIME to path
+SIME_PATH = "/home/zly/project/SIME"
+if SIME_PATH not in sys.path:
+    sys.path.append(SIME_PATH)
+
+# Add project root to path so we can import 'src'
+PROJECT_ROOT = str(Path(__file__).parent.parent)
+if PROJECT_ROOT not in sys.path:
+    sys.path.append(PROJECT_ROOT)
+
+def test_imports():
+    """Verify that we can import from both local project and SIME."""
+    print(f"sys.path: {sys.path}")
+    
+    # 1. Test local import from src
+    try:
+        # Correct function name based on file inspection
+        from src.ring_numbering import assign_ring_numbering
+        assert callable(assign_ring_numbering)
+        print("Successfully imported src.ring_numbering.assign_ring_numbering")
+    except ImportError as e:
+        print(f"Failed to import src.ring_numbering: {e}")
+        raise
+
+    # 2. Test SIME import
+    try:
+        from utils.mole_predictor import ParallelBroadSpectrumPredictor
+        assert ParallelBroadSpectrumPredictor is not None
+        print("Successfully imported ParallelBroadSpectrumPredictor from utils.mole_predictor")
+    except ImportError as e:
+        print(f"Failed to import from SIME: {e}")
+        raise
+
+if __name__ == "__main__":
+    test_imports()
--- a/tests/test_fragment_prep.py
+++ b/tests/test_fragment_prep.py
@@ -0,0 +1,95 @@
+import pytest
+from rdkit import Chem
+from src.splicing.fragment_prep import activate_fragment
+
+def test_activate_smart_ethanol():
+    """Test 'smart' activation on Ethanol (CCO). Should attach to Oxygen."""
+    smiles = "CCO"
+    mol = activate_fragment(smiles, strategy="smart")
+    
+    # Check if we have a dummy atom
+    assert mol is not None
+    assert mol.GetNumAtoms() == 4  # C, C, O, *
+    
+    # Check if the dummy atom is attached to Oxygen
+    # Find the dummy atom
+    dummy_atom = None
+    for atom in mol.GetAtoms():
+        if atom.GetSymbol() == '*':
+            dummy_atom = atom
+            break
+    
+    assert dummy_atom is not None
+    
+    # Check neighbors of dummy atom
+    neighbors = dummy_atom.GetNeighbors()
+    assert len(neighbors) == 1
+    assert neighbors[0].GetSymbol() == 'O'
+    
+    # Check output SMILES format
+    out_smiles = Chem.MolToSmiles(mol)
+    assert '*' in out_smiles
+
+def test_activate_smart_amine():
+    """Test 'smart' activation on Ethylamine (CCN). Should attach to Nitrogen."""
+    smiles = "CCN"
+    mol = activate_fragment(smiles, strategy="smart")
+    
+    assert mol is not None
+    
+    # Find the dummy atom
+    dummy_atom = None
+    for atom in mol.GetAtoms():
+        if atom.GetSymbol() == '*':
+            dummy_atom = atom
+            break
+            
+    assert dummy_atom is not None
+    neighbors = dummy_atom.GetNeighbors()
+    assert neighbors[0].GetSymbol() == 'N'
+
+def test_activate_random_pentane():
+    """Test 'random' activation on Pentane (CCCCC). Should attach to a Carbon."""
+    smiles = "CCCCC"
+    # Seed is not easily passed to the function unless we add it to the signature or fix it inside.
+    # For this test, any Carbon is fine.
+    mol = activate_fragment(smiles, strategy="random")
+    
+    assert mol is not None
+    assert mol.GetNumAtoms() == 6 # 5 C + 1 *
+    
+    dummy_atom = None
+    for atom in mol.GetAtoms():
+        if atom.GetSymbol() == '*':
+            dummy_atom = atom
+            break
+            
+    assert dummy_atom is not None
+    neighbors = dummy_atom.GetNeighbors()
+    assert neighbors[0].GetSymbol() == 'C'
+
+def test_activate_smart_fallback():
+    """Test 'smart' fallback when no heteroatoms are found (e.g. Propane)."""
+    smiles = "CCC"
+    # Should fall back to finding a terminal carbon or random
+    # The requirement says "fall back to a terminal Carbon" or random. 
+    # Let's assume the implementation picks a terminal carbon if possible, or just behaves like random on C.
+    mol = activate_fragment(smiles, strategy="smart")
+    
+    assert mol is not None
+    dummy_atom = None
+    for atom in mol.GetAtoms():
+        if atom.GetSymbol() == '*':
+            dummy_atom = atom
+            break
+    
+    assert dummy_atom is not None
+    neighbor = dummy_atom.GetNeighbors()[0]
+    assert neighbor.GetSymbol() == 'C'
+    # Verify it's a valid molecule
+    assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE
+
+def test_invalid_smiles():
+    with pytest.raises(ValueError):
+        activate_fragment("NotASmiles", strategy="smart")
+
--- a/tests/test_scaffold_prep.py
+++ b/tests/test_scaffold_prep.py
@@ -0,0 +1,84 @@
+import pytest
+from rdkit import Chem
+from src.splicing.scaffold_prep import prepare_tylosin_scaffold
+from src.ring_numbering import assign_ring_numbering
+
+def test_prepare_tylosin_scaffold():
+    # Construct a 16-membered lactone with side chains
+    # Numbering logic (assumed based on implementation):
+    # 1: C=O
+    # 2-6: CH2
+    # 7: CH(CH3)  <- Methyl side chain
+    # 8-14: CH2
+    # 15: CH(CC)  <- Ethyl side chain
+    # 16: O
+    
+    # SMILES:
+    # O=C1 (pos 1)
+    # CCCCC (pos 2-6)
+    # C(C) (pos 7, with Methyl)
+    # CCCCCCC (pos 8-14)
+    # C(CC) (pos 15, with Ethyl)
+    # O1 (pos 16)
+    
+    smiles = "O=C1CCCCC(C)CCCCCCCCC(CC)O1"
+    
+    # Verify initial assumption about numbering
+    mol = Chem.MolFromSmiles(smiles)
+    numbering = assign_ring_numbering(mol)
+    
+    # Find atom indices for pos 7 and 15 to ensure our SMILES construction is correct for the test
+    pos_map = {v: k for k, v in numbering.items()}
+    assert 7 in pos_map, "Position 7 not found in ring"
+    assert 15 in pos_map, "Position 15 not found in ring"
+    assert 5 in pos_map, "Position 5 not found in ring"
+    
+    atom7 = mol.GetAtomWithIdx(pos_map[7])
+    atom15 = mol.GetAtomWithIdx(pos_map[15])
+    atom5 = mol.GetAtomWithIdx(pos_map[5])
+    
+    # Check side chains exist
+    # Atom 7 should have 3 neighbors (2 ring, 1 methyl)
+    assert len(atom7.GetNeighbors()) == 3
+    # Atom 15 should have 3 neighbors (2 ring, 1 ethyl)
+    assert len(atom15.GetNeighbors()) == 3
+    # Atom 5 should have 2 neighbors (2 ring, 2 implicit H)
+    assert len(atom5.GetNeighbors()) == 2
+    
+    # Execute scaffold prep
+    target_positions = [5, 7, 15]
+    res_mol, dummy_map = prepare_tylosin_scaffold(smiles, target_positions)
+    
+    assert res_mol is not None
+    assert len(dummy_map) == 3
+    
+    # Verify dummies
+    for pos in target_positions:
+        assert pos in dummy_map
+        dummy_idx = dummy_map[pos]
+        dummy_atom = res_mol.GetAtomWithIdx(dummy_idx)
+        assert dummy_atom.GetSymbol() == "*"
+        assert dummy_atom.GetIsotope() == pos
+        
+        # Check that dummy is connected to the correct ring position
+        neighbors = dummy_atom.GetNeighbors()
+        assert len(neighbors) == 1
+        
+    # Verify side chains removed
+    # New atom counts.
+    # Original: 16 (ring) + 1 (O=) + 1 (Me) + 2 (Et) = 20 heavy atoms.
+    # Removed: Me (1), Et (2). Total -3.
+    # Added: 3 dummies. Total +3.
+    # Net: 20.
+    assert res_mol.GetNumAtoms() == 20
+    
+    # Check that the specific side chains are gone.
+    # Count carbons.
+    # Original C count: 1 (C=O) + 14 (CH2/CH) + 1(Me) + 2(Et) = 18 C.
+    # New C count: 1 (C=O) + 14 (Ring C) = 15 C.
+    # Dummies are *. O are O.
+    c_count = sum(1 for a in res_mol.GetAtoms() if a.GetSymbol() == 'C')
+    assert c_count == 15, f"Expected 15 Carbons, found {c_count}"
+    
+    dummy_count = sum(1 for a in res_mol.GetAtoms() if a.GetSymbol() == '*')
+    assert dummy_count == 3
--- a/tests/test_splicing_engine.py
+++ b/tests/test_splicing_engine.py
@@ -0,0 +1,77 @@
+import pytest
+from rdkit import Chem
+from src.splicing.engine import splice_molecule
+
+def test_splice_benzene_methyl():
+    """
+    Test splicing a benzene scaffold (isotope 1) with a methyl fragment.
+    Scaffold: c1ccccc1[1*] (Phenyl radical-ish dummy)
+    Fragment: C* (Methyl radical-ish dummy)
+    Result: Cc1ccccc1 (Toluene)
+    """
+    scaffold = Chem.MolFromSmiles("c1ccccc1[1*]")
+    fragment = Chem.MolFromSmiles("C*")
+    
+    assert scaffold is not None
+    assert fragment is not None
+    
+    product = splice_molecule(scaffold, fragment, position=1)
+    
+    # Expected result: Toluene
+    expected_smiles = "Cc1ccccc1"
+    expected_mol = Chem.MolFromSmiles(expected_smiles)
+    expected_canonical = Chem.MolToSmiles(expected_mol, isomericSmiles=True)
+    
+    product_canonical = Chem.MolToSmiles(product, isomericSmiles=True)
+    
+    assert product_canonical == expected_canonical
+
+def test_splice_missing_isotope():
+    """Test that error is raised if the requested position is not found on scaffold."""
+    scaffold = Chem.MolFromSmiles("c1ccccc1[2*]") # Isotope 2
+    fragment = Chem.MolFromSmiles("C*")
+    
+    with pytest.raises(ValueError, match="Scaffold dummy atom with isotope 1 not found"):
+        splice_molecule(scaffold, fragment, position=1)
+
+def test_splice_no_fragment_dummy():
+    """Test that error is raised if fragment has no dummy atom."""
+    scaffold = Chem.MolFromSmiles("c1ccccc1[1*]")
+    fragment = Chem.MolFromSmiles("C") # Methane, no dummy
+    
+    with pytest.raises(ValueError, match="Fragment does not contain a dummy atom"):
+        splice_molecule(scaffold, fragment, position=1)
+
+def test_complex_splicing():
+    """
+    Test splicing with more complex structures.
+    Scaffold: Pyridine derivative n1cccc1CC[1*]
+    Fragment: Cyclopropane C1CC1*
+    Result: n1cccc1CCC1CC1
+    """
+    scaffold = Chem.MolFromSmiles("n1cccc1CC[1*]")
+    fragment = Chem.MolFromSmiles("*C1CC1")
+    
+    product = splice_molecule(scaffold, fragment, position=1)
+    
+    expected = Chem.MolFromSmiles("n1cccc1CCC1CC1")
+    
+    assert Chem.MolToSmiles(product) == Chem.MolToSmiles(expected)
+
+def test_scaffold_with_multiple_different_dummies():
+    """
+    Test splicing when scaffold has multiple dummies with different isotopes.
+    Scaffold: [1*]c1ccccc1[2*]
+    Fragment: C*
+    Target: Splicing at 1 should leave [2*] intact.
+    """
+    scaffold = Chem.MolFromSmiles("[1*]c1ccccc1[2*]")
+    fragment = Chem.MolFromSmiles("C*")
+    
+    # Splice at 1
+    product = splice_molecule(scaffold, fragment, position=1)
+    
+    # Expected: Cc1ccccc1[2*]
+    expected = Chem.MolFromSmiles("Cc1ccccc1[2*]")
+    
+    assert Chem.MolToSmiles(product) == Chem.MolToSmiles(expected)