import pytest from rdkit import Chem from src.splicing.scaffold_prep import prepare_tylosin_scaffold from src.ring_numbering import assign_ring_numbering def test_prepare_tylosin_scaffold(): # Construct a 16-membered lactone with side chains # Numbering logic (assumed based on implementation): # 1: C=O # 2-6: CH2 # 7: CH(CH3) <- Methyl side chain # 8-14: CH2 # 15: CH(CC) <- Ethyl side chain # 16: O # SMILES: # O=C1 (pos 1) # CCCCC (pos 2-6) # C(C) (pos 7, with Methyl) # CCCCCCC (pos 8-14) # C(CC) (pos 15, with Ethyl) # O1 (pos 16) smiles = "O=C1CCCCC(C)CCCCCCCCC(CC)O1" # Verify initial assumption about numbering mol = Chem.MolFromSmiles(smiles) numbering = assign_ring_numbering(mol) # Find atom indices for pos 7 and 15 to ensure our SMILES construction is correct for the test pos_map = {v: k for k, v in numbering.items()} assert 7 in pos_map, "Position 7 not found in ring" assert 15 in pos_map, "Position 15 not found in ring" assert 5 in pos_map, "Position 5 not found in ring" atom7 = mol.GetAtomWithIdx(pos_map[7]) atom15 = mol.GetAtomWithIdx(pos_map[15]) atom5 = mol.GetAtomWithIdx(pos_map[5]) # Check side chains exist # Atom 7 should have 3 neighbors (2 ring, 1 methyl) assert len(atom7.GetNeighbors()) == 3 # Atom 15 should have 3 neighbors (2 ring, 1 ethyl) assert len(atom15.GetNeighbors()) == 3 # Atom 5 should have 2 neighbors (2 ring, 2 implicit H) assert len(atom5.GetNeighbors()) == 2 # Execute scaffold prep target_positions = [5, 7, 15] res_mol, dummy_map = prepare_tylosin_scaffold(smiles, target_positions) assert res_mol is not None assert len(dummy_map) == 3 # Verify dummies for pos in target_positions: assert pos in dummy_map dummy_idx = dummy_map[pos] dummy_atom = res_mol.GetAtomWithIdx(dummy_idx) assert dummy_atom.GetSymbol() == "*" assert dummy_atom.GetIsotope() == pos # Check that dummy is connected to the correct ring position neighbors = dummy_atom.GetNeighbors() assert len(neighbors) == 1 # Verify side chains removed # New atom counts. # Original: 16 (ring) + 1 (O=) + 1 (Me) + 2 (Et) = 20 heavy atoms. # Removed: Me (1), Et (2). Total -3. # Added: 3 dummies. Total +3. # Net: 20. assert res_mol.GetNumAtoms() == 20 # Check that the specific side chains are gone. # Count carbons. # Original C count: 1 (C=O) + 14 (CH2/CH) + 1(Me) + 2(Et) = 18 C. # New C count: 1 (C=O) + 14 (Ring C) = 15 C. # Dummies are *. O are O. c_count = sum(1 for a in res_mol.GetAtoms() if a.GetSymbol() == 'C') assert c_count == 15, f"Expected 15 Carbons, found {c_count}" dummy_count = sum(1 for a in res_mol.GetAtoms() if a.GetSymbol() == '*') assert dummy_count == 3