- Add src/splicing/ module with scaffold_prep, fragment_prep, and engine - Add tylosin_splicer.py entry script - Add unit tests for splicing components Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
85 lines
2.9 KiB
Python
85 lines
2.9 KiB
Python
import pytest
|
|
from rdkit import Chem
|
|
from src.splicing.scaffold_prep import prepare_tylosin_scaffold
|
|
from src.ring_numbering import assign_ring_numbering
|
|
|
|
def test_prepare_tylosin_scaffold():
|
|
# Construct a 16-membered lactone with side chains
|
|
# Numbering logic (assumed based on implementation):
|
|
# 1: C=O
|
|
# 2-6: CH2
|
|
# 7: CH(CH3) <- Methyl side chain
|
|
# 8-14: CH2
|
|
# 15: CH(CC) <- Ethyl side chain
|
|
# 16: O
|
|
|
|
# SMILES:
|
|
# O=C1 (pos 1)
|
|
# CCCCC (pos 2-6)
|
|
# C(C) (pos 7, with Methyl)
|
|
# CCCCCCC (pos 8-14)
|
|
# C(CC) (pos 15, with Ethyl)
|
|
# O1 (pos 16)
|
|
|
|
smiles = "O=C1CCCCC(C)CCCCCCCCC(CC)O1"
|
|
|
|
# Verify initial assumption about numbering
|
|
mol = Chem.MolFromSmiles(smiles)
|
|
numbering = assign_ring_numbering(mol)
|
|
|
|
# Find atom indices for pos 7 and 15 to ensure our SMILES construction is correct for the test
|
|
pos_map = {v: k for k, v in numbering.items()}
|
|
assert 7 in pos_map, "Position 7 not found in ring"
|
|
assert 15 in pos_map, "Position 15 not found in ring"
|
|
assert 5 in pos_map, "Position 5 not found in ring"
|
|
|
|
atom7 = mol.GetAtomWithIdx(pos_map[7])
|
|
atom15 = mol.GetAtomWithIdx(pos_map[15])
|
|
atom5 = mol.GetAtomWithIdx(pos_map[5])
|
|
|
|
# Check side chains exist
|
|
# Atom 7 should have 3 neighbors (2 ring, 1 methyl)
|
|
assert len(atom7.GetNeighbors()) == 3
|
|
# Atom 15 should have 3 neighbors (2 ring, 1 ethyl)
|
|
assert len(atom15.GetNeighbors()) == 3
|
|
# Atom 5 should have 2 neighbors (2 ring, 2 implicit H)
|
|
assert len(atom5.GetNeighbors()) == 2
|
|
|
|
# Execute scaffold prep
|
|
target_positions = [5, 7, 15]
|
|
res_mol, dummy_map = prepare_tylosin_scaffold(smiles, target_positions)
|
|
|
|
assert res_mol is not None
|
|
assert len(dummy_map) == 3
|
|
|
|
# Verify dummies
|
|
for pos in target_positions:
|
|
assert pos in dummy_map
|
|
dummy_idx = dummy_map[pos]
|
|
dummy_atom = res_mol.GetAtomWithIdx(dummy_idx)
|
|
assert dummy_atom.GetSymbol() == "*"
|
|
assert dummy_atom.GetIsotope() == pos
|
|
|
|
# Check that dummy is connected to the correct ring position
|
|
neighbors = dummy_atom.GetNeighbors()
|
|
assert len(neighbors) == 1
|
|
|
|
# Verify side chains removed
|
|
# New atom counts.
|
|
# Original: 16 (ring) + 1 (O=) + 1 (Me) + 2 (Et) = 20 heavy atoms.
|
|
# Removed: Me (1), Et (2). Total -3.
|
|
# Added: 3 dummies. Total +3.
|
|
# Net: 20.
|
|
assert res_mol.GetNumAtoms() == 20
|
|
|
|
# Check that the specific side chains are gone.
|
|
# Count carbons.
|
|
# Original C count: 1 (C=O) + 14 (CH2/CH) + 1(Me) + 2(Et) = 18 C.
|
|
# New C count: 1 (C=O) + 14 (Ring C) = 15 C.
|
|
# Dummies are *. O are O.
|
|
c_count = sum(1 for a in res_mol.GetAtoms() if a.GetSymbol() == 'C')
|
|
assert c_count == 15, f"Expected 15 Carbons, found {c_count}"
|
|
|
|
dummy_count = sum(1 for a in res_mol.GetAtoms() if a.GetSymbol() == '*')
|
|
assert dummy_count == 3
|