Add splicing module and related tests
- Add src/splicing/ module with scaffold_prep, fragment_prep, and engine - Add tylosin_splicer.py entry script - Add unit tests for splicing components Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
39
tests/test_env_integration.py
Normal file
39
tests/test_env_integration.py
Normal file
@@ -0,0 +1,39 @@
|
||||
import sys
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
# Add SIME to path
|
||||
SIME_PATH = "/home/zly/project/SIME"
|
||||
if SIME_PATH not in sys.path:
|
||||
sys.path.append(SIME_PATH)
|
||||
|
||||
# Add project root to path so we can import 'src'
|
||||
PROJECT_ROOT = str(Path(__file__).parent.parent)
|
||||
if PROJECT_ROOT not in sys.path:
|
||||
sys.path.append(PROJECT_ROOT)
|
||||
|
||||
def test_imports():
|
||||
"""Verify that we can import from both local project and SIME."""
|
||||
print(f"sys.path: {sys.path}")
|
||||
|
||||
# 1. Test local import from src
|
||||
try:
|
||||
# Correct function name based on file inspection
|
||||
from src.ring_numbering import assign_ring_numbering
|
||||
assert callable(assign_ring_numbering)
|
||||
print("Successfully imported src.ring_numbering.assign_ring_numbering")
|
||||
except ImportError as e:
|
||||
print(f"Failed to import src.ring_numbering: {e}")
|
||||
raise
|
||||
|
||||
# 2. Test SIME import
|
||||
try:
|
||||
from utils.mole_predictor import ParallelBroadSpectrumPredictor
|
||||
assert ParallelBroadSpectrumPredictor is not None
|
||||
print("Successfully imported ParallelBroadSpectrumPredictor from utils.mole_predictor")
|
||||
except ImportError as e:
|
||||
print(f"Failed to import from SIME: {e}")
|
||||
raise
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_imports()
|
||||
95
tests/test_fragment_prep.py
Normal file
95
tests/test_fragment_prep.py
Normal file
@@ -0,0 +1,95 @@
|
||||
import pytest
|
||||
from rdkit import Chem
|
||||
from src.splicing.fragment_prep import activate_fragment
|
||||
|
||||
def test_activate_smart_ethanol():
|
||||
"""Test 'smart' activation on Ethanol (CCO). Should attach to Oxygen."""
|
||||
smiles = "CCO"
|
||||
mol = activate_fragment(smiles, strategy="smart")
|
||||
|
||||
# Check if we have a dummy atom
|
||||
assert mol is not None
|
||||
assert mol.GetNumAtoms() == 4 # C, C, O, *
|
||||
|
||||
# Check if the dummy atom is attached to Oxygen
|
||||
# Find the dummy atom
|
||||
dummy_atom = None
|
||||
for atom in mol.GetAtoms():
|
||||
if atom.GetSymbol() == '*':
|
||||
dummy_atom = atom
|
||||
break
|
||||
|
||||
assert dummy_atom is not None
|
||||
|
||||
# Check neighbors of dummy atom
|
||||
neighbors = dummy_atom.GetNeighbors()
|
||||
assert len(neighbors) == 1
|
||||
assert neighbors[0].GetSymbol() == 'O'
|
||||
|
||||
# Check output SMILES format
|
||||
out_smiles = Chem.MolToSmiles(mol)
|
||||
assert '*' in out_smiles
|
||||
|
||||
def test_activate_smart_amine():
|
||||
"""Test 'smart' activation on Ethylamine (CCN). Should attach to Nitrogen."""
|
||||
smiles = "CCN"
|
||||
mol = activate_fragment(smiles, strategy="smart")
|
||||
|
||||
assert mol is not None
|
||||
|
||||
# Find the dummy atom
|
||||
dummy_atom = None
|
||||
for atom in mol.GetAtoms():
|
||||
if atom.GetSymbol() == '*':
|
||||
dummy_atom = atom
|
||||
break
|
||||
|
||||
assert dummy_atom is not None
|
||||
neighbors = dummy_atom.GetNeighbors()
|
||||
assert neighbors[0].GetSymbol() == 'N'
|
||||
|
||||
def test_activate_random_pentane():
|
||||
"""Test 'random' activation on Pentane (CCCCC). Should attach to a Carbon."""
|
||||
smiles = "CCCCC"
|
||||
# Seed is not easily passed to the function unless we add it to the signature or fix it inside.
|
||||
# For this test, any Carbon is fine.
|
||||
mol = activate_fragment(smiles, strategy="random")
|
||||
|
||||
assert mol is not None
|
||||
assert mol.GetNumAtoms() == 6 # 5 C + 1 *
|
||||
|
||||
dummy_atom = None
|
||||
for atom in mol.GetAtoms():
|
||||
if atom.GetSymbol() == '*':
|
||||
dummy_atom = atom
|
||||
break
|
||||
|
||||
assert dummy_atom is not None
|
||||
neighbors = dummy_atom.GetNeighbors()
|
||||
assert neighbors[0].GetSymbol() == 'C'
|
||||
|
||||
def test_activate_smart_fallback():
|
||||
"""Test 'smart' fallback when no heteroatoms are found (e.g. Propane)."""
|
||||
smiles = "CCC"
|
||||
# Should fall back to finding a terminal carbon or random
|
||||
# The requirement says "fall back to a terminal Carbon" or random.
|
||||
# Let's assume the implementation picks a terminal carbon if possible, or just behaves like random on C.
|
||||
mol = activate_fragment(smiles, strategy="smart")
|
||||
|
||||
assert mol is not None
|
||||
dummy_atom = None
|
||||
for atom in mol.GetAtoms():
|
||||
if atom.GetSymbol() == '*':
|
||||
dummy_atom = atom
|
||||
break
|
||||
|
||||
assert dummy_atom is not None
|
||||
neighbor = dummy_atom.GetNeighbors()[0]
|
||||
assert neighbor.GetSymbol() == 'C'
|
||||
# Verify it's a valid molecule
|
||||
assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE
|
||||
|
||||
def test_invalid_smiles():
|
||||
with pytest.raises(ValueError):
|
||||
activate_fragment("NotASmiles", strategy="smart")
|
||||
|
||||
84
tests/test_scaffold_prep.py
Normal file
84
tests/test_scaffold_prep.py
Normal file
@@ -0,0 +1,84 @@
|
||||
import pytest
|
||||
from rdkit import Chem
|
||||
from src.splicing.scaffold_prep import prepare_tylosin_scaffold
|
||||
from src.ring_numbering import assign_ring_numbering
|
||||
|
||||
def test_prepare_tylosin_scaffold():
|
||||
# Construct a 16-membered lactone with side chains
|
||||
# Numbering logic (assumed based on implementation):
|
||||
# 1: C=O
|
||||
# 2-6: CH2
|
||||
# 7: CH(CH3) <- Methyl side chain
|
||||
# 8-14: CH2
|
||||
# 15: CH(CC) <- Ethyl side chain
|
||||
# 16: O
|
||||
|
||||
# SMILES:
|
||||
# O=C1 (pos 1)
|
||||
# CCCCC (pos 2-6)
|
||||
# C(C) (pos 7, with Methyl)
|
||||
# CCCCCCC (pos 8-14)
|
||||
# C(CC) (pos 15, with Ethyl)
|
||||
# O1 (pos 16)
|
||||
|
||||
smiles = "O=C1CCCCC(C)CCCCCCCCC(CC)O1"
|
||||
|
||||
# Verify initial assumption about numbering
|
||||
mol = Chem.MolFromSmiles(smiles)
|
||||
numbering = assign_ring_numbering(mol)
|
||||
|
||||
# Find atom indices for pos 7 and 15 to ensure our SMILES construction is correct for the test
|
||||
pos_map = {v: k for k, v in numbering.items()}
|
||||
assert 7 in pos_map, "Position 7 not found in ring"
|
||||
assert 15 in pos_map, "Position 15 not found in ring"
|
||||
assert 5 in pos_map, "Position 5 not found in ring"
|
||||
|
||||
atom7 = mol.GetAtomWithIdx(pos_map[7])
|
||||
atom15 = mol.GetAtomWithIdx(pos_map[15])
|
||||
atom5 = mol.GetAtomWithIdx(pos_map[5])
|
||||
|
||||
# Check side chains exist
|
||||
# Atom 7 should have 3 neighbors (2 ring, 1 methyl)
|
||||
assert len(atom7.GetNeighbors()) == 3
|
||||
# Atom 15 should have 3 neighbors (2 ring, 1 ethyl)
|
||||
assert len(atom15.GetNeighbors()) == 3
|
||||
# Atom 5 should have 2 neighbors (2 ring, 2 implicit H)
|
||||
assert len(atom5.GetNeighbors()) == 2
|
||||
|
||||
# Execute scaffold prep
|
||||
target_positions = [5, 7, 15]
|
||||
res_mol, dummy_map = prepare_tylosin_scaffold(smiles, target_positions)
|
||||
|
||||
assert res_mol is not None
|
||||
assert len(dummy_map) == 3
|
||||
|
||||
# Verify dummies
|
||||
for pos in target_positions:
|
||||
assert pos in dummy_map
|
||||
dummy_idx = dummy_map[pos]
|
||||
dummy_atom = res_mol.GetAtomWithIdx(dummy_idx)
|
||||
assert dummy_atom.GetSymbol() == "*"
|
||||
assert dummy_atom.GetIsotope() == pos
|
||||
|
||||
# Check that dummy is connected to the correct ring position
|
||||
neighbors = dummy_atom.GetNeighbors()
|
||||
assert len(neighbors) == 1
|
||||
|
||||
# Verify side chains removed
|
||||
# New atom counts.
|
||||
# Original: 16 (ring) + 1 (O=) + 1 (Me) + 2 (Et) = 20 heavy atoms.
|
||||
# Removed: Me (1), Et (2). Total -3.
|
||||
# Added: 3 dummies. Total +3.
|
||||
# Net: 20.
|
||||
assert res_mol.GetNumAtoms() == 20
|
||||
|
||||
# Check that the specific side chains are gone.
|
||||
# Count carbons.
|
||||
# Original C count: 1 (C=O) + 14 (CH2/CH) + 1(Me) + 2(Et) = 18 C.
|
||||
# New C count: 1 (C=O) + 14 (Ring C) = 15 C.
|
||||
# Dummies are *. O are O.
|
||||
c_count = sum(1 for a in res_mol.GetAtoms() if a.GetSymbol() == 'C')
|
||||
assert c_count == 15, f"Expected 15 Carbons, found {c_count}"
|
||||
|
||||
dummy_count = sum(1 for a in res_mol.GetAtoms() if a.GetSymbol() == '*')
|
||||
assert dummy_count == 3
|
||||
77
tests/test_splicing_engine.py
Normal file
77
tests/test_splicing_engine.py
Normal file
@@ -0,0 +1,77 @@
|
||||
import pytest
|
||||
from rdkit import Chem
|
||||
from src.splicing.engine import splice_molecule
|
||||
|
||||
def test_splice_benzene_methyl():
|
||||
"""
|
||||
Test splicing a benzene scaffold (isotope 1) with a methyl fragment.
|
||||
Scaffold: c1ccccc1[1*] (Phenyl radical-ish dummy)
|
||||
Fragment: C* (Methyl radical-ish dummy)
|
||||
Result: Cc1ccccc1 (Toluene)
|
||||
"""
|
||||
scaffold = Chem.MolFromSmiles("c1ccccc1[1*]")
|
||||
fragment = Chem.MolFromSmiles("C*")
|
||||
|
||||
assert scaffold is not None
|
||||
assert fragment is not None
|
||||
|
||||
product = splice_molecule(scaffold, fragment, position=1)
|
||||
|
||||
# Expected result: Toluene
|
||||
expected_smiles = "Cc1ccccc1"
|
||||
expected_mol = Chem.MolFromSmiles(expected_smiles)
|
||||
expected_canonical = Chem.MolToSmiles(expected_mol, isomericSmiles=True)
|
||||
|
||||
product_canonical = Chem.MolToSmiles(product, isomericSmiles=True)
|
||||
|
||||
assert product_canonical == expected_canonical
|
||||
|
||||
def test_splice_missing_isotope():
|
||||
"""Test that error is raised if the requested position is not found on scaffold."""
|
||||
scaffold = Chem.MolFromSmiles("c1ccccc1[2*]") # Isotope 2
|
||||
fragment = Chem.MolFromSmiles("C*")
|
||||
|
||||
with pytest.raises(ValueError, match="Scaffold dummy atom with isotope 1 not found"):
|
||||
splice_molecule(scaffold, fragment, position=1)
|
||||
|
||||
def test_splice_no_fragment_dummy():
|
||||
"""Test that error is raised if fragment has no dummy atom."""
|
||||
scaffold = Chem.MolFromSmiles("c1ccccc1[1*]")
|
||||
fragment = Chem.MolFromSmiles("C") # Methane, no dummy
|
||||
|
||||
with pytest.raises(ValueError, match="Fragment does not contain a dummy atom"):
|
||||
splice_molecule(scaffold, fragment, position=1)
|
||||
|
||||
def test_complex_splicing():
|
||||
"""
|
||||
Test splicing with more complex structures.
|
||||
Scaffold: Pyridine derivative n1cccc1CC[1*]
|
||||
Fragment: Cyclopropane C1CC1*
|
||||
Result: n1cccc1CCC1CC1
|
||||
"""
|
||||
scaffold = Chem.MolFromSmiles("n1cccc1CC[1*]")
|
||||
fragment = Chem.MolFromSmiles("*C1CC1")
|
||||
|
||||
product = splice_molecule(scaffold, fragment, position=1)
|
||||
|
||||
expected = Chem.MolFromSmiles("n1cccc1CCC1CC1")
|
||||
|
||||
assert Chem.MolToSmiles(product) == Chem.MolToSmiles(expected)
|
||||
|
||||
def test_scaffold_with_multiple_different_dummies():
|
||||
"""
|
||||
Test splicing when scaffold has multiple dummies with different isotopes.
|
||||
Scaffold: [1*]c1ccccc1[2*]
|
||||
Fragment: C*
|
||||
Target: Splicing at 1 should leave [2*] intact.
|
||||
"""
|
||||
scaffold = Chem.MolFromSmiles("[1*]c1ccccc1[2*]")
|
||||
fragment = Chem.MolFromSmiles("C*")
|
||||
|
||||
# Splice at 1
|
||||
product = splice_molecule(scaffold, fragment, position=1)
|
||||
|
||||
# Expected: Cc1ccccc1[2*]
|
||||
expected = Chem.MolFromSmiles("Cc1ccccc1[2*]")
|
||||
|
||||
assert Chem.MolToSmiles(product) == Chem.MolToSmiles(expected)
|
||||
Reference in New Issue
Block a user