Add splicing module and related tests
- Add src/splicing/ module with scaffold_prep, fragment_prep, and engine - Add tylosin_splicer.py entry script - Add unit tests for splicing components Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
10
scripts/tylosin_splicer.py
Normal file
10
scripts/tylosin_splicer.py
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
"""
|
||||||
|
Tylosin Splicing System - Main Entry Point
|
||||||
|
"""
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("Hello from Tylosin Splicer")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
0
src/splicing/__init__.py
Normal file
0
src/splicing/__init__.py
Normal file
79
src/splicing/engine.py
Normal file
79
src/splicing/engine.py
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
from rdkit import Chem
|
||||||
|
|
||||||
|
def splice_molecule(scaffold: Chem.Mol, fragment: Chem.Mol, position: int) -> Chem.Mol:
|
||||||
|
"""
|
||||||
|
Connects a scaffold and a fragment by replacing a specific dummy atom on the scaffold
|
||||||
|
and a dummy atom on the fragment with a single bond.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
scaffold: The scaffold molecule containing labeled dummy atoms (e.g., [1*]).
|
||||||
|
fragment: The fragment molecule containing a dummy atom (*).
|
||||||
|
position: The isotope number of the dummy atom on the scaffold to attach to.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Chem.Mol: The spliced molecule.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If the specified dummy atom is not found on the scaffold
|
||||||
|
or if the fragment does not contain a dummy atom.
|
||||||
|
"""
|
||||||
|
# 1. Combine molecules
|
||||||
|
# Note: CombineMols preserves atom indices of mol1 (scaffold), and appends mol2 (fragment).
|
||||||
|
# Atoms 0 to N-1 are scaffold, N to N+M-1 are fragment.
|
||||||
|
combined = Chem.CombineMols(scaffold, fragment)
|
||||||
|
rw_mol = Chem.RWMol(combined)
|
||||||
|
|
||||||
|
scaffold_atom_count = scaffold.GetNumAtoms()
|
||||||
|
total_atoms = rw_mol.GetNumAtoms()
|
||||||
|
|
||||||
|
# 2. Find Scaffold Dummy
|
||||||
|
scaffold_dummy_idx = -1
|
||||||
|
for i in range(scaffold_atom_count):
|
||||||
|
atom = rw_mol.GetAtomWithIdx(i)
|
||||||
|
if atom.GetAtomicNum() == 0 and atom.GetIsotope() == position:
|
||||||
|
scaffold_dummy_idx = i
|
||||||
|
break
|
||||||
|
|
||||||
|
if scaffold_dummy_idx == -1:
|
||||||
|
raise ValueError(f"Scaffold dummy atom with isotope {position} not found")
|
||||||
|
|
||||||
|
# 3. Find Fragment Dummy
|
||||||
|
# We search in the fragment part (indices >= scaffold_atom_count)
|
||||||
|
fragment_dummy_idx = -1
|
||||||
|
for i in range(scaffold_atom_count, total_atoms):
|
||||||
|
atom = rw_mol.GetAtomWithIdx(i)
|
||||||
|
# We assume any dummy atom in the fragment is the attachment point.
|
||||||
|
# Usually fragment has isotope 0 on its dummy.
|
||||||
|
if atom.GetAtomicNum() == 0:
|
||||||
|
fragment_dummy_idx = i
|
||||||
|
break
|
||||||
|
|
||||||
|
if fragment_dummy_idx == -1:
|
||||||
|
raise ValueError("Fragment does not contain a dummy atom")
|
||||||
|
|
||||||
|
# 4. Identify Neighbors (Anchors)
|
||||||
|
scaffold_dummy = rw_mol.GetAtomWithIdx(scaffold_dummy_idx)
|
||||||
|
if scaffold_dummy.GetDegree() != 1:
|
||||||
|
raise ValueError(f"Scaffold dummy atom at index {scaffold_dummy_idx} must have exactly one neighbor")
|
||||||
|
scaffold_anchor_idx = scaffold_dummy.GetNeighbors()[0].GetIdx()
|
||||||
|
|
||||||
|
fragment_dummy = rw_mol.GetAtomWithIdx(fragment_dummy_idx)
|
||||||
|
if fragment_dummy.GetDegree() != 1:
|
||||||
|
raise ValueError(f"Fragment dummy atom at index {fragment_dummy_idx} must have exactly one neighbor")
|
||||||
|
fragment_anchor_idx = fragment_dummy.GetNeighbors()[0].GetIdx()
|
||||||
|
|
||||||
|
# 5. Add Bond
|
||||||
|
rw_mol.AddBond(scaffold_anchor_idx, fragment_anchor_idx, Chem.BondType.SINGLE)
|
||||||
|
|
||||||
|
# 6. Remove Dummy Atoms
|
||||||
|
# Remove the higher index first to preserve the lower index
|
||||||
|
# We know fragment_dummy_idx > scaffold_dummy_idx because fragment atoms were appended.
|
||||||
|
# However, just to be safe, we sort them.
|
||||||
|
indices_to_remove = sorted([scaffold_dummy_idx, fragment_dummy_idx], reverse=True)
|
||||||
|
for idx in indices_to_remove:
|
||||||
|
rw_mol.RemoveAtom(idx)
|
||||||
|
|
||||||
|
# 7. Sanitize
|
||||||
|
Chem.SanitizeMol(rw_mol)
|
||||||
|
|
||||||
|
return rw_mol.GetMol()
|
||||||
87
src/splicing/fragment_prep.py
Normal file
87
src/splicing/fragment_prep.py
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
import random
|
||||||
|
from rdkit import Chem
|
||||||
|
|
||||||
|
def activate_fragment(smiles: str, strategy: str = "smart") -> Chem.Mol:
|
||||||
|
"""
|
||||||
|
Convert a small molecule fragment into an attachable R-group by adding a dummy atom (*).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
smiles: SMILES string of the fragment.
|
||||||
|
strategy: 'smart' (prioritize heteroatoms) or 'random' (any atom with H).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Chem.Mol: The activated fragment with a dummy atom attached.
|
||||||
|
"""
|
||||||
|
mol = Chem.MolFromSmiles(smiles)
|
||||||
|
if mol is None:
|
||||||
|
raise ValueError(f"Invalid SMILES string: {smiles}")
|
||||||
|
|
||||||
|
target_idx = -1
|
||||||
|
|
||||||
|
if strategy == "smart":
|
||||||
|
# Order of preference: Amine, Alcohol/Phenol, Thiol
|
||||||
|
# Amine: [N;!H0] - Nitrogen with at least one H
|
||||||
|
# Alcohol/Phenol: [O;H1] - Oxygen with 1 H (usually 2 bonds total)
|
||||||
|
# Thiol: [S;H1]
|
||||||
|
|
||||||
|
smarts_patterns = [
|
||||||
|
"[N;!H0]", # Primary/Secondary amine
|
||||||
|
"[O;H1]", # Alcohol/Phenol
|
||||||
|
"[S;H1]" # Thiol
|
||||||
|
]
|
||||||
|
|
||||||
|
for smarts in smarts_patterns:
|
||||||
|
pattern = Chem.MolFromSmarts(smarts)
|
||||||
|
if pattern:
|
||||||
|
matches = mol.GetSubstructMatches(pattern)
|
||||||
|
if matches:
|
||||||
|
# Pick the first match
|
||||||
|
target_idx = matches[0][0]
|
||||||
|
break
|
||||||
|
|
||||||
|
if target_idx == -1:
|
||||||
|
# Fallback to random if no smart match found
|
||||||
|
strategy = "random"
|
||||||
|
|
||||||
|
if strategy == "random":
|
||||||
|
# Find all atoms with at least one H
|
||||||
|
candidates = []
|
||||||
|
carbon_candidates = []
|
||||||
|
|
||||||
|
for atom in mol.GetAtoms():
|
||||||
|
# GetTotalNumHs includes implicit and explicit Hs
|
||||||
|
if atom.GetTotalNumHs() > 0:
|
||||||
|
candidates.append(atom.GetIdx())
|
||||||
|
if atom.GetSymbol() == 'C':
|
||||||
|
carbon_candidates.append(atom.GetIdx())
|
||||||
|
|
||||||
|
if not candidates:
|
||||||
|
raise ValueError("No suitable atoms with hydrogens found to attach to.")
|
||||||
|
|
||||||
|
# Prefer Carbon atoms if available
|
||||||
|
if carbon_candidates:
|
||||||
|
# Pick the first one for deterministic behavior
|
||||||
|
target_idx = carbon_candidates[0]
|
||||||
|
else:
|
||||||
|
target_idx = candidates[0]
|
||||||
|
|
||||||
|
if target_idx == -1:
|
||||||
|
# Should be caught by the candidates check, but just in case
|
||||||
|
raise ValueError("Could not identify a target atom for activation.")
|
||||||
|
|
||||||
|
# Perform attachment
|
||||||
|
rwmol = Chem.RWMol(mol)
|
||||||
|
|
||||||
|
# Add dummy atom
|
||||||
|
dummy_idx = rwmol.AddAtom(Chem.Atom('*'))
|
||||||
|
|
||||||
|
# Add bond to target atom
|
||||||
|
rwmol.AddBond(target_idx, dummy_idx, Chem.BondType.SINGLE)
|
||||||
|
|
||||||
|
# Sanitize to fix implicit H counts and ensure validity
|
||||||
|
try:
|
||||||
|
Chem.SanitizeMol(rwmol)
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError(f"Failed to sanitize molecule after activation: {e}")
|
||||||
|
|
||||||
|
return rwmol.GetMol()
|
||||||
123
src/splicing/scaffold_prep.py
Normal file
123
src/splicing/scaffold_prep.py
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
from rdkit import Chem
|
||||||
|
from typing import List, Dict, Tuple, Set, Optional
|
||||||
|
from src.ring_numbering import assign_ring_numbering
|
||||||
|
|
||||||
|
def get_subtree_indices(mol: Chem.Mol, start_atom_idx: int, forbidden_idx: int) -> Set[int]:
|
||||||
|
"""
|
||||||
|
Get all atom indices in the subtree starting at start_atom_idx,
|
||||||
|
moving away from forbidden_idx.
|
||||||
|
Used to identify side chain atoms to remove.
|
||||||
|
"""
|
||||||
|
subtree = set()
|
||||||
|
stack = [start_atom_idx]
|
||||||
|
|
||||||
|
while stack:
|
||||||
|
current = stack.pop()
|
||||||
|
if current in subtree:
|
||||||
|
continue
|
||||||
|
subtree.add(current)
|
||||||
|
|
||||||
|
atom = mol.GetAtomWithIdx(current)
|
||||||
|
for neighbor in atom.GetNeighbors():
|
||||||
|
n_idx = neighbor.GetIdx()
|
||||||
|
# Traverse neighbors except the one leading back to the ring (forbidden)
|
||||||
|
# and those already visited
|
||||||
|
if n_idx != forbidden_idx and n_idx not in subtree:
|
||||||
|
stack.append(n_idx)
|
||||||
|
|
||||||
|
return subtree
|
||||||
|
|
||||||
|
def prepare_tylosin_scaffold(smiles: str, positions: List[int]) -> Tuple[Chem.Mol, Dict[int, int]]:
|
||||||
|
"""
|
||||||
|
Prepare the Tylosin scaffold by removing side chains at specified positions
|
||||||
|
and marking them with dummy atoms.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
smiles: SMILES string of the scaffold/molecule.
|
||||||
|
positions: List of ring positions (1-16) to prepare (add sockets).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (Modified Molecule, Dict mapping position -> new_dummy_atom_idx)
|
||||||
|
The returned molecule has dummy atoms ('*') at the specified positions,
|
||||||
|
marked with Isotope = position number.
|
||||||
|
"""
|
||||||
|
mol = Chem.MolFromSmiles(smiles)
|
||||||
|
if not mol:
|
||||||
|
raise ValueError(f"Invalid SMILES: {smiles}")
|
||||||
|
|
||||||
|
# 1. Ring numbering to identify target atoms
|
||||||
|
ring_map = assign_ring_numbering(mol) # atom_idx -> ring_pos
|
||||||
|
if not ring_map:
|
||||||
|
raise ValueError("Could not assign ring numbering. Is this a 16-membered lactone?")
|
||||||
|
|
||||||
|
# Reverse map for easy lookup: ring_pos -> atom_idx
|
||||||
|
pos_to_atom = {v: k for k, v in ring_map.items()}
|
||||||
|
|
||||||
|
atoms_to_remove = set()
|
||||||
|
dummies_to_add = [] # List of (ring_atom_idx, position)
|
||||||
|
|
||||||
|
# 2. Identify edits
|
||||||
|
for pos in positions:
|
||||||
|
if pos not in pos_to_atom:
|
||||||
|
raise ValueError(f"Position {pos} not found in ring numbering.")
|
||||||
|
|
||||||
|
ring_atom_idx = pos_to_atom[pos]
|
||||||
|
ring_atom = mol.GetAtomWithIdx(ring_atom_idx)
|
||||||
|
|
||||||
|
# Identify non-ring neighbors (side chains)
|
||||||
|
# Note: neighbors in ring have indices in ring_map
|
||||||
|
side_chain_neighbors = []
|
||||||
|
for n in ring_atom.GetNeighbors():
|
||||||
|
if n.GetIdx() not in ring_map:
|
||||||
|
side_chain_neighbors.append(n.GetIdx())
|
||||||
|
|
||||||
|
# If side chains exist, mark them for removal
|
||||||
|
if side_chain_neighbors:
|
||||||
|
for sc_idx in side_chain_neighbors:
|
||||||
|
subtree = get_subtree_indices(mol, sc_idx, forbidden_idx=ring_atom_idx)
|
||||||
|
atoms_to_remove.update(subtree)
|
||||||
|
|
||||||
|
# Plan to add dummy at this ring atom
|
||||||
|
dummies_to_add.append((ring_atom_idx, pos))
|
||||||
|
|
||||||
|
# 3. Apply edits using RWMol
|
||||||
|
rwmol = Chem.RWMol(mol)
|
||||||
|
|
||||||
|
# Step A: Add dummy atoms
|
||||||
|
# We add them before deletion to use stable ring indices.
|
||||||
|
# Note: Adding atoms does not change existing indices.
|
||||||
|
|
||||||
|
for ring_idx, pos in dummies_to_add:
|
||||||
|
# Create dummy atom
|
||||||
|
dummy = Chem.Atom('*')
|
||||||
|
dummy.SetIsotope(pos) # Mark with position
|
||||||
|
|
||||||
|
new_idx = rwmol.AddAtom(dummy)
|
||||||
|
|
||||||
|
# Add bond to ring atom
|
||||||
|
rwmol.AddBond(ring_idx, new_idx, Chem.BondType.SINGLE)
|
||||||
|
|
||||||
|
# Step B: Remove side chain atoms
|
||||||
|
# Sort descending to preserve lower indices during deletion
|
||||||
|
sorted_remove = sorted(list(atoms_to_remove), reverse=True)
|
||||||
|
for idx in sorted_remove:
|
||||||
|
rwmol.RemoveAtom(idx)
|
||||||
|
|
||||||
|
# 4. Finalize
|
||||||
|
mol_final = rwmol.GetMol()
|
||||||
|
|
||||||
|
try:
|
||||||
|
Chem.SanitizeMol(mol_final)
|
||||||
|
except Exception:
|
||||||
|
# Sometime dummies trigger sanitization errors, but usually partial sanitization works.
|
||||||
|
# We'll ignore strict sanitization errors for the scaffold as it has dummies.
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 5. Build result map (position -> atom_index in new mol)
|
||||||
|
# Since indices shifted, we find dummies by their isotope markers.
|
||||||
|
final_dummy_map = {}
|
||||||
|
for atom in mol_final.GetAtoms():
|
||||||
|
if atom.GetSymbol() == '*' and atom.GetIsotope() in positions:
|
||||||
|
final_dummy_map[atom.GetIsotope()] = atom.GetIdx()
|
||||||
|
|
||||||
|
return mol_final, final_dummy_map
|
||||||
39
tests/test_env_integration.py
Normal file
39
tests/test_env_integration.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
import sys
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Add SIME to path
|
||||||
|
SIME_PATH = "/home/zly/project/SIME"
|
||||||
|
if SIME_PATH not in sys.path:
|
||||||
|
sys.path.append(SIME_PATH)
|
||||||
|
|
||||||
|
# Add project root to path so we can import 'src'
|
||||||
|
PROJECT_ROOT = str(Path(__file__).parent.parent)
|
||||||
|
if PROJECT_ROOT not in sys.path:
|
||||||
|
sys.path.append(PROJECT_ROOT)
|
||||||
|
|
||||||
|
def test_imports():
|
||||||
|
"""Verify that we can import from both local project and SIME."""
|
||||||
|
print(f"sys.path: {sys.path}")
|
||||||
|
|
||||||
|
# 1. Test local import from src
|
||||||
|
try:
|
||||||
|
# Correct function name based on file inspection
|
||||||
|
from src.ring_numbering import assign_ring_numbering
|
||||||
|
assert callable(assign_ring_numbering)
|
||||||
|
print("Successfully imported src.ring_numbering.assign_ring_numbering")
|
||||||
|
except ImportError as e:
|
||||||
|
print(f"Failed to import src.ring_numbering: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
# 2. Test SIME import
|
||||||
|
try:
|
||||||
|
from utils.mole_predictor import ParallelBroadSpectrumPredictor
|
||||||
|
assert ParallelBroadSpectrumPredictor is not None
|
||||||
|
print("Successfully imported ParallelBroadSpectrumPredictor from utils.mole_predictor")
|
||||||
|
except ImportError as e:
|
||||||
|
print(f"Failed to import from SIME: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_imports()
|
||||||
95
tests/test_fragment_prep.py
Normal file
95
tests/test_fragment_prep.py
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
import pytest
|
||||||
|
from rdkit import Chem
|
||||||
|
from src.splicing.fragment_prep import activate_fragment
|
||||||
|
|
||||||
|
def test_activate_smart_ethanol():
|
||||||
|
"""Test 'smart' activation on Ethanol (CCO). Should attach to Oxygen."""
|
||||||
|
smiles = "CCO"
|
||||||
|
mol = activate_fragment(smiles, strategy="smart")
|
||||||
|
|
||||||
|
# Check if we have a dummy atom
|
||||||
|
assert mol is not None
|
||||||
|
assert mol.GetNumAtoms() == 4 # C, C, O, *
|
||||||
|
|
||||||
|
# Check if the dummy atom is attached to Oxygen
|
||||||
|
# Find the dummy atom
|
||||||
|
dummy_atom = None
|
||||||
|
for atom in mol.GetAtoms():
|
||||||
|
if atom.GetSymbol() == '*':
|
||||||
|
dummy_atom = atom
|
||||||
|
break
|
||||||
|
|
||||||
|
assert dummy_atom is not None
|
||||||
|
|
||||||
|
# Check neighbors of dummy atom
|
||||||
|
neighbors = dummy_atom.GetNeighbors()
|
||||||
|
assert len(neighbors) == 1
|
||||||
|
assert neighbors[0].GetSymbol() == 'O'
|
||||||
|
|
||||||
|
# Check output SMILES format
|
||||||
|
out_smiles = Chem.MolToSmiles(mol)
|
||||||
|
assert '*' in out_smiles
|
||||||
|
|
||||||
|
def test_activate_smart_amine():
|
||||||
|
"""Test 'smart' activation on Ethylamine (CCN). Should attach to Nitrogen."""
|
||||||
|
smiles = "CCN"
|
||||||
|
mol = activate_fragment(smiles, strategy="smart")
|
||||||
|
|
||||||
|
assert mol is not None
|
||||||
|
|
||||||
|
# Find the dummy atom
|
||||||
|
dummy_atom = None
|
||||||
|
for atom in mol.GetAtoms():
|
||||||
|
if atom.GetSymbol() == '*':
|
||||||
|
dummy_atom = atom
|
||||||
|
break
|
||||||
|
|
||||||
|
assert dummy_atom is not None
|
||||||
|
neighbors = dummy_atom.GetNeighbors()
|
||||||
|
assert neighbors[0].GetSymbol() == 'N'
|
||||||
|
|
||||||
|
def test_activate_random_pentane():
|
||||||
|
"""Test 'random' activation on Pentane (CCCCC). Should attach to a Carbon."""
|
||||||
|
smiles = "CCCCC"
|
||||||
|
# Seed is not easily passed to the function unless we add it to the signature or fix it inside.
|
||||||
|
# For this test, any Carbon is fine.
|
||||||
|
mol = activate_fragment(smiles, strategy="random")
|
||||||
|
|
||||||
|
assert mol is not None
|
||||||
|
assert mol.GetNumAtoms() == 6 # 5 C + 1 *
|
||||||
|
|
||||||
|
dummy_atom = None
|
||||||
|
for atom in mol.GetAtoms():
|
||||||
|
if atom.GetSymbol() == '*':
|
||||||
|
dummy_atom = atom
|
||||||
|
break
|
||||||
|
|
||||||
|
assert dummy_atom is not None
|
||||||
|
neighbors = dummy_atom.GetNeighbors()
|
||||||
|
assert neighbors[0].GetSymbol() == 'C'
|
||||||
|
|
||||||
|
def test_activate_smart_fallback():
|
||||||
|
"""Test 'smart' fallback when no heteroatoms are found (e.g. Propane)."""
|
||||||
|
smiles = "CCC"
|
||||||
|
# Should fall back to finding a terminal carbon or random
|
||||||
|
# The requirement says "fall back to a terminal Carbon" or random.
|
||||||
|
# Let's assume the implementation picks a terminal carbon if possible, or just behaves like random on C.
|
||||||
|
mol = activate_fragment(smiles, strategy="smart")
|
||||||
|
|
||||||
|
assert mol is not None
|
||||||
|
dummy_atom = None
|
||||||
|
for atom in mol.GetAtoms():
|
||||||
|
if atom.GetSymbol() == '*':
|
||||||
|
dummy_atom = atom
|
||||||
|
break
|
||||||
|
|
||||||
|
assert dummy_atom is not None
|
||||||
|
neighbor = dummy_atom.GetNeighbors()[0]
|
||||||
|
assert neighbor.GetSymbol() == 'C'
|
||||||
|
# Verify it's a valid molecule
|
||||||
|
assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE
|
||||||
|
|
||||||
|
def test_invalid_smiles():
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
activate_fragment("NotASmiles", strategy="smart")
|
||||||
|
|
||||||
84
tests/test_scaffold_prep.py
Normal file
84
tests/test_scaffold_prep.py
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
import pytest
|
||||||
|
from rdkit import Chem
|
||||||
|
from src.splicing.scaffold_prep import prepare_tylosin_scaffold
|
||||||
|
from src.ring_numbering import assign_ring_numbering
|
||||||
|
|
||||||
|
def test_prepare_tylosin_scaffold():
|
||||||
|
# Construct a 16-membered lactone with side chains
|
||||||
|
# Numbering logic (assumed based on implementation):
|
||||||
|
# 1: C=O
|
||||||
|
# 2-6: CH2
|
||||||
|
# 7: CH(CH3) <- Methyl side chain
|
||||||
|
# 8-14: CH2
|
||||||
|
# 15: CH(CC) <- Ethyl side chain
|
||||||
|
# 16: O
|
||||||
|
|
||||||
|
# SMILES:
|
||||||
|
# O=C1 (pos 1)
|
||||||
|
# CCCCC (pos 2-6)
|
||||||
|
# C(C) (pos 7, with Methyl)
|
||||||
|
# CCCCCCC (pos 8-14)
|
||||||
|
# C(CC) (pos 15, with Ethyl)
|
||||||
|
# O1 (pos 16)
|
||||||
|
|
||||||
|
smiles = "O=C1CCCCC(C)CCCCCCCCC(CC)O1"
|
||||||
|
|
||||||
|
# Verify initial assumption about numbering
|
||||||
|
mol = Chem.MolFromSmiles(smiles)
|
||||||
|
numbering = assign_ring_numbering(mol)
|
||||||
|
|
||||||
|
# Find atom indices for pos 7 and 15 to ensure our SMILES construction is correct for the test
|
||||||
|
pos_map = {v: k for k, v in numbering.items()}
|
||||||
|
assert 7 in pos_map, "Position 7 not found in ring"
|
||||||
|
assert 15 in pos_map, "Position 15 not found in ring"
|
||||||
|
assert 5 in pos_map, "Position 5 not found in ring"
|
||||||
|
|
||||||
|
atom7 = mol.GetAtomWithIdx(pos_map[7])
|
||||||
|
atom15 = mol.GetAtomWithIdx(pos_map[15])
|
||||||
|
atom5 = mol.GetAtomWithIdx(pos_map[5])
|
||||||
|
|
||||||
|
# Check side chains exist
|
||||||
|
# Atom 7 should have 3 neighbors (2 ring, 1 methyl)
|
||||||
|
assert len(atom7.GetNeighbors()) == 3
|
||||||
|
# Atom 15 should have 3 neighbors (2 ring, 1 ethyl)
|
||||||
|
assert len(atom15.GetNeighbors()) == 3
|
||||||
|
# Atom 5 should have 2 neighbors (2 ring, 2 implicit H)
|
||||||
|
assert len(atom5.GetNeighbors()) == 2
|
||||||
|
|
||||||
|
# Execute scaffold prep
|
||||||
|
target_positions = [5, 7, 15]
|
||||||
|
res_mol, dummy_map = prepare_tylosin_scaffold(smiles, target_positions)
|
||||||
|
|
||||||
|
assert res_mol is not None
|
||||||
|
assert len(dummy_map) == 3
|
||||||
|
|
||||||
|
# Verify dummies
|
||||||
|
for pos in target_positions:
|
||||||
|
assert pos in dummy_map
|
||||||
|
dummy_idx = dummy_map[pos]
|
||||||
|
dummy_atom = res_mol.GetAtomWithIdx(dummy_idx)
|
||||||
|
assert dummy_atom.GetSymbol() == "*"
|
||||||
|
assert dummy_atom.GetIsotope() == pos
|
||||||
|
|
||||||
|
# Check that dummy is connected to the correct ring position
|
||||||
|
neighbors = dummy_atom.GetNeighbors()
|
||||||
|
assert len(neighbors) == 1
|
||||||
|
|
||||||
|
# Verify side chains removed
|
||||||
|
# New atom counts.
|
||||||
|
# Original: 16 (ring) + 1 (O=) + 1 (Me) + 2 (Et) = 20 heavy atoms.
|
||||||
|
# Removed: Me (1), Et (2). Total -3.
|
||||||
|
# Added: 3 dummies. Total +3.
|
||||||
|
# Net: 20.
|
||||||
|
assert res_mol.GetNumAtoms() == 20
|
||||||
|
|
||||||
|
# Check that the specific side chains are gone.
|
||||||
|
# Count carbons.
|
||||||
|
# Original C count: 1 (C=O) + 14 (CH2/CH) + 1(Me) + 2(Et) = 18 C.
|
||||||
|
# New C count: 1 (C=O) + 14 (Ring C) = 15 C.
|
||||||
|
# Dummies are *. O are O.
|
||||||
|
c_count = sum(1 for a in res_mol.GetAtoms() if a.GetSymbol() == 'C')
|
||||||
|
assert c_count == 15, f"Expected 15 Carbons, found {c_count}"
|
||||||
|
|
||||||
|
dummy_count = sum(1 for a in res_mol.GetAtoms() if a.GetSymbol() == '*')
|
||||||
|
assert dummy_count == 3
|
||||||
77
tests/test_splicing_engine.py
Normal file
77
tests/test_splicing_engine.py
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
import pytest
|
||||||
|
from rdkit import Chem
|
||||||
|
from src.splicing.engine import splice_molecule
|
||||||
|
|
||||||
|
def test_splice_benzene_methyl():
|
||||||
|
"""
|
||||||
|
Test splicing a benzene scaffold (isotope 1) with a methyl fragment.
|
||||||
|
Scaffold: c1ccccc1[1*] (Phenyl radical-ish dummy)
|
||||||
|
Fragment: C* (Methyl radical-ish dummy)
|
||||||
|
Result: Cc1ccccc1 (Toluene)
|
||||||
|
"""
|
||||||
|
scaffold = Chem.MolFromSmiles("c1ccccc1[1*]")
|
||||||
|
fragment = Chem.MolFromSmiles("C*")
|
||||||
|
|
||||||
|
assert scaffold is not None
|
||||||
|
assert fragment is not None
|
||||||
|
|
||||||
|
product = splice_molecule(scaffold, fragment, position=1)
|
||||||
|
|
||||||
|
# Expected result: Toluene
|
||||||
|
expected_smiles = "Cc1ccccc1"
|
||||||
|
expected_mol = Chem.MolFromSmiles(expected_smiles)
|
||||||
|
expected_canonical = Chem.MolToSmiles(expected_mol, isomericSmiles=True)
|
||||||
|
|
||||||
|
product_canonical = Chem.MolToSmiles(product, isomericSmiles=True)
|
||||||
|
|
||||||
|
assert product_canonical == expected_canonical
|
||||||
|
|
||||||
|
def test_splice_missing_isotope():
|
||||||
|
"""Test that error is raised if the requested position is not found on scaffold."""
|
||||||
|
scaffold = Chem.MolFromSmiles("c1ccccc1[2*]") # Isotope 2
|
||||||
|
fragment = Chem.MolFromSmiles("C*")
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match="Scaffold dummy atom with isotope 1 not found"):
|
||||||
|
splice_molecule(scaffold, fragment, position=1)
|
||||||
|
|
||||||
|
def test_splice_no_fragment_dummy():
|
||||||
|
"""Test that error is raised if fragment has no dummy atom."""
|
||||||
|
scaffold = Chem.MolFromSmiles("c1ccccc1[1*]")
|
||||||
|
fragment = Chem.MolFromSmiles("C") # Methane, no dummy
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match="Fragment does not contain a dummy atom"):
|
||||||
|
splice_molecule(scaffold, fragment, position=1)
|
||||||
|
|
||||||
|
def test_complex_splicing():
|
||||||
|
"""
|
||||||
|
Test splicing with more complex structures.
|
||||||
|
Scaffold: Pyridine derivative n1cccc1CC[1*]
|
||||||
|
Fragment: Cyclopropane C1CC1*
|
||||||
|
Result: n1cccc1CCC1CC1
|
||||||
|
"""
|
||||||
|
scaffold = Chem.MolFromSmiles("n1cccc1CC[1*]")
|
||||||
|
fragment = Chem.MolFromSmiles("*C1CC1")
|
||||||
|
|
||||||
|
product = splice_molecule(scaffold, fragment, position=1)
|
||||||
|
|
||||||
|
expected = Chem.MolFromSmiles("n1cccc1CCC1CC1")
|
||||||
|
|
||||||
|
assert Chem.MolToSmiles(product) == Chem.MolToSmiles(expected)
|
||||||
|
|
||||||
|
def test_scaffold_with_multiple_different_dummies():
|
||||||
|
"""
|
||||||
|
Test splicing when scaffold has multiple dummies with different isotopes.
|
||||||
|
Scaffold: [1*]c1ccccc1[2*]
|
||||||
|
Fragment: C*
|
||||||
|
Target: Splicing at 1 should leave [2*] intact.
|
||||||
|
"""
|
||||||
|
scaffold = Chem.MolFromSmiles("[1*]c1ccccc1[2*]")
|
||||||
|
fragment = Chem.MolFromSmiles("C*")
|
||||||
|
|
||||||
|
# Splice at 1
|
||||||
|
product = splice_molecule(scaffold, fragment, position=1)
|
||||||
|
|
||||||
|
# Expected: Cc1ccccc1[2*]
|
||||||
|
expected = Chem.MolFromSmiles("Cc1ccccc1[2*]")
|
||||||
|
|
||||||
|
assert Chem.MolToSmiles(product) == Chem.MolToSmiles(expected)
|
||||||
Reference in New Issue
Block a user