feat(validation): add isotope tagging utilities
This commit is contained in:
70
src/macro_lactone_toolkit/validation/isotope_utils.py
Normal file
70
src/macro_lactone_toolkit/validation/isotope_utils.py
Normal file
@@ -0,0 +1,70 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from rdkit import Chem
|
||||
|
||||
|
||||
def build_fragment_with_isotope(
|
||||
mol: Chem.Mol,
|
||||
side_chain_atoms: list[int],
|
||||
side_chain_start_idx: int,
|
||||
ring_atom_idx: int,
|
||||
cleavage_position: int,
|
||||
) -> tuple[str, str, str]:
|
||||
"""
|
||||
Build fragment SMILES with isotope tagging.
|
||||
|
||||
Returns:
|
||||
Tuple of (labeled_smiles, plain_smiles, bond_type)
|
||||
"""
|
||||
# Get original bond type
|
||||
bond = mol.GetBondBetweenAtoms(ring_atom_idx, side_chain_start_idx)
|
||||
bond_type = bond.GetBondType().name if bond else "SINGLE"
|
||||
|
||||
# Create editable molecule
|
||||
emol = Chem.EditableMol(Chem.Mol(mol))
|
||||
|
||||
# Add dummy atom with isotope = cleavage position
|
||||
dummy_atom = Chem.Atom(0)
|
||||
dummy_atom.SetIsotope(cleavage_position)
|
||||
dummy_idx = emol.AddAtom(dummy_atom)
|
||||
|
||||
# Add bond between dummy and side chain start
|
||||
emol.AddBond(dummy_idx, side_chain_start_idx, bond.GetBondType())
|
||||
|
||||
# Determine atoms to keep
|
||||
atoms_to_keep = set([dummy_idx, side_chain_start_idx] + list(side_chain_atoms))
|
||||
|
||||
# Remove atoms not in keep list
|
||||
# Need to remove in reverse order to maintain valid indices
|
||||
all_atoms = list(range(mol.GetNumAtoms()))
|
||||
atoms_to_remove = [i for i in all_atoms if i not in atoms_to_keep]
|
||||
|
||||
for atom_idx in sorted(atoms_to_remove, reverse=True):
|
||||
emol.RemoveAtom(atom_idx)
|
||||
|
||||
fragment = emol.GetMol()
|
||||
Chem.SanitizeMol(fragment)
|
||||
|
||||
# Get labeled SMILES (with isotope)
|
||||
labeled_smiles = Chem.MolToSmiles(fragment)
|
||||
|
||||
# Get plain SMILES (without isotope)
|
||||
plain_fragment = Chem.Mol(fragment)
|
||||
for atom in plain_fragment.GetAtoms():
|
||||
if atom.GetIsotope() > 0:
|
||||
atom.SetIsotope(0)
|
||||
plain_smiles = Chem.MolToSmiles(plain_fragment)
|
||||
|
||||
return labeled_smiles, plain_smiles, bond_type
|
||||
|
||||
|
||||
def extract_isotope_position(fragment_smiles: str) -> int:
|
||||
"""Extract cleavage position from fragment SMILES."""
|
||||
mol = Chem.MolFromSmiles(fragment_smiles)
|
||||
if mol is None:
|
||||
return 0
|
||||
|
||||
for atom in mol.GetAtoms():
|
||||
if atom.GetAtomicNum() == 0 and atom.GetIsotope() > 0:
|
||||
return atom.GetIsotope()
|
||||
return 0
|
||||
0
tests/validation/__init__.py
Normal file
0
tests/validation/__init__.py
Normal file
34
tests/validation/test_isotope_utils.py
Normal file
34
tests/validation/test_isotope_utils.py
Normal file
@@ -0,0 +1,34 @@
|
||||
import pytest
|
||||
from rdkit import Chem
|
||||
|
||||
from macro_lactone_toolkit.validation.isotope_utils import (
|
||||
build_fragment_with_isotope,
|
||||
extract_isotope_position,
|
||||
)
|
||||
|
||||
|
||||
def test_build_fragment_with_isotope():
|
||||
# Create a simple test molecule: ethyl group attached to position 5
|
||||
mol = Chem.MolFromSmiles("CCCC(CC)CCC") # Position 4 (0-indexed) has ethyl
|
||||
assert mol is not None
|
||||
|
||||
side_chain_atoms = [4, 5] # The ethyl group atoms
|
||||
side_chain_start = 4
|
||||
ring_atom = 3
|
||||
cleavage_pos = 5
|
||||
|
||||
labeled, plain, bond_type = build_fragment_with_isotope(
|
||||
mol, side_chain_atoms, side_chain_start, ring_atom, cleavage_pos
|
||||
)
|
||||
|
||||
assert labeled is not None
|
||||
assert plain is not None
|
||||
assert bond_type == "SINGLE"
|
||||
|
||||
# Check isotope was set
|
||||
extracted_pos = extract_isotope_position(labeled)
|
||||
assert extracted_pos == cleavage_pos
|
||||
|
||||
# Plain should have no isotope
|
||||
extracted_plain = extract_isotope_position(plain)
|
||||
assert extracted_plain == 0
|
||||
Reference in New Issue
Block a user