feat(validation): add isotope tagging utilities
This commit is contained in:
70
src/macro_lactone_toolkit/validation/isotope_utils.py
Normal file
70
src/macro_lactone_toolkit/validation/isotope_utils.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from rdkit import Chem
|
||||||
|
|
||||||
|
|
||||||
|
def build_fragment_with_isotope(
|
||||||
|
mol: Chem.Mol,
|
||||||
|
side_chain_atoms: list[int],
|
||||||
|
side_chain_start_idx: int,
|
||||||
|
ring_atom_idx: int,
|
||||||
|
cleavage_position: int,
|
||||||
|
) -> tuple[str, str, str]:
|
||||||
|
"""
|
||||||
|
Build fragment SMILES with isotope tagging.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (labeled_smiles, plain_smiles, bond_type)
|
||||||
|
"""
|
||||||
|
# Get original bond type
|
||||||
|
bond = mol.GetBondBetweenAtoms(ring_atom_idx, side_chain_start_idx)
|
||||||
|
bond_type = bond.GetBondType().name if bond else "SINGLE"
|
||||||
|
|
||||||
|
# Create editable molecule
|
||||||
|
emol = Chem.EditableMol(Chem.Mol(mol))
|
||||||
|
|
||||||
|
# Add dummy atom with isotope = cleavage position
|
||||||
|
dummy_atom = Chem.Atom(0)
|
||||||
|
dummy_atom.SetIsotope(cleavage_position)
|
||||||
|
dummy_idx = emol.AddAtom(dummy_atom)
|
||||||
|
|
||||||
|
# Add bond between dummy and side chain start
|
||||||
|
emol.AddBond(dummy_idx, side_chain_start_idx, bond.GetBondType())
|
||||||
|
|
||||||
|
# Determine atoms to keep
|
||||||
|
atoms_to_keep = set([dummy_idx, side_chain_start_idx] + list(side_chain_atoms))
|
||||||
|
|
||||||
|
# Remove atoms not in keep list
|
||||||
|
# Need to remove in reverse order to maintain valid indices
|
||||||
|
all_atoms = list(range(mol.GetNumAtoms()))
|
||||||
|
atoms_to_remove = [i for i in all_atoms if i not in atoms_to_keep]
|
||||||
|
|
||||||
|
for atom_idx in sorted(atoms_to_remove, reverse=True):
|
||||||
|
emol.RemoveAtom(atom_idx)
|
||||||
|
|
||||||
|
fragment = emol.GetMol()
|
||||||
|
Chem.SanitizeMol(fragment)
|
||||||
|
|
||||||
|
# Get labeled SMILES (with isotope)
|
||||||
|
labeled_smiles = Chem.MolToSmiles(fragment)
|
||||||
|
|
||||||
|
# Get plain SMILES (without isotope)
|
||||||
|
plain_fragment = Chem.Mol(fragment)
|
||||||
|
for atom in plain_fragment.GetAtoms():
|
||||||
|
if atom.GetIsotope() > 0:
|
||||||
|
atom.SetIsotope(0)
|
||||||
|
plain_smiles = Chem.MolToSmiles(plain_fragment)
|
||||||
|
|
||||||
|
return labeled_smiles, plain_smiles, bond_type
|
||||||
|
|
||||||
|
|
||||||
|
def extract_isotope_position(fragment_smiles: str) -> int:
|
||||||
|
"""Extract cleavage position from fragment SMILES."""
|
||||||
|
mol = Chem.MolFromSmiles(fragment_smiles)
|
||||||
|
if mol is None:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
for atom in mol.GetAtoms():
|
||||||
|
if atom.GetAtomicNum() == 0 and atom.GetIsotope() > 0:
|
||||||
|
return atom.GetIsotope()
|
||||||
|
return 0
|
||||||
0
tests/validation/__init__.py
Normal file
0
tests/validation/__init__.py
Normal file
34
tests/validation/test_isotope_utils.py
Normal file
34
tests/validation/test_isotope_utils.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
import pytest
|
||||||
|
from rdkit import Chem
|
||||||
|
|
||||||
|
from macro_lactone_toolkit.validation.isotope_utils import (
|
||||||
|
build_fragment_with_isotope,
|
||||||
|
extract_isotope_position,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_fragment_with_isotope():
|
||||||
|
# Create a simple test molecule: ethyl group attached to position 5
|
||||||
|
mol = Chem.MolFromSmiles("CCCC(CC)CCC") # Position 4 (0-indexed) has ethyl
|
||||||
|
assert mol is not None
|
||||||
|
|
||||||
|
side_chain_atoms = [4, 5] # The ethyl group atoms
|
||||||
|
side_chain_start = 4
|
||||||
|
ring_atom = 3
|
||||||
|
cleavage_pos = 5
|
||||||
|
|
||||||
|
labeled, plain, bond_type = build_fragment_with_isotope(
|
||||||
|
mol, side_chain_atoms, side_chain_start, ring_atom, cleavage_pos
|
||||||
|
)
|
||||||
|
|
||||||
|
assert labeled is not None
|
||||||
|
assert plain is not None
|
||||||
|
assert bond_type == "SINGLE"
|
||||||
|
|
||||||
|
# Check isotope was set
|
||||||
|
extracted_pos = extract_isotope_position(labeled)
|
||||||
|
assert extracted_pos == cleavage_pos
|
||||||
|
|
||||||
|
# Plain should have no isotope
|
||||||
|
extracted_plain = extract_isotope_position(plain)
|
||||||
|
assert extracted_plain == 0
|
||||||
Reference in New Issue
Block a user