From 2e3b52d04944d43b02215a8b804ff436e7f53c0c Mon Sep 17 00:00:00 2001 From: lingyuzeng Date: Thu, 19 Mar 2026 10:28:03 +0800 Subject: [PATCH] feat(validation): add isotope tagging utilities --- .../validation/isotope_utils.py | 70 +++++++++++++++++++ tests/validation/__init__.py | 0 tests/validation/test_isotope_utils.py | 34 +++++++++ 3 files changed, 104 insertions(+) create mode 100644 src/macro_lactone_toolkit/validation/isotope_utils.py create mode 100644 tests/validation/__init__.py create mode 100644 tests/validation/test_isotope_utils.py diff --git a/src/macro_lactone_toolkit/validation/isotope_utils.py b/src/macro_lactone_toolkit/validation/isotope_utils.py new file mode 100644 index 0000000..594edc5 --- /dev/null +++ b/src/macro_lactone_toolkit/validation/isotope_utils.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +from rdkit import Chem + + +def build_fragment_with_isotope( + mol: Chem.Mol, + side_chain_atoms: list[int], + side_chain_start_idx: int, + ring_atom_idx: int, + cleavage_position: int, +) -> tuple[str, str, str]: + """ + Build fragment SMILES with isotope tagging. + + Returns: + Tuple of (labeled_smiles, plain_smiles, bond_type) + """ + # Get original bond type + bond = mol.GetBondBetweenAtoms(ring_atom_idx, side_chain_start_idx) + bond_type = bond.GetBondType().name if bond else "SINGLE" + + # Create editable molecule + emol = Chem.EditableMol(Chem.Mol(mol)) + + # Add dummy atom with isotope = cleavage position + dummy_atom = Chem.Atom(0) + dummy_atom.SetIsotope(cleavage_position) + dummy_idx = emol.AddAtom(dummy_atom) + + # Add bond between dummy and side chain start + emol.AddBond(dummy_idx, side_chain_start_idx, bond.GetBondType()) + + # Determine atoms to keep + atoms_to_keep = set([dummy_idx, side_chain_start_idx] + list(side_chain_atoms)) + + # Remove atoms not in keep list + # Need to remove in reverse order to maintain valid indices + all_atoms = list(range(mol.GetNumAtoms())) + atoms_to_remove = [i for i in all_atoms if i not in atoms_to_keep] + + for atom_idx in sorted(atoms_to_remove, reverse=True): + emol.RemoveAtom(atom_idx) + + fragment = emol.GetMol() + Chem.SanitizeMol(fragment) + + # Get labeled SMILES (with isotope) + labeled_smiles = Chem.MolToSmiles(fragment) + + # Get plain SMILES (without isotope) + plain_fragment = Chem.Mol(fragment) + for atom in plain_fragment.GetAtoms(): + if atom.GetIsotope() > 0: + atom.SetIsotope(0) + plain_smiles = Chem.MolToSmiles(plain_fragment) + + return labeled_smiles, plain_smiles, bond_type + + +def extract_isotope_position(fragment_smiles: str) -> int: + """Extract cleavage position from fragment SMILES.""" + mol = Chem.MolFromSmiles(fragment_smiles) + if mol is None: + return 0 + + for atom in mol.GetAtoms(): + if atom.GetAtomicNum() == 0 and atom.GetIsotope() > 0: + return atom.GetIsotope() + return 0 diff --git a/tests/validation/__init__.py b/tests/validation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/validation/test_isotope_utils.py b/tests/validation/test_isotope_utils.py new file mode 100644 index 0000000..0246264 --- /dev/null +++ b/tests/validation/test_isotope_utils.py @@ -0,0 +1,34 @@ +import pytest +from rdkit import Chem + +from macro_lactone_toolkit.validation.isotope_utils import ( + build_fragment_with_isotope, + extract_isotope_position, +) + + +def test_build_fragment_with_isotope(): + # Create a simple test molecule: ethyl group attached to position 5 + mol = Chem.MolFromSmiles("CCCC(CC)CCC") # Position 4 (0-indexed) has ethyl + assert mol is not None + + side_chain_atoms = [4, 5] # The ethyl group atoms + side_chain_start = 4 + ring_atom = 3 + cleavage_pos = 5 + + labeled, plain, bond_type = build_fragment_with_isotope( + mol, side_chain_atoms, side_chain_start, ring_atom, cleavage_pos + ) + + assert labeled is not None + assert plain is not None + assert bond_type == "SINGLE" + + # Check isotope was set + extracted_pos = extract_isotope_position(labeled) + assert extracted_pos == cleavage_pos + + # Plain should have no isotope + extracted_plain = extract_isotope_position(plain) + assert extracted_plain == 0