feat(validation): add isotope tagging utilities

This commit is contained in:
2026-03-19 10:28:03 +08:00
parent d18133ce16
commit 2e3b52d049
3 changed files with 104 additions and 0 deletions

View File

@@ -0,0 +1,70 @@
from __future__ import annotations
from rdkit import Chem
def build_fragment_with_isotope(
mol: Chem.Mol,
side_chain_atoms: list[int],
side_chain_start_idx: int,
ring_atom_idx: int,
cleavage_position: int,
) -> tuple[str, str, str]:
"""
Build fragment SMILES with isotope tagging.
Returns:
Tuple of (labeled_smiles, plain_smiles, bond_type)
"""
# Get original bond type
bond = mol.GetBondBetweenAtoms(ring_atom_idx, side_chain_start_idx)
bond_type = bond.GetBondType().name if bond else "SINGLE"
# Create editable molecule
emol = Chem.EditableMol(Chem.Mol(mol))
# Add dummy atom with isotope = cleavage position
dummy_atom = Chem.Atom(0)
dummy_atom.SetIsotope(cleavage_position)
dummy_idx = emol.AddAtom(dummy_atom)
# Add bond between dummy and side chain start
emol.AddBond(dummy_idx, side_chain_start_idx, bond.GetBondType())
# Determine atoms to keep
atoms_to_keep = set([dummy_idx, side_chain_start_idx] + list(side_chain_atoms))
# Remove atoms not in keep list
# Need to remove in reverse order to maintain valid indices
all_atoms = list(range(mol.GetNumAtoms()))
atoms_to_remove = [i for i in all_atoms if i not in atoms_to_keep]
for atom_idx in sorted(atoms_to_remove, reverse=True):
emol.RemoveAtom(atom_idx)
fragment = emol.GetMol()
Chem.SanitizeMol(fragment)
# Get labeled SMILES (with isotope)
labeled_smiles = Chem.MolToSmiles(fragment)
# Get plain SMILES (without isotope)
plain_fragment = Chem.Mol(fragment)
for atom in plain_fragment.GetAtoms():
if atom.GetIsotope() > 0:
atom.SetIsotope(0)
plain_smiles = Chem.MolToSmiles(plain_fragment)
return labeled_smiles, plain_smiles, bond_type
def extract_isotope_position(fragment_smiles: str) -> int:
"""Extract cleavage position from fragment SMILES."""
mol = Chem.MolFromSmiles(fragment_smiles)
if mol is None:
return 0
for atom in mol.GetAtoms():
if atom.GetAtomicNum() == 0 and atom.GetIsotope() > 0:
return atom.GetIsotope()
return 0

View File

View File

@@ -0,0 +1,34 @@
import pytest
from rdkit import Chem
from macro_lactone_toolkit.validation.isotope_utils import (
build_fragment_with_isotope,
extract_isotope_position,
)
def test_build_fragment_with_isotope():
# Create a simple test molecule: ethyl group attached to position 5
mol = Chem.MolFromSmiles("CCCC(CC)CCC") # Position 4 (0-indexed) has ethyl
assert mol is not None
side_chain_atoms = [4, 5] # The ethyl group atoms
side_chain_start = 4
ring_atom = 3
cleavage_pos = 5
labeled, plain, bond_type = build_fragment_with_isotope(
mol, side_chain_atoms, side_chain_start, ring_atom, cleavage_pos
)
assert labeled is not None
assert plain is not None
assert bond_type == "SINGLE"
# Check isotope was set
extracted_pos = extract_isotope_position(labeled)
assert extracted_pos == cleavage_pos
# Plain should have no isotope
extracted_plain = extract_isotope_position(plain)
assert extracted_plain == 0