feat(validation): add isotope tagging utilities

This commit is contained in:
2026-03-19 10:28:03 +08:00
parent d18133ce16
commit 2e3b52d049
3 changed files with 104 additions and 0 deletions

View File

@@ -0,0 +1,70 @@
from __future__ import annotations
from rdkit import Chem
def build_fragment_with_isotope(
mol: Chem.Mol,
side_chain_atoms: list[int],
side_chain_start_idx: int,
ring_atom_idx: int,
cleavage_position: int,
) -> tuple[str, str, str]:
"""
Build fragment SMILES with isotope tagging.
Returns:
Tuple of (labeled_smiles, plain_smiles, bond_type)
"""
# Get original bond type
bond = mol.GetBondBetweenAtoms(ring_atom_idx, side_chain_start_idx)
bond_type = bond.GetBondType().name if bond else "SINGLE"
# Create editable molecule
emol = Chem.EditableMol(Chem.Mol(mol))
# Add dummy atom with isotope = cleavage position
dummy_atom = Chem.Atom(0)
dummy_atom.SetIsotope(cleavage_position)
dummy_idx = emol.AddAtom(dummy_atom)
# Add bond between dummy and side chain start
emol.AddBond(dummy_idx, side_chain_start_idx, bond.GetBondType())
# Determine atoms to keep
atoms_to_keep = set([dummy_idx, side_chain_start_idx] + list(side_chain_atoms))
# Remove atoms not in keep list
# Need to remove in reverse order to maintain valid indices
all_atoms = list(range(mol.GetNumAtoms()))
atoms_to_remove = [i for i in all_atoms if i not in atoms_to_keep]
for atom_idx in sorted(atoms_to_remove, reverse=True):
emol.RemoveAtom(atom_idx)
fragment = emol.GetMol()
Chem.SanitizeMol(fragment)
# Get labeled SMILES (with isotope)
labeled_smiles = Chem.MolToSmiles(fragment)
# Get plain SMILES (without isotope)
plain_fragment = Chem.Mol(fragment)
for atom in plain_fragment.GetAtoms():
if atom.GetIsotope() > 0:
atom.SetIsotope(0)
plain_smiles = Chem.MolToSmiles(plain_fragment)
return labeled_smiles, plain_smiles, bond_type
def extract_isotope_position(fragment_smiles: str) -> int:
"""Extract cleavage position from fragment SMILES."""
mol = Chem.MolFromSmiles(fragment_smiles)
if mol is None:
return 0
for atom in mol.GetAtoms():
if atom.GetAtomicNum() == 0 and atom.GetIsotope() > 0:
return atom.GetIsotope()
return 0