feat(toolkit): ship macro_lactone_toolkit package

Unify macrolactone detection, numbering, fragmentation, and
splicing under the installable macro_lactone_toolkit package.

- replace legacy src.* modules with the new package layout
- add analyze/number/fragment CLI entrypoints and pixi tasks
- migrate tests, README, and scripts to the new package API
This commit is contained in:
2026-03-18 22:06:45 +08:00
parent a768d26e47
commit 5e7b236f31
45 changed files with 1302 additions and 6304 deletions

View File

@@ -0,0 +1,25 @@
from .analyzer import MacroLactoneAnalyzer
from .errors import (
AmbiguousMacrolactoneError,
FragmentationError,
MacrolactoneDetectionError,
MacrolactoneError,
RingNumberingError,
)
from .fragmenter import MacrolactoneFragmenter
from .models import FragmentationResult, RingNumberingResult, SideChainFragment
__all__ = [
"AmbiguousMacrolactoneError",
"FragmentationError",
"FragmentationResult",
"MacroLactoneAnalyzer",
"MacrolactoneDetectionError",
"MacrolactoneError",
"MacrolactoneFragmenter",
"RingNumberingError",
"RingNumberingResult",
"SideChainFragment",
]
__version__ = "0.1.0"

View File

@@ -0,0 +1,243 @@
from __future__ import annotations
from collections import deque
from dataclasses import dataclass
from typing import Iterable
from rdkit import Chem
from .errors import MacrolactoneDetectionError, RingNumberingError
from .models import RingNumberingResult
VALID_RING_SIZES = tuple(range(12, 21))
@dataclass(frozen=True)
class DetectedMacrolactone:
ring_size: int
ring_atoms: tuple[int, ...]
carbonyl_carbon_idx: int
ester_oxygen_idx: int
def ensure_mol(mol_input: str | Chem.Mol) -> tuple[Chem.Mol, str]:
if isinstance(mol_input, Chem.Mol):
mol = Chem.Mol(mol_input)
else:
mol = Chem.MolFromSmiles(mol_input)
if mol is None:
raise MacrolactoneDetectionError(f"Unable to parse SMILES: {mol_input}")
return mol, Chem.MolToSmiles(mol, isomericSmiles=True)
def find_macrolactone_candidates(
mol: Chem.Mol,
ring_size: int | None = None,
) -> list[DetectedMacrolactone]:
candidates: list[DetectedMacrolactone] = []
seen: set[tuple[tuple[int, ...], int, int]] = set()
for ring in mol.GetRingInfo().AtomRings():
if len(ring) not in VALID_RING_SIZES:
continue
if ring_size is not None and len(ring) != ring_size:
continue
ester_match = _find_ring_ester_atoms(mol, ring)
if ester_match is None:
continue
carbonyl_carbon_idx, ester_oxygen_idx = ester_match
key = (tuple(sorted(ring)), carbonyl_carbon_idx, ester_oxygen_idx)
if key in seen:
continue
seen.add(key)
candidates.append(
DetectedMacrolactone(
ring_size=len(ring),
ring_atoms=tuple(ring),
carbonyl_carbon_idx=carbonyl_carbon_idx,
ester_oxygen_idx=ester_oxygen_idx,
)
)
return sorted(
candidates,
key=lambda candidate: (
candidate.ring_size,
tuple(sorted(candidate.ring_atoms)),
candidate.carbonyl_carbon_idx,
candidate.ester_oxygen_idx,
),
)
def build_numbering_result(mol: Chem.Mol, candidate: DetectedMacrolactone) -> RingNumberingResult:
ring_atoms = list(candidate.ring_atoms)
ring_atom_set = set(ring_atoms)
adjacency = {
atom_idx: sorted(
neighbor.GetIdx()
for neighbor in mol.GetAtomWithIdx(atom_idx).GetNeighbors()
if neighbor.GetIdx() in ring_atom_set
)
for atom_idx in ring_atoms
}
ordered_atoms = [candidate.carbonyl_carbon_idx, candidate.ester_oxygen_idx]
visited = set(ordered_atoms)
previous_atom = candidate.carbonyl_carbon_idx
current_atom = candidate.ester_oxygen_idx
while len(ordered_atoms) < candidate.ring_size:
next_atoms = [
atom_idx
for atom_idx in adjacency[current_atom]
if atom_idx != previous_atom and atom_idx not in visited
]
if len(next_atoms) != 1:
raise RingNumberingError("Unable to determine a unique ring traversal order.")
next_atom = next_atoms[0]
ordered_atoms.append(next_atom)
visited.add(next_atom)
previous_atom, current_atom = current_atom, next_atom
if candidate.carbonyl_carbon_idx not in adjacency[current_atom]:
raise RingNumberingError("Ring traversal did not close on the lactone carbonyl carbon.")
position_to_atom = {position: atom_idx for position, atom_idx in enumerate(ordered_atoms, start=1)}
atom_to_position = {atom_idx: position for position, atom_idx in position_to_atom.items()}
return RingNumberingResult(
ring_size=candidate.ring_size,
ring_atoms=sorted(ring_atoms),
ordered_atoms=ordered_atoms,
carbonyl_carbon_idx=candidate.carbonyl_carbon_idx,
ester_oxygen_idx=candidate.ester_oxygen_idx,
atom_to_position=atom_to_position,
position_to_atom=position_to_atom,
)
def collect_side_chain_atoms(
mol: Chem.Mol,
start_atom_idx: int,
ring_atom_indices: Iterable[int],
) -> list[int]:
ring_atom_set = set(ring_atom_indices)
queue: deque[int] = deque([start_atom_idx])
visited: set[int] = set()
side_chain_atoms: list[int] = []
while queue:
atom_idx = queue.popleft()
if atom_idx in visited or atom_idx in ring_atom_set:
continue
visited.add(atom_idx)
side_chain_atoms.append(atom_idx)
atom = mol.GetAtomWithIdx(atom_idx)
for neighbor in atom.GetNeighbors():
neighbor_idx = neighbor.GetIdx()
if neighbor_idx not in visited and neighbor_idx not in ring_atom_set:
queue.append(neighbor_idx)
return side_chain_atoms
def is_intrinsic_lactone_neighbor(
mol: Chem.Mol,
candidate: DetectedMacrolactone,
ring_atom_idx: int,
neighbor_idx: int,
) -> bool:
if ring_atom_idx != candidate.carbonyl_carbon_idx:
return False
bond = mol.GetBondBetweenAtoms(ring_atom_idx, neighbor_idx)
neighbor = mol.GetAtomWithIdx(neighbor_idx)
return bond is not None and bond.GetBondType() == Chem.BondType.DOUBLE and neighbor.GetAtomicNum() == 8
def build_fragment_smiles(
mol: Chem.Mol,
side_chain_atoms: list[int],
side_chain_start_idx: int,
ring_atom_idx: int,
dummy_isotope: int,
) -> str:
fragment_mol = Chem.RWMol()
old_to_new: dict[int, int] = {}
for old_idx in side_chain_atoms:
old_atom = mol.GetAtomWithIdx(old_idx)
new_atom = Chem.Atom(old_atom)
new_idx = fragment_mol.AddAtom(new_atom)
old_to_new[old_idx] = new_idx
for old_idx in side_chain_atoms:
atom = mol.GetAtomWithIdx(old_idx)
for neighbor in atom.GetNeighbors():
neighbor_idx = neighbor.GetIdx()
if neighbor_idx in old_to_new and old_idx < neighbor_idx:
bond = mol.GetBondBetweenAtoms(old_idx, neighbor_idx)
fragment_mol.AddBond(
old_to_new[old_idx],
old_to_new[neighbor_idx],
bond.GetBondType(),
)
bond_to_ring = mol.GetBondBetweenAtoms(ring_atom_idx, side_chain_start_idx)
if bond_to_ring is None:
raise RingNumberingError("Missing ring-to-side-chain bond during fragmentation.")
dummy_atom = Chem.Atom(0)
dummy_atom.SetIsotope(dummy_isotope)
dummy_idx = fragment_mol.AddAtom(dummy_atom)
fragment_mol.AddBond(
old_to_new[side_chain_start_idx],
dummy_idx,
bond_to_ring.GetBondType(),
)
fragment = fragment_mol.GetMol()
Chem.SanitizeMol(fragment)
return Chem.MolToSmiles(fragment, isomericSmiles=True)
def _find_ring_ester_atoms(mol: Chem.Mol, ring_atoms: Iterable[int]) -> tuple[int, int] | None:
ring_atom_set = set(ring_atoms)
matches: list[tuple[int, int]] = []
for atom_idx in ring_atom_set:
atom = mol.GetAtomWithIdx(atom_idx)
if atom.GetAtomicNum() != 6:
continue
has_carbonyl_oxygen = any(
bond.GetBondType() == Chem.BondType.DOUBLE and bond.GetOtherAtom(atom).GetAtomicNum() == 8
for bond in atom.GetBonds()
)
if not has_carbonyl_oxygen:
continue
for neighbor in atom.GetNeighbors():
neighbor_idx = neighbor.GetIdx()
bond = mol.GetBondBetweenAtoms(atom_idx, neighbor_idx)
if (
neighbor_idx in ring_atom_set
and neighbor.GetAtomicNum() == 8
and bond is not None
and bond.GetBondType() == Chem.BondType.SINGLE
):
ring_neighbors = [
ring_neighbor.GetIdx()
for ring_neighbor in neighbor.GetNeighbors()
if ring_neighbor.GetIdx() in ring_atom_set
]
if len(ring_neighbors) == 2:
matches.append((atom_idx, neighbor_idx))
if not matches:
return None
return sorted(matches)[0]

View File

@@ -0,0 +1,27 @@
from __future__ import annotations
from rdkit import Chem
from ._core import ensure_mol, find_macrolactone_candidates
class MacroLactoneAnalyzer:
"""Identify valid 12-20 membered macrolactones."""
def get_valid_ring_sizes(self, mol_input: str | Chem.Mol) -> list[int]:
mol, _ = ensure_mol(mol_input)
candidates = find_macrolactone_candidates(mol)
return sorted({candidate.ring_size for candidate in candidates})
def analyze_molecule(self, mol_input: str | Chem.Mol) -> dict:
mol, smiles = ensure_mol(mol_input)
candidates = find_macrolactone_candidates(mol)
valid_ring_sizes = sorted({candidate.ring_size for candidate in candidates})
is_ambiguous = len(valid_ring_sizes) > 1 or len(candidates) > 1
return {
"smiles": smiles,
"valid_ring_sizes": valid_ring_sizes,
"candidate_count": len(candidates),
"is_ambiguous": is_ambiguous,
"selected_ring_size": valid_ring_sizes[0] if len(valid_ring_sizes) == 1 and len(candidates) == 1 else None,
}

View File

@@ -0,0 +1,160 @@
from __future__ import annotations
import argparse
import json
from dataclasses import asdict
from pathlib import Path
import pandas as pd
from .analyzer import MacroLactoneAnalyzer
from .errors import MacrolactoneError
from .fragmenter import MacrolactoneFragmenter
def main() -> None:
parser = build_parser()
args = parser.parse_args()
if not hasattr(args, "func"):
parser.print_help()
raise SystemExit(1)
args.func(args)
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(prog="macro-lactone-toolkit")
subparsers = parser.add_subparsers(dest="command")
analyze = subparsers.add_parser("analyze")
_add_common_input_arguments(analyze)
analyze.add_argument("--ring-size", type=int, default=None)
analyze.set_defaults(func=run_analyze)
number = subparsers.add_parser("number")
number.add_argument("--smiles", required=True)
number.add_argument("--ring-size", type=int, default=None)
number.set_defaults(func=run_number)
fragment = subparsers.add_parser("fragment")
_add_common_input_arguments(fragment)
fragment.add_argument("--ring-size", type=int, default=None)
fragment.add_argument("--parent-id", default=None)
fragment.add_argument("--errors-output", default=None)
fragment.set_defaults(func=run_fragment)
return parser
def run_analyze(args: argparse.Namespace) -> None:
analyzer = MacroLactoneAnalyzer()
if args.smiles:
payload = analyzer.analyze_molecule(args.smiles)
_write_output(payload, args.output)
return
rows = _read_csv_rows(args.input, args.smiles_column, args.id_column)
payload = []
for row in rows:
analysis = analyzer.analyze_molecule(row["smiles"])
analysis["parent_id"] = row["parent_id"]
payload.append(analysis)
_write_output(payload, args.output)
def run_number(args: argparse.Namespace) -> None:
fragmenter = MacrolactoneFragmenter(ring_size=args.ring_size)
payload = fragmenter.number_molecule(args.smiles).to_dict()
_write_json(payload, None)
def run_fragment(args: argparse.Namespace) -> None:
fragmenter = MacrolactoneFragmenter(ring_size=args.ring_size)
if args.smiles:
result = fragmenter.fragment_molecule(args.smiles, parent_id=args.parent_id)
_write_output(result.to_dict(), args.output)
return
rows = _read_csv_rows(args.input, args.smiles_column, args.id_column)
fragment_rows: list[dict] = []
error_rows: list[dict] = []
for row in rows:
try:
result = fragmenter.fragment_molecule(row["smiles"], parent_id=row["parent_id"])
except MacrolactoneError as exc:
error_rows.append(
{
"parent_id": row["parent_id"],
"smiles": row["smiles"],
"error_type": type(exc).__name__,
"error_message": str(exc),
}
)
continue
for fragment in result.fragments:
fragment_rows.append(
{
"parent_id": result.parent_id,
"ring_size": result.ring_size,
**fragment.to_dict(),
}
)
if args.output:
_write_output(fragment_rows, args.output)
else:
_write_json({"fragments": fragment_rows, "errors": error_rows}, None)
if args.errors_output:
_write_output(error_rows, args.errors_output)
def _add_common_input_arguments(parser: argparse.ArgumentParser) -> None:
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument("--smiles")
group.add_argument("--input")
parser.add_argument("--smiles-column", default="smiles")
parser.add_argument("--id-column", default="id")
parser.add_argument("--output", default=None)
def _read_csv_rows(input_path: str, smiles_column: str, id_column: str) -> list[dict]:
dataframe = pd.read_csv(input_path)
rows = []
for index, row in dataframe.iterrows():
parent_id = row[id_column] if id_column in dataframe.columns else f"row_{index}"
rows.append(
{
"parent_id": str(parent_id),
"smiles": row[smiles_column],
}
)
return rows
def _write_output(payload: list[dict] | dict, output_path: str | None) -> None:
if output_path is None:
_write_json(payload, None)
return
path = Path(output_path)
if path.suffix.lower() == ".csv":
dataframe = pd.DataFrame(payload)
dataframe.to_csv(path, index=False)
return
_write_json(payload, path)
def _write_json(payload: list[dict] | dict, output_path: Path | None) -> None:
text = json.dumps(payload, indent=2, ensure_ascii=False)
if output_path is None:
print(text)
else:
output_path.write_text(text + "\n", encoding="utf-8")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,18 @@
class MacrolactoneError(Exception):
"""Base class for macrolactone toolkit errors."""
class MacrolactoneDetectionError(MacrolactoneError):
"""Raised when a valid macrolactone cannot be detected."""
class AmbiguousMacrolactoneError(MacrolactoneDetectionError):
"""Raised when multiple valid macrolactone candidates are detected."""
class RingNumberingError(MacrolactoneError):
"""Raised when ring numbering cannot be assigned."""
class FragmentationError(MacrolactoneError):
"""Raised when side-chain fragmentation fails."""

View File

@@ -0,0 +1,114 @@
from __future__ import annotations
from rdkit import Chem
from rdkit.Chem import Descriptors
from ._core import (
build_fragment_smiles,
build_numbering_result,
collect_side_chain_atoms,
ensure_mol,
find_macrolactone_candidates,
is_intrinsic_lactone_neighbor,
)
from .analyzer import MacroLactoneAnalyzer
from .errors import AmbiguousMacrolactoneError, FragmentationError, MacrolactoneDetectionError
from .models import FragmentationResult, RingNumberingResult, SideChainFragment
class MacrolactoneFragmenter:
"""Number and fragment 12-20 membered macrolactones."""
def __init__(self, ring_size: int | None = None):
if ring_size is not None and not 12 <= ring_size <= 20:
raise ValueError("ring_size must be between 12 and 20")
self.ring_size = ring_size
self.analyzer = MacroLactoneAnalyzer()
def number_molecule(self, mol_input: str | Chem.Mol) -> RingNumberingResult:
mol, _ = ensure_mol(mol_input)
candidate = self._select_candidate(mol)
return build_numbering_result(mol, candidate)
def fragment_molecule(
self,
mol_input: str | Chem.Mol,
parent_id: str | None = None,
) -> FragmentationResult:
mol, parent_smiles = ensure_mol(mol_input)
candidate = self._select_candidate(mol)
numbering = build_numbering_result(mol, candidate)
parent_id = parent_id or "molecule_0"
ring_atom_set = set(candidate.ring_atoms)
fragments: list[SideChainFragment] = []
for position, ring_atom_idx in numbering.position_to_atom.items():
ring_atom = mol.GetAtomWithIdx(ring_atom_idx)
for neighbor in ring_atom.GetNeighbors():
neighbor_idx = neighbor.GetIdx()
if neighbor_idx in ring_atom_set:
continue
if is_intrinsic_lactone_neighbor(mol, candidate, ring_atom_idx, neighbor_idx):
continue
side_chain_atoms = collect_side_chain_atoms(mol, neighbor_idx, ring_atom_set)
if not side_chain_atoms:
continue
try:
fragment_smiles_labeled = build_fragment_smiles(
mol=mol,
side_chain_atoms=side_chain_atoms,
side_chain_start_idx=neighbor_idx,
ring_atom_idx=ring_atom_idx,
dummy_isotope=position,
)
fragment_smiles_plain = build_fragment_smiles(
mol=mol,
side_chain_atoms=side_chain_atoms,
side_chain_start_idx=neighbor_idx,
ring_atom_idx=ring_atom_idx,
dummy_isotope=0,
)
except Exception as exc: # pragma: no cover - sanitized by tests
raise FragmentationError(str(exc)) from exc
plain_fragment = Chem.MolFromSmiles(fragment_smiles_plain)
if plain_fragment is None:
raise FragmentationError("Generated fragment_smiles_plain could not be read by RDKit.")
fragment_id = f"{parent_id}_frag_{len(fragments)}"
fragments.append(
SideChainFragment(
parent_id=parent_id,
fragment_id=fragment_id,
cleavage_position=position,
attachment_atom_idx=ring_atom_idx,
fragment_smiles_labeled=fragment_smiles_labeled,
fragment_smiles_plain=fragment_smiles_plain,
atom_count=sum(1 for atom in plain_fragment.GetAtoms() if atom.GetAtomicNum() != 0),
molecular_weight=round(Descriptors.MolWt(plain_fragment), 4),
)
)
return FragmentationResult(
parent_id=parent_id,
parent_smiles=parent_smiles,
ring_size=numbering.ring_size,
numbering=numbering,
fragments=fragments,
)
def _select_candidate(self, mol: Chem.Mol):
candidates = find_macrolactone_candidates(mol, ring_size=self.ring_size)
if not candidates:
requested = f"{self.ring_size}-membered " if self.ring_size is not None else ""
raise MacrolactoneDetectionError(f"No valid {requested}macrolactone was detected.")
valid_ring_sizes = sorted({candidate.ring_size for candidate in candidates})
if len(candidates) > 1 or len(valid_ring_sizes) > 1:
raise AmbiguousMacrolactoneError(
"Multiple valid macrolactone candidates were detected. Pass ring_size explicitly."
)
return candidates[0]

View File

@@ -0,0 +1,52 @@
from __future__ import annotations
from dataclasses import asdict, dataclass, field
@dataclass(frozen=True)
class RingNumberingResult:
ring_size: int
ring_atoms: list[int]
ordered_atoms: list[int]
carbonyl_carbon_idx: int
ester_oxygen_idx: int
atom_to_position: dict[int, int]
position_to_atom: dict[int, int]
def to_dict(self) -> dict:
return asdict(self)
@dataclass(frozen=True)
class SideChainFragment:
parent_id: str
fragment_id: str
cleavage_position: int
attachment_atom_idx: int
fragment_smiles_labeled: str
fragment_smiles_plain: str
atom_count: int
molecular_weight: float
def to_dict(self) -> dict:
return asdict(self)
@dataclass(frozen=True)
class FragmentationResult:
parent_id: str
parent_smiles: str
ring_size: int
numbering: RingNumberingResult
fragments: list[SideChainFragment] = field(default_factory=list)
warnings: list[str] = field(default_factory=list)
def to_dict(self) -> dict:
return {
"parent_id": self.parent_id,
"parent_smiles": self.parent_smiles,
"ring_size": self.ring_size,
"numbering": self.numbering.to_dict(),
"fragments": [fragment.to_dict() for fragment in self.fragments],
"warnings": list(self.warnings),
}

View File

@@ -0,0 +1,9 @@
from .engine import splice_molecule
from .fragment_prep import activate_fragment
from .scaffold_prep import prepare_macrolactone_scaffold
__all__ = [
"activate_fragment",
"prepare_macrolactone_scaffold",
"splice_molecule",
]

View File

@@ -0,0 +1,51 @@
from __future__ import annotations
from rdkit import Chem
def splice_molecule(scaffold: Chem.Mol, fragment: Chem.Mol, position: int) -> Chem.Mol:
combined = Chem.CombineMols(scaffold, fragment)
rwmol = Chem.RWMol(combined)
scaffold_atom_count = scaffold.GetNumAtoms()
total_atoms = rwmol.GetNumAtoms()
scaffold_dummy_idx = -1
for atom_idx in range(scaffold_atom_count):
atom = rwmol.GetAtomWithIdx(atom_idx)
if atom.GetAtomicNum() == 0 and atom.GetIsotope() == position:
scaffold_dummy_idx = atom_idx
break
if scaffold_dummy_idx == -1:
raise ValueError(f"Scaffold dummy atom with isotope {position} not found")
fragment_dummy_idx = -1
for atom_idx in range(scaffold_atom_count, total_atoms):
atom = rwmol.GetAtomWithIdx(atom_idx)
if atom.GetAtomicNum() == 0:
fragment_dummy_idx = atom_idx
break
if fragment_dummy_idx == -1:
raise ValueError("Fragment does not contain a dummy atom")
scaffold_dummy = rwmol.GetAtomWithIdx(scaffold_dummy_idx)
fragment_dummy = rwmol.GetAtomWithIdx(fragment_dummy_idx)
if scaffold_dummy.GetDegree() != 1:
raise ValueError("Scaffold dummy atom must have exactly one neighbor")
if fragment_dummy.GetDegree() != 1:
raise ValueError("Fragment dummy atom must have exactly one neighbor")
scaffold_anchor_idx = scaffold_dummy.GetNeighbors()[0].GetIdx()
fragment_anchor_idx = fragment_dummy.GetNeighbors()[0].GetIdx()
fragment_bond = rwmol.GetBondBetweenAtoms(fragment_dummy_idx, fragment_anchor_idx)
if fragment_bond is None:
raise ValueError("Fragment dummy atom is not bonded to an anchor atom")
rwmol.AddBond(scaffold_anchor_idx, fragment_anchor_idx, fragment_bond.GetBondType())
for atom_idx in sorted((scaffold_dummy_idx, fragment_dummy_idx), reverse=True):
rwmol.RemoveAtom(atom_idx)
product = rwmol.GetMol()
Chem.SanitizeMol(product)
return product

View File

@@ -0,0 +1,49 @@
from __future__ import annotations
from rdkit import Chem
def activate_fragment(smiles: str, strategy: str = "smart") -> Chem.Mol:
mol = Chem.MolFromSmiles(smiles)
if mol is None:
raise ValueError(f"Invalid SMILES string: {smiles}")
target_idx = -1
if strategy == "smart":
for smarts in ("[N;!H0]", "[O;H1]", "[S;H1]"):
pattern = Chem.MolFromSmarts(smarts)
if pattern is None:
continue
matches = mol.GetSubstructMatches(pattern)
if matches:
target_idx = matches[0][0]
break
if target_idx == -1:
strategy = "random"
if strategy == "random":
carbon_candidates: list[int] = []
other_candidates: list[int] = []
for atom in mol.GetAtoms():
if atom.GetTotalNumHs() == 0:
continue
if atom.GetAtomicNum() == 6:
carbon_candidates.append(atom.GetIdx())
else:
other_candidates.append(atom.GetIdx())
candidates = carbon_candidates or other_candidates
if not candidates:
raise ValueError("No suitable atoms with hydrogens found to attach to.")
target_idx = candidates[0]
if target_idx == -1:
raise ValueError("Could not identify a target atom for activation.")
rwmol = Chem.RWMol(mol)
dummy_idx = rwmol.AddAtom(Chem.Atom(0))
rwmol.AddBond(target_idx, dummy_idx, Chem.BondType.SINGLE)
result = rwmol.GetMol()
Chem.SanitizeMol(result)
return result

View File

@@ -0,0 +1,63 @@
from __future__ import annotations
from typing import Iterable
from rdkit import Chem
from .._core import collect_side_chain_atoms, ensure_mol, find_macrolactone_candidates, is_intrinsic_lactone_neighbor
from ..fragmenter import MacrolactoneFragmenter
def prepare_macrolactone_scaffold(
mol_input: str | Chem.Mol,
positions: Iterable[int],
ring_size: int | None = None,
) -> tuple[Chem.Mol, dict[int, int]]:
positions = list(positions)
mol, _ = ensure_mol(mol_input)
fragmenter = MacrolactoneFragmenter(ring_size=ring_size)
numbering = fragmenter.number_molecule(mol)
candidate = find_macrolactone_candidates(mol, ring_size=numbering.ring_size)[0]
ring_atom_set = set(numbering.ordered_atoms)
atoms_to_remove: set[int] = set()
dummy_specs: list[tuple[int, int, Chem.BondType]] = []
for position in positions:
if position not in numbering.position_to_atom:
raise ValueError(f"Position {position} not found in ring numbering.")
ring_atom_idx = numbering.position_to_atom[position]
ring_atom = mol.GetAtomWithIdx(ring_atom_idx)
for neighbor in ring_atom.GetNeighbors():
neighbor_idx = neighbor.GetIdx()
if neighbor_idx in ring_atom_set:
continue
if is_intrinsic_lactone_neighbor(mol, candidate, ring_atom_idx, neighbor_idx):
continue
side_chain_atoms = collect_side_chain_atoms(mol, neighbor_idx, ring_atom_set)
atoms_to_remove.update(side_chain_atoms)
bond = mol.GetBondBetweenAtoms(ring_atom_idx, neighbor_idx)
if bond is not None:
dummy_specs.append((ring_atom_idx, position, bond.GetBondType()))
if not any(spec_position == position for _, spec_position, _ in dummy_specs):
dummy_specs.append((ring_atom_idx, position, Chem.BondType.SINGLE))
rwmol = Chem.RWMol(mol)
for ring_atom_idx, position, bond_type in dummy_specs:
dummy_atom = Chem.Atom(0)
dummy_atom.SetIsotope(position)
dummy_idx = rwmol.AddAtom(dummy_atom)
rwmol.AddBond(ring_atom_idx, dummy_idx, bond_type)
for atom_idx in sorted(atoms_to_remove, reverse=True):
rwmol.RemoveAtom(atom_idx)
scaffold = rwmol.GetMol()
Chem.SanitizeMol(scaffold)
dummy_map = {
atom.GetIsotope(): atom.GetIdx()
for atom in scaffold.GetAtoms()
if atom.GetAtomicNum() == 0 and atom.GetIsotope() in positions
}
return scaffold, dummy_map