feat(validation): enforce single-anchor fragments

- skip fused/shared/multi-anchor side systems during extraction
- add fragment library schema and fragment_library.csv export
- make scaffold prep strict for non-spliceable positions
This commit is contained in:
2026-03-19 14:20:32 +08:00
parent 07ba27be2b
commit 46a438dd36
10 changed files with 383 additions and 21 deletions

View File

@@ -78,6 +78,108 @@ def build_non_standard_ring_atom_macrolactone(
)
def build_macrolactone_with_fused_side_ring(
ring_size: int = 16,
fused_positions: tuple[int, int] = (5, 6),
side_chains: Mapping[int, str] | None = None,
) -> BuiltMacrolactone:
base = build_macrolactone(ring_size=ring_size, side_chains=side_chains)
position_a, position_b = fused_positions
rwmol = Chem.RWMol(Chem.Mol(base.mol))
atom_x = rwmol.AddAtom(Chem.Atom("C"))
atom_y = rwmol.AddAtom(Chem.Atom("C"))
rwmol.AddBond(base.position_to_atom[position_a], atom_x, Chem.BondType.SINGLE)
rwmol.AddBond(atom_x, atom_y, Chem.BondType.SINGLE)
rwmol.AddBond(atom_y, base.position_to_atom[position_b], Chem.BondType.SINGLE)
mol = rwmol.GetMol()
Chem.SanitizeMol(mol)
return BuiltMacrolactone(
mol=mol,
smiles=Chem.MolToSmiles(mol, isomericSmiles=True),
position_to_atom=base.position_to_atom,
)
def build_macrolactone_with_bridge_side_chain(
ring_size: int = 16,
bridge_positions: tuple[int, int] = (5, 8),
side_chains: Mapping[int, str] | None = None,
) -> BuiltMacrolactone:
base = build_macrolactone(ring_size=ring_size, side_chains=side_chains)
position_a, position_b = bridge_positions
rwmol = Chem.RWMol(Chem.Mol(base.mol))
atom_x = rwmol.AddAtom(Chem.Atom("C"))
atom_y = rwmol.AddAtom(Chem.Atom("C"))
rwmol.AddBond(base.position_to_atom[position_a], atom_x, Chem.BondType.SINGLE)
rwmol.AddBond(atom_x, atom_y, Chem.BondType.SINGLE)
rwmol.AddBond(atom_y, base.position_to_atom[position_b], Chem.BondType.SINGLE)
mol = rwmol.GetMol()
Chem.SanitizeMol(mol)
return BuiltMacrolactone(
mol=mol,
smiles=Chem.MolToSmiles(mol, isomericSmiles=True),
position_to_atom=base.position_to_atom,
)
def build_macrolactone_with_shared_atom_side_ring(
ring_size: int = 16,
position: int = 5,
side_chains: Mapping[int, str] | None = None,
) -> BuiltMacrolactone:
base = build_macrolactone(ring_size=ring_size, side_chains=side_chains)
rwmol = Chem.RWMol(Chem.Mol(base.mol))
atom_x = rwmol.AddAtom(Chem.Atom("C"))
atom_y = rwmol.AddAtom(Chem.Atom("C"))
atom_z = rwmol.AddAtom(Chem.Atom("C"))
ring_atom_idx = base.position_to_atom[position]
rwmol.AddBond(ring_atom_idx, atom_x, Chem.BondType.SINGLE)
rwmol.AddBond(atom_x, atom_y, Chem.BondType.SINGLE)
rwmol.AddBond(atom_y, atom_z, Chem.BondType.SINGLE)
rwmol.AddBond(atom_z, ring_atom_idx, Chem.BondType.SINGLE)
mol = rwmol.GetMol()
Chem.SanitizeMol(mol)
return BuiltMacrolactone(
mol=mol,
smiles=Chem.MolToSmiles(mol, isomericSmiles=True),
position_to_atom=base.position_to_atom,
)
def build_macrolactone_with_single_anchor_side_ring(
ring_size: int = 16,
position: int = 5,
side_chains: Mapping[int, str] | None = None,
) -> BuiltMacrolactone:
base = build_macrolactone(ring_size=ring_size, side_chains=side_chains)
rwmol = Chem.RWMol(Chem.Mol(base.mol))
atom_x = rwmol.AddAtom(Chem.Atom("C"))
atom_y = rwmol.AddAtom(Chem.Atom("C"))
atom_z = rwmol.AddAtom(Chem.Atom("C"))
ring_atom_idx = base.position_to_atom[position]
rwmol.AddBond(ring_atom_idx, atom_x, Chem.BondType.SINGLE)
rwmol.AddBond(atom_x, atom_y, Chem.BondType.SINGLE)
rwmol.AddBond(atom_y, atom_z, Chem.BondType.SINGLE)
rwmol.AddBond(atom_z, atom_x, Chem.BondType.SINGLE)
mol = rwmol.GetMol()
Chem.SanitizeMol(mol)
return BuiltMacrolactone(
mol=mol,
smiles=Chem.MolToSmiles(mol, isomericSmiles=True),
position_to_atom=base.position_to_atom,
)
def build_overlapping_candidate_macrolactone() -> BuiltMacrolactone:
rwmol = Chem.RWMol()