feat(toolkit): ship macro_lactone_toolkit package
Unify macrolactone detection, numbering, fragmentation, and splicing under the installable macro_lactone_toolkit package. - replace legacy src.* modules with the new package layout - add analyze/number/fragment CLI entrypoints and pixi tasks - migrate tests, README, and scripts to the new package API
This commit is contained in:
1
tests/__init__.py
Normal file
1
tests/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Tests package marker for helper imports.
|
||||
96
tests/helpers.py
Normal file
96
tests/helpers.py
Normal file
@@ -0,0 +1,96 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Mapping
|
||||
|
||||
from rdkit import Chem
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class BuiltMacrolactone:
|
||||
mol: Chem.Mol
|
||||
smiles: str
|
||||
position_to_atom: dict[int, int]
|
||||
|
||||
|
||||
def build_macrolactone(
|
||||
ring_size: int,
|
||||
side_chains: Mapping[int, str] | None = None,
|
||||
) -> BuiltMacrolactone:
|
||||
if not 12 <= ring_size <= 20:
|
||||
raise ValueError("ring_size must be between 12 and 20")
|
||||
|
||||
side_chains = dict(side_chains or {})
|
||||
rwmol = Chem.RWMol()
|
||||
|
||||
position_to_atom: dict[int, int] = {
|
||||
1: rwmol.AddAtom(Chem.Atom("C")),
|
||||
2: rwmol.AddAtom(Chem.Atom("O")),
|
||||
}
|
||||
for position in range(3, ring_size + 1):
|
||||
position_to_atom[position] = rwmol.AddAtom(Chem.Atom("C"))
|
||||
|
||||
carbonyl_oxygen_idx = rwmol.AddAtom(Chem.Atom("O"))
|
||||
|
||||
rwmol.AddBond(position_to_atom[1], position_to_atom[2], Chem.BondType.SINGLE)
|
||||
for position in range(2, ring_size):
|
||||
rwmol.AddBond(
|
||||
position_to_atom[position],
|
||||
position_to_atom[position + 1],
|
||||
Chem.BondType.SINGLE,
|
||||
)
|
||||
rwmol.AddBond(position_to_atom[ring_size], position_to_atom[1], Chem.BondType.SINGLE)
|
||||
rwmol.AddBond(position_to_atom[1], carbonyl_oxygen_idx, Chem.BondType.DOUBLE)
|
||||
|
||||
for position, side_chain in side_chains.items():
|
||||
if position not in position_to_atom:
|
||||
raise ValueError(f"Invalid ring position: {position}")
|
||||
_add_side_chain(rwmol, position_to_atom[position], side_chain)
|
||||
|
||||
mol = rwmol.GetMol()
|
||||
Chem.SanitizeMol(mol)
|
||||
return BuiltMacrolactone(
|
||||
mol=mol,
|
||||
smiles=Chem.MolToSmiles(mol, isomericSmiles=True),
|
||||
position_to_atom=position_to_atom,
|
||||
)
|
||||
|
||||
|
||||
def build_ambiguous_smiles() -> str:
|
||||
mol_12 = build_macrolactone(12).mol
|
||||
mol_14 = build_macrolactone(14).mol
|
||||
combined = Chem.CombineMols(mol_12, mol_14)
|
||||
return Chem.MolToSmiles(combined, isomericSmiles=True)
|
||||
|
||||
|
||||
def canonicalize(smiles_or_mol: str | Chem.Mol) -> str:
|
||||
if isinstance(smiles_or_mol, Chem.Mol):
|
||||
mol = smiles_or_mol
|
||||
else:
|
||||
mol = Chem.MolFromSmiles(smiles_or_mol)
|
||||
if mol is None:
|
||||
raise ValueError(f"Unable to parse SMILES: {smiles_or_mol}")
|
||||
return Chem.MolToSmiles(mol, isomericSmiles=True)
|
||||
|
||||
|
||||
def _add_side_chain(rwmol: Chem.RWMol, ring_atom_idx: int, side_chain: str) -> None:
|
||||
if side_chain == "methyl":
|
||||
carbon_idx = rwmol.AddAtom(Chem.Atom("C"))
|
||||
rwmol.AddBond(ring_atom_idx, carbon_idx, Chem.BondType.SINGLE)
|
||||
return
|
||||
|
||||
if side_chain == "ethyl":
|
||||
carbon_1_idx = rwmol.AddAtom(Chem.Atom("C"))
|
||||
carbon_2_idx = rwmol.AddAtom(Chem.Atom("C"))
|
||||
rwmol.AddBond(ring_atom_idx, carbon_1_idx, Chem.BondType.SINGLE)
|
||||
rwmol.AddBond(carbon_1_idx, carbon_2_idx, Chem.BondType.SINGLE)
|
||||
return
|
||||
|
||||
if side_chain == "exocyclic_alkene":
|
||||
carbon_1_idx = rwmol.AddAtom(Chem.Atom("C"))
|
||||
carbon_2_idx = rwmol.AddAtom(Chem.Atom("C"))
|
||||
rwmol.AddBond(ring_atom_idx, carbon_1_idx, Chem.BondType.DOUBLE)
|
||||
rwmol.AddBond(carbon_1_idx, carbon_2_idx, Chem.BondType.SINGLE)
|
||||
return
|
||||
|
||||
raise ValueError(f"Unsupported side chain: {side_chain}")
|
||||
74
tests/test_cli.py
Normal file
74
tests/test_cli.py
Normal file
@@ -0,0 +1,74 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from .helpers import build_ambiguous_smiles, build_macrolactone
|
||||
|
||||
|
||||
def run_cli(*args: str) -> subprocess.CompletedProcess[str]:
|
||||
return subprocess.run(
|
||||
[sys.executable, "-m", "macro_lactone_toolkit.cli", *args],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
|
||||
|
||||
def test_cli_smoke_commands():
|
||||
built = build_macrolactone(16, {5: "methyl"})
|
||||
|
||||
analyze = run_cli("analyze", "--smiles", built.smiles)
|
||||
assert analyze.returncode == 0, analyze.stderr
|
||||
analyze_payload = json.loads(analyze.stdout)
|
||||
assert analyze_payload["valid_ring_sizes"] == [16]
|
||||
|
||||
number = run_cli("number", "--smiles", built.smiles)
|
||||
assert number.returncode == 0, number.stderr
|
||||
number_payload = json.loads(number.stdout)
|
||||
assert number_payload["ring_size"] == 16
|
||||
assert number_payload["position_to_atom"]["1"] >= 0
|
||||
|
||||
fragment = run_cli("fragment", "--smiles", built.smiles, "--parent-id", "cli_1")
|
||||
assert fragment.returncode == 0, fragment.stderr
|
||||
fragment_payload = json.loads(fragment.stdout)
|
||||
assert fragment_payload["parent_id"] == "cli_1"
|
||||
assert fragment_payload["ring_size"] == 16
|
||||
assert fragment_payload["fragments"][0]["fragment_smiles_labeled"]
|
||||
|
||||
|
||||
def test_cli_fragment_csv_skips_ambiguous_and_records_errors(tmp_path):
|
||||
valid = build_macrolactone(14, {4: "methyl"})
|
||||
ambiguous = build_ambiguous_smiles()
|
||||
input_path = tmp_path / "molecules.csv"
|
||||
output_path = tmp_path / "fragments.csv"
|
||||
errors_path = tmp_path / "errors.csv"
|
||||
|
||||
pd.DataFrame(
|
||||
[
|
||||
{"id": "valid_1", "smiles": valid.smiles},
|
||||
{"id": "ambiguous_1", "smiles": ambiguous},
|
||||
]
|
||||
).to_csv(input_path, index=False)
|
||||
|
||||
completed = run_cli(
|
||||
"fragment",
|
||||
"--input",
|
||||
str(input_path),
|
||||
"--output",
|
||||
str(output_path),
|
||||
"--errors-output",
|
||||
str(errors_path),
|
||||
)
|
||||
|
||||
assert completed.returncode == 0, completed.stderr
|
||||
|
||||
fragments = pd.read_csv(output_path)
|
||||
errors = pd.read_csv(errors_path)
|
||||
|
||||
assert set(fragments["parent_id"]) == {"valid_1"}
|
||||
assert errors.loc[0, "parent_id"] == "ambiguous_1"
|
||||
assert errors.loc[0, "error_type"] == "AmbiguousMacrolactoneError"
|
||||
73
tests/test_detection_and_numbering.py
Normal file
73
tests/test_detection_and_numbering.py
Normal file
@@ -0,0 +1,73 @@
|
||||
import pytest
|
||||
from rdkit import Chem
|
||||
|
||||
from macro_lactone_toolkit import (
|
||||
AmbiguousMacrolactoneError,
|
||||
MacroLactoneAnalyzer,
|
||||
MacrolactoneDetectionError,
|
||||
MacrolactoneFragmenter,
|
||||
)
|
||||
|
||||
from .helpers import build_ambiguous_smiles, build_macrolactone
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ring_size", [12, 14, 16, 20])
|
||||
def test_analyzer_detects_supported_ring_sizes(ring_size: int):
|
||||
built = build_macrolactone(ring_size)
|
||||
analyzer = MacroLactoneAnalyzer()
|
||||
|
||||
assert analyzer.get_valid_ring_sizes(built.smiles) == [ring_size]
|
||||
|
||||
|
||||
def test_analyzer_rejects_non_lactone_macrocycle():
|
||||
analyzer = MacroLactoneAnalyzer()
|
||||
|
||||
assert analyzer.get_valid_ring_sizes("C1CCCCCCCCCCC1") == []
|
||||
|
||||
|
||||
def test_fragmenter_auto_numbers_ring_with_expected_positions():
|
||||
built = build_macrolactone(16, {5: "methyl"})
|
||||
result = MacrolactoneFragmenter().number_molecule(built.mol)
|
||||
|
||||
assert result.ring_size == 16
|
||||
assert result.position_to_atom == built.position_to_atom
|
||||
assert set(result.position_to_atom) == set(range(1, 17))
|
||||
assert result.atom_to_position == {
|
||||
atom_idx: position for position, atom_idx in built.position_to_atom.items()
|
||||
}
|
||||
|
||||
carbonyl_atom = built.mol.GetAtomWithIdx(result.position_to_atom[1])
|
||||
assert carbonyl_atom.GetSymbol() == "C"
|
||||
assert any(
|
||||
bond.GetBondType() == Chem.BondType.DOUBLE and bond.GetOtherAtom(carbonyl_atom).GetSymbol() == "O"
|
||||
for bond in carbonyl_atom.GetBonds()
|
||||
)
|
||||
|
||||
ester_oxygen = built.mol.GetAtomWithIdx(result.position_to_atom[2])
|
||||
assert ester_oxygen.GetSymbol() == "O"
|
||||
|
||||
|
||||
def test_fragmenter_requires_explicit_ring_size_for_ambiguous_molecule():
|
||||
ambiguous_smiles = build_ambiguous_smiles()
|
||||
|
||||
with pytest.raises(AmbiguousMacrolactoneError):
|
||||
MacrolactoneFragmenter().number_molecule(ambiguous_smiles)
|
||||
|
||||
|
||||
def test_fragmenter_raises_for_missing_macrolactone():
|
||||
with pytest.raises(MacrolactoneDetectionError):
|
||||
MacrolactoneFragmenter().number_molecule("CCO")
|
||||
|
||||
|
||||
def test_explicit_ring_size_selects_requested_ring():
|
||||
built = build_macrolactone(14)
|
||||
result = MacrolactoneFragmenter(ring_size=14).number_molecule(built.smiles)
|
||||
|
||||
assert result.ring_size == 14
|
||||
|
||||
|
||||
def test_explicit_ring_size_rejects_wrong_ring():
|
||||
built = build_macrolactone(12)
|
||||
|
||||
with pytest.raises(MacrolactoneDetectionError):
|
||||
MacrolactoneFragmenter(ring_size=16).number_molecule(built.smiles)
|
||||
@@ -1,39 +0,0 @@
|
||||
import sys
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
# Add SIME to path
|
||||
SIME_PATH = "/home/zly/project/SIME"
|
||||
if SIME_PATH not in sys.path:
|
||||
sys.path.append(SIME_PATH)
|
||||
|
||||
# Add project root to path so we can import 'src'
|
||||
PROJECT_ROOT = str(Path(__file__).parent.parent)
|
||||
if PROJECT_ROOT not in sys.path:
|
||||
sys.path.append(PROJECT_ROOT)
|
||||
|
||||
def test_imports():
|
||||
"""Verify that we can import from both local project and SIME."""
|
||||
print(f"sys.path: {sys.path}")
|
||||
|
||||
# 1. Test local import from src
|
||||
try:
|
||||
# Correct function name based on file inspection
|
||||
from src.ring_numbering import assign_ring_numbering
|
||||
assert callable(assign_ring_numbering)
|
||||
print("Successfully imported src.ring_numbering.assign_ring_numbering")
|
||||
except ImportError as e:
|
||||
print(f"Failed to import src.ring_numbering: {e}")
|
||||
raise
|
||||
|
||||
# 2. Test SIME import
|
||||
try:
|
||||
from utils.mole_predictor import ParallelBroadSpectrumPredictor
|
||||
assert ParallelBroadSpectrumPredictor is not None
|
||||
print("Successfully imported ParallelBroadSpectrumPredictor from utils.mole_predictor")
|
||||
except ImportError as e:
|
||||
print(f"Failed to import from SIME: {e}")
|
||||
raise
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_imports()
|
||||
@@ -1,95 +1,42 @@
|
||||
import pytest
|
||||
from rdkit import Chem
|
||||
from src.splicing.fragment_prep import activate_fragment
|
||||
|
||||
from macro_lactone_toolkit.splicing.fragment_prep import activate_fragment
|
||||
|
||||
|
||||
def test_activate_smart_ethanol():
|
||||
"""Test 'smart' activation on Ethanol (CCO). Should attach to Oxygen."""
|
||||
smiles = "CCO"
|
||||
mol = activate_fragment(smiles, strategy="smart")
|
||||
|
||||
# Check if we have a dummy atom
|
||||
mol = activate_fragment("CCO", strategy="smart")
|
||||
|
||||
assert mol is not None
|
||||
assert mol.GetNumAtoms() == 4 # C, C, O, *
|
||||
|
||||
# Check if the dummy atom is attached to Oxygen
|
||||
# Find the dummy atom
|
||||
dummy_atom = None
|
||||
for atom in mol.GetAtoms():
|
||||
if atom.GetSymbol() == '*':
|
||||
dummy_atom = atom
|
||||
break
|
||||
|
||||
assert dummy_atom is not None
|
||||
|
||||
# Check neighbors of dummy atom
|
||||
neighbors = dummy_atom.GetNeighbors()
|
||||
assert len(neighbors) == 1
|
||||
assert neighbors[0].GetSymbol() == 'O'
|
||||
|
||||
# Check output SMILES format
|
||||
out_smiles = Chem.MolToSmiles(mol)
|
||||
assert '*' in out_smiles
|
||||
assert mol.GetNumAtoms() == 4
|
||||
dummy_atom = next(atom for atom in mol.GetAtoms() if atom.GetAtomicNum() == 0)
|
||||
assert dummy_atom.GetNeighbors()[0].GetSymbol() == "O"
|
||||
assert "*" in Chem.MolToSmiles(mol)
|
||||
|
||||
|
||||
def test_activate_smart_amine():
|
||||
"""Test 'smart' activation on Ethylamine (CCN). Should attach to Nitrogen."""
|
||||
smiles = "CCN"
|
||||
mol = activate_fragment(smiles, strategy="smart")
|
||||
|
||||
assert mol is not None
|
||||
|
||||
# Find the dummy atom
|
||||
dummy_atom = None
|
||||
for atom in mol.GetAtoms():
|
||||
if atom.GetSymbol() == '*':
|
||||
dummy_atom = atom
|
||||
break
|
||||
|
||||
assert dummy_atom is not None
|
||||
neighbors = dummy_atom.GetNeighbors()
|
||||
assert neighbors[0].GetSymbol() == 'N'
|
||||
mol = activate_fragment("CCN", strategy="smart")
|
||||
|
||||
dummy_atom = next(atom for atom in mol.GetAtoms() if atom.GetAtomicNum() == 0)
|
||||
assert dummy_atom.GetNeighbors()[0].GetSymbol() == "N"
|
||||
|
||||
|
||||
def test_activate_random_pentane():
|
||||
"""Test 'random' activation on Pentane (CCCCC). Should attach to a Carbon."""
|
||||
smiles = "CCCCC"
|
||||
# Seed is not easily passed to the function unless we add it to the signature or fix it inside.
|
||||
# For this test, any Carbon is fine.
|
||||
mol = activate_fragment(smiles, strategy="random")
|
||||
|
||||
assert mol is not None
|
||||
assert mol.GetNumAtoms() == 6 # 5 C + 1 *
|
||||
|
||||
dummy_atom = None
|
||||
for atom in mol.GetAtoms():
|
||||
if atom.GetSymbol() == '*':
|
||||
dummy_atom = atom
|
||||
break
|
||||
|
||||
assert dummy_atom is not None
|
||||
neighbors = dummy_atom.GetNeighbors()
|
||||
assert neighbors[0].GetSymbol() == 'C'
|
||||
mol = activate_fragment("CCCCC", strategy="random")
|
||||
|
||||
assert mol.GetNumAtoms() == 6
|
||||
dummy_atom = next(atom for atom in mol.GetAtoms() if atom.GetAtomicNum() == 0)
|
||||
assert dummy_atom.GetNeighbors()[0].GetSymbol() == "C"
|
||||
|
||||
|
||||
def test_activate_smart_fallback():
|
||||
"""Test 'smart' fallback when no heteroatoms are found (e.g. Propane)."""
|
||||
smiles = "CCC"
|
||||
# Should fall back to finding a terminal carbon or random
|
||||
# The requirement says "fall back to a terminal Carbon" or random.
|
||||
# Let's assume the implementation picks a terminal carbon if possible, or just behaves like random on C.
|
||||
mol = activate_fragment(smiles, strategy="smart")
|
||||
|
||||
assert mol is not None
|
||||
dummy_atom = None
|
||||
for atom in mol.GetAtoms():
|
||||
if atom.GetSymbol() == '*':
|
||||
dummy_atom = atom
|
||||
break
|
||||
|
||||
assert dummy_atom is not None
|
||||
neighbor = dummy_atom.GetNeighbors()[0]
|
||||
assert neighbor.GetSymbol() == 'C'
|
||||
# Verify it's a valid molecule
|
||||
mol = activate_fragment("CCC", strategy="smart")
|
||||
|
||||
dummy_atom = next(atom for atom in mol.GetAtoms() if atom.GetAtomicNum() == 0)
|
||||
assert dummy_atom.GetNeighbors()[0].GetSymbol() == "C"
|
||||
assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE
|
||||
|
||||
|
||||
def test_invalid_smiles():
|
||||
with pytest.raises(ValueError):
|
||||
activate_fragment("NotASmiles", strategy="smart")
|
||||
|
||||
|
||||
53
tests/test_fragmentation.py
Normal file
53
tests/test_fragmentation.py
Normal file
@@ -0,0 +1,53 @@
|
||||
from rdkit import Chem
|
||||
|
||||
from macro_lactone_toolkit import MacrolactoneFragmenter
|
||||
|
||||
from .helpers import build_macrolactone
|
||||
|
||||
|
||||
def test_fragmentation_returns_empty_list_without_sidechains():
|
||||
built = build_macrolactone(12)
|
||||
result = MacrolactoneFragmenter().fragment_molecule(built.smiles, parent_id="plain")
|
||||
|
||||
assert result.fragments == []
|
||||
|
||||
|
||||
def test_fragmentation_emits_labeled_and_plain_smiles_round_trip():
|
||||
built = build_macrolactone(16, {5: "ethyl", 8: "methyl"})
|
||||
result = MacrolactoneFragmenter().fragment_molecule(built.smiles, parent_id="mol_001")
|
||||
|
||||
assert result.parent_id == "mol_001"
|
||||
assert result.ring_size == 16
|
||||
assert {fragment.cleavage_position for fragment in result.fragments} == {5, 8}
|
||||
|
||||
for fragment in result.fragments:
|
||||
labeled = Chem.MolFromSmiles(fragment.fragment_smiles_labeled)
|
||||
plain = Chem.MolFromSmiles(fragment.fragment_smiles_plain)
|
||||
|
||||
assert labeled is not None
|
||||
assert plain is not None
|
||||
assert Chem.MolToSmiles(labeled, isomericSmiles=True)
|
||||
assert Chem.MolToSmiles(plain, isomericSmiles=True)
|
||||
assert any(
|
||||
atom.GetAtomicNum() == 0 and atom.GetIsotope() == fragment.cleavage_position
|
||||
for atom in labeled.GetAtoms()
|
||||
)
|
||||
assert any(
|
||||
atom.GetAtomicNum() == 0 and atom.GetIsotope() == 0
|
||||
for atom in plain.GetAtoms()
|
||||
)
|
||||
|
||||
|
||||
def test_fragmentation_preserves_attachment_bond_type():
|
||||
built = build_macrolactone(16, {6: "exocyclic_alkene"})
|
||||
result = MacrolactoneFragmenter().fragment_molecule(built.smiles, parent_id="bond_type")
|
||||
|
||||
fragment = next(fragment for fragment in result.fragments if fragment.cleavage_position == 6)
|
||||
labeled = Chem.MolFromSmiles(fragment.fragment_smiles_labeled)
|
||||
plain = Chem.MolFromSmiles(fragment.fragment_smiles_plain)
|
||||
|
||||
for mol in (labeled, plain):
|
||||
dummy_atom = next(atom for atom in mol.GetAtoms() if atom.GetAtomicNum() == 0)
|
||||
neighbor = dummy_atom.GetNeighbors()[0]
|
||||
bond = mol.GetBondBetweenAtoms(dummy_atom.GetIdx(), neighbor.GetIdx())
|
||||
assert bond.GetBondType() == Chem.BondType.DOUBLE
|
||||
5
tests/test_imports.py
Normal file
5
tests/test_imports.py
Normal file
@@ -0,0 +1,5 @@
|
||||
import macro_lactone_toolkit
|
||||
|
||||
|
||||
def test_public_imports_smoke():
|
||||
assert macro_lactone_toolkit is not None
|
||||
@@ -1,223 +0,0 @@
|
||||
"""
|
||||
测试环编号功能 - 验证原子编号是否固定
|
||||
"""
|
||||
import sys
|
||||
sys.path.insert(0, '/home/zly/project/macro_split')
|
||||
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem import Draw, AllChem
|
||||
from rdkit.Chem.Draw import rdMolDraw2D
|
||||
from src.ring_visualization import (
|
||||
get_macrolactone_numbering,
|
||||
get_ring_atoms_by_size
|
||||
)
|
||||
|
||||
|
||||
def test_ring_numbering_consistency(smiles: str, ring_size: int = 16, num_tests: int = 5):
|
||||
"""
|
||||
测试环编号的一致性 - 多次运行确保编号固定
|
||||
"""
|
||||
print("=" * 70)
|
||||
print("测试环编号一致性")
|
||||
print("=" * 70)
|
||||
print(f"\nSMILES: {smiles[:80]}...")
|
||||
print(f"环大小: {ring_size}")
|
||||
print(f"测试次数: {num_tests}")
|
||||
|
||||
# 解析分子
|
||||
mol = Chem.MolFromSmiles(smiles)
|
||||
if mol is None:
|
||||
print("❌ 无法解析SMILES")
|
||||
return False
|
||||
|
||||
print(f"✓ 分子解析成功,共 {mol.GetNumAtoms()} 个原子")
|
||||
|
||||
# 检测环大小
|
||||
ring_atoms = get_ring_atoms_by_size(mol, ring_size)
|
||||
if ring_atoms is None:
|
||||
for size in range(12, 21):
|
||||
ring_atoms = get_ring_atoms_by_size(mol, size)
|
||||
if ring_atoms:
|
||||
ring_size = size
|
||||
print(f"⚠️ 使用检测到的{size}元环")
|
||||
break
|
||||
|
||||
if ring_atoms is None:
|
||||
print("❌ 未找到12-20元环")
|
||||
return False
|
||||
|
||||
print(f"✓ 找到{ring_size}元环,包含 {len(ring_atoms)} 个原子")
|
||||
|
||||
# 多次测试编号一致性
|
||||
all_numberings = []
|
||||
all_carbonyl_carbons = []
|
||||
all_ester_oxygens = []
|
||||
|
||||
for i in range(num_tests):
|
||||
result = get_macrolactone_numbering(mol, ring_size)
|
||||
ring_atoms_result, ring_numbering, ordered_atoms, carbonyl_carbon, ester_oxygen, (is_valid, reason) = result
|
||||
|
||||
if not is_valid:
|
||||
print(f"❌ 第{i+1}次测试失败: {reason}")
|
||||
return False
|
||||
|
||||
all_numberings.append(ring_numbering.copy())
|
||||
all_carbonyl_carbons.append(carbonyl_carbon)
|
||||
all_ester_oxygens.append(ester_oxygen)
|
||||
|
||||
# 验证一致性
|
||||
print("\n" + "-" * 50)
|
||||
print("编号一致性检查:")
|
||||
print("-" * 50)
|
||||
|
||||
is_consistent = True
|
||||
|
||||
if len(set(all_carbonyl_carbons)) == 1:
|
||||
print(f"✓ 羰基碳位置一致: 原子索引 {all_carbonyl_carbons[0]}")
|
||||
else:
|
||||
print(f"❌ 羰基碳位置不一致: {all_carbonyl_carbons}")
|
||||
is_consistent = False
|
||||
|
||||
if len(set(all_ester_oxygens)) == 1:
|
||||
print(f"✓ 酯氧位置一致: 原子索引 {all_ester_oxygens[0]}")
|
||||
else:
|
||||
print(f"❌ 酯氧位置不一致: {all_ester_oxygens}")
|
||||
is_consistent = False
|
||||
|
||||
first_numbering = all_numberings[0]
|
||||
for i, numbering in enumerate(all_numberings[1:], 2):
|
||||
if numbering != first_numbering:
|
||||
print(f"❌ 第{i}次编号与第1次不一致")
|
||||
is_consistent = False
|
||||
break
|
||||
|
||||
if is_consistent:
|
||||
print(f"✓ 所有{num_tests}次测试的编号完全一致")
|
||||
|
||||
# 显示详细编号信息
|
||||
print("\n" + "-" * 50)
|
||||
print("环原子编号详情:")
|
||||
print("-" * 50)
|
||||
|
||||
numbering = all_numberings[0]
|
||||
carbonyl_carbon = all_carbonyl_carbons[0]
|
||||
ester_oxygen = all_ester_oxygens[0]
|
||||
|
||||
sorted_items = sorted(numbering.items(), key=lambda x: x[1])
|
||||
|
||||
print(f"{'位置':<6} {'原子索引':<10} {'元素':<6} {'说明'}")
|
||||
print("-" * 40)
|
||||
|
||||
for atom_idx, position in sorted_items:
|
||||
atom = mol.GetAtomWithIdx(atom_idx)
|
||||
symbol = atom.GetSymbol()
|
||||
note = ""
|
||||
if atom_idx == carbonyl_carbon:
|
||||
note = "← 羰基碳 (C=O)"
|
||||
elif atom_idx == ester_oxygen:
|
||||
note = "← 酯键氧"
|
||||
print(f"{position:<6} {atom_idx:<10} {symbol:<6} {note}")
|
||||
|
||||
return is_consistent
|
||||
|
||||
|
||||
def save_visualization(smiles: str, output_path: str, ring_size: int = 16):
|
||||
"""保存分子可视化图片"""
|
||||
print("\n" + "=" * 70)
|
||||
print("保存可视化图片")
|
||||
print("=" * 70)
|
||||
|
||||
mol = Chem.MolFromSmiles(smiles)
|
||||
if mol is None:
|
||||
print("❌ 无法解析SMILES")
|
||||
return
|
||||
|
||||
for size in range(12, 21):
|
||||
ring_atoms = get_ring_atoms_by_size(mol, size)
|
||||
if ring_atoms:
|
||||
ring_size = size
|
||||
break
|
||||
|
||||
result = get_macrolactone_numbering(mol, ring_size)
|
||||
ring_atoms, ring_numbering, ordered_atoms, carbonyl_carbon, ester_oxygen, (is_valid, reason) = result
|
||||
|
||||
if not is_valid:
|
||||
print(f"❌ 无法获取编号: {reason}")
|
||||
return
|
||||
|
||||
mol_copy = Chem.Mol(mol)
|
||||
AllChem.Compute2DCoords(mol_copy)
|
||||
|
||||
for atom_idx in ring_atoms:
|
||||
if atom_idx in ring_numbering:
|
||||
atom = mol_copy.GetAtomWithIdx(atom_idx)
|
||||
atom.SetProp("atomNote", str(ring_numbering[atom_idx]))
|
||||
|
||||
atom_colors = {}
|
||||
for atom_idx in ring_atoms:
|
||||
atom = mol.GetAtomWithIdx(atom_idx)
|
||||
symbol = atom.GetSymbol()
|
||||
|
||||
if atom_idx == carbonyl_carbon:
|
||||
atom_colors[atom_idx] = (1.0, 0.6, 0.0)
|
||||
elif atom_idx == ester_oxygen:
|
||||
atom_colors[atom_idx] = (1.0, 0.4, 0.4)
|
||||
elif symbol == 'C':
|
||||
atom_colors[atom_idx] = (0.7, 0.85, 1.0)
|
||||
elif symbol == 'O':
|
||||
atom_colors[atom_idx] = (1.0, 0.7, 0.7)
|
||||
elif symbol == 'N':
|
||||
atom_colors[atom_idx] = (0.8, 0.7, 1.0)
|
||||
else:
|
||||
atom_colors[atom_idx] = (0.8, 1.0, 0.8)
|
||||
|
||||
drawer = rdMolDraw2D.MolDraw2DSVG(1000, 1000)
|
||||
drawer.SetFontSize(14)
|
||||
drawer.DrawMolecule(mol_copy, highlightAtoms=list(ring_atoms), highlightAtomColors=atom_colors)
|
||||
drawer.FinishDrawing()
|
||||
svg = drawer.GetDrawingText()
|
||||
|
||||
svg_path = output_path.replace('.png', '.svg')
|
||||
with open(svg_path, 'w', encoding='utf-8') as f:
|
||||
f.write(svg)
|
||||
print(f"✓ SVG已保存到: {svg_path}")
|
||||
|
||||
try:
|
||||
drawer_png = rdMolDraw2D.MolDraw2DCairo(1000, 1000)
|
||||
drawer_png.SetFontSize(14)
|
||||
drawer_png.DrawMolecule(mol_copy, highlightAtoms=list(ring_atoms), highlightAtomColors=atom_colors)
|
||||
drawer_png.FinishDrawing()
|
||||
drawer_png.WriteDrawingText(output_path)
|
||||
print(f"✓ PNG已保存到: {output_path}")
|
||||
except Exception as e:
|
||||
print(f"⚠️ PNG保存失败: {e}")
|
||||
|
||||
print("\n颜色说明:")
|
||||
print(" 橙色: 羰基碳 (位置1)")
|
||||
print(" 红色: 酯键氧 (位置2)")
|
||||
print(" 浅蓝色: 环上碳原子")
|
||||
|
||||
|
||||
def main():
|
||||
smiles = "O[C@H]1[C@H]([C@H]([C@H](OC[C@@H]2[C@@H](CC)OC(C[C@H]([C@H](C)[C@H]([C@@H](CC=O)C[C@@H](C)C(/C=C/C(/C)=C/2)=O)O[C@H]2[C@@H]([C@H]([C@@H]([C@@H](C)O2)O[C@H]2C[C@](C)([C@@H]([C@@H](C)O2)O)O)[N@](C)C)O)O)=O)O[C@@H]1C)OC)OC"
|
||||
|
||||
print("\n大环内酯环编号测试\n")
|
||||
is_consistent = test_ring_numbering_consistency(smiles, ring_size=16, num_tests=5)
|
||||
|
||||
output_path = "/home/zly/project/macro_split/output/test_ring_numbering.png"
|
||||
save_visualization(smiles, output_path, ring_size=16)
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("测试总结")
|
||||
print("=" * 70)
|
||||
if is_consistent:
|
||||
print("✅ 所有测试通过!环原子编号是固定的。")
|
||||
else:
|
||||
print("❌ 测试失败:环原子编号不一致")
|
||||
|
||||
return is_consistent
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = main()
|
||||
sys.exit(0 if success else 1)
|
||||
@@ -1,84 +0,0 @@
|
||||
import pytest
|
||||
from rdkit import Chem
|
||||
from src.splicing.scaffold_prep import prepare_tylosin_scaffold
|
||||
from src.ring_numbering import assign_ring_numbering
|
||||
|
||||
def test_prepare_tylosin_scaffold():
|
||||
# Construct a 16-membered lactone with side chains
|
||||
# Numbering logic (assumed based on implementation):
|
||||
# 1: C=O
|
||||
# 2-6: CH2
|
||||
# 7: CH(CH3) <- Methyl side chain
|
||||
# 8-14: CH2
|
||||
# 15: CH(CC) <- Ethyl side chain
|
||||
# 16: O
|
||||
|
||||
# SMILES:
|
||||
# O=C1 (pos 1)
|
||||
# CCCCC (pos 2-6)
|
||||
# C(C) (pos 7, with Methyl)
|
||||
# CCCCCCC (pos 8-14)
|
||||
# C(CC) (pos 15, with Ethyl)
|
||||
# O1 (pos 16)
|
||||
|
||||
smiles = "O=C1CCCCC(C)CCCCCCCCC(CC)O1"
|
||||
|
||||
# Verify initial assumption about numbering
|
||||
mol = Chem.MolFromSmiles(smiles)
|
||||
numbering = assign_ring_numbering(mol)
|
||||
|
||||
# Find atom indices for pos 7 and 15 to ensure our SMILES construction is correct for the test
|
||||
pos_map = {v: k for k, v in numbering.items()}
|
||||
assert 7 in pos_map, "Position 7 not found in ring"
|
||||
assert 15 in pos_map, "Position 15 not found in ring"
|
||||
assert 5 in pos_map, "Position 5 not found in ring"
|
||||
|
||||
atom7 = mol.GetAtomWithIdx(pos_map[7])
|
||||
atom15 = mol.GetAtomWithIdx(pos_map[15])
|
||||
atom5 = mol.GetAtomWithIdx(pos_map[5])
|
||||
|
||||
# Check side chains exist
|
||||
# Atom 7 should have 3 neighbors (2 ring, 1 methyl)
|
||||
assert len(atom7.GetNeighbors()) == 3
|
||||
# Atom 15 should have 3 neighbors (2 ring, 1 ethyl)
|
||||
assert len(atom15.GetNeighbors()) == 3
|
||||
# Atom 5 should have 2 neighbors (2 ring, 2 implicit H)
|
||||
assert len(atom5.GetNeighbors()) == 2
|
||||
|
||||
# Execute scaffold prep
|
||||
target_positions = [5, 7, 15]
|
||||
res_mol, dummy_map = prepare_tylosin_scaffold(smiles, target_positions)
|
||||
|
||||
assert res_mol is not None
|
||||
assert len(dummy_map) == 3
|
||||
|
||||
# Verify dummies
|
||||
for pos in target_positions:
|
||||
assert pos in dummy_map
|
||||
dummy_idx = dummy_map[pos]
|
||||
dummy_atom = res_mol.GetAtomWithIdx(dummy_idx)
|
||||
assert dummy_atom.GetSymbol() == "*"
|
||||
assert dummy_atom.GetIsotope() == pos
|
||||
|
||||
# Check that dummy is connected to the correct ring position
|
||||
neighbors = dummy_atom.GetNeighbors()
|
||||
assert len(neighbors) == 1
|
||||
|
||||
# Verify side chains removed
|
||||
# New atom counts.
|
||||
# Original: 16 (ring) + 1 (O=) + 1 (Me) + 2 (Et) = 20 heavy atoms.
|
||||
# Removed: Me (1), Et (2). Total -3.
|
||||
# Added: 3 dummies. Total +3.
|
||||
# Net: 20.
|
||||
assert res_mol.GetNumAtoms() == 20
|
||||
|
||||
# Check that the specific side chains are gone.
|
||||
# Count carbons.
|
||||
# Original C count: 1 (C=O) + 14 (CH2/CH) + 1(Me) + 2(Et) = 18 C.
|
||||
# New C count: 1 (C=O) + 14 (Ring C) = 15 C.
|
||||
# Dummies are *. O are O.
|
||||
c_count = sum(1 for a in res_mol.GetAtoms() if a.GetSymbol() == 'C')
|
||||
assert c_count == 15, f"Expected 15 Carbons, found {c_count}"
|
||||
|
||||
dummy_count = sum(1 for a in res_mol.GetAtoms() if a.GetSymbol() == '*')
|
||||
assert dummy_count == 3
|
||||
@@ -1,77 +1,51 @@
|
||||
import pytest
|
||||
from rdkit import Chem
|
||||
from src.splicing.engine import splice_molecule
|
||||
|
||||
from macro_lactone_toolkit import MacrolactoneFragmenter
|
||||
from macro_lactone_toolkit.splicing.engine import splice_molecule
|
||||
from macro_lactone_toolkit.splicing.scaffold_prep import prepare_macrolactone_scaffold
|
||||
|
||||
from .helpers import build_macrolactone, canonicalize
|
||||
|
||||
|
||||
def test_splice_benzene_methyl():
|
||||
"""
|
||||
Test splicing a benzene scaffold (isotope 1) with a methyl fragment.
|
||||
Scaffold: c1ccccc1[1*] (Phenyl radical-ish dummy)
|
||||
Fragment: C* (Methyl radical-ish dummy)
|
||||
Result: Cc1ccccc1 (Toluene)
|
||||
"""
|
||||
scaffold = Chem.MolFromSmiles("c1ccccc1[1*]")
|
||||
fragment = Chem.MolFromSmiles("C*")
|
||||
|
||||
assert scaffold is not None
|
||||
assert fragment is not None
|
||||
|
||||
|
||||
product = splice_molecule(scaffold, fragment, position=1)
|
||||
|
||||
# Expected result: Toluene
|
||||
expected_smiles = "Cc1ccccc1"
|
||||
expected_mol = Chem.MolFromSmiles(expected_smiles)
|
||||
expected_canonical = Chem.MolToSmiles(expected_mol, isomericSmiles=True)
|
||||
|
||||
product_canonical = Chem.MolToSmiles(product, isomericSmiles=True)
|
||||
|
||||
assert product_canonical == expected_canonical
|
||||
|
||||
assert canonicalize(product) == canonicalize("Cc1ccccc1")
|
||||
|
||||
|
||||
def test_splice_missing_isotope():
|
||||
"""Test that error is raised if the requested position is not found on scaffold."""
|
||||
scaffold = Chem.MolFromSmiles("c1ccccc1[2*]") # Isotope 2
|
||||
scaffold = Chem.MolFromSmiles("c1ccccc1[2*]")
|
||||
fragment = Chem.MolFromSmiles("C*")
|
||||
|
||||
|
||||
with pytest.raises(ValueError, match="Scaffold dummy atom with isotope 1 not found"):
|
||||
splice_molecule(scaffold, fragment, position=1)
|
||||
|
||||
|
||||
def test_splice_no_fragment_dummy():
|
||||
"""Test that error is raised if fragment has no dummy atom."""
|
||||
scaffold = Chem.MolFromSmiles("c1ccccc1[1*]")
|
||||
fragment = Chem.MolFromSmiles("C") # Methane, no dummy
|
||||
|
||||
fragment = Chem.MolFromSmiles("C")
|
||||
|
||||
with pytest.raises(ValueError, match="Fragment does not contain a dummy atom"):
|
||||
splice_molecule(scaffold, fragment, position=1)
|
||||
|
||||
def test_complex_splicing():
|
||||
"""
|
||||
Test splicing with more complex structures.
|
||||
Scaffold: Pyridine derivative n1cccc1CC[1*]
|
||||
Fragment: Cyclopropane C1CC1*
|
||||
Result: n1cccc1CCC1CC1
|
||||
"""
|
||||
scaffold = Chem.MolFromSmiles("n1cccc1CC[1*]")
|
||||
fragment = Chem.MolFromSmiles("*C1CC1")
|
||||
|
||||
product = splice_molecule(scaffold, fragment, position=1)
|
||||
|
||||
expected = Chem.MolFromSmiles("n1cccc1CCC1CC1")
|
||||
|
||||
assert Chem.MolToSmiles(product) == Chem.MolToSmiles(expected)
|
||||
|
||||
def test_scaffold_with_multiple_different_dummies():
|
||||
"""
|
||||
Test splicing when scaffold has multiple dummies with different isotopes.
|
||||
Scaffold: [1*]c1ccccc1[2*]
|
||||
Fragment: C*
|
||||
Target: Splicing at 1 should leave [2*] intact.
|
||||
"""
|
||||
scaffold = Chem.MolFromSmiles("[1*]c1ccccc1[2*]")
|
||||
fragment = Chem.MolFromSmiles("C*")
|
||||
|
||||
# Splice at 1
|
||||
product = splice_molecule(scaffold, fragment, position=1)
|
||||
|
||||
# Expected: Cc1ccccc1[2*]
|
||||
expected = Chem.MolFromSmiles("Cc1ccccc1[2*]")
|
||||
|
||||
assert Chem.MolToSmiles(product) == Chem.MolToSmiles(expected)
|
||||
def test_prepare_scaffold_and_reassemble_fragment():
|
||||
built = build_macrolactone(16, {5: "ethyl"})
|
||||
result = MacrolactoneFragmenter(ring_size=16).fragment_molecule(built.smiles, parent_id="reassemble")
|
||||
fragment = next(fragment for fragment in result.fragments if fragment.cleavage_position == 5)
|
||||
|
||||
scaffold, dummy_map = prepare_macrolactone_scaffold(
|
||||
built.smiles,
|
||||
positions=[5],
|
||||
ring_size=16,
|
||||
)
|
||||
|
||||
assert 5 in dummy_map
|
||||
|
||||
product = splice_molecule(scaffold, Chem.MolFromSmiles(fragment.fragment_smiles_labeled), position=5)
|
||||
|
||||
assert canonicalize(product) == canonicalize(built.mol)
|
||||
|
||||
Reference in New Issue
Block a user