{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 16环大环内酯侧链分析演示\n", "\n", "本notebook演示如何对一个16环大环内酯分子进行:\n", "1. 固定编号\n", "2. 定义碎片dataclass (JSON导入导出)\n", "3. 侧链断裂并保存碎片信息\n", "\n", "我们将从temp.csv中选择一个分子进行演示。\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. 导入必要的库\n" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "所有模块导入成功!\n" ] } ], "source": [ "# 配置matplotlib在Jupyter中内联显示\n", "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "\n", "import sys\n", "sys.path.append('../')\n", "\n", "from rdkit import Chem\n", "from rdkit.Chem import Draw\n", "from rdkit.Chem.Draw import IPythonConsole\n", "import pandas as pd\n", "import json\n", "from pathlib import Path\n", "import numpy as np\n", "\n", "# 导入我们的模块\n", "from src.ring_numbering import (\n", " assign_ring_numbering,\n", " find_lactone_carbon,\n", " get_ring_atoms,\n", " validate_numbering\n", ")\n", "from src.fragment_dataclass import Fragment, MoleculeFragments\n", "from src.fragment_cleaver import (\n", " identify_side_chains,\n", " extract_side_chain_fragment,\n", " cleave_side_chains,\n", " process_molecule\n", ")\n", "from src.visualizer import draw_molecule_with_numbering\n", "\n", "print(\"所有模块导入成功!\")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. 读取数据并选择一个分子\n" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "共有 2022 个16环大环内酯分子\n", "\n", "前5个分子:\n" ] }, { "data": { "text/html": [ "
| \n", " | IDs | \n", "molecule_pref_name | \n", "smiles | \n", "
|---|---|---|---|
| 0 | \n", "CHEMBL94657 | \n", "PATUPILONE | \n", "C/C(=C\\c1csc(C)n1)[C@@H]1C[C@@H]2O[C@]2(C)CCC[... | \n", "
| 1 | \n", "CHEMBL94657 | \n", "PATUPILONE | \n", "C/C(=C\\c1csc(C)n1)[C@@H]1C[C@@H]2O[C@]2(C)CCC[... | \n", "
| 2 | \n", "CHEMBL1554 | \n", "DACTINOMYCIN | \n", "Cc1c2oc3c(C)ccc(C(=O)N[C@@H]4C(=O)N[C@H](C(C)C... | \n", "
| 3 | \n", "CHEMBL1173445 | \n", "LARGAZOLE | \n", "CCCCCCCC(=O)SCC/C=C/[C@@H]1CC(=O)NCc2nc(cs2)C2... | \n", "
| 4 | \n", "CHEMBL3902498 | \n", "NaN | \n", "Cc1cc2ccc1[C@@H](C)COC(=O)Nc1ccc(S(=O)(=O)C3CC... | \n", "