{ "cells": [ { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'H': 1, 'F': 1, 'Cl': 1, 'Br': 1, 'I': 1, 'B': 3, 'B+1': 2, 'B-1': 4, 'O': 2, 'O+1': 3, 'O-1': 1, 'N': 3, 'N+1': 4, 'N-1': 2, 'C': 4, 'C+1': 5, 'C-1': 3, 'P': 5, 'P+1': 6, 'P-1': 4, 'S': 6, 'S+1': 7, 'S-1': 5, '?': 8}\n" ] } ], "source": [ "import selfies as sf\n", "\n", "# 获取默认的语义约束字典\n", "constraints = sf.get_preset_constraints(\"default\")\n", "print(constraints)\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "import selfies as sf\n", "new_constraints = sf.get_preset_constraints(\"default\")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "sf.set_semantic_constraints(new_constraints)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "smiles_dataset = [\"COC\", \"FCF\", \"O=O\", \"O=Cc1ccccc1\"]\n", "selfies_dataset = list(map(sf.encoder, smiles_dataset))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "alphabet = sf.get_alphabet_from_selfies(selfies_dataset)\n", "alphabet.add(\"[nop]\")\n", "\n", "alphabet = list(sorted(alphabet))\n", "alphabet" ] } ], "metadata": { "kernelspec": { "display_name": "frage", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.16" } }, "nbformat": 4, "nbformat_minor": 2 }