808 lines
39 KiB
Plaintext
808 lines
39 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# install unimol https://unimol.readthedocs.io/en/latest/installation.html#install\n",
|
||
"import pandas as pd\n",
|
||
"df = pd.read_csv('/mnt/c/project/qsar/data/A_85.csv',sep=';')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Molecule ChEMBL ID</th>\n",
|
||
" <th>Molecule Name</th>\n",
|
||
" <th>Molecule Max Phase</th>\n",
|
||
" <th>Molecular Weight</th>\n",
|
||
" <th>#RO5 Violations</th>\n",
|
||
" <th>AlogP</th>\n",
|
||
" <th>Compound Key</th>\n",
|
||
" <th>Smiles</th>\n",
|
||
" <th>Standard Type</th>\n",
|
||
" <th>Standard Relation</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>Target Type</th>\n",
|
||
" <th>Document ChEMBL ID</th>\n",
|
||
" <th>Source ID</th>\n",
|
||
" <th>Source Description</th>\n",
|
||
" <th>Document Journal</th>\n",
|
||
" <th>Document Year</th>\n",
|
||
" <th>Cell ChEMBL ID</th>\n",
|
||
" <th>Properties</th>\n",
|
||
" <th>Action Type</th>\n",
|
||
" <th>Standard Text Value</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>CHEMBL5184894</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>None</td>\n",
|
||
" <td>916.22</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>5.74</td>\n",
|
||
" <td>C-3</td>\n",
|
||
" <td>CC[C@H]1OC(=O)C[C@@H](O)[C@H](C)[C@@H](O[C@@H]...</td>\n",
|
||
" <td>MIC</td>\n",
|
||
" <td>'='</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>ORGANISM</td>\n",
|
||
" <td>CHEMBL5126592</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Scientific Literature</td>\n",
|
||
" <td>Eur J Med Chem</td>\n",
|
||
" <td>2022</td>\n",
|
||
" <td>None</td>\n",
|
||
" <td>Time_Lower = 16.0 hrs | Time_Upper = 20.0 hrs</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>CHEMBL5198466</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>None</td>\n",
|
||
" <td>902.19</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>5.35</td>\n",
|
||
" <td>C-2</td>\n",
|
||
" <td>CC[C@H]1OC(=O)C[C@@H](O)[C@H](C)[C@@H](O[C@@H]...</td>\n",
|
||
" <td>MIC</td>\n",
|
||
" <td>'='</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>ORGANISM</td>\n",
|
||
" <td>CHEMBL5126592</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Scientific Literature</td>\n",
|
||
" <td>Eur J Med Chem</td>\n",
|
||
" <td>2022</td>\n",
|
||
" <td>None</td>\n",
|
||
" <td>Time_Lower = 16.0 hrs | Time_Upper = 20.0 hrs</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>CHEMBL5179714</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>None</td>\n",
|
||
" <td>976.02</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>5.80</td>\n",
|
||
" <td>A-7</td>\n",
|
||
" <td>CC[C@H]1OC(=O)C[C@@H](O)[C@H](C)[C@@H](O[C@@H]...</td>\n",
|
||
" <td>MIC</td>\n",
|
||
" <td>'='</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>ORGANISM</td>\n",
|
||
" <td>CHEMBL5126592</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Scientific Literature</td>\n",
|
||
" <td>Eur J Med Chem</td>\n",
|
||
" <td>2022</td>\n",
|
||
" <td>None</td>\n",
|
||
" <td>Time_Lower = 16.0 hrs | Time_Upper = 20.0 hrs</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>CHEMBL5190735</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>None</td>\n",
|
||
" <td>904.16</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.93</td>\n",
|
||
" <td>C-4</td>\n",
|
||
" <td>CC[C@H]1OC(=O)C[C@@H](O)[C@H](C)[C@@H](O[C@@H]...</td>\n",
|
||
" <td>MIC</td>\n",
|
||
" <td>'='</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>ORGANISM</td>\n",
|
||
" <td>CHEMBL5126592</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Scientific Literature</td>\n",
|
||
" <td>Eur J Med Chem</td>\n",
|
||
" <td>2022</td>\n",
|
||
" <td>None</td>\n",
|
||
" <td>Time_Lower = 16.0 hrs | Time_Upper = 20.0 hrs</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>CHEMBL5199514</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>None</td>\n",
|
||
" <td>897.13</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>5.03</td>\n",
|
||
" <td>1-23(A-2)</td>\n",
|
||
" <td>CC[C@H]1OC(=O)C[C@@H](O)[C@H](C)[C@@H](O[C@@H]...</td>\n",
|
||
" <td>MIC</td>\n",
|
||
" <td>'='</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>ORGANISM</td>\n",
|
||
" <td>CHEMBL5126592</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Scientific Literature</td>\n",
|
||
" <td>Eur J Med Chem</td>\n",
|
||
" <td>2022</td>\n",
|
||
" <td>None</td>\n",
|
||
" <td>Time_Lower = 16.0 hrs | Time_Upper = 20.0 hrs</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 47 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Molecule ChEMBL ID Molecule Name Molecule Max Phase Molecular Weight \\\n",
|
||
"0 CHEMBL5184894 NaN None 916.22 \n",
|
||
"1 CHEMBL5198466 NaN None 902.19 \n",
|
||
"2 CHEMBL5179714 NaN None 976.02 \n",
|
||
"3 CHEMBL5190735 NaN None 904.16 \n",
|
||
"4 CHEMBL5199514 NaN None 897.13 \n",
|
||
"\n",
|
||
" #RO5 Violations AlogP Compound Key \\\n",
|
||
"0 3 5.74 C-3 \n",
|
||
"1 3 5.35 C-2 \n",
|
||
"2 3 5.80 A-7 \n",
|
||
"3 2 3.93 C-4 \n",
|
||
"4 3 5.03 1-23(A-2) \n",
|
||
"\n",
|
||
" Smiles Standard Type \\\n",
|
||
"0 CC[C@H]1OC(=O)C[C@@H](O)[C@H](C)[C@@H](O[C@@H]... MIC \n",
|
||
"1 CC[C@H]1OC(=O)C[C@@H](O)[C@H](C)[C@@H](O[C@@H]... MIC \n",
|
||
"2 CC[C@H]1OC(=O)C[C@@H](O)[C@H](C)[C@@H](O[C@@H]... MIC \n",
|
||
"3 CC[C@H]1OC(=O)C[C@@H](O)[C@H](C)[C@@H](O[C@@H]... MIC \n",
|
||
"4 CC[C@H]1OC(=O)C[C@@H](O)[C@H](C)[C@@H](O[C@@H]... MIC \n",
|
||
"\n",
|
||
" Standard Relation ... Target Type Document ChEMBL ID Source ID \\\n",
|
||
"0 '=' ... ORGANISM CHEMBL5126592 1 \n",
|
||
"1 '=' ... ORGANISM CHEMBL5126592 1 \n",
|
||
"2 '=' ... ORGANISM CHEMBL5126592 1 \n",
|
||
"3 '=' ... ORGANISM CHEMBL5126592 1 \n",
|
||
"4 '=' ... ORGANISM CHEMBL5126592 1 \n",
|
||
"\n",
|
||
" Source Description Document Journal Document Year Cell ChEMBL ID \\\n",
|
||
"0 Scientific Literature Eur J Med Chem 2022 None \n",
|
||
"1 Scientific Literature Eur J Med Chem 2022 None \n",
|
||
"2 Scientific Literature Eur J Med Chem 2022 None \n",
|
||
"3 Scientific Literature Eur J Med Chem 2022 None \n",
|
||
"4 Scientific Literature Eur J Med Chem 2022 None \n",
|
||
"\n",
|
||
" Properties Action Type \\\n",
|
||
"0 Time_Lower = 16.0 hrs | Time_Upper = 20.0 hrs NaN \n",
|
||
"1 Time_Lower = 16.0 hrs | Time_Upper = 20.0 hrs NaN \n",
|
||
"2 Time_Lower = 16.0 hrs | Time_Upper = 20.0 hrs NaN \n",
|
||
"3 Time_Lower = 16.0 hrs | Time_Upper = 20.0 hrs NaN \n",
|
||
"4 Time_Lower = 16.0 hrs | Time_Upper = 20.0 hrs NaN \n",
|
||
"\n",
|
||
" Standard Text Value \n",
|
||
"0 NaN \n",
|
||
"1 NaN \n",
|
||
"2 NaN \n",
|
||
"3 NaN \n",
|
||
"4 NaN \n",
|
||
"\n",
|
||
"[5 rows x 47 columns]"
|
||
]
|
||
},
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Index(['Molecule ChEMBL ID', 'Molecule Name', 'Molecule Max Phase',\n",
|
||
" 'Molecular Weight', '#RO5 Violations', 'AlogP', 'Compound Key',\n",
|
||
" 'Smiles', 'Standard Type', 'Standard Relation', 'Standard Value',\n",
|
||
" 'Standard Units', 'pChEMBL Value', 'Data Validity Comment', 'Comment',\n",
|
||
" 'Uo Units', 'Ligand Efficiency BEI', 'Ligand Efficiency LE',\n",
|
||
" 'Ligand Efficiency LLE', 'Ligand Efficiency SEI', 'Potential Duplicate',\n",
|
||
" 'Assay ChEMBL ID', 'Assay Description', 'Assay Type', 'BAO Format ID',\n",
|
||
" 'BAO Label', 'Assay Organism', 'Assay Tissue ChEMBL ID',\n",
|
||
" 'Assay Tissue Name', 'Assay Cell Type', 'Assay Subcellular Fraction',\n",
|
||
" 'Assay Parameters', 'Assay Variant Accession', 'Assay Variant Mutation',\n",
|
||
" 'Target ChEMBL ID', 'Target Name', 'Target Organism', 'Target Type',\n",
|
||
" 'Document ChEMBL ID', 'Source ID', 'Source Description',\n",
|
||
" 'Document Journal', 'Document Year', 'Cell ChEMBL ID', 'Properties',\n",
|
||
" 'Action Type', 'Standard Text Value'],\n",
|
||
" dtype='object')"
|
||
]
|
||
},
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.columns"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"{'Activity', 'Inhibition', 'MBC', 'MIC', 'Ratio'}"
|
||
]
|
||
},
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"set(df['Standard Type'].to_list())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# select MIC\n",
|
||
"df_with_MIC = df[df['Standard Type'] == 'MIC']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# 选择关键的活性和结构数据\n",
|
||
"qsar_df = df_with_MIC[['Molecule ChEMBL ID', 'Smiles', 'Standard Value', 'Standard Units', \n",
|
||
" 'AlogP', 'Molecular Weight', '#RO5 Violations', 'Ligand Efficiency LE',\n",
|
||
" 'Target Name', 'Target ChEMBL ID']]\n",
|
||
"\n",
|
||
"# 如果需要,可以进一步处理或清洗数据\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" SMILES TARGET\n",
|
||
"0 CC[C@H]1OC(=O)C[C@@H](O)[C@H](C)[C@@H](O[C@@H]... 0.5\n",
|
||
"1 CC[C@H]1OC(=O)C[C@@H](O)[C@H](C)[C@@H](O[C@@H]... 0.5\n",
|
||
"2 CC[C@H]1OC(=O)C[C@@H](O)[C@H](C)[C@@H](O[C@@H]... 32.0\n",
|
||
"3 CC[C@H]1OC(=O)C[C@@H](O)[C@H](C)[C@@H](O[C@@H]... 16.0\n",
|
||
"4 CC[C@H]1OC(=O)C[C@@H](O)[C@H](C)[C@@H](O[C@@H]... 4.0\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"# Assuming qsar_df is your original DataFrame\n",
|
||
"qsar_df_formatted = qsar_df[['Smiles', 'Standard Value']].copy()\n",
|
||
"\n",
|
||
"# Rename the 'Standard Value' column to 'TARGET'\n",
|
||
"qsar_df_formatted.rename(columns={'Smiles': 'SMILES', 'Standard Value': 'TARGET'}, inplace=True)\n",
|
||
"\n",
|
||
"# Now qsar_df_formatted is ready for training\n",
|
||
"print(qsar_df_formatted.head())\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"qsar_df_formatted.to_csv('qsar_training_data.csv', index=False)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"```shell\n",
|
||
"sudo apt-get update\n",
|
||
"sudo apt-get install --only-upgrade libstdc++6\n",
|
||
"```\n",
|
||
"https://unimol.readthedocs.io/en/latest/tutorial.html\n",
|
||
"/opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Training set size: 151 samples\n",
|
||
"Test set size: 38 samples\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"from sklearn.model_selection import train_test_split\n",
|
||
"\n",
|
||
"# Load the dataset\n",
|
||
"qsar_df = pd.read_csv('qsar_training_data.csv')\n",
|
||
"\n",
|
||
"# Split the data: 90% for training, 10% for testing\n",
|
||
"train_df, test_df = train_test_split(qsar_df, test_size=0.2, random_state=42)\n",
|
||
"\n",
|
||
"# Save the resulting datasets to CSV files\n",
|
||
"train_df.to_csv('qsar_training_data.csv', index=False)\n",
|
||
"test_df.to_csv('qsar_test_data.csv', index=False)\n",
|
||
"\n",
|
||
"# Output the size of the datasets to verify\n",
|
||
"print(f\"Training set size: {train_df.shape[0]} samples\")\n",
|
||
"print(f\"Test set size: {test_df.shape[0]} samples\")\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2024-09-27 22:45:27 | unimol_tools/weights/weighthub.py | 17 | INFO | Uni-Mol Tools | Weights will be downloaded to default directory: /opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"from unimol_tools import MolTrain, MolPredict\n",
|
||
"\n",
|
||
"# Assuming you saved the DataFrame to 'qsar_training_data.csv'\n",
|
||
"train_data = 'qsar_training_data.csv'\n",
|
||
"\n",
|
||
"# Train the model\n",
|
||
"clf = MolTrain(task='regression', # or 'classification' based on your needs\n",
|
||
" data_type='molecule', \n",
|
||
" epochs=10, \n",
|
||
" batch_size=16, \n",
|
||
" metrics='mse', # Use 'mse' for regression\n",
|
||
" save_path='./exp'\n",
|
||
" )\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2024-09-27 22:45:27 | unimol_tools/data/datareader.py | 188 | INFO | Uni-Mol Tools | Anomaly clean with 3 sigma threshold: 151 -> 151\n",
|
||
"2024-09-27 22:45:28 | unimol_tools/data/conformer.py | 89 | INFO | Uni-Mol Tools | Start generating conformers...\n",
|
||
"151it [00:11, 13.52it/s]\n",
|
||
"2024-09-27 22:45:39 | unimol_tools/data/conformer.py | 93 | INFO | Uni-Mol Tools | Succeed to generate conformers for 100.00% of molecules.\n",
|
||
"2024-09-27 22:45:39 | unimol_tools/data/conformer.py | 95 | INFO | Uni-Mol Tools | Succeed to generate 3d conformers for 69.54% of molecules.\n",
|
||
"2024-09-27 22:45:39 | unimol_tools/train.py | 172 | INFO | Uni-Mol Tools | Output directory already exists: ./exp\n",
|
||
"2024-09-27 22:45:39 | unimol_tools/train.py | 173 | INFO | Uni-Mol Tools | Warning: Overwrite output directory: ./exp\n",
|
||
"2024-09-27 22:45:39 | unimol_tools/models/unimol.py | 124 | INFO | Uni-Mol Tools | Loading pretrained weights from /opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights/mol_pre_all_h_220816.pt\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"WEIGHT_DIR /opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights\n",
|
||
"MODEL_CONFIG {'weight': {'protein': 'poc_pre_220816.pt', 'molecule_no_h': 'mol_pre_no_h_220816.pt', 'molecule_all_h': 'mol_pre_all_h_220816.pt', 'crystal': 'mp_all_h_230313.pt', 'oled': 'oled_pre_no_h_230101.pt'}, 'dict': {'protein': 'poc.dict.txt', 'molecule_no_h': 'mol.dict.txt', 'molecule_all_h': 'mol.dict.txt', 'crystal': 'mp.dict.txt', 'oled': 'oled.dict.txt'}}\n",
|
||
"/opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights/mol_pre_all_h_220816.pt\n",
|
||
"/opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights/mol.dict.txt\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2024-09-27 22:45:40 | unimol_tools/models/nnmodel.py | 142 | INFO | Uni-Mol Tools | start training Uni-Mol:unimolv1\n",
|
||
"2024-09-27 22:45:44 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [1/10] train_loss: 1.3586, val_loss: 2.4455, val_mse: 280.7126, lr: 0.000093, 3.3s\n",
|
||
"2024-09-27 22:45:45 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [2/10] train_loss: 1.0766, val_loss: 1.1453, val_mse: 131.8109, lr: 0.000082, 0.8s\n",
|
||
"2024-09-27 22:45:47 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [3/10] train_loss: 0.9960, val_loss: 1.0161, val_mse: 116.7266, lr: 0.000072, 0.6s\n",
|
||
"2024-09-27 22:45:48 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [4/10] train_loss: 1.0533, val_loss: 1.1678, val_mse: 133.8159, lr: 0.000062, 0.7s\n",
|
||
"2024-09-27 22:45:49 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [5/10] train_loss: 0.8922, val_loss: 0.9024, val_mse: 103.2530, lr: 0.000051, 0.7s\n",
|
||
"2024-09-27 22:45:51 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [6/10] train_loss: 0.8443, val_loss: 1.2440, val_mse: 142.2061, lr: 0.000041, 0.7s\n",
|
||
"2024-09-27 22:45:51 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [7/10] train_loss: 0.9452, val_loss: 0.8510, val_mse: 97.0552, lr: 0.000031, 0.7s\n",
|
||
"2024-09-27 22:45:53 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [8/10] train_loss: 0.9270, val_loss: 1.0135, val_mse: 115.5859, lr: 0.000021, 0.7s\n",
|
||
"2024-09-27 22:45:53 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [9/10] train_loss: 0.8749, val_loss: 0.9838, val_mse: 112.1781, lr: 0.000010, 0.7s\n",
|
||
"2024-09-27 22:45:54 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [10/10] train_loss: 0.9159, val_loss: 1.0383, val_mse: 118.4027, lr: 0.000000, 0.7s\n",
|
||
"2024-09-27 22:45:55 | unimol_tools/tasks/trainer.py | 300 | INFO | Uni-Mol Tools | load model success!\n",
|
||
"2024-09-27 22:45:55 | unimol_tools/models/nnmodel.py | 168 | INFO | Uni-Mol Tools | fold 0, result {'mse': 97.05515, 'mae': 7.331252, 'pearsonr': 0.5909511971112806, 'spearmanr': 0.5808793020991461, 'r2': 0.2890600562095642}\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"WEIGHT_DIR /opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights\n",
|
||
"MODEL_CONFIG {'weight': {'protein': 'poc_pre_220816.pt', 'molecule_no_h': 'mol_pre_no_h_220816.pt', 'molecule_all_h': 'mol_pre_all_h_220816.pt', 'crystal': 'mp_all_h_230313.pt', 'oled': 'oled_pre_no_h_230101.pt'}, 'dict': {'protein': 'poc.dict.txt', 'molecule_no_h': 'mol.dict.txt', 'molecule_all_h': 'mol.dict.txt', 'crystal': 'mp.dict.txt', 'oled': 'oled.dict.txt'}}\n",
|
||
"/opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights/mol_pre_all_h_220816.pt\n",
|
||
"/opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights/mol.dict.txt\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2024-09-27 22:45:55 | unimol_tools/models/unimol.py | 124 | INFO | Uni-Mol Tools | Loading pretrained weights from /opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights/mol_pre_all_h_220816.pt\n",
|
||
"2024-09-27 22:45:56 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [1/10] train_loss: 1.3883, val_loss: 0.6170, val_mse: 73.4399, lr: 0.000093, 0.7s\n",
|
||
"2024-09-27 22:45:58 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [2/10] train_loss: 1.3161, val_loss: 0.9960, val_mse: 114.1673, lr: 0.000082, 0.8s\n",
|
||
"2024-09-27 22:45:58 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [3/10] train_loss: 1.1456, val_loss: 0.5966, val_mse: 70.8534, lr: 0.000072, 0.6s\n",
|
||
"2024-09-27 22:46:00 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [4/10] train_loss: 1.1155, val_loss: 0.6260, val_mse: 74.4525, lr: 0.000062, 0.8s\n",
|
||
"2024-09-27 22:46:01 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [5/10] train_loss: 0.9599, val_loss: 0.7855, val_mse: 90.7978, lr: 0.000051, 0.7s\n",
|
||
"2024-09-27 22:46:01 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [6/10] train_loss: 1.0833, val_loss: 0.7717, val_mse: 89.3328, lr: 0.000041, 0.7s\n",
|
||
"2024-09-27 22:46:02 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [7/10] train_loss: 1.0203, val_loss: 1.0910, val_mse: 124.0563, lr: 0.000031, 0.7s\n",
|
||
"2024-09-27 22:46:03 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [8/10] train_loss: 0.9478, val_loss: 0.7585, val_mse: 89.1448, lr: 0.000021, 0.7s\n",
|
||
"2024-09-27 22:46:03 | unimol_tools/utils/metrics.py | 234 | WARNING | Uni-Mol Tools | Early stopping at epoch: 8\n",
|
||
"2024-09-27 22:46:03 | unimol_tools/tasks/trainer.py | 300 | INFO | Uni-Mol Tools | load model success!\n",
|
||
"2024-09-27 22:46:04 | unimol_tools/models/nnmodel.py | 168 | INFO | Uni-Mol Tools | fold 1, result {'mse': 70.85342, 'mae': 5.747976, 'pearsonr': 0.20143281994387507, 'spearmanr': 0.2609365195530784, 'r2': 0.035624027252197266}\n",
|
||
"2024-09-27 22:46:04 | unimol_tools/models/unimol.py | 124 | INFO | Uni-Mol Tools | Loading pretrained weights from /opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights/mol_pre_all_h_220816.pt\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"WEIGHT_DIR /opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights\n",
|
||
"MODEL_CONFIG {'weight': {'protein': 'poc_pre_220816.pt', 'molecule_no_h': 'mol_pre_no_h_220816.pt', 'molecule_all_h': 'mol_pre_all_h_220816.pt', 'crystal': 'mp_all_h_230313.pt', 'oled': 'oled_pre_no_h_230101.pt'}, 'dict': {'protein': 'poc.dict.txt', 'molecule_no_h': 'mol.dict.txt', 'molecule_all_h': 'mol.dict.txt', 'crystal': 'mp.dict.txt', 'oled': 'oled.dict.txt'}}\n",
|
||
"/opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights/mol_pre_all_h_220816.pt\n",
|
||
"/opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights/mol.dict.txt\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2024-09-27 22:46:05 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [1/10] train_loss: 1.4606, val_loss: 3.3206, val_mse: 378.6044, lr: 0.000093, 0.7s\n",
|
||
"2024-09-27 22:46:06 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [2/10] train_loss: 1.4898, val_loss: 0.9078, val_mse: 105.1634, lr: 0.000082, 0.6s\n",
|
||
"2024-09-27 22:46:08 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [3/10] train_loss: 1.0155, val_loss: 1.0172, val_mse: 117.1504, lr: 0.000072, 0.7s\n",
|
||
"2024-09-27 22:46:09 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [4/10] train_loss: 0.9078, val_loss: 1.0485, val_mse: 120.6293, lr: 0.000062, 0.7s\n",
|
||
"2024-09-27 22:46:09 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [5/10] train_loss: 0.9419, val_loss: 1.0336, val_mse: 119.9488, lr: 0.000051, 0.6s\n",
|
||
"2024-09-27 22:46:10 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [6/10] train_loss: 0.9781, val_loss: 0.9392, val_mse: 108.4770, lr: 0.000041, 0.6s\n",
|
||
"2024-09-27 22:46:10 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [7/10] train_loss: 0.8445, val_loss: 0.8251, val_mse: 95.0762, lr: 0.000031, 0.7s\n",
|
||
"2024-09-27 22:46:12 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [8/10] train_loss: 0.9550, val_loss: 0.8084, val_mse: 93.2420, lr: 0.000021, 0.6s\n",
|
||
"2024-09-27 22:46:13 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [9/10] train_loss: 0.7777, val_loss: 0.8005, val_mse: 92.2463, lr: 0.000010, 0.7s\n",
|
||
"2024-09-27 22:46:15 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [10/10] train_loss: 0.7272, val_loss: 0.8880, val_mse: 101.9108, lr: 0.000000, 0.7s\n",
|
||
"2024-09-27 22:46:16 | unimol_tools/tasks/trainer.py | 300 | INFO | Uni-Mol Tools | load model success!\n",
|
||
"2024-09-27 22:46:16 | unimol_tools/models/nnmodel.py | 168 | INFO | Uni-Mol Tools | fold 2, result {'mse': 92.246346, 'mae': 6.6947527, 'pearsonr': 0.2854272922850214, 'spearmanr': 0.3806770274531352, 'r2': 0.010852813720703125}\n",
|
||
"2024-09-27 22:46:16 | unimol_tools/models/unimol.py | 124 | INFO | Uni-Mol Tools | Loading pretrained weights from /opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights/mol_pre_all_h_220816.pt\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"WEIGHT_DIR /opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights\n",
|
||
"MODEL_CONFIG {'weight': {'protein': 'poc_pre_220816.pt', 'molecule_no_h': 'mol_pre_no_h_220816.pt', 'molecule_all_h': 'mol_pre_all_h_220816.pt', 'crystal': 'mp_all_h_230313.pt', 'oled': 'oled_pre_no_h_230101.pt'}, 'dict': {'protein': 'poc.dict.txt', 'molecule_no_h': 'mol.dict.txt', 'molecule_all_h': 'mol.dict.txt', 'crystal': 'mp.dict.txt', 'oled': 'oled.dict.txt'}}\n",
|
||
"/opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights/mol_pre_all_h_220816.pt\n",
|
||
"/opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights/mol.dict.txt\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2024-09-27 22:46:17 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [1/10] train_loss: 1.2635, val_loss: 1.7796, val_mse: 199.5377, lr: 0.000093, 0.6s\n",
|
||
"2024-09-27 22:46:18 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [2/10] train_loss: 1.1977, val_loss: 0.8583, val_mse: 97.1572, lr: 0.000082, 0.6s\n",
|
||
"2024-09-27 22:46:19 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [3/10] train_loss: 1.1185, val_loss: 0.8756, val_mse: 100.4343, lr: 0.000072, 0.7s\n",
|
||
"2024-09-27 22:46:20 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [4/10] train_loss: 0.9613, val_loss: 0.9776, val_mse: 109.5843, lr: 0.000062, 0.7s\n",
|
||
"2024-09-27 22:46:21 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [5/10] train_loss: 1.1364, val_loss: 0.7891, val_mse: 90.3483, lr: 0.000051, 0.7s\n",
|
||
"2024-09-27 22:46:22 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [6/10] train_loss: 0.9706, val_loss: 0.8194, val_mse: 92.2830, lr: 0.000041, 0.7s\n",
|
||
"2024-09-27 22:46:23 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [7/10] train_loss: 0.9130, val_loss: 0.6754, val_mse: 75.7993, lr: 0.000031, 0.6s\n",
|
||
"2024-09-27 22:46:24 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [8/10] train_loss: 0.9331, val_loss: 0.6883, val_mse: 77.6490, lr: 0.000021, 0.6s\n",
|
||
"2024-09-27 22:46:25 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [9/10] train_loss: 0.8835, val_loss: 0.6737, val_mse: 75.2476, lr: 0.000010, 0.7s\n",
|
||
"2024-09-27 22:46:26 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [10/10] train_loss: 0.8302, val_loss: 0.6861, val_mse: 76.5509, lr: 0.000000, 0.6s\n",
|
||
"2024-09-27 22:46:27 | unimol_tools/tasks/trainer.py | 300 | INFO | Uni-Mol Tools | load model success!\n",
|
||
"2024-09-27 22:46:27 | unimol_tools/models/nnmodel.py | 168 | INFO | Uni-Mol Tools | fold 3, result {'mse': 75.2476, 'mae': 6.477419, 'pearsonr': 0.525078779024625, 'spearmanr': 0.41594596740381523, 'r2': 0.2580321431159973}\n",
|
||
"2024-09-27 22:46:27 | unimol_tools/models/unimol.py | 124 | INFO | Uni-Mol Tools | Loading pretrained weights from /opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights/mol_pre_all_h_220816.pt\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"WEIGHT_DIR /opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights\n",
|
||
"MODEL_CONFIG {'weight': {'protein': 'poc_pre_220816.pt', 'molecule_no_h': 'mol_pre_no_h_220816.pt', 'molecule_all_h': 'mol_pre_all_h_220816.pt', 'crystal': 'mp_all_h_230313.pt', 'oled': 'oled_pre_no_h_230101.pt'}, 'dict': {'protein': 'poc.dict.txt', 'molecule_no_h': 'mol.dict.txt', 'molecule_all_h': 'mol.dict.txt', 'crystal': 'mp.dict.txt', 'oled': 'oled.dict.txt'}}\n",
|
||
"/opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights/mol_pre_all_h_220816.pt\n",
|
||
"/opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights/mol.dict.txt\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2024-09-27 22:46:28 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [1/10] train_loss: 1.2777, val_loss: 1.9819, val_mse: 227.5000, lr: 0.000093, 0.7s\n",
|
||
"2024-09-27 22:46:30 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [2/10] train_loss: 1.4327, val_loss: 3.0447, val_mse: 346.6302, lr: 0.000082, 0.7s\n",
|
||
"2024-09-27 22:46:30 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [3/10] train_loss: 1.2044, val_loss: 1.2728, val_mse: 145.1859, lr: 0.000072, 0.6s\n",
|
||
"2024-09-27 22:46:32 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [4/10] train_loss: 0.9925, val_loss: 1.7034, val_mse: 194.0388, lr: 0.000062, 0.6s\n",
|
||
"2024-09-27 22:46:32 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [5/10] train_loss: 0.8893, val_loss: 1.0293, val_mse: 117.8387, lr: 0.000051, 0.7s\n",
|
||
"2024-09-27 22:46:34 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [6/10] train_loss: 0.9734, val_loss: 1.1283, val_mse: 129.1722, lr: 0.000041, 0.6s\n",
|
||
"2024-09-27 22:46:35 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [7/10] train_loss: 0.9078, val_loss: 1.1153, val_mse: 127.7389, lr: 0.000031, 0.7s\n",
|
||
"2024-09-27 22:46:35 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [8/10] train_loss: 1.0021, val_loss: 0.8698, val_mse: 99.5525, lr: 0.000021, 0.7s\n",
|
||
"2024-09-27 22:46:37 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [9/10] train_loss: 0.8148, val_loss: 1.0937, val_mse: 125.1105, lr: 0.000010, 0.7s\n",
|
||
"2024-09-27 22:46:37 | unimol_tools/tasks/trainer.py | 210 | INFO | Uni-Mol Tools | Epoch [10/10] train_loss: 0.9359, val_loss: 1.1535, val_mse: 131.9864, lr: 0.000000, 0.7s\n",
|
||
"2024-09-27 22:46:38 | unimol_tools/tasks/trainer.py | 300 | INFO | Uni-Mol Tools | load model success!\n",
|
||
"2024-09-27 22:46:38 | unimol_tools/models/nnmodel.py | 168 | INFO | Uni-Mol Tools | fold 4, result {'mse': 99.55246, 'mae': 7.1317773, 'pearsonr': 0.6565703302621841, 'spearmanr': 0.7090074325220541, 'r2': 0.328890860080719}\n",
|
||
"2024-09-27 22:46:38 | unimol_tools/models/nnmodel.py | 183 | INFO | Uni-Mol Tools | Uni-Mol metrics score: \n",
|
||
"{'mse': 87.05764177737245, 'mae': 6.680970421546713, 'pearsonr': 0.4982811863609151, 'spearmanr': 0.49812542784550135, 'r2': 0.23865339883639336}\n",
|
||
"2024-09-27 22:46:38 | unimol_tools/models/nnmodel.py | 184 | INFO | Uni-Mol Tools | Uni-Mol & Metric result saved!\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"pred = clf.fit(data=train_data)\n",
|
||
"\n",
|
||
"## download mol.dict.txt\n",
|
||
"## python script :/opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2024-09-27 22:46:39 | unimol_tools/data/conformer.py | 89 | INFO | Uni-Mol Tools | Start generating conformers...\n",
|
||
"38it [00:05, 6.67it/s]\n",
|
||
"2024-09-27 22:46:44 | unimol_tools/data/conformer.py | 93 | INFO | Uni-Mol Tools | Succeed to generate conformers for 100.00% of molecules.\n",
|
||
"2024-09-27 22:46:44 | unimol_tools/data/conformer.py | 95 | INFO | Uni-Mol Tools | Succeed to generate 3d conformers for 73.68% of molecules.\n",
|
||
"2024-09-27 22:46:45 | unimol_tools/models/unimol.py | 124 | INFO | Uni-Mol Tools | Loading pretrained weights from /opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights/mol_pre_all_h_220816.pt\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"WEIGHT_DIR /opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights\n",
|
||
"MODEL_CONFIG {'weight': {'protein': 'poc_pre_220816.pt', 'molecule_no_h': 'mol_pre_no_h_220816.pt', 'molecule_all_h': 'mol_pre_all_h_220816.pt', 'crystal': 'mp_all_h_230313.pt', 'oled': 'oled_pre_no_h_230101.pt'}, 'dict': {'protein': 'poc.dict.txt', 'molecule_no_h': 'mol.dict.txt', 'molecule_all_h': 'mol.dict.txt', 'crystal': 'mp.dict.txt', 'oled': 'oled.dict.txt'}}\n",
|
||
"/opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights/mol_pre_all_h_220816.pt\n",
|
||
"/opt/conda/envs/analyse/lib/python3.11/site-packages/unimol_tools/weights/mol.dict.txt\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2024-09-27 22:46:45 | unimol_tools/models/nnmodel.py | 206 | INFO | Uni-Mol Tools | start predict NNModel:unimolv1\n",
|
||
"2024-09-27 22:46:47 | unimol_tools/tasks/trainer.py | 300 | INFO | Uni-Mol Tools | load model success!\n",
|
||
"2024-09-27 22:46:48 | unimol_tools/tasks/trainer.py | 300 | INFO | Uni-Mol Tools | load model success!\n",
|
||
"2024-09-27 22:46:50 | unimol_tools/tasks/trainer.py | 300 | INFO | Uni-Mol Tools | load model success!\n",
|
||
"2024-09-27 22:46:51 | unimol_tools/tasks/trainer.py | 300 | INFO | Uni-Mol Tools | load model success!\n",
|
||
"2024-09-27 22:46:53 | unimol_tools/tasks/trainer.py | 300 | INFO | Uni-Mol Tools | load model success!\n",
|
||
"2024-09-27 22:46:53 | unimol_tools/predict.py | 92 | INFO | Uni-Mol Tools | final predict metrics score: \n",
|
||
"{'mse': 59.72037918598548, 'mae': 5.179289798987539, 'pearsonr': 0.638764928149331, 'spearmanr': 0.6006870492749102, 'r2': 0.35928715315601223}\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Making predictions\n",
|
||
"test_data = 'qsar_test_data.csv' # Replace with your actual test data file\n",
|
||
"clf = MolPredict(load_model='./exp')\n",
|
||
"res = clf.predict(data=test_data)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>SMILES</th>\n",
|
||
" <th>TARGET</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>CC[C@H]1OC(=O)C[C@@H](O)[C@H](C)[C@@H](O[C@@H]...</td>\n",
|
||
" <td>0.5</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>CC[C@H]1OC(=O)C[C@@H](O)[C@H](C)[C@@H](O[C@@H]...</td>\n",
|
||
" <td>0.5</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>CC[C@H]1OC(=O)C[C@@H](O)[C@H](C)[C@@H](O[C@@H]...</td>\n",
|
||
" <td>32.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>CC[C@H]1OC(=O)C[C@@H](O)[C@H](C)[C@@H](O[C@@H]...</td>\n",
|
||
" <td>16.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>CC[C@H]1OC(=O)C[C@@H](O)[C@H](C)[C@@H](O[C@@H]...</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" SMILES TARGET\n",
|
||
"0 CC[C@H]1OC(=O)C[C@@H](O)[C@H](C)[C@@H](O[C@@H]... 0.5\n",
|
||
"1 CC[C@H]1OC(=O)C[C@@H](O)[C@H](C)[C@@H](O[C@@H]... 0.5\n",
|
||
"2 CC[C@H]1OC(=O)C[C@@H](O)[C@H](C)[C@@H](O[C@@H]... 32.0\n",
|
||
"3 CC[C@H]1OC(=O)C[C@@H](O)[C@H](C)[C@@H](O[C@@H]... 16.0\n",
|
||
"4 CC[C@H]1OC(=O)C[C@@H](O)[C@H](C)[C@@H](O[C@@H]... 4.0"
|
||
]
|
||
},
|
||
"execution_count": 13,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"qsar_df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"189"
|
||
]
|
||
},
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"len(qsar_df)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# QSAR https://bohrium.dp.tech/notebooks/1032"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "analyse",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.8"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|