update

2024-09-28 13:14:52 +08:00
parent 2f139ec93d
commit af43b65d34
18 changed files with 432026 additions and 79 deletions
--- a/MIC/qsar_3D_predict.py
+++ b/MIC/qsar_3D_predict.py
@@ -0,0 +1,118 @@
+import joblib
+import numpy as np
+from rdkit import Chem
+from rdkit.Chem import AllChem
+from rdkit.Chem import rdPartialCharges
+from pathlib import Path
+from pprint import pprint
+
+# Function to calculate 3D-QSAR descriptors using Coulomb Matrix
+def calculate_3dqsar_repr(SMILES, max_atoms=100, three_d=False):
+    mol = Chem.MolFromSmiles(SMILES)
+    mol = Chem.AddHs(mol)
+    if three_d:
+        AllChem.EmbedMolecule(mol, AllChem.ETKDG())
+    else:
+        AllChem.Compute2DCoords(mol)
+    natoms = mol.GetNumAtoms()
+    rdPartialCharges.ComputeGasteigerCharges(mol)
+    charges = np.array([float(atom.GetProp("_GasteigerCharge")) for atom in mol.GetAtoms()])
+    coords = mol.GetConformer().GetPositions()
+    coulomb_matrix = np.zeros((max_atoms, max_atoms))
+    n = min(max_atoms, natoms)
+    for i in range(n):
+        for j in range(i, n):
+            if i == j:
+                coulomb_matrix[i, j] = 0.5 * charges[i] ** 2
+            if i != j:
+                delta = np.linalg.norm(coords[i] - coords[j])
+                if delta != 0:
+                    coulomb_matrix[i, j] = charges[i] * charges[j] / delta
+                    coulomb_matrix[j, i] = coulomb_matrix[i, j]
+    coulomb_matrix = np.where(np.isinf(coulomb_matrix), 0, coulomb_matrix)
+    coulomb_matrix = np.where(np.isnan(coulomb_matrix), 0, coulomb_matrix)
+    return coulomb_matrix.reshape(max_atoms*max_atoms).tolist()
+
+# # Load the SDF file and convert to SMILES
+# sdf_file = '/mnt/c/project/qsar/predict_data/chem1.sdf'
+# supplier = Chem.SDMolSupplier(sdf_file)
+# new_mol = [mol for mol in supplier][0]  # Assuming only one molecule in SDF
+# smiles = Chem.MolToSmiles(new_mol)
+
+# # Calculate the 3D-QSAR descriptors
+# descriptor = calculate_3dqsar_repr(smiles)
+# descriptor_array = np.array(descriptor).reshape(1, -1)
+
+# # Load the saved model (use the model that performed best in training)
+# model_filename = "3d_qsar_random_forest_model.pkl"
+# model = joblib.load(model_filename)
+
+# # Predict the MIC value
+# predicted_mic = model.predict(descriptor_array)
+# print(f"Predicted MIC value: {predicted_mic[0]}")
+
+# Load the SDF file and convert to SMILES
+sdf_file_list = [i for i in Path('../predict_data').glob('*.sdf')]
+# sdf_file = '/mnt/c/project/qsar/predict_data/chem1.sdf'
+sdf_results = {}
+for sdf_file in sdf_file_list:
+    supplier = Chem.SDMolSupplier(sdf_file)
+    new_mol = [mol for mol in supplier][0]  # Assuming only one molecule in SDF
+    smiles = Chem.MolToSmiles(new_mol)
+
+    # Calculate the 3D-QSAR descriptors
+    descriptor = calculate_3dqsar_repr(smiles)
+    descriptor_array = np.array(descriptor).reshape(1, -1)
+
+    # Load the saved model (use the model that performed best in training)
+    model_file_list = [i for i in Path().cwd().glob('3d_qsar_*.pkl')]
+
+    results = {}
+
+    for model_file in model_file_list:
+        model = joblib.load(model_file)
+        # Predict the MIC value
+        predicted_mic = model.predict(descriptor_array)
+        # print(f"Predicted MIC value: {predicted_mic[0]}")
+        results[model_file.stem] = predicted_mic[0]
+
+    sdf_results[sdf_file.stem] = results
+    
+pprint(sdf_results)
+
+import seaborn as sns
+import matplotlib.pyplot as plt
+import pandas as pd
+
+# Filter out negative MIC values from sdf_results
+filtered_sdf_results = {}
+for sdf_name, model_results in sdf_results.items():
+    filtered_results = {model_name: mic_value for model_name, mic_value in model_results.items() if mic_value >= 0}
+    filtered_sdf_results[sdf_name] = filtered_results
+
+pprint(filtered_sdf_results)
+
+# Convert the filtered results to a DataFrame for easier plotting
+filtered_data = []
+for sdf_name, model_results in filtered_sdf_results.items():
+    for model_name, mic_value in model_results.items():
+        filtered_data.append({'SDF': sdf_name, 'Model': model_name, 'MIC': mic_value})
+
+df = pd.DataFrame(filtered_data)
+
+# Set up the matplotlib figure
+plt.figure(figsize=(12, 8))
+
+# Create a seaborn barplot with the filtered data
+sns.barplot(x='SDF', y='MIC', hue='Model', data=df, palette='tab20')
+
+# Customize the plot
+plt.title('Predicted MIC values by Model for Each SDF (Filtered)')
+plt.xlabel('SDF Files')
+plt.ylabel('Predicted MIC Values')
+plt.xticks(rotation=45)
+plt.legend(title='Model')
+
+# Show the plot
+plt.tight_layout()
+plt.show()