import joblib import numpy as np from rdkit import Chem from rdkit.Chem import Descriptors from pathlib import Path from pprint import pprint # Function to calculate 1D-QSAR descriptors def calculate_1dqsar_repr(smiles): mol = Chem.MolFromSmiles(smiles) if mol is None: return None descriptors = [ Descriptors.MolWt(mol), Descriptors.MolLogP(mol), Descriptors.NumHDonors(mol), Descriptors.NumHAcceptors(mol), Descriptors.TPSA(mol), Descriptors.NumRotatableBonds(mol), Descriptors.NumAromaticRings(mol), Descriptors.NumAliphaticRings(mol), Descriptors.NumSaturatedRings(mol), Descriptors.NumHeteroatoms(mol), Descriptors.NumValenceElectrons(mol), Descriptors.NumRadicalElectrons(mol), Descriptors.qed(mol) ] return descriptors # Load the SDF file and convert to SMILES sdf_file_list = [i for i in Path('../predict_data').glob('*.sdf')] # sdf_file = '/mnt/c/project/qsar/predict_data/chem1.sdf' sdf_results = {} for sdf_file in sdf_file_list: supplier = Chem.SDMolSupplier(sdf_file) new_mol = [mol for mol in supplier][0] # Assuming only one molecule in SDF smiles = Chem.MolToSmiles(new_mol) # Calculate the 1D-QSAR descriptors descriptor = calculate_1dqsar_repr(smiles) descriptor_array = np.array(descriptor).reshape(1, -1) # Load the saved model (use the model that performed best in training) model_file_list = [i for i in Path().cwd().glob('1d_qsar_*.pkl')] results = {} for model_file in model_file_list: model = joblib.load(model_file) # Predict the MIC value predicted_mic = model.predict(descriptor_array) # print(f"Predicted MIC value: {predicted_mic[0]}") results[model_file.stem] = predicted_mic[0] sdf_results[sdf_file.stem] = results pprint(sdf_results) import seaborn as sns import matplotlib.pyplot as plt import pandas as pd # Filter out negative MIC values from sdf_results filtered_sdf_results = {} for sdf_name, model_results in sdf_results.items(): filtered_results = {model_name: mic_value for model_name, mic_value in model_results.items() if mic_value >= 0} filtered_sdf_results[sdf_name] = filtered_results # Convert the filtered results to a DataFrame for easier plotting filtered_data = [] for sdf_name, model_results in filtered_sdf_results.items(): for model_name, mic_value in model_results.items(): filtered_data.append({'SDF': sdf_name, 'Model': model_name, 'MIC': mic_value}) df = pd.DataFrame(filtered_data) # Set up the matplotlib figure plt.figure(figsize=(12, 8)) # Create a seaborn barplot with the filtered data sns.barplot(x='SDF', y='MIC', hue='Model', data=df, palette='tab20') # Customize the plot plt.title('Predicted MIC values by Model for Each SDF (Filtered)') plt.xlabel('SDF Files') plt.ylabel('Predicted MIC Values') plt.xticks(rotation=45) plt.legend(title='Model') # Show the plot plt.tight_layout() plt.show()