use balloon 3d mol
This commit is contained in:
@@ -130,36 +130,38 @@ def select_2d_features(X_df, y, k=10):
|
|||||||
return X_selected, selected_features
|
return X_selected, selected_features
|
||||||
|
|
||||||
# ------------------------ 3D-QSAR特征计算(从 CSV 中 SMILES 生成3D构象) ------------------------
|
# ------------------------ 3D-QSAR特征计算(从 CSV 中 SMILES 生成3D构象) ------------------------
|
||||||
def generate_3d_mols_from_csv(csv_file, mmffVariant='MMFF94'):
|
def generate_3d_mols_from_csv(csv_file):
|
||||||
"""
|
"""
|
||||||
从 CSV 文件中读取 SMILES,生成分子的3D构象(添加氢原子、嵌入构象并进行MMFF能量最小化)。
|
从 CSV 文件中读取 Compound 字段,拼接 ".sdf" 后缀,
|
||||||
针对大环分子,启用了宏环扭转角优化并增大了嵌入尝试次数。
|
在指定目录中加载已生成3D构象的分子文件。
|
||||||
|
|
||||||
mmffVariant:MMFF94或MMFF94S,默认为 MMFF94
|
参数:
|
||||||
|
csv_file: 包含 Compound 字段的 CSV 文件路径。
|
||||||
|
|
||||||
返回值:分子列表,每个元素为 (mol, True)
|
返回:
|
||||||
|
分子列表,每个元素为 (mol, True)。
|
||||||
"""
|
"""
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
from pathlib import Path
|
||||||
from rdkit import Chem
|
from rdkit import Chem
|
||||||
from rdkit.Chem import AllChem
|
|
||||||
|
|
||||||
data = pd.read_csv(csv_file)
|
data = pd.read_csv(csv_file)
|
||||||
mols = []
|
mols = []
|
||||||
# 距离几何+ETKDG生成3D构象
|
sdf_dir = Path("/root/project/qsar/pycomsia/src/balloon_output")
|
||||||
for smi in data["SMILES"]:
|
|
||||||
mol = Chem.MolFromSmiles(smi)
|
|
||||||
if mol is None:
|
|
||||||
print(f"Warning: 无法从 SMILES {smi} 生成分子。")
|
|
||||||
continue
|
|
||||||
m3d = Chem.AddHs(mol)
|
|
||||||
AllChem.EmbedMolecule(m3d, randomSeed=10, useMacrocycleTorsions=True)
|
|
||||||
# MMFF生成3D构象 优化
|
|
||||||
if m3d.GetNumConformers() > 0:
|
|
||||||
AllChem.MMFFOptimizeMolecule(m3d)
|
|
||||||
mols.append(m3d)
|
|
||||||
else:
|
|
||||||
print(f"Warning: 分子 {smi} 未生成构象。")
|
|
||||||
|
|
||||||
|
for compound in data["Compound"]:
|
||||||
|
sdf_file = sdf_dir / f"{compound}.sdf"
|
||||||
|
if not sdf_file.exists():
|
||||||
|
print(f"Warning: 文件 {sdf_file} 不存在。")
|
||||||
|
continue
|
||||||
|
# 读取 SDF 文件(假设每个 SDF 文件只包含一个分子)
|
||||||
|
mol = Chem.MolFromMolFile(str(sdf_file))
|
||||||
|
if mol is None:
|
||||||
|
print(f"Warning: 无法从文件 {sdf_file} 读取分子。")
|
||||||
|
continue
|
||||||
|
mols.append(mol)
|
||||||
|
|
||||||
|
# 只返回包含至少一个构象的分子
|
||||||
aligned_results = [(mol, True) for mol in mols if mol.GetNumConformers() > 0]
|
aligned_results = [(mol, True) for mol in mols if mol.GetNumConformers() > 0]
|
||||||
return aligned_results
|
return aligned_results
|
||||||
|
|
||||||
@@ -336,4 +338,4 @@ def cli(data_smi, target):
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
cli()
|
cli()
|
||||||
# python main.py --data-smi /root/project/qsar/1d-qsar/data_smi.csv --target MIC_LOG_ATCC25923
|
# python train_qsar_model.py --data-smi /root/project/qsar/1d-qsar/data_smi.csv --target MIC_LOG_ATCC25923
|
||||||
|
|||||||
Reference in New Issue
Block a user