use balloon 3d mol
This commit is contained in:
@@ -130,36 +130,38 @@ def select_2d_features(X_df, y, k=10):
|
||||
return X_selected, selected_features
|
||||
|
||||
# ------------------------ 3D-QSAR特征计算(从 CSV 中 SMILES 生成3D构象) ------------------------
|
||||
def generate_3d_mols_from_csv(csv_file, mmffVariant='MMFF94'):
|
||||
def generate_3d_mols_from_csv(csv_file):
|
||||
"""
|
||||
从 CSV 文件中读取 SMILES,生成分子的3D构象(添加氢原子、嵌入构象并进行MMFF能量最小化)。
|
||||
针对大环分子,启用了宏环扭转角优化并增大了嵌入尝试次数。
|
||||
从 CSV 文件中读取 Compound 字段,拼接 ".sdf" 后缀,
|
||||
在指定目录中加载已生成3D构象的分子文件。
|
||||
|
||||
mmffVariant:MMFF94或MMFF94S,默认为 MMFF94
|
||||
|
||||
返回值:分子列表,每个元素为 (mol, True)
|
||||
参数:
|
||||
csv_file: 包含 Compound 字段的 CSV 文件路径。
|
||||
|
||||
返回:
|
||||
分子列表,每个元素为 (mol, True)。
|
||||
"""
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem import AllChem
|
||||
|
||||
|
||||
data = pd.read_csv(csv_file)
|
||||
mols = []
|
||||
# 距离几何+ETKDG生成3D构象
|
||||
for smi in data["SMILES"]:
|
||||
mol = Chem.MolFromSmiles(smi)
|
||||
if mol is None:
|
||||
print(f"Warning: 无法从 SMILES {smi} 生成分子。")
|
||||
sdf_dir = Path("/root/project/qsar/pycomsia/src/balloon_output")
|
||||
|
||||
for compound in data["Compound"]:
|
||||
sdf_file = sdf_dir / f"{compound}.sdf"
|
||||
if not sdf_file.exists():
|
||||
print(f"Warning: 文件 {sdf_file} 不存在。")
|
||||
continue
|
||||
m3d = Chem.AddHs(mol)
|
||||
AllChem.EmbedMolecule(m3d, randomSeed=10, useMacrocycleTorsions=True)
|
||||
# MMFF生成3D构象 优化
|
||||
if m3d.GetNumConformers() > 0:
|
||||
AllChem.MMFFOptimizeMolecule(m3d)
|
||||
mols.append(m3d)
|
||||
else:
|
||||
print(f"Warning: 分子 {smi} 未生成构象。")
|
||||
|
||||
# 读取 SDF 文件(假设每个 SDF 文件只包含一个分子)
|
||||
mol = Chem.MolFromMolFile(str(sdf_file))
|
||||
if mol is None:
|
||||
print(f"Warning: 无法从文件 {sdf_file} 读取分子。")
|
||||
continue
|
||||
mols.append(mol)
|
||||
|
||||
# 只返回包含至少一个构象的分子
|
||||
aligned_results = [(mol, True) for mol in mols if mol.GetNumConformers() > 0]
|
||||
return aligned_results
|
||||
|
||||
@@ -336,4 +338,4 @@ def cli(data_smi, target):
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli()
|
||||
# python main.py --data-smi /root/project/qsar/1d-qsar/data_smi.csv --target MIC_LOG_ATCC25923
|
||||
# python train_qsar_model.py --data-smi /root/project/qsar/1d-qsar/data_smi.csv --target MIC_LOG_ATCC25923
|
||||
|
||||
Reference in New Issue
Block a user