47 lines
1.2 KiB
Python
47 lines
1.2 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import os
|
|
import csv
|
|
from rdkit import Chem
|
|
from pathlib import Path
|
|
|
|
|
|
# 当前目录路径
|
|
current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
# 输入SDF文件路径
|
|
sdf_file = Path(current_dir) / "../data/fgbar_vina_SP_1_pv.sdf"
|
|
|
|
# 输出CSV文件路径
|
|
csv_file = Path(current_dir) / "../data/molecules.csv"
|
|
|
|
|
|
def sdf_to_csv(sdf_path, output_path):
|
|
"""读取SDF文件中的分子并将其转换为SMILES和IDENTIFIER保存到CSV文件"""
|
|
suppl = Chem.SDMolSupplier(sdf_path)
|
|
valid_mol_count = 0
|
|
|
|
with open(output_path, 'w', newline='', encoding='utf-8') as f:
|
|
writer = csv.writer(f)
|
|
# 写表头
|
|
writer.writerow(["smiles", "name"])
|
|
|
|
for mol in suppl:
|
|
if mol is not None:
|
|
smiles = Chem.MolToSmiles(mol)
|
|
name = mol.GetProp("_Name") if mol.HasProp("_Name") else ""
|
|
writer.writerow([smiles, name])
|
|
valid_mol_count += 1
|
|
|
|
return valid_mol_count
|
|
|
|
def main():
|
|
print(f"正在读取SDF文件: {sdf_file}")
|
|
mol_count = sdf_to_csv(sdf_file.as_posix(), csv_file)
|
|
print(f"成功处理了 {mol_count} 个分子")
|
|
print(f"结果已保存到: {csv_file}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|