#!/usr/bin/env python # -*- coding: utf-8 -*- import os import csv from rdkit import Chem from pathlib import Path # 当前目录路径 current_dir = os.path.dirname(os.path.abspath(__file__)) # 输入SDF文件路径 sdf_file = Path(current_dir) / "../data/fgbar_vina_SP_1_pv.sdf" # 输出CSV文件路径 csv_file = Path(current_dir) / "../data/molecules.csv" def sdf_to_csv(sdf_path, output_path): """读取SDF文件中的分子并将其转换为SMILES和IDENTIFIER保存到CSV文件""" suppl = Chem.SDMolSupplier(sdf_path) valid_mol_count = 0 with open(output_path, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) # 写表头 writer.writerow(["smiles", "name"]) for mol in suppl: if mol is not None: smiles = Chem.MolToSmiles(mol) name = mol.GetProp("_Name") if mol.HasProp("_Name") else "" writer.writerow([smiles, name]) valid_mol_count += 1 return valid_mol_count def main(): print(f"正在读取SDF文件: {sdf_file}") mol_count = sdf_to_csv(sdf_file.as_posix(), csv_file) print(f"成功处理了 {mol_count} 个分子") print(f"结果已保存到: {csv_file}") if __name__ == "__main__": main()