Files
admet-ai/scripts/sdf_to_smiles.py
2025-08-28 20:22:47 +08:00

47 lines
1.2 KiB
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import csv
from rdkit import Chem
from pathlib import Path
# 当前目录路径
current_dir = os.path.dirname(os.path.abspath(__file__))
# 输入SDF文件路径
sdf_file = Path(current_dir) / "../data/fgbar_vina_SP_1_pv.sdf"
# 输出CSV文件路径
csv_file = Path(current_dir) / "../data/molecules.csv"
def sdf_to_csv(sdf_path, output_path):
"""读取SDF文件中的分子并将其转换为SMILES和IDENTIFIER保存到CSV文件"""
suppl = Chem.SDMolSupplier(sdf_path)
valid_mol_count = 0
with open(output_path, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
# 写表头
writer.writerow(["smiles", "name"])
for mol in suppl:
if mol is not None:
smiles = Chem.MolToSmiles(mol)
name = mol.GetProp("_Name") if mol.HasProp("_Name") else ""
writer.writerow([smiles, name])
valid_mol_count += 1
return valid_mol_count
def main():
print(f"正在读取SDF文件: {sdf_file}")
mol_count = sdf_to_csv(sdf_file.as_posix(), csv_file)
print(f"成功处理了 {mol_count} 个分子")
print(f"结果已保存到: {csv_file}")
if __name__ == "__main__":
main()