add convert name propety

This commit is contained in:
mm644706215
2025-08-28 20:22:47 +08:00
parent b2988ef77d
commit 9ebb98635e
2 changed files with 79 additions and 76 deletions

File diff suppressed because one or more lines are too long

View File

@@ -2,43 +2,45 @@
# -*- coding: utf-8 -*-
import os
import csv
from rdkit import Chem
from pathlib import Path
# 当前目录路径
current_dir = os.path.dirname(os.path.abspath(__file__))
# 输入SDF文件路径
sdf_file = os.path.join(current_dir, 'fgbar_vina_SP_1_pv.sdf')
sdf_file = Path(current_dir) / "../data/fgbar_vina_SP_1_pv.sdf"
# 输出SMILES文件路径
smiles_file = os.path.join(current_dir, 'molecules.txt')
# 输出CSV文件路径
csv_file = Path(current_dir) / "../data/molecules.csv"
def sdf_to_smiles(sdf_path, output_path):
"""读取SDF文件中的分子并将其转换为SMILES格式保存到文本文件"""
# 读取SDF文件
def sdf_to_csv(sdf_path, output_path):
"""读取SDF文件中的分子并将其转换为SMILES和IDENTIFIER保存到CSV文件"""
suppl = Chem.SDMolSupplier(sdf_path)
# 计数有效分子数量
valid_mol_count = 0
# 打开输出文件
with open(output_path, 'w') as f:
# 遍历所有分子
for i, mol in enumerate(suppl):
if mol is not None: # 确保分子有效
# 获取SMILES
with open(output_path, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
# 写表头
writer.writerow(["smiles", "name"])
for mol in suppl:
if mol is not None:
smiles = Chem.MolToSmiles(mol)
# 写入文件
f.write(f"{smiles}\n")
name = mol.GetProp("_Name") if mol.HasProp("_Name") else ""
writer.writerow([smiles, name])
valid_mol_count += 1
return valid_mol_count
def main():
print(f"正在读取SDF文件: {sdf_file}")
mol_count = sdf_to_smiles(sdf_file, smiles_file)
mol_count = sdf_to_csv(sdf_file.as_posix(), csv_file)
print(f"成功处理了 {mol_count} 个分子")
print(f"SMILES已保存到: {smiles_file}")
print(f"结果已保存到: {csv_file}")
if __name__ == "__main__":
main()