""" ------------------------------------------------------------- 批量2D SDF文件 → 3D SDF文件 并行转换脚本 (joblib版) ------------------------------------------------------------- 功能: - 从指定目录读取2D SDF文件(每个分子一个SDF文件),并行批量生成3D SDF文件,保留原有属性 - 支持失败分子详细记录和统计 - 源文件与目标3D SDF目录分离,防止覆盖 依赖安装: conda install -y -c conda-forge rdkit joblib 用法示例: python sdf2to3d.py --src_dir ./2d_sdf_dir --out_dir ./3d_sdf_dir --n_jobs 8 作者:chatgpt ------------------------------------------------------------- """ from pathlib import Path from rdkit import Chem from rdkit.Chem import AllChem from joblib import Parallel, delayed import argparse def convert_2d_to_3d_sdf(sdf_path, out_dir, max_attempts=10): identifier = sdf_path.stem out_sdf = out_dir / f"{identifier}.sdf" try: suppl = Chem.SDMolSupplier(str(sdf_path), sanitize=True) mols = [mol for mol in suppl if mol is not None] if not mols: return identifier, False, "SDF读取失败" mol = mols[0] mol = Chem.AddHs(mol) params = AllChem.ETKDGv3() last_error = "" for attempt in range(max_attempts): try: status = AllChem.EmbedMolecule(mol, params) if status == 0: AllChem.UFFOptimizeMolecule(mol) writer = Chem.SDWriter(str(out_sdf)) writer.write(mol) writer.close() return identifier, True, f"成功(第{attempt+1}次)" else: last_error = f"Embed失败: status={status}" except Exception as e: last_error = f"3D生成异常: {e}" return identifier, False, last_error if last_error else f"3D构象生成失败(已重试{max_attempts}次)" except Exception as e: return identifier, False, f"异常: {e}" def main(): parser = argparse.ArgumentParser() parser.add_argument('--src_dir', type=str, required=True, help='2D SDF文件夹路径') parser.add_argument('--out_dir', type=str, required=True, help='3D SDF输出文件夹路径') parser.add_argument('--n_jobs', type=int, default=4, help='并行进程数') parser.add_argument('--max_attempts', type=int, default=10, help='最大Embed尝试次数') args = parser.parse_args() src_dir = Path(args.src_dir) out_dir = Path(args.out_dir) out_dir.mkdir(parents=True, exist_ok=True) sdf_files = list(src_dir.glob("*.sdf")) print(f"共检测到2D SDF文件 {len(sdf_files)} 个,开始并行3D生成...") results = Parallel(n_jobs=args.n_jobs)( delayed(convert_2d_to_3d_sdf)(sdf_file, out_dir, args.max_attempts) for sdf_file in sdf_files ) # 分类统计 success = [r for r in results if r[1]] failed = [r for r in results if not r[1]] print("="*60) print(f"总2D SDF: {len(sdf_files)}") print(f"成功生成3D SDF: {len(success)}") print(f"失败: {len(failed)}") if failed: print("失败分子列表:") for ident, _, msg in failed: print(f" {ident}: {msg}") # 保存失败到文件 failfile = out_dir / "failed_2dto3d.txt" with open(failfile, "w", encoding="utf-8") as f: for ident, _, msg in failed: f.write(f"{ident}\t{msg}\n") print(f"失败分子已保存到: {failfile.resolve()}") print("="*60) if __name__ == '__main__': main()