SIME/utils/smiles_svg_show.py

import argparse
import json
import re
from datetime import datetime
from pathlib import Path
from rdkit import Chem
from rdkit.Chem.Draw import rdMolDraw2D
import boto3

# 对象存储配置信息（可随时修改）
BUCKET_NAME = "{Your_Bucket_Name}"
ACCESS_KEY = "{Your_Access_Key}"
SECRET_KEY = "{Your_Secret_Key}"
ENDPOINT_URL = "{Your_Endpoint_Url}"
S3_SVG_PREFIX = "svg_outputs/"

# 生成SVG图片并高亮
def mol_to_svg(mol, highlight_atoms=None, size=(400, 400)):
    drawer = rdMolDraw2D.MolDraw2DSVG(size[0], size[1])
    drawer.SetFontSize(6)
    opts = drawer.drawOptions()
    opts.addAtomIndices = True

    atom_colors = {}
    if highlight_atoms:
        for idx in highlight_atoms:
            atom_colors[idx] = (1, 0, 0)

    drawer.DrawMolecule(
        mol,
        highlightAtoms=highlight_atoms or [],
        highlightAtomColors=atom_colors
    )
    drawer.FinishDrawing()
    return drawer.GetDrawingText()

# 上传到对象存储（S3兼容）
# 替换原始 upload_svg_to_s3 的返回值
def upload_svg_to_s3(svg_content, object_name):
    session = boto3.session.Session(
        aws_access_key_id=ACCESS_KEY,
        aws_secret_access_key=SECRET_KEY,
    )
    s3 = session.resource('s3', endpoint_url=ENDPOINT_URL)
    obj = s3.Object(BUCKET_NAME, object_name)
    obj.put(Body=svg_content, ContentType='image/svg+xml')

    # 返回 R2.dev 公共 URL
    return f"https://pub-389f446a01134875b8c7ced0572758de.r2.dev/{object_name}"

# 检测原子价态错误
def find_valence_error_atom(mol):
    try:
        Chem.SanitizeMol(mol)
        return None
    except Chem.AtomValenceException as e:
        match = re.search(r'atom # (\d+)', str(e))
        if match:
            return int(match.group(1))
    return None

# 保存和读取JSON的方法
def save_json(data, filename):
    Path(filename).write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding='utf-8')

def load_json(filename):
    return json.loads(Path(filename).read_text(encoding='utf-8'))

# 获取原子详细状态信息
def get_atom_status(mol, atom_idx):
    atom = mol.GetAtomWithIdx(atom_idx)
    mol.UpdatePropertyCache(strict=False)
    connections = []
    for bond in atom.GetBonds():
        neighbor_idx = bond.GetOtherAtomIdx(atom_idx)
        connections.append({
            "connected_to": f"#{neighbor_idx} ({mol.GetAtomWithIdx(neighbor_idx).GetSymbol()})",
            "bond_type": str(bond.GetBondType())
        })

    return {
        "explicit_connections": atom.GetDegree(),
        "formal_charge": atom.GetFormalCharge(),
        "radical_electrons": atom.GetNumRadicalElectrons(),
        "implicit_hydrogens": atom.GetNumImplicitHs(),
        "explicit_hydrogens": atom.GetNumExplicitHs(),
        "connections_detail": connections
    }

# 主程序
def main():
    parser = argparse.ArgumentParser(description="Process SMILES and optionally highlight atoms using atom index or SMARTS pattern.")
    parser.add_argument('--smiles', type=str, required=True, help='SMILES string of molecule')
    parser.add_argument('--atom_idx', type=int, help='Atom index to highlight')
    parser.add_argument('--smarts', type=str, help='SMARTS pattern to highlight matched atoms')
    parser.add_argument('--output', type=str, default="output.json", help='Output JSON filename')
    parser.add_argument('--no_s3', action='store_true', help='Save SVG locally instead of S3')

    args = parser.parse_args()

    mol = Chem.MolFromSmiles(args.smiles, sanitize=False)
    # Chem.SanitizeMol(mol)  # 手动完成标准化
    # Chem.MolToSmiles(mol)  # canonical=True by default

    error_atom_idx = find_valence_error_atom(mol)
    atom_state_info = "OK" if error_atom_idx is None else f"Valence error at atom #{error_atom_idx}"

    highlight_atoms = set()

    if args.atom_idx is not None:
        highlight_atoms.add(args.atom_idx)

    if args.smarts:
        patt = Chem.MolFromSmarts(args.smarts)
        matches = mol.GetSubstructMatches(patt)
        for match in matches:
            highlight_atoms.update(match)

    svg_str = mol_to_svg(mol, highlight_atoms=list(highlight_atoms))

    timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
    svg_filename = f"molecule_{timestamp}.svg"

    output_path = Path(args.output)
    if not output_path.is_absolute():
        output_path = Path.cwd() / output_path

    if args.no_s3:
        svg_path = output_path.parent / svg_filename
        svg_path.write_text(svg_str, encoding='utf-8')
        svg_location = str(svg_path)
    else:
        object_name = f"{S3_SVG_PREFIX}{svg_filename}"
        svg_location = upload_svg_to_s3(svg_str, object_name)

    output_data = {
        "atom_state": atom_state_info,
        "svg_url": svg_location,
        "svg_filename": svg_filename
    }

    if args.atom_idx is not None:
        output_data["atom_status_detail"] = get_atom_status(mol, args.atom_idx)

    save_json(output_data, output_path)

    print(f"Results saved to {output_path}")

if __name__ == "__main__":
    main()

"""
# 自动修复键值错误
python smiles_svg_show.py --smiles "O=C1C[C@@H](O)C[C@H](O[C@H]9C[C@@](C)(OC)[C@@H](O)[C@H](C)O9)[C@@H](C)C[C@@H](C)C(=O)/C=C/[C@@H](CC)=C/[C@H](O[C@@H]9O[C@H](C)C[C@@H]([C@H]9O)N(C)C)[N@@](C)O1" --atom_idx 30

python smiles_svg_show.py --smiles "CCC1=C\[C@H](O[C@H]2C[C@@](C)(OC)[C@@H](O)[C@H](C)O2)[C@@H](CC=O)OC(=O)C[C@@H](O)C[C@H](O[C@H]2C[C@@](C)(OC)[C@@H](O)[C@H](C)O2)[C@@H](C)C[C@@H](C)C(=O)\C=C\1" --atom_idx 30
# smarts 匹配，要求smiles正确
python smiles_svg_show.py --smiles "CCC1=C\[C@H](O[C@H]2C[C@@](C)(OC)[C@@H](O)[C@H](C)O2)[C@@H](CC=O)OC(=O)C[C@@H](O)C[C@H](O[C@H]2C[C@@](C)(OC)[C@@H](O)[C@H](C)O2)[C@@H](C)C[C@@H](C)C(=O)\C=C\1" --smarts "[r16]([#8][#6](=[#8]))"
"""