first add
This commit is contained in:
60
utils/test_smart.py
Executable file
60
utils/test_smart.py
Executable file
@@ -0,0 +1,60 @@
|
||||
from rdkit import Chem
|
||||
from joblib import Parallel, delayed
|
||||
import logging
|
||||
|
||||
# 定义日志配置
|
||||
logging.basicConfig(
|
||||
filename="rgroup_matching.log",
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||
)
|
||||
|
||||
# 定义 SMARTS 模式
|
||||
macro = Chem.MolFromSmarts("[r12,r13,r14,r15,r16,r17,r18,r19,r20]([#8][#6](=[#8]))")
|
||||
|
||||
# 读取 SMI 文件
|
||||
smi_file = "/home/mambauser/LillyMol/test/1M_stratsampled_V1B.smi"
|
||||
with open(smi_file, 'r') as f:
|
||||
SMILES_list = [line.strip() for line in f if line.strip()]
|
||||
|
||||
logging.info(f"Loaded {len(SMILES_list)} molecules from {smi_file}.")
|
||||
|
||||
# 匹配函数
|
||||
def match_smarts(smiles):
|
||||
mol = Chem.MolFromSmiles(smiles)
|
||||
if mol is None:
|
||||
return smiles, None # 无效分子
|
||||
result = mol.GetSubstructMatches(macro)
|
||||
return smiles, result
|
||||
|
||||
# 使用 joblib 并行处理
|
||||
logging.info("Starting SMARTS matching...")
|
||||
results = Parallel(n_jobs=-1)(delayed(match_smarts)(s) for s in SMILES_list)
|
||||
|
||||
# 分离成功和失败的分子
|
||||
success = [smiles for smiles, result in results if result]
|
||||
fail = [smiles for smiles, result in results if not result]
|
||||
|
||||
# 统计结果
|
||||
total = len(success) + len(fail)
|
||||
success_rate = len(success) / total * 100 if total > 0 else 0
|
||||
|
||||
# 保存日志信息
|
||||
logging.info(f"Total molecules: {total}")
|
||||
logging.info(f"Success: {len(success)}")
|
||||
logging.info(f"Fail: {len(fail)}")
|
||||
logging.info(f"Success rate: {success_rate:.2f}%")
|
||||
|
||||
print(f"Total molecules: {total}")
|
||||
print(f"Success: {len(success)}")
|
||||
print(f"Fail: {len(fail)}")
|
||||
print(f"Success rate: {success_rate:.2f}%")
|
||||
|
||||
# 将失败的分子写入到一个 SMI 文件
|
||||
fail_smi_file = "fail_molecules.smi"
|
||||
with open(fail_smi_file, "w") as ff:
|
||||
for smiles in fail:
|
||||
ff.write(smiles + "\n")
|
||||
|
||||
logging.info(f"Failed molecules written to {fail_smi_file}.")
|
||||
print(f"Failed molecules written to {fail_smi_file}.")
|
||||
Reference in New Issue
Block a user