feat(validation): enforce single-anchor fragments
- skip fused/shared/multi-anchor side systems during extraction - add fragment library schema and fragment_library.csv export - make scaffold prep strict for non-spliceable positions
This commit is contained in:
13
README.md
13
README.md
@@ -96,6 +96,7 @@ pixi run python scripts/validate_macrolactone_db.py \
|
||||
validation_output/
|
||||
├── README.md # 目录说明
|
||||
├── fragments.db # SQLite 数据库
|
||||
├── fragment_library.csv # 最终片段库导出(含 has_dummy_atom / splice_ready)
|
||||
├── summary.csv # 汇总表(含 ml_id, chembl_id)
|
||||
├── summary_statistics.json # 统计信息
|
||||
├── ring_size_12/ # 按环大小组织
|
||||
@@ -124,11 +125,15 @@ sqlite3 validation_output/fragments.db \
|
||||
FROM parent_molecules \
|
||||
WHERE classification='standard_macrolactone' LIMIT 5;"
|
||||
|
||||
# 查询最终片段库
|
||||
sqlite3 validation_output/fragments.db \
|
||||
"SELECT source_type, source_parent_ml_id, cleavage_position, has_dummy_atom, splice_ready \
|
||||
FROM fragment_library_entries LIMIT 10;"
|
||||
|
||||
# 查询片段
|
||||
sqlite3 validation_output/fragments.db \
|
||||
"SELECT fragment_id, cleavage_position, dummy_isotope \
|
||||
FROM side_chain_fragments \
|
||||
WHERE ml_id='ML00000001';"
|
||||
"SELECT fragment_id, cleavage_position, dummy_isotope, has_dummy_atom, dummy_atom_count \
|
||||
FROM side_chain_fragments LIMIT 10;"
|
||||
|
||||
# 按环大小统计
|
||||
sqlite3 validation_output/fragments.db \
|
||||
@@ -144,6 +149,8 @@ sqlite3 validation_output/fragments.db \
|
||||
| `classification` | standard_macrolactone / non_standard_macrocycle / not_macrolactone |
|
||||
| `dummy_isotope` | 裂解位置编号,用于片段重建 |
|
||||
| `cleavage_position` | 环上的断裂位置 |
|
||||
| `has_dummy_atom` | 该片段是否带 dummy 原子,可用于区分可直接拼接片段 |
|
||||
| `splice_ready` | 是否与当前单锚点拼接流程直接兼容 |
|
||||
|
||||
## Legacy Scripts
|
||||
|
||||
|
||||
Reference in New Issue
Block a user