feat(toolkit): add classification and migration
Implement the standard/non-standard/not-macrolactone classification layer and integrate it into analyzer, fragmenter, and CLI outputs. Port the remaining legacy package capabilities into new visualization and workflow modules, restore batch/statistics/SDF scripts on top of the flat CSV workflow, and update active docs to the new package API.
This commit is contained in:
@@ -1,11 +1,56 @@
|
||||
from macro_lactone_toolkit import FragmentationResult
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from collections import Counter
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem import AllChem
|
||||
|
||||
|
||||
def main() -> None:
|
||||
raise SystemExit(
|
||||
"Legacy helper retired. Use 'macro-lactone-toolkit fragment' to export fragments, "
|
||||
"then generate SDF/statistics in downstream analysis code."
|
||||
def build_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate cleavage statistics and ETKDGv3 parent-molecule SDF files from flat fragment CSV."
|
||||
)
|
||||
parser.add_argument("--input", required=True)
|
||||
parser.add_argument("--output-dir", required=True)
|
||||
return parser
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> None:
|
||||
args = build_parser().parse_args(argv)
|
||||
dataframe = pd.read_csv(args.input)
|
||||
output_dir = Path(args.output_dir)
|
||||
sdf_dir = output_dir / "sdf"
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
sdf_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
position_counts = Counter(int(position) for position in dataframe["cleavage_position"])
|
||||
stats = {
|
||||
"position_counts": dict(sorted(position_counts.items())),
|
||||
"total_fragments": int(len(dataframe)),
|
||||
"total_parent_molecules": int(dataframe["parent_id"].nunique()),
|
||||
}
|
||||
(output_dir / "cleavage_position_statistics.json").write_text(
|
||||
json.dumps(stats, indent=2, ensure_ascii=False) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
parent_rows = dataframe[["parent_id", "parent_smiles"]].drop_duplicates()
|
||||
for parent in parent_rows.itertuples(index=False):
|
||||
mol = Chem.MolFromSmiles(parent.parent_smiles)
|
||||
if mol is None:
|
||||
continue
|
||||
mol = Chem.AddHs(mol)
|
||||
params = AllChem.ETKDGv3()
|
||||
if AllChem.EmbedMolecule(mol, params) != 0:
|
||||
continue
|
||||
AllChem.UFFOptimizeMolecule(mol)
|
||||
writer = Chem.SDWriter(str(sdf_dir / f"{parent.parent_id}_3d.sdf"))
|
||||
writer.write(mol)
|
||||
writer.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user