From 5c5838605c4f9bfe053105ff0010836cfcec03c8 Mon Sep 17 00:00:00 2001 From: hotwa Date: Wed, 8 Oct 2025 19:59:25 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=9D=83=E9=87=8D=E8=BD=AC?= =?UTF-8?q?=E5=8C=96=E6=97=B6=E5=80=99=E7=9A=84=E6=A3=80=E6=9F=A5=E8=84=9A?= =?UTF-8?q?=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/bin_header_dump.py | 55 +++++++++++++++++++++++++++++++++++ tests/peek_scales.py | 52 +++++++++++++++++++++++++++++++++ tests/peek_scales_v2.py | 48 +++++++++++++++++++++++++++++++ tests/smoke_metal.py | 16 +++++++++++ tests/token_uuid_slot.py | 62 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 233 insertions(+) create mode 100644 tests/bin_header_dump.py create mode 100644 tests/peek_scales.py create mode 100644 tests/peek_scales_v2.py create mode 100644 tests/smoke_metal.py create mode 100644 tests/token_uuid_slot.py diff --git a/tests/bin_header_dump.py b/tests/bin_header_dump.py new file mode 100644 index 0000000..f066706 --- /dev/null +++ b/tests/bin_header_dump.py @@ -0,0 +1,55 @@ +# tests/token_uuid_slot.py +import struct, uuid, sys, tiktoken +from openai_harmony import load_harmony_encoding, HarmonyEncodingName + +FMT_MODEL = "": 199998, + "": 199999, + "<|untrusted|>": 200000, + "<|endofuntrusted|>": 200001, + "<|return|>": 200002, + "<|constrain|>": 200003, + "<|reversed200004|>": 200004, + "<|channel|>": 200005, + "<|start|>": 200006, + "<|end|>": 200007, + "<|message|>": 200008, + "<|reversed200008|>": 200008, + "<|reversed200009|>": 200009, + "<|reversed200010|>": 200010, + "<|reversed200011|>": 200011, + "<|call|>": 200012, + "<|refusal|>": 200013, +} + +def table_start_offset(f): + f.seek(0) + f.read(16) # magic + f.read(16) # model uuid + f.read(struct.calcsize(FMT_MODEL)) + f.read(16) # apple uuid + tok_uuid = uuid.UUID(bytes=f.read(16)) + ns, nt, rs, ts = struct.unpack(" slot={slot}, uuid={u}\n") + +if __name__ == "__main__": + show(sys.argv[1], sys.argv[2]) diff --git a/tests/peek_scales.py b/tests/peek_scales.py new file mode 100644 index 0000000..b428967 --- /dev/null +++ b/tests/peek_scales.py @@ -0,0 +1,52 @@ +# peek_scales.py —— 兼容分片 safetensors +import json, os, sys +from safetensors import safe_open + +def iter_keys(srcdir): + single = os.path.join(srcdir, "model.safetensors") + index = os.path.join(srcdir, "model.safetensors.index.json") + if os.path.exists(single): + with safe_open(single, framework="pt", device="cpu") as f: + yield f, list(f.keys()) + else: + idx = json.load(open(index)) + wm = idx["weight_map"] + opened = {} + from contextlib import ExitStack + with ExitStack() as stack: + for rel in set(wm.values()): + opened[rel] = stack.enter_context(safe_open(os.path.join(srcdir, rel), framework="pt", device="cpu")) + yield opened, list(wm.keys()) + +def peek(srcdir): + opened, keys = next(iter_keys(srcdir)) + def get(k): + if isinstance(opened, dict): + # 分片模式 + # 找到该 key 对应文件;这里简单起见直接遍历 + for f in opened.values(): + if k in f.keys(): + return f.get_tensor(k) + raise KeyError(k) + else: + return opened.get_tensor(k) + + import torch + mx = [] + for n in range(0, 1000): # 粗看前 1000 层以内的命名 + for which in ("mlp1_weight.scales", "mlp2_weight.scales"): + k = f"block.{n}.mlp.{which}" + try: + t = get(k) + mx.append(t.max().item()) + print(k, "max=", float(mx[-1])) + except Exception: + pass + if mx: + m = max(mx) + print("\nGLOBAL MAX SCALE:", m, " (m + 14 =", m+14, ")") + if m >= 241: + print("⚠️ 警告:m+14 可能溢出 uint8!请用 int16+clamp 写回。") + +if __name__ == "__main__": + peek(sys.argv[1]) diff --git a/tests/peek_scales_v2.py b/tests/peek_scales_v2.py new file mode 100644 index 0000000..ef500ea --- /dev/null +++ b/tests/peek_scales_v2.py @@ -0,0 +1,48 @@ +# tests/peek_scales_v2.py +import os, json, sys +from safetensors import safe_open + +def open_any(srcdir): + single = os.path.join(srcdir, "model.safetensors") + index = os.path.join(srcdir, "model.safetensors.index.json") + if os.path.exists(single): + f = safe_open(single, framework="pt", device="cpu") + return [f], lambda k: f.get_tensor(k), list(f.keys()) + wm = json.load(open(index))["weight_map"] + files = sorted(set(wm.values())) + opened = [safe_open(os.path.join(srcdir, fp), framework="pt", device="cpu") for fp in files] + keys = list(wm.keys()) + def get(k): + for f in opened: + if k in f.keys(): return f.get_tensor(k) + raise KeyError(k) + return opened, get, keys + +def main(srcdir): + opened, get, keys = open_any(srcdir) + patterns = [ + "block.{n}.mlp.mlp1_weight.scales", # openharmony-mlx 原生 + "block.{n}.mlp.mlp2_weight.scales", + "model.layers.{n}.mlp.experts.gate_up_proj_scales", # Jinx 命名 + "model.layers.{n}.mlp.experts.down_proj_scales", + ] + import torch + mx = [] + for n in range(0, 64): # 够用 + for pat in patterns: + k = pat.format(n=n) + try: + t = get(k) + except Exception: + continue + v = float(t.max().item()) + mx.append(v) + print(k, "max=", v) + if mx: + m = max(mx) + print("\nGLOBAL MAX SCALE:", m, " (m + 14 =", m+14, ")") + if m + 14 >= 256: + print("⚠️ 会发生 uint8 溢出,必须用 int16+clamp 的写法。") + +if __name__ == "__main__": + main(sys.argv[1]) diff --git a/tests/smoke_metal.py b/tests/smoke_metal.py new file mode 100644 index 0000000..ecde37a --- /dev/null +++ b/tests/smoke_metal.py @@ -0,0 +1,16 @@ +# smoke_metal.py —— 用 metal 后端直接跑 8 个新 token +from gpt_oss.responses_api.inference.metal import setup_model +from openai_harmony import load_harmony_encoding, HarmonyEncodingName + +enc = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS) + +infer_next_token = setup_model("/Volumes/long990max/project/openharmony-mlx/model.bin") # 改成你的路径 +ids = enc.encode("你好,给我一句话的回答:(英文)")[:128] # 输入 ids + +new = [] +for _ in range(8): + tid = infer_next_token(ids + new, temperature=0.7, new_request=False) + new.append(tid) + +print("new token ids:", new) +print("decoded:", enc.decode(ids + new)) diff --git a/tests/token_uuid_slot.py b/tests/token_uuid_slot.py new file mode 100644 index 0000000..8523928 --- /dev/null +++ b/tests/token_uuid_slot.py @@ -0,0 +1,62 @@ +# token_uuid_slot.py +import struct, uuid, sys, pathlib, tiktoken +from openai_harmony import load_harmony_encoding, HarmonyEncodingName + +FMT_MODEL = "": 199998, + "": 199999, + "<|untrusted|>": 200000, + "<|endofuntrusted|>": 200001, + "<|return|>": 200002, + "<|constrain|>": 200003, + "<|reversed200004|>": 200004, + "<|channel|>": 200005, + "<|start|>": 200006, + "<|end|>": 200007, + "<|message|>": 200008, + "<|reversed200008|>": 200008, + "<|reversed200009|>": 200009, + "<|reversed200010|>": 200010, + "<|reversed200011|>": 200011, + "<|call|>": 200012, + "<|refusal|>": 200013, +} + +def header_and_table_off(f): + f.read(16) # magic + f.read(16) # model uuid + f.read(struct.calcsize(FMT_MODEL)) + f.read(16) # apple uuid + tok_uuid = uuid.UUID(bytes=f.read(16)) + ns, nt, rs, ts = struct.unpack(" slot={slot}, uuid={u}\n") + +if __name__ == "__main__": + # 用法: python token_uuid_slot.py "<|channel|>" + show(sys.argv[1], sys.argv[2]) + +# python tests/token_uuid_slot.py /Volumes/long990max/gpustack_data/openai/gpt-oss-20b/metal/model.bin "<|channel|>" +# python tests/token_uuid_slot.py /Volumes/long990max/project/openharmony-mlx/model.bin "<|channel|>" + +# python tests/token_uuid_slot.py /Volumes/long990max/gpustack_data/openai/gpt-oss-20b/metal/model.bin "<|message|>" +# python tests/token_uuid_slot.py /Volumes/long990max/project/openharmony-mlx/model.bin "<|message|>" + +# python tests/token_uuid_slot.py /Volumes/long990max/gpustack_data/openai/gpt-oss-20b/metal/model.bin "<|return|>" +# python tests/token_uuid_slot.py /Volumes/long990max/project/openharmony-mlx/model.bin "<|return|>"