增加权重转化时候的检查脚本

This commit is contained in:
2025-10-08 19:59:25 +08:00
parent 9974fc7a00
commit 5c5838605c
5 changed files with 233 additions and 0 deletions

55
tests/bin_header_dump.py Normal file
View File

@@ -0,0 +1,55 @@
# tests/token_uuid_slot.py
import struct, uuid, sys, tiktoken
from openai_harmony import load_harmony_encoding, HarmonyEncodingName
FMT_MODEL = "<IIIIII f IIII f f f f f f"
SPECIAL = {
"<|reversed199998|>": 199998,
"": 199999,
"<|untrusted|>": 200000,
"<|endofuntrusted|>": 200001,
"<|return|>": 200002,
"<|constrain|>": 200003,
"<|reversed200004|>": 200004,
"<|channel|>": 200005,
"<|start|>": 200006,
"<|end|>": 200007,
"<|message|>": 200008,
"<|reversed200008|>": 200008,
"<|reversed200009|>": 200009,
"<|reversed200010|>": 200010,
"<|reversed200011|>": 200011,
"<|call|>": 200012,
"<|refusal|>": 200013,
}
def table_start_offset(f):
f.seek(0)
f.read(16) # magic
f.read(16) # model uuid
f.read(struct.calcsize(FMT_MODEL))
f.read(16) # apple uuid
tok_uuid = uuid.UUID(bytes=f.read(16))
ns, nt, rs, ts = struct.unpack("<IIII", f.read(16))
table_off = f.tell()
return tok_uuid, ns, nt, rs, ts, table_off
def show(path, token):
tok_id = SPECIAL[token]
enc = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
o200k = tiktoken.get_encoding("o200k_base")
# 用 harmony 的判定口径得到“文本 token 数”,与写 bin 时保持一致
num_text = sum(1 for t in range(o200k.n_vocab) if not enc.is_special_token(t))
slot = tok_id - num_text
with open(path, "rb") as f:
tok_uuid, ns, nt, rs, ts, table_off = table_start_offset(f)
f.seek(table_off + 16*slot)
u = uuid.UUID(bytes=f.read(16))
print(f"{path}\n tokenizer_uuid: {tok_uuid}\n header_nt={nt}, header_ns={ns}\n"
f" token={token} (id={tok_id}) -> slot={slot}, uuid={u}\n")
if __name__ == "__main__":
show(sys.argv[1], sys.argv[2])

52
tests/peek_scales.py Normal file
View File

@@ -0,0 +1,52 @@
# peek_scales.py —— 兼容分片 safetensors
import json, os, sys
from safetensors import safe_open
def iter_keys(srcdir):
single = os.path.join(srcdir, "model.safetensors")
index = os.path.join(srcdir, "model.safetensors.index.json")
if os.path.exists(single):
with safe_open(single, framework="pt", device="cpu") as f:
yield f, list(f.keys())
else:
idx = json.load(open(index))
wm = idx["weight_map"]
opened = {}
from contextlib import ExitStack
with ExitStack() as stack:
for rel in set(wm.values()):
opened[rel] = stack.enter_context(safe_open(os.path.join(srcdir, rel), framework="pt", device="cpu"))
yield opened, list(wm.keys())
def peek(srcdir):
opened, keys = next(iter_keys(srcdir))
def get(k):
if isinstance(opened, dict):
# 分片模式
# 找到该 key 对应文件;这里简单起见直接遍历
for f in opened.values():
if k in f.keys():
return f.get_tensor(k)
raise KeyError(k)
else:
return opened.get_tensor(k)
import torch
mx = []
for n in range(0, 1000): # 粗看前 1000 层以内的命名
for which in ("mlp1_weight.scales", "mlp2_weight.scales"):
k = f"block.{n}.mlp.{which}"
try:
t = get(k)
mx.append(t.max().item())
print(k, "max=", float(mx[-1]))
except Exception:
pass
if mx:
m = max(mx)
print("\nGLOBAL MAX SCALE:", m, " (m + 14 =", m+14, ")")
if m >= 241:
print("⚠️ 警告m+14 可能溢出 uint8请用 int16+clamp 写回。")
if __name__ == "__main__":
peek(sys.argv[1])

48
tests/peek_scales_v2.py Normal file
View File

@@ -0,0 +1,48 @@
# tests/peek_scales_v2.py
import os, json, sys
from safetensors import safe_open
def open_any(srcdir):
single = os.path.join(srcdir, "model.safetensors")
index = os.path.join(srcdir, "model.safetensors.index.json")
if os.path.exists(single):
f = safe_open(single, framework="pt", device="cpu")
return [f], lambda k: f.get_tensor(k), list(f.keys())
wm = json.load(open(index))["weight_map"]
files = sorted(set(wm.values()))
opened = [safe_open(os.path.join(srcdir, fp), framework="pt", device="cpu") for fp in files]
keys = list(wm.keys())
def get(k):
for f in opened:
if k in f.keys(): return f.get_tensor(k)
raise KeyError(k)
return opened, get, keys
def main(srcdir):
opened, get, keys = open_any(srcdir)
patterns = [
"block.{n}.mlp.mlp1_weight.scales", # openharmony-mlx 原生
"block.{n}.mlp.mlp2_weight.scales",
"model.layers.{n}.mlp.experts.gate_up_proj_scales", # Jinx 命名
"model.layers.{n}.mlp.experts.down_proj_scales",
]
import torch
mx = []
for n in range(0, 64): # 够用
for pat in patterns:
k = pat.format(n=n)
try:
t = get(k)
except Exception:
continue
v = float(t.max().item())
mx.append(v)
print(k, "max=", v)
if mx:
m = max(mx)
print("\nGLOBAL MAX SCALE:", m, " (m + 14 =", m+14, ")")
if m + 14 >= 256:
print("⚠️ 会发生 uint8 溢出,必须用 int16+clamp 的写法。")
if __name__ == "__main__":
main(sys.argv[1])

16
tests/smoke_metal.py Normal file
View File

@@ -0,0 +1,16 @@
# smoke_metal.py —— 用 metal 后端直接跑 8 个新 token
from gpt_oss.responses_api.inference.metal import setup_model
from openai_harmony import load_harmony_encoding, HarmonyEncodingName
enc = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
infer_next_token = setup_model("/Volumes/long990max/project/openharmony-mlx/model.bin") # 改成你的路径
ids = enc.encode("你好,给我一句话的回答:(英文)")[:128] # 输入 ids
new = []
for _ in range(8):
tid = infer_next_token(ids + new, temperature=0.7, new_request=False)
new.append(tid)
print("new token ids:", new)
print("decoded:", enc.decode(ids + new))

62
tests/token_uuid_slot.py Normal file
View File

@@ -0,0 +1,62 @@
# token_uuid_slot.py
import struct, uuid, sys, pathlib, tiktoken
from openai_harmony import load_harmony_encoding, HarmonyEncodingName
FMT_MODEL = "<IIIIII f IIII f f f f f"
SPECIAL = {
"<|reversed199998|>": 199998,
"": 199999,
"<|untrusted|>": 200000,
"<|endofuntrusted|>": 200001,
"<|return|>": 200002,
"<|constrain|>": 200003,
"<|reversed200004|>": 200004,
"<|channel|>": 200005,
"<|start|>": 200006,
"<|end|>": 200007,
"<|message|>": 200008,
"<|reversed200008|>": 200008,
"<|reversed200009|>": 200009,
"<|reversed200010|>": 200010,
"<|reversed200011|>": 200011,
"<|call|>": 200012,
"<|refusal|>": 200013,
}
def header_and_table_off(f):
f.read(16) # magic
f.read(16) # model uuid
f.read(struct.calcsize(FMT_MODEL))
f.read(16) # apple uuid
tok_uuid = uuid.UUID(bytes=f.read(16))
ns, nt, rs, ts = struct.unpack("<IIII", f.read(16))
table_off = f.tell() # UUID 表的起始位置
return tok_uuid, ns, nt, rs, ts, table_off
def show(path, token):
tok_id = SPECIAL[token]
enc = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
o200k = tiktoken.get_encoding("o200k_base")
# 与转权重时同源的“文本/特殊”划分
num_text = sum(1 for t in range(o200k.n_vocab) if not enc.is_special_token(t))
slot = tok_id - num_text
with open(path, "rb") as f:
tok_uuid, ns, nt, rs, ts, table_off = header_and_table_off(f)
f.seek(table_off + 16*slot)
u = uuid.UUID(bytes=f.read(16))
print(f"{path}\n tokenizer_uuid: {tok_uuid}\n num_text={nt}, num_special={ns}")
print(f" token={token} (id={tok_id}) -> slot={slot}, uuid={u}\n")
if __name__ == "__main__":
# 用法: python token_uuid_slot.py <bin> "<|channel|>"
show(sys.argv[1], sys.argv[2])
# python tests/token_uuid_slot.py /Volumes/long990max/gpustack_data/openai/gpt-oss-20b/metal/model.bin "<|channel|>"
# python tests/token_uuid_slot.py /Volumes/long990max/project/openharmony-mlx/model.bin "<|channel|>"
# python tests/token_uuid_slot.py /Volumes/long990max/gpustack_data/openai/gpt-oss-20b/metal/model.bin "<|message|>"
# python tests/token_uuid_slot.py /Volumes/long990max/project/openharmony-mlx/model.bin "<|message|>"
# python tests/token_uuid_slot.py /Volumes/long990max/gpustack_data/openai/gpt-oss-20b/metal/model.bin "<|return|>"
# python tests/token_uuid_slot.py /Volumes/long990max/project/openharmony-mlx/model.bin "<|return|>"