增加权重转化时候的检查脚本
This commit is contained in:
55
tests/bin_header_dump.py
Normal file
55
tests/bin_header_dump.py
Normal file
@@ -0,0 +1,55 @@
|
||||
# tests/token_uuid_slot.py
|
||||
import struct, uuid, sys, tiktoken
|
||||
from openai_harmony import load_harmony_encoding, HarmonyEncodingName
|
||||
|
||||
FMT_MODEL = "<IIIIII f IIII f f f f f f"
|
||||
|
||||
SPECIAL = {
|
||||
"<|reversed199998|>": 199998,
|
||||
"": 199999,
|
||||
"<|untrusted|>": 200000,
|
||||
"<|endofuntrusted|>": 200001,
|
||||
"<|return|>": 200002,
|
||||
"<|constrain|>": 200003,
|
||||
"<|reversed200004|>": 200004,
|
||||
"<|channel|>": 200005,
|
||||
"<|start|>": 200006,
|
||||
"<|end|>": 200007,
|
||||
"<|message|>": 200008,
|
||||
"<|reversed200008|>": 200008,
|
||||
"<|reversed200009|>": 200009,
|
||||
"<|reversed200010|>": 200010,
|
||||
"<|reversed200011|>": 200011,
|
||||
"<|call|>": 200012,
|
||||
"<|refusal|>": 200013,
|
||||
}
|
||||
|
||||
def table_start_offset(f):
|
||||
f.seek(0)
|
||||
f.read(16) # magic
|
||||
f.read(16) # model uuid
|
||||
f.read(struct.calcsize(FMT_MODEL))
|
||||
f.read(16) # apple uuid
|
||||
tok_uuid = uuid.UUID(bytes=f.read(16))
|
||||
ns, nt, rs, ts = struct.unpack("<IIII", f.read(16))
|
||||
table_off = f.tell()
|
||||
return tok_uuid, ns, nt, rs, ts, table_off
|
||||
|
||||
def show(path, token):
|
||||
tok_id = SPECIAL[token]
|
||||
enc = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
||||
o200k = tiktoken.get_encoding("o200k_base")
|
||||
|
||||
# 用 harmony 的判定口径得到“文本 token 数”,与写 bin 时保持一致
|
||||
num_text = sum(1 for t in range(o200k.n_vocab) if not enc.is_special_token(t))
|
||||
slot = tok_id - num_text
|
||||
|
||||
with open(path, "rb") as f:
|
||||
tok_uuid, ns, nt, rs, ts, table_off = table_start_offset(f)
|
||||
f.seek(table_off + 16*slot)
|
||||
u = uuid.UUID(bytes=f.read(16))
|
||||
print(f"{path}\n tokenizer_uuid: {tok_uuid}\n header_nt={nt}, header_ns={ns}\n"
|
||||
f" token={token} (id={tok_id}) -> slot={slot}, uuid={u}\n")
|
||||
|
||||
if __name__ == "__main__":
|
||||
show(sys.argv[1], sys.argv[2])
|
||||
52
tests/peek_scales.py
Normal file
52
tests/peek_scales.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# peek_scales.py —— 兼容分片 safetensors
|
||||
import json, os, sys
|
||||
from safetensors import safe_open
|
||||
|
||||
def iter_keys(srcdir):
|
||||
single = os.path.join(srcdir, "model.safetensors")
|
||||
index = os.path.join(srcdir, "model.safetensors.index.json")
|
||||
if os.path.exists(single):
|
||||
with safe_open(single, framework="pt", device="cpu") as f:
|
||||
yield f, list(f.keys())
|
||||
else:
|
||||
idx = json.load(open(index))
|
||||
wm = idx["weight_map"]
|
||||
opened = {}
|
||||
from contextlib import ExitStack
|
||||
with ExitStack() as stack:
|
||||
for rel in set(wm.values()):
|
||||
opened[rel] = stack.enter_context(safe_open(os.path.join(srcdir, rel), framework="pt", device="cpu"))
|
||||
yield opened, list(wm.keys())
|
||||
|
||||
def peek(srcdir):
|
||||
opened, keys = next(iter_keys(srcdir))
|
||||
def get(k):
|
||||
if isinstance(opened, dict):
|
||||
# 分片模式
|
||||
# 找到该 key 对应文件;这里简单起见直接遍历
|
||||
for f in opened.values():
|
||||
if k in f.keys():
|
||||
return f.get_tensor(k)
|
||||
raise KeyError(k)
|
||||
else:
|
||||
return opened.get_tensor(k)
|
||||
|
||||
import torch
|
||||
mx = []
|
||||
for n in range(0, 1000): # 粗看前 1000 层以内的命名
|
||||
for which in ("mlp1_weight.scales", "mlp2_weight.scales"):
|
||||
k = f"block.{n}.mlp.{which}"
|
||||
try:
|
||||
t = get(k)
|
||||
mx.append(t.max().item())
|
||||
print(k, "max=", float(mx[-1]))
|
||||
except Exception:
|
||||
pass
|
||||
if mx:
|
||||
m = max(mx)
|
||||
print("\nGLOBAL MAX SCALE:", m, " (m + 14 =", m+14, ")")
|
||||
if m >= 241:
|
||||
print("⚠️ 警告:m+14 可能溢出 uint8!请用 int16+clamp 写回。")
|
||||
|
||||
if __name__ == "__main__":
|
||||
peek(sys.argv[1])
|
||||
48
tests/peek_scales_v2.py
Normal file
48
tests/peek_scales_v2.py
Normal file
@@ -0,0 +1,48 @@
|
||||
# tests/peek_scales_v2.py
|
||||
import os, json, sys
|
||||
from safetensors import safe_open
|
||||
|
||||
def open_any(srcdir):
|
||||
single = os.path.join(srcdir, "model.safetensors")
|
||||
index = os.path.join(srcdir, "model.safetensors.index.json")
|
||||
if os.path.exists(single):
|
||||
f = safe_open(single, framework="pt", device="cpu")
|
||||
return [f], lambda k: f.get_tensor(k), list(f.keys())
|
||||
wm = json.load(open(index))["weight_map"]
|
||||
files = sorted(set(wm.values()))
|
||||
opened = [safe_open(os.path.join(srcdir, fp), framework="pt", device="cpu") for fp in files]
|
||||
keys = list(wm.keys())
|
||||
def get(k):
|
||||
for f in opened:
|
||||
if k in f.keys(): return f.get_tensor(k)
|
||||
raise KeyError(k)
|
||||
return opened, get, keys
|
||||
|
||||
def main(srcdir):
|
||||
opened, get, keys = open_any(srcdir)
|
||||
patterns = [
|
||||
"block.{n}.mlp.mlp1_weight.scales", # openharmony-mlx 原生
|
||||
"block.{n}.mlp.mlp2_weight.scales",
|
||||
"model.layers.{n}.mlp.experts.gate_up_proj_scales", # Jinx 命名
|
||||
"model.layers.{n}.mlp.experts.down_proj_scales",
|
||||
]
|
||||
import torch
|
||||
mx = []
|
||||
for n in range(0, 64): # 够用
|
||||
for pat in patterns:
|
||||
k = pat.format(n=n)
|
||||
try:
|
||||
t = get(k)
|
||||
except Exception:
|
||||
continue
|
||||
v = float(t.max().item())
|
||||
mx.append(v)
|
||||
print(k, "max=", v)
|
||||
if mx:
|
||||
m = max(mx)
|
||||
print("\nGLOBAL MAX SCALE:", m, " (m + 14 =", m+14, ")")
|
||||
if m + 14 >= 256:
|
||||
print("⚠️ 会发生 uint8 溢出,必须用 int16+clamp 的写法。")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv[1])
|
||||
16
tests/smoke_metal.py
Normal file
16
tests/smoke_metal.py
Normal file
@@ -0,0 +1,16 @@
|
||||
# smoke_metal.py —— 用 metal 后端直接跑 8 个新 token
|
||||
from gpt_oss.responses_api.inference.metal import setup_model
|
||||
from openai_harmony import load_harmony_encoding, HarmonyEncodingName
|
||||
|
||||
enc = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
||||
|
||||
infer_next_token = setup_model("/Volumes/long990max/project/openharmony-mlx/model.bin") # 改成你的路径
|
||||
ids = enc.encode("你好,给我一句话的回答:(英文)")[:128] # 输入 ids
|
||||
|
||||
new = []
|
||||
for _ in range(8):
|
||||
tid = infer_next_token(ids + new, temperature=0.7, new_request=False)
|
||||
new.append(tid)
|
||||
|
||||
print("new token ids:", new)
|
||||
print("decoded:", enc.decode(ids + new))
|
||||
62
tests/token_uuid_slot.py
Normal file
62
tests/token_uuid_slot.py
Normal file
@@ -0,0 +1,62 @@
|
||||
# token_uuid_slot.py
|
||||
import struct, uuid, sys, pathlib, tiktoken
|
||||
from openai_harmony import load_harmony_encoding, HarmonyEncodingName
|
||||
|
||||
FMT_MODEL = "<IIIIII f IIII f f f f f"
|
||||
|
||||
SPECIAL = {
|
||||
"<|reversed199998|>": 199998,
|
||||
"": 199999,
|
||||
"<|untrusted|>": 200000,
|
||||
"<|endofuntrusted|>": 200001,
|
||||
"<|return|>": 200002,
|
||||
"<|constrain|>": 200003,
|
||||
"<|reversed200004|>": 200004,
|
||||
"<|channel|>": 200005,
|
||||
"<|start|>": 200006,
|
||||
"<|end|>": 200007,
|
||||
"<|message|>": 200008,
|
||||
"<|reversed200008|>": 200008,
|
||||
"<|reversed200009|>": 200009,
|
||||
"<|reversed200010|>": 200010,
|
||||
"<|reversed200011|>": 200011,
|
||||
"<|call|>": 200012,
|
||||
"<|refusal|>": 200013,
|
||||
}
|
||||
|
||||
def header_and_table_off(f):
|
||||
f.read(16) # magic
|
||||
f.read(16) # model uuid
|
||||
f.read(struct.calcsize(FMT_MODEL))
|
||||
f.read(16) # apple uuid
|
||||
tok_uuid = uuid.UUID(bytes=f.read(16))
|
||||
ns, nt, rs, ts = struct.unpack("<IIII", f.read(16))
|
||||
table_off = f.tell() # UUID 表的起始位置
|
||||
return tok_uuid, ns, nt, rs, ts, table_off
|
||||
|
||||
def show(path, token):
|
||||
tok_id = SPECIAL[token]
|
||||
enc = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
||||
o200k = tiktoken.get_encoding("o200k_base")
|
||||
# 与转权重时同源的“文本/特殊”划分
|
||||
num_text = sum(1 for t in range(o200k.n_vocab) if not enc.is_special_token(t))
|
||||
slot = tok_id - num_text
|
||||
with open(path, "rb") as f:
|
||||
tok_uuid, ns, nt, rs, ts, table_off = header_and_table_off(f)
|
||||
f.seek(table_off + 16*slot)
|
||||
u = uuid.UUID(bytes=f.read(16))
|
||||
print(f"{path}\n tokenizer_uuid: {tok_uuid}\n num_text={nt}, num_special={ns}")
|
||||
print(f" token={token} (id={tok_id}) -> slot={slot}, uuid={u}\n")
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 用法: python token_uuid_slot.py <bin> "<|channel|>"
|
||||
show(sys.argv[1], sys.argv[2])
|
||||
|
||||
# python tests/token_uuid_slot.py /Volumes/long990max/gpustack_data/openai/gpt-oss-20b/metal/model.bin "<|channel|>"
|
||||
# python tests/token_uuid_slot.py /Volumes/long990max/project/openharmony-mlx/model.bin "<|channel|>"
|
||||
|
||||
# python tests/token_uuid_slot.py /Volumes/long990max/gpustack_data/openai/gpt-oss-20b/metal/model.bin "<|message|>"
|
||||
# python tests/token_uuid_slot.py /Volumes/long990max/project/openharmony-mlx/model.bin "<|message|>"
|
||||
|
||||
# python tests/token_uuid_slot.py /Volumes/long990max/gpustack_data/openai/gpt-oss-20b/metal/model.bin "<|return|>"
|
||||
# python tests/token_uuid_slot.py /Volumes/long990max/project/openharmony-mlx/model.bin "<|return|>"
|
||||
Reference in New Issue
Block a user