Initial commit

Co-authored-by: Zhuohan Li <zhuohan@openai.com>
Co-authored-by: Maratyszcza <marat@openai.com>
Co-authored-by: Volodymyr Kyrylov <vol@wilab.org.ua>
This commit is contained in:
Dominik Kundel
2025-08-05 08:19:49 -07:00
commit 243a1b0276
124 changed files with 20405 additions and 0 deletions

View File

@@ -0,0 +1,177 @@
cmake_minimum_required(VERSION 3.24)
project(GPTOSS
VERSION 1.0
DESCRIPTION "Local GPT-OSS inference"
LANGUAGES C CXX OBJC)
set(CMAKE_C_STANDARD 11)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_OBJC_STANDARD 11)
set(CMAKE_OBJC_STANDARD_REQUIRED ON)
find_library(FOUNDATION_FRAMEWORK Foundation REQUIRED)
find_library(METAL_FRAMEWORK Metal REQUIRED)
find_library(IOKIT_FRAMEWORK IOKit REQUIRED)
set(METAL_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/source/accumulate.metal
${CMAKE_CURRENT_SOURCE_DIR}/source/convert.metal
${CMAKE_CURRENT_SOURCE_DIR}/source/embeddings.metal
${CMAKE_CURRENT_SOURCE_DIR}/source/matmul.metal
${CMAKE_CURRENT_SOURCE_DIR}/source/moematmul.metal
${CMAKE_CURRENT_SOURCE_DIR}/source/random.metal
${CMAKE_CURRENT_SOURCE_DIR}/source/rmsnorm.metal
${CMAKE_CURRENT_SOURCE_DIR}/source/rope.metal
${CMAKE_CURRENT_SOURCE_DIR}/source/sample.metal
${CMAKE_CURRENT_SOURCE_DIR}/source/sdpa.metal
${CMAKE_CURRENT_SOURCE_DIR}/source/topk.metal
)
set(METAL_LIB default.metallib)
include_directories(BEFORE include source/include)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${METAL_LIB}
COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}/source/"
COMMAND xcrun -sdk macosx metal -g "-I${CMAKE_CURRENT_SOURCE_DIR}/source/include" -c "${CMAKE_CURRENT_SOURCE_DIR}/source/accumulate.metal" -o "${CMAKE_CURRENT_BINARY_DIR}/source/accumulate.air"
COMMAND xcrun -sdk macosx metal -g "-I${CMAKE_CURRENT_SOURCE_DIR}/source/include" -c "${CMAKE_CURRENT_SOURCE_DIR}/source/convert.metal" -o "${CMAKE_CURRENT_BINARY_DIR}/source/convert.air"
COMMAND xcrun -sdk macosx metal -g "-I${CMAKE_CURRENT_SOURCE_DIR}/source/include" -c "${CMAKE_CURRENT_SOURCE_DIR}/source/embeddings.metal" -o "${CMAKE_CURRENT_BINARY_DIR}/source/embeddings.air"
COMMAND xcrun -sdk macosx metal -g "-I${CMAKE_CURRENT_SOURCE_DIR}/source/include" -c "${CMAKE_CURRENT_SOURCE_DIR}/source/matmul.metal" -o "${CMAKE_CURRENT_BINARY_DIR}/source/matmul.air"
COMMAND xcrun -sdk macosx metal -g "-I${CMAKE_CURRENT_SOURCE_DIR}/source/include" -c "${CMAKE_CURRENT_SOURCE_DIR}/source/moematmul.metal" -o "${CMAKE_CURRENT_BINARY_DIR}/source/moematmul.air"
COMMAND xcrun -sdk macosx metal -g "-I${CMAKE_CURRENT_SOURCE_DIR}/source/include" -c "${CMAKE_CURRENT_SOURCE_DIR}/source/random.metal" -o "${CMAKE_CURRENT_BINARY_DIR}/source/random.air"
COMMAND xcrun -sdk macosx metal -g "-I${CMAKE_CURRENT_SOURCE_DIR}/source/include" -c "${CMAKE_CURRENT_SOURCE_DIR}/source/rmsnorm.metal" -o "${CMAKE_CURRENT_BINARY_DIR}/source/rmsnorm.air"
COMMAND xcrun -sdk macosx metal -g "-I${CMAKE_CURRENT_SOURCE_DIR}/source/include" -c "${CMAKE_CURRENT_SOURCE_DIR}/source/rope.metal" -o "${CMAKE_CURRENT_BINARY_DIR}/source/rope.air"
COMMAND xcrun -sdk macosx metal -g "-I${CMAKE_CURRENT_SOURCE_DIR}/source/include" -c "${CMAKE_CURRENT_SOURCE_DIR}/source/sample.metal" -o "${CMAKE_CURRENT_BINARY_DIR}/source/sample.air"
COMMAND xcrun -sdk macosx metal -g "-I${CMAKE_CURRENT_SOURCE_DIR}/source/include" -c "${CMAKE_CURRENT_SOURCE_DIR}/source/sdpa.metal" -o "${CMAKE_CURRENT_BINARY_DIR}/source/sdpa.air"
COMMAND xcrun -sdk macosx metal -g "-I${CMAKE_CURRENT_SOURCE_DIR}/source/include" -c "${CMAKE_CURRENT_SOURCE_DIR}/source/topk.metal" -o "${CMAKE_CURRENT_BINARY_DIR}/source/topk.air"
COMMAND xcrun -sdk macosx metallib "${CMAKE_CURRENT_BINARY_DIR}/source/accumulate.air" "${CMAKE_CURRENT_BINARY_DIR}/source/convert.air" "${CMAKE_CURRENT_BINARY_DIR}/source/embeddings.air" "${CMAKE_CURRENT_BINARY_DIR}/source/matmul.air" "${CMAKE_CURRENT_BINARY_DIR}/source/moematmul.air" "${CMAKE_CURRENT_BINARY_DIR}/source/random.air" "${CMAKE_CURRENT_BINARY_DIR}/source/rmsnorm.air" "${CMAKE_CURRENT_BINARY_DIR}/source/rope.air" "${CMAKE_CURRENT_BINARY_DIR}/source/sample.air" "${CMAKE_CURRENT_BINARY_DIR}/source/sdpa.air" "${CMAKE_CURRENT_BINARY_DIR}/source/topk.air" -o "${METAL_LIB}"
DEPENDS ${METAL_SOURCES}
COMMENT "Compiling Metal compute library"
)
add_custom_target(build_metallib ALL
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${METAL_LIB})
add_library(log OBJECT source/log.c)
add_library(metal-kernels STATIC source/metal.m source/metal-kernels.c)
target_link_libraries(metal-kernels PRIVATE log)
add_dependencies(metal-kernels build_metallib)
add_custom_command(TARGET metal-kernels POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy
${CMAKE_CURRENT_BINARY_DIR}/${METAL_LIB}
$<TARGET_FILE_DIR:metal-kernels>)
target_link_libraries(metal-kernels PRIVATE ${FOUNDATION_FRAMEWORK} ${METAL_FRAMEWORK} ${IOKIT_FRAMEWORK})
add_library(gptoss STATIC source/model.c source/tokenizer.c source/context.c)
target_link_libraries(gptoss PRIVATE log metal-kernels)
add_executable(generate source/generate.c)
target_link_libraries(generate gptoss)
# --- [ Tests
include(FetchContent)
FetchContent_Declare(
googletest
URL https://github.com/google/googletest/archive/refs/tags/v1.17.0.zip
DOWNLOAD_EXTRACT_TIMESTAMP OFF
)
# For Windows: Prevent overriding the parent project's compiler/linker settings
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
set(INSTALL_GTEST OFF CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest)
enable_testing()
add_executable(u32-random-test test/u32-random.cc)
target_link_libraries(u32-random-test PRIVATE GTest::gtest_main metal-kernels)
target_include_directories(u32-random-test PRIVATE source/include)
add_test(NAME u32-random-test COMMAND u32-random-test)
add_executable(f32-random-test test/f32-random.cc)
target_link_libraries(f32-random-test PRIVATE GTest::gtest_main metal-kernels)
target_include_directories(f32-random-test PRIVATE source/include)
add_test(NAME f32-random-test COMMAND f32-random-test)
add_executable(mf4-f32-convert-test test/mf4-f32-convert.cc)
target_link_libraries(mf4-f32-convert-test PRIVATE GTest::gtest_main metal-kernels)
target_include_directories(mf4-f32-convert-test PRIVATE source/include)
add_test(NAME mf4-f32-convert-test COMMAND mf4-f32-convert-test)
add_executable(bf16-f32-embeddings-test test/bf16-f32-embeddings.cc)
target_link_libraries(bf16-f32-embeddings-test PRIVATE GTest::gtest_main metal-kernels)
target_include_directories(bf16-f32-embeddings-test PRIVATE source/include)
add_test(NAME bf16-f32-embeddings-test COMMAND bf16-f32-embeddings-test)
add_executable(f32-bf16w-rmsnorm-test test/f32-bf16w-rmsnorm.cc)
target_link_libraries(f32-bf16w-rmsnorm-test PRIVATE GTest::gtest_main metal-kernels)
target_include_directories(f32-bf16w-rmsnorm-test PRIVATE source/include)
add_test(NAME f32-bf16w-rmsnorm-test COMMAND f32-bf16w-rmsnorm-test)
add_executable(f32-bf16w-matmul-test test/f32-bf16w-matmul.cc)
target_link_libraries(f32-bf16w-matmul-test PRIVATE GTest::gtest_main metal-kernels)
target_include_directories(f32-bf16w-matmul-test PRIVATE source/include)
add_test(NAME f32-bf16w-matmul-test COMMAND f32-bf16w-matmul-test)
add_executable(f32-rope-test test/f32-rope.cc)
target_link_libraries(f32-rope-test PRIVATE GTest::gtest_main metal-kernels)
target_include_directories(f32-rope-test PRIVATE source/include)
add_test(NAME f32-rope-test COMMAND f32-rope-test)
# --- [ Benchmarks
include(FetchContent)
set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable self-tests in Google Benchmark" FORCE)
set(BENCHMARK_ENABLE_INSTALL OFF CACHE BOOL "Disable installation of Google Benchmark" FORCE)
FetchContent_Declare(
benchmark
URL https://github.com/google/benchmark/archive/refs/tags/v1.9.4.zip
DOWNLOAD_EXTRACT_TIMESTAMP OFF
)
FetchContent_MakeAvailable(benchmark)
add_executable(f32-random-bench benchmark/f32-random.cc)
target_link_libraries(f32-random-bench PRIVATE benchmark::benchmark metal-kernels)
target_include_directories(f32-random-bench PRIVATE source/include)
add_executable(u32-random-bench benchmark/u32-random.cc)
target_link_libraries(u32-random-bench PRIVATE benchmark::benchmark metal-kernels)
target_include_directories(u32-random-bench PRIVATE source/include)
add_executable(mf4-f32-convert-bench benchmark/mf4-f32-convert.cc)
target_link_libraries(mf4-f32-convert-bench PRIVATE benchmark::benchmark metal-kernels)
target_include_directories(mf4-f32-convert-bench PRIVATE source/include)
add_executable(f32-bf16w-rmsnorm-bench benchmark/f32-bf16w-rmsnorm.cc)
target_link_libraries(f32-bf16w-rmsnorm-bench PRIVATE benchmark::benchmark metal-kernels)
target_include_directories(f32-bf16w-rmsnorm-bench PRIVATE source/include)
# --- [ Python extension ] -----------------------------------------------
find_package(pybind11 CONFIG REQUIRED) # provides pybind11_add_module
pybind11_add_module(_metal
python/module.c
python/context.c
python/model.c
python/tokenizer.c
)
set_target_properties(_metal PROPERTIES PREFIX "")
target_link_libraries(_metal PRIVATE gptoss)
add_dependencies(_metal build_metallib)
target_link_options(_metal PRIVATE
LINKER:-sectcreate,__METAL,__shaders,${CMAKE_CURRENT_BINARY_DIR}/${METAL_LIB}
)
add_custom_command(TARGET _metal POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy
${CMAKE_CURRENT_BINARY_DIR}/${METAL_LIB}
$<TARGET_FILE_DIR:_metal>)
# 1⃣ install the extension module into the Python package
install(TARGETS _metal LIBRARY DESTINATION gpt_oss/metal)
# 2⃣ make sure the Metal shader archive travels with it
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${METAL_LIB}
DESTINATION gpt_oss/metal)
# ------------------------------------------------------------------------