Files
openharmony-mlx/gpt_oss/metal/CMakeLists.txt
Dominik Kundel 243a1b0276 Initial commit
Co-authored-by: Zhuohan Li <zhuohan@openai.com>
Co-authored-by: Maratyszcza <marat@openai.com>
Co-authored-by: Volodymyr Kyrylov <vol@wilab.org.ua>
2025-08-05 08:19:49 -07:00

177 lines
9.3 KiB
CMake
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
cmake_minimum_required(VERSION 3.24)
project(GPTOSS
VERSION 1.0
DESCRIPTION "Local GPT-OSS inference"
LANGUAGES C CXX OBJC)
set(CMAKE_C_STANDARD 11)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_OBJC_STANDARD 11)
set(CMAKE_OBJC_STANDARD_REQUIRED ON)
find_library(FOUNDATION_FRAMEWORK Foundation REQUIRED)
find_library(METAL_FRAMEWORK Metal REQUIRED)
find_library(IOKIT_FRAMEWORK IOKit REQUIRED)
set(METAL_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/source/accumulate.metal
${CMAKE_CURRENT_SOURCE_DIR}/source/convert.metal
${CMAKE_CURRENT_SOURCE_DIR}/source/embeddings.metal
${CMAKE_CURRENT_SOURCE_DIR}/source/matmul.metal
${CMAKE_CURRENT_SOURCE_DIR}/source/moematmul.metal
${CMAKE_CURRENT_SOURCE_DIR}/source/random.metal
${CMAKE_CURRENT_SOURCE_DIR}/source/rmsnorm.metal
${CMAKE_CURRENT_SOURCE_DIR}/source/rope.metal
${CMAKE_CURRENT_SOURCE_DIR}/source/sample.metal
${CMAKE_CURRENT_SOURCE_DIR}/source/sdpa.metal
${CMAKE_CURRENT_SOURCE_DIR}/source/topk.metal
)
set(METAL_LIB default.metallib)
include_directories(BEFORE include source/include)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${METAL_LIB}
COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}/source/"
COMMAND xcrun -sdk macosx metal -g "-I${CMAKE_CURRENT_SOURCE_DIR}/source/include" -c "${CMAKE_CURRENT_SOURCE_DIR}/source/accumulate.metal" -o "${CMAKE_CURRENT_BINARY_DIR}/source/accumulate.air"
COMMAND xcrun -sdk macosx metal -g "-I${CMAKE_CURRENT_SOURCE_DIR}/source/include" -c "${CMAKE_CURRENT_SOURCE_DIR}/source/convert.metal" -o "${CMAKE_CURRENT_BINARY_DIR}/source/convert.air"
COMMAND xcrun -sdk macosx metal -g "-I${CMAKE_CURRENT_SOURCE_DIR}/source/include" -c "${CMAKE_CURRENT_SOURCE_DIR}/source/embeddings.metal" -o "${CMAKE_CURRENT_BINARY_DIR}/source/embeddings.air"
COMMAND xcrun -sdk macosx metal -g "-I${CMAKE_CURRENT_SOURCE_DIR}/source/include" -c "${CMAKE_CURRENT_SOURCE_DIR}/source/matmul.metal" -o "${CMAKE_CURRENT_BINARY_DIR}/source/matmul.air"
COMMAND xcrun -sdk macosx metal -g "-I${CMAKE_CURRENT_SOURCE_DIR}/source/include" -c "${CMAKE_CURRENT_SOURCE_DIR}/source/moematmul.metal" -o "${CMAKE_CURRENT_BINARY_DIR}/source/moematmul.air"
COMMAND xcrun -sdk macosx metal -g "-I${CMAKE_CURRENT_SOURCE_DIR}/source/include" -c "${CMAKE_CURRENT_SOURCE_DIR}/source/random.metal" -o "${CMAKE_CURRENT_BINARY_DIR}/source/random.air"
COMMAND xcrun -sdk macosx metal -g "-I${CMAKE_CURRENT_SOURCE_DIR}/source/include" -c "${CMAKE_CURRENT_SOURCE_DIR}/source/rmsnorm.metal" -o "${CMAKE_CURRENT_BINARY_DIR}/source/rmsnorm.air"
COMMAND xcrun -sdk macosx metal -g "-I${CMAKE_CURRENT_SOURCE_DIR}/source/include" -c "${CMAKE_CURRENT_SOURCE_DIR}/source/rope.metal" -o "${CMAKE_CURRENT_BINARY_DIR}/source/rope.air"
COMMAND xcrun -sdk macosx metal -g "-I${CMAKE_CURRENT_SOURCE_DIR}/source/include" -c "${CMAKE_CURRENT_SOURCE_DIR}/source/sample.metal" -o "${CMAKE_CURRENT_BINARY_DIR}/source/sample.air"
COMMAND xcrun -sdk macosx metal -g "-I${CMAKE_CURRENT_SOURCE_DIR}/source/include" -c "${CMAKE_CURRENT_SOURCE_DIR}/source/sdpa.metal" -o "${CMAKE_CURRENT_BINARY_DIR}/source/sdpa.air"
COMMAND xcrun -sdk macosx metal -g "-I${CMAKE_CURRENT_SOURCE_DIR}/source/include" -c "${CMAKE_CURRENT_SOURCE_DIR}/source/topk.metal" -o "${CMAKE_CURRENT_BINARY_DIR}/source/topk.air"
COMMAND xcrun -sdk macosx metallib "${CMAKE_CURRENT_BINARY_DIR}/source/accumulate.air" "${CMAKE_CURRENT_BINARY_DIR}/source/convert.air" "${CMAKE_CURRENT_BINARY_DIR}/source/embeddings.air" "${CMAKE_CURRENT_BINARY_DIR}/source/matmul.air" "${CMAKE_CURRENT_BINARY_DIR}/source/moematmul.air" "${CMAKE_CURRENT_BINARY_DIR}/source/random.air" "${CMAKE_CURRENT_BINARY_DIR}/source/rmsnorm.air" "${CMAKE_CURRENT_BINARY_DIR}/source/rope.air" "${CMAKE_CURRENT_BINARY_DIR}/source/sample.air" "${CMAKE_CURRENT_BINARY_DIR}/source/sdpa.air" "${CMAKE_CURRENT_BINARY_DIR}/source/topk.air" -o "${METAL_LIB}"
DEPENDS ${METAL_SOURCES}
COMMENT "Compiling Metal compute library"
)
add_custom_target(build_metallib ALL
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${METAL_LIB})
add_library(log OBJECT source/log.c)
add_library(metal-kernels STATIC source/metal.m source/metal-kernels.c)
target_link_libraries(metal-kernels PRIVATE log)
add_dependencies(metal-kernels build_metallib)
add_custom_command(TARGET metal-kernels POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy
${CMAKE_CURRENT_BINARY_DIR}/${METAL_LIB}
$<TARGET_FILE_DIR:metal-kernels>)
target_link_libraries(metal-kernels PRIVATE ${FOUNDATION_FRAMEWORK} ${METAL_FRAMEWORK} ${IOKIT_FRAMEWORK})
add_library(gptoss STATIC source/model.c source/tokenizer.c source/context.c)
target_link_libraries(gptoss PRIVATE log metal-kernels)
add_executable(generate source/generate.c)
target_link_libraries(generate gptoss)
# --- [ Tests
include(FetchContent)
FetchContent_Declare(
googletest
URL https://github.com/google/googletest/archive/refs/tags/v1.17.0.zip
DOWNLOAD_EXTRACT_TIMESTAMP OFF
)
# For Windows: Prevent overriding the parent project's compiler/linker settings
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
set(INSTALL_GTEST OFF CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest)
enable_testing()
add_executable(u32-random-test test/u32-random.cc)
target_link_libraries(u32-random-test PRIVATE GTest::gtest_main metal-kernels)
target_include_directories(u32-random-test PRIVATE source/include)
add_test(NAME u32-random-test COMMAND u32-random-test)
add_executable(f32-random-test test/f32-random.cc)
target_link_libraries(f32-random-test PRIVATE GTest::gtest_main metal-kernels)
target_include_directories(f32-random-test PRIVATE source/include)
add_test(NAME f32-random-test COMMAND f32-random-test)
add_executable(mf4-f32-convert-test test/mf4-f32-convert.cc)
target_link_libraries(mf4-f32-convert-test PRIVATE GTest::gtest_main metal-kernels)
target_include_directories(mf4-f32-convert-test PRIVATE source/include)
add_test(NAME mf4-f32-convert-test COMMAND mf4-f32-convert-test)
add_executable(bf16-f32-embeddings-test test/bf16-f32-embeddings.cc)
target_link_libraries(bf16-f32-embeddings-test PRIVATE GTest::gtest_main metal-kernels)
target_include_directories(bf16-f32-embeddings-test PRIVATE source/include)
add_test(NAME bf16-f32-embeddings-test COMMAND bf16-f32-embeddings-test)
add_executable(f32-bf16w-rmsnorm-test test/f32-bf16w-rmsnorm.cc)
target_link_libraries(f32-bf16w-rmsnorm-test PRIVATE GTest::gtest_main metal-kernels)
target_include_directories(f32-bf16w-rmsnorm-test PRIVATE source/include)
add_test(NAME f32-bf16w-rmsnorm-test COMMAND f32-bf16w-rmsnorm-test)
add_executable(f32-bf16w-matmul-test test/f32-bf16w-matmul.cc)
target_link_libraries(f32-bf16w-matmul-test PRIVATE GTest::gtest_main metal-kernels)
target_include_directories(f32-bf16w-matmul-test PRIVATE source/include)
add_test(NAME f32-bf16w-matmul-test COMMAND f32-bf16w-matmul-test)
add_executable(f32-rope-test test/f32-rope.cc)
target_link_libraries(f32-rope-test PRIVATE GTest::gtest_main metal-kernels)
target_include_directories(f32-rope-test PRIVATE source/include)
add_test(NAME f32-rope-test COMMAND f32-rope-test)
# --- [ Benchmarks
include(FetchContent)
set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable self-tests in Google Benchmark" FORCE)
set(BENCHMARK_ENABLE_INSTALL OFF CACHE BOOL "Disable installation of Google Benchmark" FORCE)
FetchContent_Declare(
benchmark
URL https://github.com/google/benchmark/archive/refs/tags/v1.9.4.zip
DOWNLOAD_EXTRACT_TIMESTAMP OFF
)
FetchContent_MakeAvailable(benchmark)
add_executable(f32-random-bench benchmark/f32-random.cc)
target_link_libraries(f32-random-bench PRIVATE benchmark::benchmark metal-kernels)
target_include_directories(f32-random-bench PRIVATE source/include)
add_executable(u32-random-bench benchmark/u32-random.cc)
target_link_libraries(u32-random-bench PRIVATE benchmark::benchmark metal-kernels)
target_include_directories(u32-random-bench PRIVATE source/include)
add_executable(mf4-f32-convert-bench benchmark/mf4-f32-convert.cc)
target_link_libraries(mf4-f32-convert-bench PRIVATE benchmark::benchmark metal-kernels)
target_include_directories(mf4-f32-convert-bench PRIVATE source/include)
add_executable(f32-bf16w-rmsnorm-bench benchmark/f32-bf16w-rmsnorm.cc)
target_link_libraries(f32-bf16w-rmsnorm-bench PRIVATE benchmark::benchmark metal-kernels)
target_include_directories(f32-bf16w-rmsnorm-bench PRIVATE source/include)
# --- [ Python extension ] -----------------------------------------------
find_package(pybind11 CONFIG REQUIRED) # provides pybind11_add_module
pybind11_add_module(_metal
python/module.c
python/context.c
python/model.c
python/tokenizer.c
)
set_target_properties(_metal PROPERTIES PREFIX "")
target_link_libraries(_metal PRIVATE gptoss)
add_dependencies(_metal build_metallib)
target_link_options(_metal PRIVATE
LINKER:-sectcreate,__METAL,__shaders,${CMAKE_CURRENT_BINARY_DIR}/${METAL_LIB}
)
add_custom_command(TARGET _metal POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy
${CMAKE_CURRENT_BINARY_DIR}/${METAL_LIB}
$<TARGET_FILE_DIR:_metal>)
# 1⃣ install the extension module into the Python package
install(TARGETS _metal LIBRARY DESTINATION gpt_oss/metal)
# 2⃣ make sure the Metal shader archive travels with it
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${METAL_LIB}
DESTINATION gpt_oss/metal)
# ------------------------------------------------------------------------