Files
openharmony-mlx/gpt_oss/metal/test/f32-bf16w-matmul.cc
Dominik Kundel 243a1b0276 Initial commit
Co-authored-by: Zhuohan Li <zhuohan@openai.com>
Co-authored-by: Maratyszcza <marat@openai.com>
Co-authored-by: Volodymyr Kyrylov <vol@wilab.org.ua>
2025-08-05 08:19:49 -07:00

61 lines
1.7 KiB
C++

#include <gtest/gtest.h>
#include <cstddef>
#include <cstdint>
#include "matmul-kernel-tester.hpp"
using gptoss::MatMulKernelTester;
constexpr size_t kSimdgroupSize = 32; // fixed in the kernel
TEST(F32_BF16W_MATMUL, single_simdgroup_single_iteration) {
MatMulKernelTester()
.num_rows(1)
.num_cols(kSimdgroupSize * 4)
.threadgroup_size(kSimdgroupSize)
.TestF32_BF16W();
}
TEST(F32_BF16W_MATMUL, single_simdgroup_multiple_iteration) {
MatMulKernelTester()
.num_rows(1)
.num_cols((2 * kSimdgroupSize + 1) * 4)
.threadgroup_size(kSimdgroupSize)
.TestF32_BF16W();
}
TEST(F32_BF16W_MATMUL, single_threadgroup) {
constexpr std::size_t threadgroup_size = 2 * kSimdgroupSize;
MatMulKernelTester()
.num_rows(threadgroup_size / kSimdgroupSize)
.num_cols((2 * kSimdgroupSize + 1) * 4)
.threadgroup_size(threadgroup_size)
.TestF32_BF16W();
}
TEST(F32_BF16W_MATMUL, multiple_threadgroups) {
constexpr std::size_t threadgroup_size = 2 * kSimdgroupSize;
constexpr std::uint32_t num_threadgroups = 3;
MatMulKernelTester()
.num_rows(num_threadgroups * threadgroup_size / kSimdgroupSize)
.num_cols((2 * kSimdgroupSize + 1) * 4)
.threadgroup_size(threadgroup_size)
.TestF32_BF16W();
}
TEST(F32_BF16W_MATMUL, multiple_tokens) {
constexpr std::size_t threadgroup_size = 2 * kSimdgroupSize;
constexpr std::uint32_t num_threadgroups = 3;
MatMulKernelTester()
.num_rows(num_threadgroups * threadgroup_size / kSimdgroupSize)
.num_cols((2 * kSimdgroupSize + 1) * 4)
.num_tokens(2)
.threadgroup_size(threadgroup_size)
.TestF32_BF16W();
}