#include #include #include #include using gptoss::Check; using namespace gptoss::metal; static void u32_fill_random(benchmark::State& state) { const size_t numel = state.range(0); Device device; CommandQueue command_queue{device}; Library library{device}; Function u32_fill_random_fn{library, "gptoss_u32_fill_random"}; Buffer buffer{device, numel * sizeof(float)}; constexpr uint64_t seed = UINT64_C(1019827666124465388); constexpr uint64_t offset = UINT64_C(12345678901234567890); for (auto _ : state) { CommandBuffer command_buffer{command_queue}; Check(gptoss_metal_command_buffer_encode_launch_u32_fill_random( command_buffer.handle(), u32_fill_random_fn.handle(), /*threadgroup_size=*/0, /*max_threadgroups=*/120, /*output_buffer=*/buffer.handle(), /*output_offset=*/0, numel, seed, offset), "gptoss_metal_command_buffer_encode_launch_u32_fill_random"); command_buffer.commit(); const double elapsed_seconds = command_buffer.wait_completion(); state.SetIterationTime(elapsed_seconds); } const int64_t elements_per_iteration = numel; state.counters["elements"] = benchmark::Counter(state.iterations() * elements_per_iteration, benchmark::Counter::kIsRate); const int64_t bytes_per_iteration = numel * sizeof(float); state.counters["bytes"] = benchmark::Counter(state.iterations() * bytes_per_iteration, benchmark::Counter::kIsRate); } constexpr int64_t giga = INT64_C(1073741824); BENCHMARK(u32_fill_random)->Arg(2 * giga)->UseManualTime()->Unit(benchmark::kMicrosecond); BENCHMARK_MAIN();