Files
openharmony-mlx/gpt_oss/metal/include/gpt-oss/functions.h
Arthur Colle 92f5b57da3 Initial release: OpenHarmony-MLX - High-Performance Apple Silicon GPT-OSS Implementation
This is a complete rebranding and optimization of the original GPT-OSS codebase for Apple Silicon:

🚀 Features:
- Native MLX acceleration for M1/M2/M3/M4 chips
- Complete MLX implementation with Mixture of Experts (MoE)
- Memory-efficient quantization (4-bit MXFP4)
- Drop-in replacement APIs for existing backends
- Full tool integration (browser, python, apply_patch)
- Comprehensive build system with Metal kernels

📦 What's Included:
- gpt_oss/mlx_gpt_oss/ - Complete MLX implementation
- All original inference backends (torch, triton, metal, vllm)
- Command-line interfaces and Python APIs
- Developer tools and evaluation suite
- Updated branding and documentation

🍎 Apple Silicon Optimized:
- Up to 40 tokens/sec performance on Apple Silicon
- Run GPT-OSS-120b in 30GB with quantization
- Native Metal kernel acceleration
- Memory-mapped weight loading

🔧 Ready to Deploy:
- Updated package name to openharmony-mlx
- Comprehensive .gitignore for clean releases
- Updated README with Apple Silicon focus
- All build artifacts cleaned up

🧠 Generated with Claude Code

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-06 19:28:25 -04:00

415 lines
16 KiB
C

#pragma once
#include <stddef.h>
#include <stdint.h>
#include <gpt-oss/macros.h>
#include <gpt-oss/types.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* Creates a Model object from a file in the filesystem.
*
* @param path Path to the file containing the model in GPT-OSS format.
* @param model_out Pointer to the Model object that will be created. Must be released with gptoss_release_model.
*
* On success, returns gptoss_status_success and saves a pointer to the created Model in the model_out argument.
* On failure, returns an error code and stores null pointer in the model_out argument.
*/
enum gptoss_status GPTOSS_ABI gptoss_model_create_from_file(
const char* path,
gptoss_model_t* model_out);
/*
* Query the Tokenizer object associated with the Model.
*
* @param model Pointer to the Model object created by gptoss_model_create_from_file.
* @param tokenizer_out Pointer to the variable where the Tokenizer reference will be stored.
*
* On success, returns gptoss_status_success and stores reference to the Tokenizer object in the tokenizer_out argument.
* On failure, returns an error code and stores NULL in the tokenizer_out argument.
*/
enum gptoss_status GPTOSS_ABI gptoss_model_get_tokenizer(
gptoss_model_t model,
gptoss_tokenizer_t* tokenizer_out);
/*
* Query the maximum context length supported by the Model.
*
* @param model Pointer to the Model object created by gptoss_model_create_from_file.
* @param max_context_length_out Pointer to the variable where the maximum context length will be stored.
*
* On success, returns gptoss_status_success and stores maximum context length in the max_context_length_out argument.
* On failure, returns an error code and leaves the value specified by max_context_length_out unchanged.
*/
enum gptoss_status GPTOSS_ABI gptoss_model_get_max_context_length(
gptoss_model_t model,
size_t* max_context_length_out);
/*
* Increments a Model object's reference count.
*
* @param model Pointer to the Model object created by gptoss_model_create_from_file.
*
* On success, returns gptoss_status_success, otherwise returns an error code.
*/
enum gptoss_status GPTOSS_ABI gptoss_model_retain(
gptoss_model_t model);
/*
* Decrements a Model object's reference count and possibly release associated resources.
*
* @param model Pointer to the Model object created by gptoss_model_create_from_file.
*
* On success, returns gptoss_status_success, otherwise returns an error code.
*/
enum gptoss_status GPTOSS_ABI gptoss_model_release(
gptoss_model_t model);
/*
* Query the token ID for a special token in the Tokenizer vocabulary.
*
* @param tokenizer Pointer to the Tokenizer object created by gptoss_model_get_tokenizer.
* @param token_type Type of the special token to query an ID for.
* @param token_id_out Pointer to the variable where the token ID will be stored.
*
* On success, returns gptoss_status_success and stores the token ID in the token_id_out argument.
* On failure, returns an error code and leaves the value specified by token_id_out unchanged.
*/
enum gptoss_status GPTOSS_ABI gptoss_tokenizer_get_special_token_id(
gptoss_tokenizer_t tokenizer,
enum gptoss_special_token token_type,
uint32_t* token_id_out);
/*
* Query the number of text tokens in the Tokenizer vocabulary.
*
* @param tokenizer Pointer to the Tokenizer object created by gptoss_model_get_tokenizer.
* @param num_text_tokens_out Pointer to the variable where the number of text tokens will be stored.
*
* On success, returns gptoss_status_success and stores the number of text tokens in the num_text_tokens_out argument.
* On failure, returns an error code and leaves the value specified by num_text_tokens_out unchanged.
*/
enum gptoss_status GPTOSS_ABI gptoss_tokenizer_get_num_text_tokens(
gptoss_tokenizer_t tokenizer,
uint32_t* num_text_tokens_out);
/*
* Query the number of special tokens in the Tokenizer vocabulary.
*
* @param tokenizer Pointer to the Tokenizer object created by gptoss_model_get_tokenizer.
* @param num_special_tokens_out Pointer to the variable where the number of special tokens will be stored.
*
* On success, returns gptoss_status_success and stores the number of text tokens in the num_special_tokens_out argument.
* On failure, returns an error code and leaves the value specified by num_special_tokens_out unchanged.
*/
enum gptoss_status GPTOSS_ABI gptoss_tokenizer_get_num_special_tokens(
gptoss_tokenizer_t tokenizer,
uint32_t* num_special_tokens_out);
/*
* Query the total number of tokens in the Tokenizer vocabulary.
*
* @param tokenizer Pointer to the Tokenizer object created by gptoss_model_get_tokenizer.
* @param num_tokens_out Pointer to the variable where the total number of tokens will be stored.
*
* On success, returns gptoss_status_success and stores the total number of tokens in the num_special_tokens_out argument.
* On failure, returns an error code and leaves the value specified by num_special_tokens_out unchanged.
*/
enum gptoss_status GPTOSS_ABI gptoss_tokenizer_get_num_tokens(
gptoss_tokenizer_t tokenizer,
uint32_t* num_tokens_out);
/*
* Convert a text token ID to byte representation.
*
* @param tokenizer Pointer to the Tokenizer object returned by gptoss_model_get_tokenizer. The lifetime of the returned
* byte representation would match the lifetime of this Tokenizer object.
* @param token_ptr_out Pointer to the variable where the pointer to the byte representation of the token will be
* stored.
* @param token_size_out Pointer to the variable where the size of the byte representation of the token will be stored.
*
* On success, returns gptoss_status_success and stores pointer and size of the byte representation of the token in the
* token_ptr_out and token_size_out arguments.
* On failure, returns an error code and leaves the values specified in token_ptr_out and token_size_out unchanged.
*/
enum gptoss_status GPTOSS_ABI gptoss_tokenizer_decode(
gptoss_tokenizer_t tokenizer,
uint32_t token_id,
const void** token_ptr_out,
size_t* token_size_out);
/*
* Increments a Tokenizer object's reference count.
*
* @param tokenizer Pointer to the Tokenizer object returned by gptoss_model_get_tokenizer.
*
* On success, returns gptoss_status_success, otherwise returns an error code.
*/
enum gptoss_status GPTOSS_ABI gptoss_tokenizer_retain(
gptoss_tokenizer_t tokenizer);
/*
* Decrements a Tokenizer object's reference count and possibly release associated resources.
*
* @param tokenizer Pointer to the Tokenizer object returned by gptoss_model_get_tokenizer.
*
* On success, returns gptoss_status_success, otherwise returns an error code.
*/
enum gptoss_status GPTOSS_ABI gptoss_tokenizer_release(
gptoss_tokenizer_t tokenizer);
/*
* Creates a Context object for use with the particular Model object.
*
* @param model Model object to create a context for.
* @param context_length Maximum number of tokens in the context.
* Specify 0 to use the maximum context length supported by the model.
* @param batch_size Maximum number of tokens that can be processed in a single batch.
* Larger values may improve performance, but require more memory.
* @param context_out Pointer to the Context object that will be created.
* Must be released with gptoss_release_context.
*
* On success, returns gptoss_status_success and saves a pointer to the created Context in the context_out argument.
* On failure, returns an error code and stores null pointer in the context_out argument.
*/
enum gptoss_status GPTOSS_ABI gptoss_context_create(
gptoss_model_t model,
size_t context_length,
gptoss_context_t* context_out);
/*
* Query the current number of tokens cached in the Context.
*
* @param context Pointer to the Context object created by gptoss_context_create.
* @param num_tokens_out Pointer to the variable where the current number of cached tokens will be stored.
*
* On success, returns gptoss_status_success and stores current number of cached tokens in the num_tokens_out argument.
* On failure, returns an error code and leaves the value specified by num_tokens_out unchanged.
*/
enum gptoss_status GPTOSS_ABI gptoss_context_get_num_tokens(
gptoss_context_t context,
size_t* num_tokens_out);
/*
* Query the maximum number of tokens cached in the Context.
*
* @param context Pointer to the Context object created by gptoss_context_create.
* @param max_tokens_out Pointer to the variable where the maximum number of cached tokens will be stored.
*
* On success, returns gptoss_status_success and stores maximum number of cached tokens in the max_tokens_out argument.
* On failure, returns an error code and leaves the value specified by max_tokens_out unchanged.
*/
enum gptoss_status GPTOSS_ABI gptoss_context_get_max_tokens(
gptoss_context_t context,
size_t* max_tokens_out);
/*
* Query the list of token IDs cached in the Context.
*
* @param context Pointer to the Context object created by gptoss_context_create.
* @param tokens_out Pointer to the array where up to max_tokens_out of cached tokens will be stored.
* @param max_tokens Maximum capacity of the buffer specified by tokens_out.
* @param num_tokens_out Pointer to the variable where the actual number of cached tokens will be stored.
* This value can exceed max_tokens if the buffer capacity is insufficient.
*
* On success, returns gptoss_status_success and stores cached token IDs in the tokens_out argument and the number of
* cached tokens in the num_tokens_out argument.
* On failure, returns an error code and leaves the values specified by tokend_out and num_tokens_out unchanged.
*/
enum gptoss_status GPTOSS_ABI gptoss_context_get_tokens(
gptoss_context_t context,
uint32_t* tokens_out,
size_t max_tokens,
size_t* num_tokens_out);
/*
* Tokenize and appends a character string to the Context object.
*
* @param context Context object created by gptoss_context_create.
* @param text Pointer to the character string to tokenizer and append.
* @param text_length Length of the string, in chars.
* @param num_tokens_out Optional pointer to the variable where the number of appended tokens will be stored. Ignored if a null pointer is provided.
*
* On success, returns gptoss_status_success, otherwise returns an error code.
*/
enum gptoss_status GPTOSS_ABI gptoss_context_append_chars(
gptoss_context_t context,
const char* text,
size_t text_length,
size_t* num_tokens_out);
/*
* Appends a list of tokens to the context.
*
* @param context Context object created by gptoss_context_create.
* @param num_tokens Number of tokens to be appended.
* @param tokens Pointer to the array of tokens to be appended.
*
* On success, returns gptoss_status_success, otherwise returns an error code.
*/
enum gptoss_status GPTOSS_ABI gptoss_context_append_tokens(
gptoss_context_t context,
size_t num_tokens,
const uint32_t* tokens);
/*
* Resets the context, clearing its state.
*
* @param context Context object created by gptoss_context_create.
*
* On success, returns gptoss_status_success, otherwise returns an error code.
*/
enum gptoss_status GPTOSS_ABI gptoss_context_reset(
gptoss_context_t context);
/*
* Pre-process the tokens in the Context and generate probability distrubution over the next token.
*
* @param context Context object created by gptoss_context_create.
*
* On success, returns gptoss_status_success, otherwise returns an error code.
*/
enum gptoss_status GPTOSS_ABI gptoss_context_process(
gptoss_context_t context);
/*
* Generate a token probability distribution over the next token conditioned on the Context.
*
* @param context Context object created by gptoss_context_create.
* @param temperature Sampling temperature. Must be non-negative.
* @param seed Random number generator seed to use for sampling.
* @param token_out Pointer to the variable where the token ID will be stored.
*
* On success, returns gptoss_status_success, otherwise returns an error code.
*/
enum gptoss_status GPTOSS_ABI gptoss_context_sample(
gptoss_context_t context,
float temperature,
uint64_t seed,
uint32_t* token_out);
/*
* Get the raw logits (scores) from the last forward pass.
*
* @param context Context object created by gptoss_context_create.
* @param logits_out Pointer to the array where logits will be stored.
* @param max_logits Maximum capacity of the buffer specified by logits_out.
* @param num_logits_out Pointer to the variable where the actual number of logits will be stored.
*
* On success, returns gptoss_status_success and stores logits in the logits_out argument.
* On failure, returns an error code and leaves the values unchanged.
*/
enum gptoss_status GPTOSS_ABI gptoss_context_get_logits(
gptoss_context_t context,
float* logits_out,
size_t max_logits,
size_t* num_logits_out);
/*
* Increments a Context object's reference count.
*
* @param context Pointer to the Context object created by gptoss_create_context.
*
* On success, returns gptoss_status_success, otherwise returns an error code.
*/
enum gptoss_status GPTOSS_ABI gptoss_context_retain(
gptoss_context_t context);
/*
* Decrements a Context object's reference count and possibly release associated resources.
*
* @param context Pointer to the Context object created by gptoss_create_context.
*
* On success, returns gptoss_status_success, otherwise returns an error code.
*/
enum gptoss_status GPTOSS_ABI gptoss_context_release(
gptoss_context_t context);
/*
* Creates a Sampler object.
*
* @param sampler_out Pointer to the Sampler object that will be created.
* Must be released with gptoss_sampler_release.
*
* On success, returns gptoss_status_success and saves a pointer to the created Sampler in the sampler_out argument.
* On failure, returns an error code and stores a null pointer in the sampler_out argument.
*/
enum gptoss_status GPTOSS_ABI gptoss_sampler_create(
gptoss_sampler_t* sampler_out);
/*
* Sets the sampling temperature for the Sampler.
*
* @param sampler Sampler object created by gptoss_sampler_create.
* @param temperature Temperature value to be set. Must be in the [0.0, 1.0] range.
*
* On success, returns gptoss_status_success, otherwise returns an error code.
*/
enum gptoss_status GPTOSS_ABI gptoss_sampler_set_temperature(
gptoss_sampler_t sampler,
float temperature);
/*
* Sets the Top-P nucleus sampling parameter for the Sampler.
*
* @param sampler Sampler object created by gptoss_sampler_create.
* @param top_p Top-P value to be set. Must be in the (0.0, 1.0] range.
*
* On success, returns gptoss_status_success, otherwise returns an error code.
*/
enum gptoss_status GPTOSS_ABI gptoss_sampler_set_top_p(
gptoss_sampler_t sampler,
float top_p);
/*
* Sets the presence penalty for the Sampler.
*
* @param sampler Sampler object created by gptoss_sampler_create.
* @param presence_penalty Presence penalty value to be set. Must be in the [-2.0, 2.0] range.
*
* On success, returns gptoss_status_success, otherwise returns an error code.
*/
enum gptoss_status GPTOSS_ABI gptoss_sampler_set_presence_penalty(
gptoss_sampler_t sampler,
float presence_penalty);
/*
* Sets the frequency penalty for the Sampler.
*
* @param sampler Sampler object created by gptoss_sampler_create.
* @param frequency_penalty Frequency penalty value to be set. Must be in the [-2.0, 2.0] range.
*
* On success, returns gptoss_status_success, otherwise returns an error code.
*/
enum gptoss_status GPTOSS_ABI gptoss_sampler_set_frequency_penalty(
gptoss_sampler_t sampler,
float frequency_penalty);
/*
* Increments a Sampler object's reference count.
*
* @param sampler Pointer to the Sampler object created by gptoss_sampler_create.
*
* On success, returns gptoss_status_success, otherwise returns an error code.
*/
enum gptoss_status GPTOSS_ABI gptoss_sampler_retain(
gptoss_sampler_t sampler);
/*
* Decrements a Sampler object's reference count and possibly releases associated resources.
*
* @param sampler Pointer to the Sampler object created by gptoss_sampler_create.
*
* On success, returns gptoss_status_success, otherwise returns an error code.
*/
enum gptoss_status GPTOSS_ABI gptoss_sampler_release(
gptoss_sampler_t sampler);
#ifdef __cplusplus
} // extern "C"
#endif