feat: add Osaurus API authentication support
- Add OSAURUS_API_KEY config for Bearer token authentication - Handle Osaurus streaming format responses even with stream: false - Update .env.example with Osaurus configuration Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,9 +1,12 @@
|
|||||||
# 下游 Ollama 地址
|
# 下游 Ollama 地址
|
||||||
OLLAMA_PROXY_TARGET=http://127.0.0.1:11434
|
OLLAMA_PROXY_TARGET=http://127.0.0.1:11434
|
||||||
|
|
||||||
# 默认模型名 (可以为空,如果需要强制指定一个,也可以从配置中配)
|
# 默认模型名
|
||||||
# OLLAMA_DEFAULT_MODEL=Huihui-Qwen3.5-27B-Claude-4.6-Opus-abliterated-4bit
|
OLLAMA_DEFAULT_MODEL=hotwa/qwen35-opus-distilled-agent:latest
|
||||||
|
|
||||||
# 代理服务监听端口
|
# 代理服务监听端口
|
||||||
PROXY_PORT=11435
|
PROXY_PORT=11435
|
||||||
PROXY_HOST=127.0.0.1
|
PROXY_HOST=127.0.0.1
|
||||||
|
|
||||||
|
# Osaurus API Key (可选,如果下游需要认证)
|
||||||
|
# OSAURUS_API_KEY=your-api-key-here
|
||||||
|
|||||||
@@ -9,9 +9,12 @@ export const config = {
|
|||||||
host: process.env.PROXY_HOST || '127.0.0.1',
|
host: process.env.PROXY_HOST || '127.0.0.1',
|
||||||
port: process.env.PROXY_PORT ? parseInt(process.env.PROXY_PORT, 10) : 11435,
|
port: process.env.PROXY_PORT ? parseInt(process.env.PROXY_PORT, 10) : 11435,
|
||||||
|
|
||||||
// 下游 Ollama 的真实地此
|
// 下游 Ollama/Osaurus 的真实地址
|
||||||
targetUrl: process.env.OLLAMA_PROXY_TARGET || 'http://127.0.0.1:11434',
|
targetUrl: process.env.OLLAMA_PROXY_TARGET || 'http://127.0.0.1:11434',
|
||||||
|
|
||||||
// 默认模型:当上游没有带模型时,或者为了默认测试配置,提供一个默认模型名称
|
// 默认模型:当上游没有带模型时,或者为了默认测试配置,提供一个默认模型名称
|
||||||
defaultModel: process.env.OLLAMA_DEFAULT_MODEL || 'Huihui-Qwen3.5-27B-Claude-4.6-Opus-abliterated-4bit',
|
defaultModel: process.env.OLLAMA_DEFAULT_MODEL || '',
|
||||||
|
|
||||||
|
// Osaurus API Key (可选,如果下游需要认证)
|
||||||
|
apiKey: process.env.OSAURUS_API_KEY || '',
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -16,30 +16,93 @@ export async function forwardChatRequest(requestBody: any): Promise<any> {
|
|||||||
requestBody = normalizeRequest(requestBody);
|
requestBody = normalizeRequest(requestBody);
|
||||||
|
|
||||||
logger.info(`Forwarding chat request to ${targetEndpoint} for model: ${requestBody.model}`);
|
logger.info(`Forwarding chat request to ${targetEndpoint} for model: ${requestBody.model}`);
|
||||||
logger.info(`Request body: ${JSON.stringify(requestBody, null, 2)}`);
|
|
||||||
|
const headers: Record<string, string> = {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Accept': 'application/json'
|
||||||
|
};
|
||||||
|
|
||||||
|
// Add Authorization header if API key is configured
|
||||||
|
if (config.apiKey) {
|
||||||
|
headers['Authorization'] = `Bearer ${config.apiKey}`;
|
||||||
|
}
|
||||||
|
|
||||||
const options: RequestInit = {
|
const options: RequestInit = {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: {
|
headers,
|
||||||
'Content-Type': 'application/json',
|
|
||||||
'Accept': 'application/json'
|
|
||||||
},
|
|
||||||
body: JSON.stringify(requestBody)
|
body: JSON.stringify(requestBody)
|
||||||
};
|
};
|
||||||
|
|
||||||
const response = await fetch(targetEndpoint, options);
|
const response = await fetch(targetEndpoint, options);
|
||||||
|
|
||||||
if (!response.ok) {
|
if (!response.ok) {
|
||||||
const errorText = await response.text();
|
const errorText = await response.text();
|
||||||
logger.error(`Ollama upstream error ${response.status}: ${errorText}`);
|
logger.error(`Ollama upstream error ${response.status}: ${errorText}`);
|
||||||
throw new Error(`Upstream returned ${response.status}: ${errorText}`);
|
throw new Error(`Upstream returned ${response.status}: ${errorText}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assuming it's not a stream for now
|
// Handle response - Osaurus may return streaming format even with stream: false
|
||||||
const responseData = await response.json();
|
const responseText = await response.text();
|
||||||
|
|
||||||
|
// Try to parse as single JSON first (standard Ollama behavior)
|
||||||
|
let responseData: any;
|
||||||
|
try {
|
||||||
|
responseData = JSON.parse(responseText);
|
||||||
|
} catch (e) {
|
||||||
|
// If that fails, it might be streaming format (multiple JSON lines)
|
||||||
|
// Combine all chunks into a single response
|
||||||
|
const lines = responseText.trim().split('\n').filter(line => line.trim());
|
||||||
|
if (lines.length === 0) {
|
||||||
|
throw new Error('Empty response from upstream');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Combine streaming chunks
|
||||||
|
let combinedContent = '';
|
||||||
|
let lastChunk: any = null;
|
||||||
|
let message: any = { role: 'assistant', content: '' };
|
||||||
|
|
||||||
|
for (const line of lines) {
|
||||||
|
try {
|
||||||
|
const chunk = JSON.parse(line);
|
||||||
|
lastChunk = chunk;
|
||||||
|
if (chunk.message?.content) {
|
||||||
|
combinedContent += chunk.message.content;
|
||||||
|
}
|
||||||
|
// Preserve tool_calls from any chunk
|
||||||
|
if (chunk.message?.tool_calls) {
|
||||||
|
message.tool_calls = chunk.message.tool_calls;
|
||||||
|
}
|
||||||
|
// Preserve thinking if present
|
||||||
|
if (chunk.message?.thinking) {
|
||||||
|
message.thinking = chunk.message.thinking;
|
||||||
|
}
|
||||||
|
} catch (parseError) {
|
||||||
|
logger.warn(`Failed to parse chunk: ${line}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
message.content = combinedContent;
|
||||||
|
|
||||||
|
// Build the combined response using the last chunk's metadata
|
||||||
|
responseData = {
|
||||||
|
model: lastChunk?.model || requestBody.model,
|
||||||
|
created_at: lastChunk?.created_at || new Date().toISOString(),
|
||||||
|
message,
|
||||||
|
done: true,
|
||||||
|
done_reason: lastChunk?.done_reason || 'stop',
|
||||||
|
total_duration: lastChunk?.total_duration,
|
||||||
|
load_duration: lastChunk?.load_duration,
|
||||||
|
prompt_eval_count: lastChunk?.prompt_eval_count,
|
||||||
|
prompt_eval_duration: lastChunk?.prompt_eval_duration,
|
||||||
|
eval_count: lastChunk?.eval_count,
|
||||||
|
eval_duration: lastChunk?.eval_duration,
|
||||||
|
};
|
||||||
|
|
||||||
|
logger.info(`Combined ${lines.length} streaming chunks into single response`);
|
||||||
|
}
|
||||||
|
|
||||||
// Rewrite if necessary
|
// Rewrite if necessary
|
||||||
const rewrittenData = rewriteResponse(responseData);
|
const rewrittenData = rewriteResponse(responseData);
|
||||||
|
|
||||||
return rewrittenData;
|
return rewrittenData;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user