diff --git a/.env.example b/.env.example index 09032e0..ff4a49e 100755 --- a/.env.example +++ b/.env.example @@ -1,9 +1,12 @@ # 下游 Ollama 地址 OLLAMA_PROXY_TARGET=http://127.0.0.1:11434 -# 默认模型名 (可以为空,如果需要强制指定一个,也可以从配置中配) -# OLLAMA_DEFAULT_MODEL=Huihui-Qwen3.5-27B-Claude-4.6-Opus-abliterated-4bit +# 默认模型名 +OLLAMA_DEFAULT_MODEL=hotwa/qwen35-opus-distilled-agent:latest # 代理服务监听端口 PROXY_PORT=11435 PROXY_HOST=127.0.0.1 + +# Osaurus API Key (可选,如果下游需要认证) +# OSAURUS_API_KEY=your-api-key-here diff --git a/src/config.ts b/src/config.ts index 38b8a18..9578d85 100755 --- a/src/config.ts +++ b/src/config.ts @@ -9,9 +9,12 @@ export const config = { host: process.env.PROXY_HOST || '127.0.0.1', port: process.env.PROXY_PORT ? parseInt(process.env.PROXY_PORT, 10) : 11435, - // 下游 Ollama 的真实地此 + // 下游 Ollama/Osaurus 的真实地址 targetUrl: process.env.OLLAMA_PROXY_TARGET || 'http://127.0.0.1:11434', // 默认模型:当上游没有带模型时,或者为了默认测试配置,提供一个默认模型名称 - defaultModel: process.env.OLLAMA_DEFAULT_MODEL || 'Huihui-Qwen3.5-27B-Claude-4.6-Opus-abliterated-4bit', + defaultModel: process.env.OLLAMA_DEFAULT_MODEL || '', + + // Osaurus API Key (可选,如果下游需要认证) + apiKey: process.env.OSAURUS_API_KEY || '', }; diff --git a/src/proxy/forward.ts b/src/proxy/forward.ts index 7ddb5a3..b9b80a1 100755 --- a/src/proxy/forward.ts +++ b/src/proxy/forward.ts @@ -16,30 +16,93 @@ export async function forwardChatRequest(requestBody: any): Promise { requestBody = normalizeRequest(requestBody); logger.info(`Forwarding chat request to ${targetEndpoint} for model: ${requestBody.model}`); - logger.info(`Request body: ${JSON.stringify(requestBody, null, 2)}`); + + const headers: Record = { + 'Content-Type': 'application/json', + 'Accept': 'application/json' + }; + + // Add Authorization header if API key is configured + if (config.apiKey) { + headers['Authorization'] = `Bearer ${config.apiKey}`; + } const options: RequestInit = { method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'Accept': 'application/json' - }, + headers, body: JSON.stringify(requestBody) }; const response = await fetch(targetEndpoint, options); - + if (!response.ok) { const errorText = await response.text(); logger.error(`Ollama upstream error ${response.status}: ${errorText}`); throw new Error(`Upstream returned ${response.status}: ${errorText}`); } - // Assuming it's not a stream for now - const responseData = await response.json(); - + // Handle response - Osaurus may return streaming format even with stream: false + const responseText = await response.text(); + + // Try to parse as single JSON first (standard Ollama behavior) + let responseData: any; + try { + responseData = JSON.parse(responseText); + } catch (e) { + // If that fails, it might be streaming format (multiple JSON lines) + // Combine all chunks into a single response + const lines = responseText.trim().split('\n').filter(line => line.trim()); + if (lines.length === 0) { + throw new Error('Empty response from upstream'); + } + + // Combine streaming chunks + let combinedContent = ''; + let lastChunk: any = null; + let message: any = { role: 'assistant', content: '' }; + + for (const line of lines) { + try { + const chunk = JSON.parse(line); + lastChunk = chunk; + if (chunk.message?.content) { + combinedContent += chunk.message.content; + } + // Preserve tool_calls from any chunk + if (chunk.message?.tool_calls) { + message.tool_calls = chunk.message.tool_calls; + } + // Preserve thinking if present + if (chunk.message?.thinking) { + message.thinking = chunk.message.thinking; + } + } catch (parseError) { + logger.warn(`Failed to parse chunk: ${line}`); + } + } + + message.content = combinedContent; + + // Build the combined response using the last chunk's metadata + responseData = { + model: lastChunk?.model || requestBody.model, + created_at: lastChunk?.created_at || new Date().toISOString(), + message, + done: true, + done_reason: lastChunk?.done_reason || 'stop', + total_duration: lastChunk?.total_duration, + load_duration: lastChunk?.load_duration, + prompt_eval_count: lastChunk?.prompt_eval_count, + prompt_eval_duration: lastChunk?.prompt_eval_duration, + eval_count: lastChunk?.eval_count, + eval_duration: lastChunk?.eval_duration, + }; + + logger.info(`Combined ${lines.length} streaming chunks into single response`); + } + // Rewrite if necessary const rewrittenData = rewriteResponse(responseData); - + return rewrittenData; }