import { config } from '../config'; import { rewriteResponse } from './response-rewriter'; import { normalizeRequest } from './request-normalizer'; import { logger } from '../utils/logger'; export async function forwardChatRequest(requestBody: any, authorization?: string): Promise { const targetHost = config.targetUrl; const targetEndpoint = `${targetHost}/api/chat`; // Inject default model if not provided if (!requestBody.model && config.defaultModel) { requestBody.model = config.defaultModel; } // Normalize request (fix tool_calls format issues) requestBody = normalizeRequest(requestBody); logger.info(`Forwarding chat request to ${targetEndpoint} for model: ${requestBody.model}`); const headers: Record = { 'Content-Type': 'application/json', 'Accept': 'application/json' }; // Use Authorization from request header, fallback to config if (authorization) { headers['Authorization'] = authorization; } else if (config.apiKey) { headers['Authorization'] = `Bearer ${config.apiKey}`; } const options: RequestInit = { method: 'POST', headers, body: JSON.stringify(requestBody) }; const response = await fetch(targetEndpoint, options); if (!response.ok) { const errorText = await response.text(); logger.error(`Ollama upstream error ${response.status}: ${errorText}`); throw new Error(`Upstream returned ${response.status}: ${errorText}`); } // Handle response - Osaurus may return streaming format even with stream: false const responseText = await response.text(); // Try to parse as single JSON first (standard Ollama behavior) let responseData: any; try { responseData = JSON.parse(responseText); } catch (e) { // If that fails, it might be streaming format (multiple JSON lines) // Combine all chunks into a single response const lines = responseText.trim().split('\n').filter(line => line.trim()); if (lines.length === 0) { throw new Error('Empty response from upstream'); } // Combine streaming chunks let combinedContent = ''; let lastChunk: any = null; let message: any = { role: 'assistant', content: '' }; for (const line of lines) { try { const chunk = JSON.parse(line); lastChunk = chunk; if (chunk.message?.content) { combinedContent += chunk.message.content; } // Preserve tool_calls from any chunk if (chunk.message?.tool_calls) { message.tool_calls = chunk.message.tool_calls; } // Preserve thinking if present if (chunk.message?.thinking) { message.thinking = chunk.message.thinking; } } catch (parseError) { logger.warn(`Failed to parse chunk: ${line}`); } } message.content = combinedContent; // Build the combined response using the last chunk's metadata responseData = { model: lastChunk?.model || requestBody.model, created_at: lastChunk?.created_at || new Date().toISOString(), message, done: true, done_reason: lastChunk?.done_reason || 'stop', total_duration: lastChunk?.total_duration, load_duration: lastChunk?.load_duration, prompt_eval_count: lastChunk?.prompt_eval_count, prompt_eval_duration: lastChunk?.prompt_eval_duration, eval_count: lastChunk?.eval_count, eval_duration: lastChunk?.eval_duration, }; logger.info(`Combined ${lines.length} streaming chunks into single response`); } // Rewrite if necessary const rewrittenData = rewriteResponse(responseData); return rewrittenData; }