openclaw-ollama-toolcall-proxy/src/proxy/forward.ts

import { config } from '../config';
import { rewriteResponse } from './response-rewriter';
import { normalizeRequest } from './request-normalizer';
import { logger } from '../utils/logger';

export async function forwardChatRequest(requestBody: any, authorization?: string): Promise<any> {
  const targetHost = config.targetUrl;
  const targetEndpoint = `${targetHost}/api/chat`;

  // Inject default model if not provided
  if (!requestBody.model && config.defaultModel) {
    requestBody.model = config.defaultModel;
  }

  // Normalize request (fix tool_calls format issues)
  requestBody = normalizeRequest(requestBody);

  logger.info(`Forwarding chat request to ${targetEndpoint} for model: ${requestBody.model}`);

  const headers: Record<string, string> = {
    'Content-Type': 'application/json',
    'Accept': 'application/json'
  };

  // Use Authorization from request header, fallback to config
  if (authorization) {
    headers['Authorization'] = authorization;
  } else if (config.apiKey) {
    headers['Authorization'] = `Bearer ${config.apiKey}`;
  }

  const options: RequestInit = {
    method: 'POST',
    headers,
    body: JSON.stringify(requestBody)
  };

  const response = await fetch(targetEndpoint, options);

  if (!response.ok) {
    const errorText = await response.text();
    logger.error(`Ollama upstream error ${response.status}: ${errorText}`);
    throw new Error(`Upstream returned ${response.status}: ${errorText}`);
  }

  // Handle response - Osaurus may return streaming format even with stream: false
  const responseText = await response.text();

  // Try to parse as single JSON first (standard Ollama behavior)
  let responseData: any;
  try {
    responseData = JSON.parse(responseText);
  } catch (e) {
    // If that fails, it might be streaming format (multiple JSON lines)
    // Combine all chunks into a single response
    const lines = responseText.trim().split('\n').filter(line => line.trim());
    if (lines.length === 0) {
      throw new Error('Empty response from upstream');
    }

    // Combine streaming chunks
    let combinedContent = '';
    let lastChunk: any = null;
    let message: any = { role: 'assistant', content: '' };

    for (const line of lines) {
      try {
        const chunk = JSON.parse(line);
        lastChunk = chunk;
        if (chunk.message?.content) {
          combinedContent += chunk.message.content;
        }
        // Preserve tool_calls from any chunk
        if (chunk.message?.tool_calls) {
          message.tool_calls = chunk.message.tool_calls;
        }
        // Preserve thinking if present
        if (chunk.message?.thinking) {
          message.thinking = chunk.message.thinking;
        }
      } catch (parseError) {
        logger.warn(`Failed to parse chunk: ${line}`);
      }
    }

    message.content = combinedContent;

    // Build the combined response using the last chunk's metadata
    responseData = {
      model: lastChunk?.model || requestBody.model,
      created_at: lastChunk?.created_at || new Date().toISOString(),
      message,
      done: true,
      done_reason: lastChunk?.done_reason || 'stop',
      total_duration: lastChunk?.total_duration,
      load_duration: lastChunk?.load_duration,
      prompt_eval_count: lastChunk?.prompt_eval_count,
      prompt_eval_duration: lastChunk?.prompt_eval_duration,
      eval_count: lastChunk?.eval_count,
      eval_duration: lastChunk?.eval_duration,
    };

    logger.info(`Combined ${lines.length} streaming chunks into single response`);
  }

  // Rewrite if necessary
  const rewrittenData = rewriteResponse(responseData);

  return rewrittenData;
}