fix: add content-sanitizer to strip think blocks and orphan closing tags from all responses

2026-03-22 19:00:53 +08:00
parent 6c097fceb2
commit 3f51c4a6b4
3 changed files with 52 additions and 2 deletions
--- a/src/proxy/response-rewriter.ts
+++ b/src/proxy/response-rewriter.ts
@@ -1,6 +1,7 @@
 import { OllamaChatResponse, ToolCall } from '../types/ollama';
 import { parseXmlToolCalls } from '../parsers';
 import { logger } from '../utils/logger';
 import { sanitizeContent } from '../utils/content-sanitizer';
 /**
 * Rewrites the Ollama response to include structured tool calls if missing
@@ -59,7 +60,15 @@ export function rewriteResponse(response: OllamaChatResponse): OllamaChatRespons
    // Usually, if we parsed tool calls, we clear the content to avoid confusion
    // But retaining it is also fine. Let's clear the XML parts or the whole content to be safe.
    response.message.tool_calls = standardToolCalls;
-    response.message.content = ''; 
+    // Erase tool XML then sanitize orphan tags and think blocks
    let cleanedContent = content.replace(/<function=([^>]+)>([\s\S]*?)<\/function>/g, '');
    cleanedContent = cleanedContent.replace(/<tool_call>([\s\S]*?)<\/tool_call>/g, '');
    response.message.content = sanitizeContent(cleanedContent);
  }
  // Sanitize plain text responses too
  if ((!response.message.tool_calls || response.message.tool_calls.length === 0) && response.message.content) {
    response.message.content = sanitizeContent(response.message.content);
  }
  return response;
--- a/src/proxy/vllm-response-rewriter.ts
+++ b/src/proxy/vllm-response-rewriter.ts
@@ -1,5 +1,6 @@
 import { parseXmlToolCalls } from '../parsers';
 import { logger } from '../utils/logger';
 import { sanitizeContent } from '../utils/content-sanitizer';
 /**
 * Rewrites the vLLM/OpenAI standard response to include structured tool calls if missing
@@ -47,7 +48,18 @@ export function rewriteVllmResponse(response: any): any {
    });
    message.tool_calls = standardToolCalls;
-    message.content = ''; 
+    
    // Erase function/tool_call XML blocks, then sanitize orphan tags and think blocks
    let cleanedContent = content.replace(/<function=([^>]+)>([\s\S]*?)<\/function>/g, '');
    cleanedContent = cleanedContent.replace(/<tool_call>([\s\S]*?)<\/tool_call>/g, '');
    message.content = sanitizeContent(cleanedContent);
  }
  // Even if no tool call was found, sanitize the plain text content too
  if (!message.tool_calls || message.tool_calls.length === 0) {
    if (message.content) {
      message.content = sanitizeContent(message.content);
    }
  }
  return response;
--- a/src/utils/content-sanitizer.ts
+++ b/src/utils/content-sanitizer.ts
@@ -0,0 +1,29 @@
 /**
 * Strips internal model artifacts from content before sending to client.
 *
 * Removes:
 * - <think>...</think> blocks (internal reasoning, never visible)
 * - Orphan closing tags like </function>, </think>, </tool>, </total>
 *   (these have no semantic value to the client)
 */
 export function sanitizeContent(content: string): string {
  if (!content) return content;
  let cleaned = content;
  // 1. Strip complete <think>...</think> blocks
  cleaned = cleaned.replace(/<think>[\s\S]*?<\/think>/g, '');
  // 2. Strip orphan closing tags that have no matching opener in this output
  //    Only orphan tags (not ones inside legitimate code/markdown fences)
  cleaned = cleaned.replace(/<\/function>/g, '');
  cleaned = cleaned.replace(/<\/think>/g, '');
  cleaned = cleaned.replace(/<\/tool>/g, '');
  cleaned = cleaned.replace(/<\/tool_call>/g, '');
  cleaned = cleaned.replace(/<\/total>/g, '');
  // 3. Collapse excessive blank lines left after stripping
  cleaned = cleaned.replace(/\n{3,}/g, '\n\n');
  return cleaned.trim();
 }