diff --git a/src/proxy/response-rewriter.ts b/src/proxy/response-rewriter.ts index d53ecbf..7837828 100755 --- a/src/proxy/response-rewriter.ts +++ b/src/proxy/response-rewriter.ts @@ -1,6 +1,7 @@ import { OllamaChatResponse, ToolCall } from '../types/ollama'; import { parseXmlToolCalls } from '../parsers'; import { logger } from '../utils/logger'; +import { sanitizeContent } from '../utils/content-sanitizer'; /** * Rewrites the Ollama response to include structured tool calls if missing @@ -59,7 +60,15 @@ export function rewriteResponse(response: OllamaChatResponse): OllamaChatRespons // Usually, if we parsed tool calls, we clear the content to avoid confusion // But retaining it is also fine. Let's clear the XML parts or the whole content to be safe. response.message.tool_calls = standardToolCalls; - response.message.content = ''; + // Erase tool XML then sanitize orphan tags and think blocks + let cleanedContent = content.replace(/]+)>([\s\S]*?)<\/function>/g, ''); + cleanedContent = cleanedContent.replace(/([\s\S]*?)<\/tool_call>/g, ''); + response.message.content = sanitizeContent(cleanedContent); + } + + // Sanitize plain text responses too + if ((!response.message.tool_calls || response.message.tool_calls.length === 0) && response.message.content) { + response.message.content = sanitizeContent(response.message.content); } return response; diff --git a/src/proxy/vllm-response-rewriter.ts b/src/proxy/vllm-response-rewriter.ts index 85dca96..e4ffe3c 100755 --- a/src/proxy/vllm-response-rewriter.ts +++ b/src/proxy/vllm-response-rewriter.ts @@ -1,5 +1,6 @@ import { parseXmlToolCalls } from '../parsers'; import { logger } from '../utils/logger'; +import { sanitizeContent } from '../utils/content-sanitizer'; /** * Rewrites the vLLM/OpenAI standard response to include structured tool calls if missing @@ -47,7 +48,18 @@ export function rewriteVllmResponse(response: any): any { }); message.tool_calls = standardToolCalls; - message.content = ''; + + // Erase function/tool_call XML blocks, then sanitize orphan tags and think blocks + let cleanedContent = content.replace(/]+)>([\s\S]*?)<\/function>/g, ''); + cleanedContent = cleanedContent.replace(/([\s\S]*?)<\/tool_call>/g, ''); + message.content = sanitizeContent(cleanedContent); + } + + // Even if no tool call was found, sanitize the plain text content too + if (!message.tool_calls || message.tool_calls.length === 0) { + if (message.content) { + message.content = sanitizeContent(message.content); + } } return response; diff --git a/src/utils/content-sanitizer.ts b/src/utils/content-sanitizer.ts new file mode 100755 index 0000000..976016d --- /dev/null +++ b/src/utils/content-sanitizer.ts @@ -0,0 +1,29 @@ +/** + * Strips internal model artifacts from content before sending to client. + * + * Removes: + * - ... blocks (internal reasoning, never visible) + * - Orphan closing tags like , , , + * (these have no semantic value to the client) + */ +export function sanitizeContent(content: string): string { + if (!content) return content; + + let cleaned = content; + + // 1. Strip complete ... blocks + cleaned = cleaned.replace(/[\s\S]*?<\/think>/g, ''); + + // 2. Strip orphan closing tags that have no matching opener in this output + // Only orphan tags (not ones inside legitimate code/markdown fences) + cleaned = cleaned.replace(/<\/function>/g, ''); + cleaned = cleaned.replace(/<\/think>/g, ''); + cleaned = cleaned.replace(/<\/tool>/g, ''); + cleaned = cleaned.replace(/<\/tool_call>/g, ''); + cleaned = cleaned.replace(/<\/total>/g, ''); + + // 3. Collapse excessive blank lines left after stripping + cleaned = cleaned.replace(/\n{3,}/g, '\n\n'); + + return cleaned.trim(); +}