diff --git a/src/proxy/response-rewriter.ts b/src/proxy/response-rewriter.ts
index d53ecbf..7837828 100755
--- a/src/proxy/response-rewriter.ts
+++ b/src/proxy/response-rewriter.ts
@@ -1,6 +1,7 @@
 import { OllamaChatResponse, ToolCall } from '../types/ollama';
 import { parseXmlToolCalls } from '../parsers';
 import { logger } from '../utils/logger';
+import { sanitizeContent } from '../utils/content-sanitizer';
 
 /**
  * Rewrites the Ollama response to include structured tool calls if missing
@@ -59,7 +60,15 @@ export function rewriteResponse(response: OllamaChatResponse): OllamaChatRespons
     // Usually, if we parsed tool calls, we clear the content to avoid confusion
     // But retaining it is also fine. Let's clear the XML parts or the whole content to be safe.
     response.message.tool_calls = standardToolCalls;
-    response.message.content = ''; 
+    // Erase tool XML then sanitize orphan tags and think blocks
+    let cleanedContent = content.replace(/<function=([^>]+)>([\s\S]*?)<\/function>/g, '');
+    cleanedContent = cleanedContent.replace(/<tool_call>([\s\S]*?)<\/tool_call>/g, '');
+    response.message.content = sanitizeContent(cleanedContent);
+  }
+  
+  // Sanitize plain text responses too
+  if ((!response.message.tool_calls || response.message.tool_calls.length === 0) && response.message.content) {
+    response.message.content = sanitizeContent(response.message.content);
   }
 
   return response;
diff --git a/src/proxy/vllm-response-rewriter.ts b/src/proxy/vllm-response-rewriter.ts
index 85dca96..e4ffe3c 100755
--- a/src/proxy/vllm-response-rewriter.ts
+++ b/src/proxy/vllm-response-rewriter.ts
@@ -1,5 +1,6 @@
 import { parseXmlToolCalls } from '../parsers';
 import { logger } from '../utils/logger';
+import { sanitizeContent } from '../utils/content-sanitizer';
 
 /**
  * Rewrites the vLLM/OpenAI standard response to include structured tool calls if missing
@@ -47,7 +48,18 @@ export function rewriteVllmResponse(response: any): any {
     });
 
     message.tool_calls = standardToolCalls;
-    message.content = ''; 
+    
+    // Erase function/tool_call XML blocks, then sanitize orphan tags and think blocks
+    let cleanedContent = content.replace(/<function=([^>]+)>([\s\S]*?)<\/function>/g, '');
+    cleanedContent = cleanedContent.replace(/<tool_call>([\s\S]*?)<\/tool_call>/g, '');
+    message.content = sanitizeContent(cleanedContent);
+  }
+
+  // Even if no tool call was found, sanitize the plain text content too
+  if (!message.tool_calls || message.tool_calls.length === 0) {
+    if (message.content) {
+      message.content = sanitizeContent(message.content);
+    }
   }
 
   return response;
diff --git a/src/utils/content-sanitizer.ts b/src/utils/content-sanitizer.ts
new file mode 100755
index 0000000..976016d
--- /dev/null
+++ b/src/utils/content-sanitizer.ts
@@ -0,0 +1,29 @@
+/**
+ * Strips internal model artifacts from content before sending to client.
+ *
+ * Removes:
+ * - <think>...</think> blocks (internal reasoning, never visible)
+ * - Orphan closing tags like </function>, </think>, </tool>, </total>
+ *   (these have no semantic value to the client)
+ */
+export function sanitizeContent(content: string): string {
+  if (!content) return content;
+
+  let cleaned = content;
+
+  // 1. Strip complete <think>...</think> blocks
+  cleaned = cleaned.replace(/<think>[\s\S]*?<\/think>/g, '');
+
+  // 2. Strip orphan closing tags that have no matching opener in this output
+  //    Only orphan tags (not ones inside legitimate code/markdown fences)
+  cleaned = cleaned.replace(/<\/function>/g, '');
+  cleaned = cleaned.replace(/<\/think>/g, '');
+  cleaned = cleaned.replace(/<\/tool>/g, '');
+  cleaned = cleaned.replace(/<\/tool_call>/g, '');
+  cleaned = cleaned.replace(/<\/total>/g, '');
+
+  // 3. Collapse excessive blank lines left after stripping
+  cleaned = cleaned.replace(/\n{3,}/g, '\n\n');
+
+  return cleaned.trim();
+}