From 7a718d89835452dbca5f123d05e34ae427dead00 Mon Sep 17 00:00:00 2001
From: lingyuzeng <lingyuzeng@example.com>
Date: Sun, 22 Mar 2026 19:56:45 +0800
Subject: [PATCH] fix: rewrite vllm tool calls from reasoning content

---
 src/proxy/vllm-response-rewriter.ts  | 102 +++++++++++++++++----------
 test/fixtures/vllm-xml-response.json |   3 +-
 test/vllm-rewriter.test.ts           |  25 +++++++
 3 files changed, 92 insertions(+), 38 deletions(-)
diff --git a/src/proxy/vllm-response-rewriter.ts b/src/proxy/vllm-response-rewriter.ts
index e4ffe3c..a111aa0 100755
--- a/src/proxy/vllm-response-rewriter.ts
+++ b/src/proxy/vllm-response-rewriter.ts
@@ -2,6 +2,26 @@ import { parseXmlToolCalls } from '../parsers';
 import { logger } from '../utils/logger';
 import { sanitizeContent } from '../utils/content-sanitizer';
 
+function buildVllmToolCalls(parsedCalls: ReturnType<typeof parseXmlToolCalls>) {
+  return parsedCalls.map((call, index) => {
+    let argumentsString = '{}';
+    try {
+      argumentsString = JSON.stringify(call.args);
+    } catch (e) {
+      logger.error('Failed to stringify arguments for tool call', call.args);
+    }
+
+    return {
+      id: `call_${Date.now()}_${index}`,
+      type: 'function',
+      function: {
+        name: call.name,
+        arguments: argumentsString,
+      }
+    };
+  });
+}
+
 /**
  * Rewrites the vLLM/OpenAI standard response to include structured tool calls if missing
  * but present in XML tags within the content.
@@ -19,47 +39,55 @@ export function rewriteVllmResponse(response: any): any {
     return response;
   }
 
-  const content = message.content;
-  if (!content) {
+  const content = message.content || '';
+  const reasoningContent = message.reasoning_content || message.reasoning || message.thinking || '';
+  const parsedCalls = [
+    ...parseXmlToolCalls(content),
+    ...parseXmlToolCalls(reasoningContent),
+  ];
+
+  if (parsedCalls.length > 0) {
+    logger.info(`Rewriting vLLM response: found ${parsedCalls.length} tool calls in content/reasoning`);
+
+    message.tool_calls = buildVllmToolCalls(parsedCalls);
+    message.content = '';
+
+    if (message.reasoning_content) {
+      message.reasoning_content = sanitizeContent(
+        message.reasoning_content
+          .replace(/<function=([^>]+)>([\s\S]*?)<\/function>/g, '')
+          .replace(/<tool_call>([\s\S]*?)<\/tool_call>/g, '')
+      );
+    }
+    if (message.reasoning) {
+      message.reasoning = sanitizeContent(
+        message.reasoning
+          .replace(/<function=([^>]+)>([\s\S]*?)<\/function>/g, '')
+          .replace(/<tool_call>([\s\S]*?)<\/tool_call>/g, '')
+      );
+    }
+    if (message.thinking) {
+      message.thinking = sanitizeContent(
+        message.thinking
+          .replace(/<function=([^>]+)>([\s\S]*?)<\/function>/g, '')
+          .replace(/<tool_call>([\s\S]*?)<\/tool_call>/g, '')
+      );
+    }
     return response;
   }
 
-  const parsedCalls = parseXmlToolCalls(content);
-
-  if (parsedCalls.length > 0) {
-    logger.info(`Rewriting vLLM response: found ${parsedCalls.length} tool calls in XML content`);
-    
-    const standardToolCalls = parsedCalls.map((call, index) => {
-      let argumentsString = '{}';
-      try {
-        argumentsString = JSON.stringify(call.args);
-      } catch (e) {
-        logger.error('Failed to stringify arguments for tool call', call.args);
-      }
-
-      return {
-        id: `call_${Date.now()}_${index}`,
-        type: 'function',
-        function: {
-          name: call.name,
-          arguments: argumentsString,
-        }
-      };
-    });
-
-    message.tool_calls = standardToolCalls;
-    
-    // Erase function/tool_call XML blocks, then sanitize orphan tags and think blocks
-    let cleanedContent = content.replace(/<function=([^>]+)>([\s\S]*?)<\/function>/g, '');
-    cleanedContent = cleanedContent.replace(/<tool_call>([\s\S]*?)<\/tool_call>/g, '');
-    message.content = sanitizeContent(cleanedContent);
-  }
-
   // Even if no tool call was found, sanitize the plain text content too
-  if (!message.tool_calls || message.tool_calls.length === 0) {
-    if (message.content) {
-      message.content = sanitizeContent(message.content);
-    }
+  if (message.content) {
+    message.content = sanitizeContent(message.content);
+  }
+  if (message.reasoning_content) {
+    message.reasoning_content = sanitizeContent(message.reasoning_content);
+  }
+  if (message.reasoning) {
+    message.reasoning = sanitizeContent(message.reasoning);
+  }
+  if (message.thinking) {
+    message.thinking = sanitizeContent(message.thinking);
   }
 
   return response;
diff --git a/test/fixtures/vllm-xml-response.json b/test/fixtures/vllm-xml-response.json
index 12957d3..7dc45b5 100755
--- a/test/fixtures/vllm-xml-response.json
+++ b/test/fixtures/vllm-xml-response.json
@@ -8,7 +8,8 @@
       "index": 0,
       "message": {
         "role": "assistant",
-        "content": "<function=read>\n<parameter=path>\n/tmp/test.txt\n</parameter>\n</function>"
+        "content": "",
+        "reasoning_content": "<tool_call>\n{\"name\":\"read\",\"arguments\":{\"path\":\"/tmp/test.txt\"}}\n</tool_call>"
       },
       "finish_reason": "stop"
     }
diff --git a/test/vllm-rewriter.test.ts b/test/vllm-rewriter.test.ts
index 9d662cf..59e3d5a 100755
--- a/test/vllm-rewriter.test.ts
+++ b/test/vllm-rewriter.test.ts
@@ -50,4 +50,29 @@ describe('vLLM Response Rewriter', () => {
     expect(result.choices[0].message.content).toBe("Here are the calls");
     expect(result.choices[0].message.tool_calls).toHaveLength(1);
   });
+
+  it('rewrites tool call found in reasoning_content into structured tool_calls', () => {
+    const inputResponse = {
+      id: "chatcmpl-123",
+      choices: [{
+        index: 0,
+        message: {
+          role: "assistant",
+          content: "",
+          reasoning_content: "<tool_call>\n{\"name\":\"read\",\"arguments\":{\"path\":\"/tmp/test.txt\"}}\n</tool_call>"
+        }
+      }]
+    };
+
+    const result = rewriteVllmResponse(inputResponse);
+
+    expect(result.choices[0].message.content).toBe("");
+    expect(result.choices[0].message.tool_calls).toBeDefined();
+    expect(result.choices[0].message.tool_calls).toHaveLength(1);
+    expect(result.choices[0].message.tool_calls[0].function.name).toBe("read");
+    expect(JSON.parse(result.choices[0].message.tool_calls[0].function.arguments)).toEqual({
+      path: "/tmp/test.txt"
+    });
+    expect(result.choices[0].message.reasoning_content).toBe("");
+  });
 });