From 7a718d89835452dbca5f123d05e34ae427dead00 Mon Sep 17 00:00:00 2001 From: lingyuzeng Date: Sun, 22 Mar 2026 19:56:45 +0800 Subject: [PATCH] fix: rewrite vllm tool calls from reasoning content --- src/proxy/vllm-response-rewriter.ts | 102 +++++++++++++++++---------- test/fixtures/vllm-xml-response.json | 3 +- test/vllm-rewriter.test.ts | 25 +++++++ 3 files changed, 92 insertions(+), 38 deletions(-) diff --git a/src/proxy/vllm-response-rewriter.ts b/src/proxy/vllm-response-rewriter.ts index e4ffe3c..a111aa0 100755 --- a/src/proxy/vllm-response-rewriter.ts +++ b/src/proxy/vllm-response-rewriter.ts @@ -2,6 +2,26 @@ import { parseXmlToolCalls } from '../parsers'; import { logger } from '../utils/logger'; import { sanitizeContent } from '../utils/content-sanitizer'; +function buildVllmToolCalls(parsedCalls: ReturnType) { + return parsedCalls.map((call, index) => { + let argumentsString = '{}'; + try { + argumentsString = JSON.stringify(call.args); + } catch (e) { + logger.error('Failed to stringify arguments for tool call', call.args); + } + + return { + id: `call_${Date.now()}_${index}`, + type: 'function', + function: { + name: call.name, + arguments: argumentsString, + } + }; + }); +} + /** * Rewrites the vLLM/OpenAI standard response to include structured tool calls if missing * but present in XML tags within the content. @@ -19,47 +39,55 @@ export function rewriteVllmResponse(response: any): any { return response; } - const content = message.content; - if (!content) { + const content = message.content || ''; + const reasoningContent = message.reasoning_content || message.reasoning || message.thinking || ''; + const parsedCalls = [ + ...parseXmlToolCalls(content), + ...parseXmlToolCalls(reasoningContent), + ]; + + if (parsedCalls.length > 0) { + logger.info(`Rewriting vLLM response: found ${parsedCalls.length} tool calls in content/reasoning`); + + message.tool_calls = buildVllmToolCalls(parsedCalls); + message.content = ''; + + if (message.reasoning_content) { + message.reasoning_content = sanitizeContent( + message.reasoning_content + .replace(/]+)>([\s\S]*?)<\/function>/g, '') + .replace(/([\s\S]*?)<\/tool_call>/g, '') + ); + } + if (message.reasoning) { + message.reasoning = sanitizeContent( + message.reasoning + .replace(/]+)>([\s\S]*?)<\/function>/g, '') + .replace(/([\s\S]*?)<\/tool_call>/g, '') + ); + } + if (message.thinking) { + message.thinking = sanitizeContent( + message.thinking + .replace(/]+)>([\s\S]*?)<\/function>/g, '') + .replace(/([\s\S]*?)<\/tool_call>/g, '') + ); + } return response; } - const parsedCalls = parseXmlToolCalls(content); - - if (parsedCalls.length > 0) { - logger.info(`Rewriting vLLM response: found ${parsedCalls.length} tool calls in XML content`); - - const standardToolCalls = parsedCalls.map((call, index) => { - let argumentsString = '{}'; - try { - argumentsString = JSON.stringify(call.args); - } catch (e) { - logger.error('Failed to stringify arguments for tool call', call.args); - } - - return { - id: `call_${Date.now()}_${index}`, - type: 'function', - function: { - name: call.name, - arguments: argumentsString, - } - }; - }); - - message.tool_calls = standardToolCalls; - - // Erase function/tool_call XML blocks, then sanitize orphan tags and think blocks - let cleanedContent = content.replace(/]+)>([\s\S]*?)<\/function>/g, ''); - cleanedContent = cleanedContent.replace(/([\s\S]*?)<\/tool_call>/g, ''); - message.content = sanitizeContent(cleanedContent); - } - // Even if no tool call was found, sanitize the plain text content too - if (!message.tool_calls || message.tool_calls.length === 0) { - if (message.content) { - message.content = sanitizeContent(message.content); - } + if (message.content) { + message.content = sanitizeContent(message.content); + } + if (message.reasoning_content) { + message.reasoning_content = sanitizeContent(message.reasoning_content); + } + if (message.reasoning) { + message.reasoning = sanitizeContent(message.reasoning); + } + if (message.thinking) { + message.thinking = sanitizeContent(message.thinking); } return response; diff --git a/test/fixtures/vllm-xml-response.json b/test/fixtures/vllm-xml-response.json index 12957d3..7dc45b5 100755 --- a/test/fixtures/vllm-xml-response.json +++ b/test/fixtures/vllm-xml-response.json @@ -8,7 +8,8 @@ "index": 0, "message": { "role": "assistant", - "content": "\n\n/tmp/test.txt\n\n" + "content": "", + "reasoning_content": "\n{\"name\":\"read\",\"arguments\":{\"path\":\"/tmp/test.txt\"}}\n" }, "finish_reason": "stop" } diff --git a/test/vllm-rewriter.test.ts b/test/vllm-rewriter.test.ts index 9d662cf..59e3d5a 100755 --- a/test/vllm-rewriter.test.ts +++ b/test/vllm-rewriter.test.ts @@ -50,4 +50,29 @@ describe('vLLM Response Rewriter', () => { expect(result.choices[0].message.content).toBe("Here are the calls"); expect(result.choices[0].message.tool_calls).toHaveLength(1); }); + + it('rewrites tool call found in reasoning_content into structured tool_calls', () => { + const inputResponse = { + id: "chatcmpl-123", + choices: [{ + index: 0, + message: { + role: "assistant", + content: "", + reasoning_content: "\n{\"name\":\"read\",\"arguments\":{\"path\":\"/tmp/test.txt\"}}\n" + } + }] + }; + + const result = rewriteVllmResponse(inputResponse); + + expect(result.choices[0].message.content).toBe(""); + expect(result.choices[0].message.tool_calls).toBeDefined(); + expect(result.choices[0].message.tool_calls).toHaveLength(1); + expect(result.choices[0].message.tool_calls[0].function.name).toBe("read"); + expect(JSON.parse(result.choices[0].message.tool_calls[0].function.arguments)).toEqual({ + path: "/tmp/test.txt" + }); + expect(result.choices[0].message.reasoning_content).toBe(""); + }); });