fix: rewrite vllm tool calls from reasoning content

2026-03-22 19:56:45 +08:00
parent 8eb7b25ec9
commit 7a718d8983
3 changed files with 92 additions and 38 deletions
--- a/src/proxy/vllm-response-rewriter.ts
+++ b/src/proxy/vllm-response-rewriter.ts
@@ -2,6 +2,26 @@ import { parseXmlToolCalls } from '../parsers';
 import { logger } from '../utils/logger';
 import { sanitizeContent } from '../utils/content-sanitizer';
 function buildVllmToolCalls(parsedCalls: ReturnType<typeof parseXmlToolCalls>) {
  return parsedCalls.map((call, index) => {
    let argumentsString = '{}';
    try {
      argumentsString = JSON.stringify(call.args);
    } catch (e) {
      logger.error('Failed to stringify arguments for tool call', call.args);
    }
    return {
      id: `call_${Date.now()}_${index}`,
      type: 'function',
      function: {
        name: call.name,
        arguments: argumentsString,
      }
    };
  });
 }
 /**
 * Rewrites the vLLM/OpenAI standard response to include structured tool calls if missing
 * but present in XML tags within the content.
@@ -19,47 +39,55 @@ export function rewriteVllmResponse(response: any): any {
    return response;
  }
-  const content = message.content;
+  const content = message.content || '';
-  if (!content) {
+  const reasoningContent = message.reasoning_content || message.reasoning || message.thinking || '';
  const parsedCalls = [
    ...parseXmlToolCalls(content),
    ...parseXmlToolCalls(reasoningContent),
  ];
  if (parsedCalls.length > 0) {
    logger.info(`Rewriting vLLM response: found ${parsedCalls.length} tool calls in content/reasoning`);
    message.tool_calls = buildVllmToolCalls(parsedCalls);
    message.content = '';
    if (message.reasoning_content) {
      message.reasoning_content = sanitizeContent(
        message.reasoning_content
          .replace(/<function=([^>]+)>([\s\S]*?)<\/function>/g, '')
          .replace(/<tool_call>([\s\S]*?)<\/tool_call>/g, '')
      );
    }
    if (message.reasoning) {
      message.reasoning = sanitizeContent(
        message.reasoning
          .replace(/<function=([^>]+)>([\s\S]*?)<\/function>/g, '')
          .replace(/<tool_call>([\s\S]*?)<\/tool_call>/g, '')
      );
    }
    if (message.thinking) {
      message.thinking = sanitizeContent(
        message.thinking
          .replace(/<function=([^>]+)>([\s\S]*?)<\/function>/g, '')
          .replace(/<tool_call>([\s\S]*?)<\/tool_call>/g, '')
      );
    }
    return response;
  }
  const parsedCalls = parseXmlToolCalls(content);
  if (parsedCalls.length > 0) {
    logger.info(`Rewriting vLLM response: found ${parsedCalls.length} tool calls in XML content`);
    const standardToolCalls = parsedCalls.map((call, index) => {
      let argumentsString = '{}';
      try {
        argumentsString = JSON.stringify(call.args);
      } catch (e) {
        logger.error('Failed to stringify arguments for tool call', call.args);
      }
      return {
        id: `call_${Date.now()}_${index}`,
        type: 'function',
        function: {
          name: call.name,
          arguments: argumentsString,
        }
      };
    });
    message.tool_calls = standardToolCalls;
    // Erase function/tool_call XML blocks, then sanitize orphan tags and think blocks
    let cleanedContent = content.replace(/<function=([^>]+)>([\s\S]*?)<\/function>/g, '');
    cleanedContent = cleanedContent.replace(/<tool_call>([\s\S]*?)<\/tool_call>/g, '');
    message.content = sanitizeContent(cleanedContent);
  }
  // Even if no tool call was found, sanitize the plain text content too
-  if (!message.tool_calls || message.tool_calls.length === 0) {
+  if (message.content) {
-    if (message.content) {
+    message.content = sanitizeContent(message.content);
-      message.content = sanitizeContent(message.content);
+  }
-    }
+  if (message.reasoning_content) {
    message.reasoning_content = sanitizeContent(message.reasoning_content);
  }
  if (message.reasoning) {
    message.reasoning = sanitizeContent(message.reasoning);
  }
  if (message.thinking) {
    message.thinking = sanitizeContent(message.thinking);
  }
  return response;
--- a/test/fixtures/vllm-xml-response.json
+++ b/test/fixtures/vllm-xml-response.json
@@ -8,7 +8,8 @@
      "index": 0,
      "message": {
        "role": "assistant",
-        "content": "<function=read>\n<parameter=path>\n/tmp/test.txt\n</parameter>\n</function>"
+        "content": "",
        "reasoning_content": "<tool_call>\n{\"name\":\"read\",\"arguments\":{\"path\":\"/tmp/test.txt\"}}\n</tool_call>"
      },
      "finish_reason": "stop"
    }
--- a/test/vllm-rewriter.test.ts
+++ b/test/vllm-rewriter.test.ts
@@ -50,4 +50,29 @@ describe('vLLM Response Rewriter', () => {
    expect(result.choices[0].message.content).toBe("Here are the calls");
    expect(result.choices[0].message.tool_calls).toHaveLength(1);
  });
  it('rewrites tool call found in reasoning_content into structured tool_calls', () => {
    const inputResponse = {
      id: "chatcmpl-123",
      choices: [{
        index: 0,
        message: {
          role: "assistant",
          content: "",
          reasoning_content: "<tool_call>\n{\"name\":\"read\",\"arguments\":{\"path\":\"/tmp/test.txt\"}}\n</tool_call>"
        }
      }]
    };
    const result = rewriteVllmResponse(inputResponse);
    expect(result.choices[0].message.content).toBe("");
    expect(result.choices[0].message.tool_calls).toBeDefined();
    expect(result.choices[0].message.tool_calls).toHaveLength(1);
    expect(result.choices[0].message.tool_calls[0].function.name).toBe("read");
    expect(JSON.parse(result.choices[0].message.tool_calls[0].function.arguments)).toEqual({
      path: "/tmp/test.txt"
    });
    expect(result.choices[0].message.reasoning_content).toBe("");
  });
 });