diff --git a/src/proxy/vllm-forward.ts b/src/proxy/vllm-forward.ts index d77fa66..a093cbd 100755 --- a/src/proxy/vllm-forward.ts +++ b/src/proxy/vllm-forward.ts @@ -115,8 +115,11 @@ export async function forwardAndSpoofVllmStreamRequest(requestBody: any, authori const message = rewrittenData.choices?.[0]?.message || {}; - // 2. Content chunk MUST come strictly before tool calls - if (message.content) { + const hasToolCalls = Array.isArray(message.tool_calls) && message.tool_calls.length > 0; + + // 2. Content chunk MUST come strictly before tool calls, but suppress it when + // the response is a tool-calling turn to keep OpenClaw in a tool-first state. + if (!hasToolCalls && message.content) { // Fragment the content to simulate real token streaming and prevent UI double-rendering bugs const chunkSize = 16; for (let i = 0; i < message.content.length; i += chunkSize) { @@ -125,7 +128,7 @@ export async function forwardAndSpoofVllmStreamRequest(requestBody: any, authori } // 3. Tool calls chunk - if (message.tool_calls && message.tool_calls.length > 0) { + if (hasToolCalls) { const streamToolCalls = message.tool_calls.map((tc: any, idx: number) => ({ index: idx, id: tc.id, @@ -139,7 +142,9 @@ export async function forwardAndSpoofVllmStreamRequest(requestBody: any, authori } // 4. Finish reason chunk - const finalFinishReason = rewrittenData.choices?.[0]?.finish_reason || (message.tool_calls?.length > 0 ? 'tool_calls' : 'stop'); + const finalFinishReason = hasToolCalls + ? 'tool_calls' + : (rewrittenData.choices?.[0]?.finish_reason || 'stop'); pushChunk({}, finalFinishReason); // 5. Done diff --git a/src/proxy/vllm-response-rewriter.ts b/src/proxy/vllm-response-rewriter.ts index a111aa0..32599b3 100755 --- a/src/proxy/vllm-response-rewriter.ts +++ b/src/proxy/vllm-response-rewriter.ts @@ -22,6 +22,18 @@ function buildVllmToolCalls(parsedCalls: ReturnType) { }); } +function sanitizeVllmReasoning(message: any) { + if (message.reasoning_content) { + message.reasoning_content = sanitizeContent(message.reasoning_content); + } + if (message.reasoning) { + message.reasoning = sanitizeContent(message.reasoning); + } + if (message.thinking) { + message.thinking = sanitizeContent(message.thinking); + } +} + /** * Rewrites the vLLM/OpenAI standard response to include structured tool calls if missing * but present in XML tags within the content. @@ -34,8 +46,10 @@ export function rewriteVllmResponse(response: any): any { const message = response.choices[0].message; if (!message) return response; - // If already has tool_calls, do nothing + // If already has tool_calls, normalize into a tool-first shape. if (message.tool_calls && message.tool_calls.length > 0) { + message.content = ''; + sanitizeVllmReasoning(message); return response; } @@ -53,26 +67,21 @@ export function rewriteVllmResponse(response: any): any { message.content = ''; if (message.reasoning_content) { - message.reasoning_content = sanitizeContent( - message.reasoning_content - .replace(/]+)>([\s\S]*?)<\/function>/g, '') - .replace(/([\s\S]*?)<\/tool_call>/g, '') - ); + message.reasoning_content = message.reasoning_content + .replace(/]+)>([\s\S]*?)<\/function>/g, '') + .replace(/([\s\S]*?)<\/tool_call>/g, ''); } if (message.reasoning) { - message.reasoning = sanitizeContent( - message.reasoning - .replace(/]+)>([\s\S]*?)<\/function>/g, '') - .replace(/([\s\S]*?)<\/tool_call>/g, '') - ); + message.reasoning = message.reasoning + .replace(/]+)>([\s\S]*?)<\/function>/g, '') + .replace(/([\s\S]*?)<\/tool_call>/g, ''); } if (message.thinking) { - message.thinking = sanitizeContent( - message.thinking - .replace(/]+)>([\s\S]*?)<\/function>/g, '') - .replace(/([\s\S]*?)<\/tool_call>/g, '') - ); + message.thinking = message.thinking + .replace(/]+)>([\s\S]*?)<\/function>/g, '') + .replace(/([\s\S]*?)<\/tool_call>/g, ''); } + sanitizeVllmReasoning(message); return response; } @@ -80,15 +89,7 @@ export function rewriteVllmResponse(response: any): any { if (message.content) { message.content = sanitizeContent(message.content); } - if (message.reasoning_content) { - message.reasoning_content = sanitizeContent(message.reasoning_content); - } - if (message.reasoning) { - message.reasoning = sanitizeContent(message.reasoning); - } - if (message.thinking) { - message.thinking = sanitizeContent(message.thinking); - } + sanitizeVllmReasoning(message); return response; } diff --git a/test/integration.vllm.test.ts b/test/integration.vllm.test.ts index f927b92..38867e3 100755 --- a/test/integration.vllm.test.ts +++ b/test/integration.vllm.test.ts @@ -59,4 +59,37 @@ describe('vLLM Proxy Integration Test', () => { path: "/tmp/test.txt" }); }); + + it('spoofs streaming responses for tool-calling turns without content chunks', async () => { + const requestFixturePath = path.join(__dirname, 'fixtures', 'vllm-like-request.json'); + const responseFixturePath = path.join(__dirname, 'fixtures', 'vllm-xml-response.json'); + + const requestJson = JSON.parse(fs.readFileSync(requestFixturePath, 'utf8')); + const responseJson = JSON.parse(fs.readFileSync(responseFixturePath, 'utf8')); + requestJson.stream = true; + + (global.fetch as any).mockResolvedValue({ + ok: true, + json: async () => responseJson + }); + + const response = await server.inject({ + method: 'POST', + url: '/v1/chat/completions', + payload: requestJson + }); + + expect(response.statusCode).toBe(200); + expect(response.headers['content-type']).toContain('text/event-stream'); + + const fetchArgs = (global.fetch as any).mock.calls[0]; + const upstreamBody = JSON.parse(fetchArgs[1].body); + expect(upstreamBody.stream).toBe(false); + + expect(response.payload).toContain('"role":"assistant"'); + expect(response.payload).toContain('"tool_calls"'); + expect(response.payload).toContain('"finish_reason":"tool_calls"'); + expect(response.payload).not.toContain('"content"'); + expect(response.payload).toContain('data: [DONE]'); + }); }); diff --git a/test/vllm-rewriter.test.ts b/test/vllm-rewriter.test.ts index 59e3d5a..62f5a62 100755 --- a/test/vllm-rewriter.test.ts +++ b/test/vllm-rewriter.test.ts @@ -28,12 +28,13 @@ describe('vLLM Response Rewriter', () => { expect(argsObject).toEqual({ path: '/tmp/test.txt' }); }); - it('does not touch response that already has tool_calls', () => { + it('normalizes response that already has tool_calls into a tool-first shape', () => { const inputResponse = { choices: [{ message: { role: "assistant", content: "Here are the calls", + thinking: "internal", tool_calls: [ { id: "123", @@ -47,7 +48,8 @@ describe('vLLM Response Rewriter', () => { const result = rewriteVllmResponse(inputResponse); - expect(result.choices[0].message.content).toBe("Here are the calls"); + expect(result.choices[0].message.content).toBe(""); + expect(result.choices[0].message.thinking).toBe(""); expect(result.choices[0].message.tool_calls).toHaveLength(1); });