fix: make vllm tool-calling turns tool-first
This commit is contained in:
@@ -115,8 +115,11 @@ export async function forwardAndSpoofVllmStreamRequest(requestBody: any, authori
|
|||||||
|
|
||||||
const message = rewrittenData.choices?.[0]?.message || {};
|
const message = rewrittenData.choices?.[0]?.message || {};
|
||||||
|
|
||||||
// 2. Content chunk MUST come strictly before tool calls
|
const hasToolCalls = Array.isArray(message.tool_calls) && message.tool_calls.length > 0;
|
||||||
if (message.content) {
|
|
||||||
|
// 2. Content chunk MUST come strictly before tool calls, but suppress it when
|
||||||
|
// the response is a tool-calling turn to keep OpenClaw in a tool-first state.
|
||||||
|
if (!hasToolCalls && message.content) {
|
||||||
// Fragment the content to simulate real token streaming and prevent UI double-rendering bugs
|
// Fragment the content to simulate real token streaming and prevent UI double-rendering bugs
|
||||||
const chunkSize = 16;
|
const chunkSize = 16;
|
||||||
for (let i = 0; i < message.content.length; i += chunkSize) {
|
for (let i = 0; i < message.content.length; i += chunkSize) {
|
||||||
@@ -125,7 +128,7 @@ export async function forwardAndSpoofVllmStreamRequest(requestBody: any, authori
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 3. Tool calls chunk
|
// 3. Tool calls chunk
|
||||||
if (message.tool_calls && message.tool_calls.length > 0) {
|
if (hasToolCalls) {
|
||||||
const streamToolCalls = message.tool_calls.map((tc: any, idx: number) => ({
|
const streamToolCalls = message.tool_calls.map((tc: any, idx: number) => ({
|
||||||
index: idx,
|
index: idx,
|
||||||
id: tc.id,
|
id: tc.id,
|
||||||
@@ -139,7 +142,9 @@ export async function forwardAndSpoofVllmStreamRequest(requestBody: any, authori
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 4. Finish reason chunk
|
// 4. Finish reason chunk
|
||||||
const finalFinishReason = rewrittenData.choices?.[0]?.finish_reason || (message.tool_calls?.length > 0 ? 'tool_calls' : 'stop');
|
const finalFinishReason = hasToolCalls
|
||||||
|
? 'tool_calls'
|
||||||
|
: (rewrittenData.choices?.[0]?.finish_reason || 'stop');
|
||||||
pushChunk({}, finalFinishReason);
|
pushChunk({}, finalFinishReason);
|
||||||
|
|
||||||
// 5. Done
|
// 5. Done
|
||||||
|
|||||||
@@ -22,6 +22,18 @@ function buildVllmToolCalls(parsedCalls: ReturnType<typeof parseXmlToolCalls>) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function sanitizeVllmReasoning(message: any) {
|
||||||
|
if (message.reasoning_content) {
|
||||||
|
message.reasoning_content = sanitizeContent(message.reasoning_content);
|
||||||
|
}
|
||||||
|
if (message.reasoning) {
|
||||||
|
message.reasoning = sanitizeContent(message.reasoning);
|
||||||
|
}
|
||||||
|
if (message.thinking) {
|
||||||
|
message.thinking = sanitizeContent(message.thinking);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Rewrites the vLLM/OpenAI standard response to include structured tool calls if missing
|
* Rewrites the vLLM/OpenAI standard response to include structured tool calls if missing
|
||||||
* but present in XML tags within the content.
|
* but present in XML tags within the content.
|
||||||
@@ -34,8 +46,10 @@ export function rewriteVllmResponse(response: any): any {
|
|||||||
const message = response.choices[0].message;
|
const message = response.choices[0].message;
|
||||||
if (!message) return response;
|
if (!message) return response;
|
||||||
|
|
||||||
// If already has tool_calls, do nothing
|
// If already has tool_calls, normalize into a tool-first shape.
|
||||||
if (message.tool_calls && message.tool_calls.length > 0) {
|
if (message.tool_calls && message.tool_calls.length > 0) {
|
||||||
|
message.content = '';
|
||||||
|
sanitizeVllmReasoning(message);
|
||||||
return response;
|
return response;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -53,26 +67,21 @@ export function rewriteVllmResponse(response: any): any {
|
|||||||
message.content = '';
|
message.content = '';
|
||||||
|
|
||||||
if (message.reasoning_content) {
|
if (message.reasoning_content) {
|
||||||
message.reasoning_content = sanitizeContent(
|
message.reasoning_content = message.reasoning_content
|
||||||
message.reasoning_content
|
.replace(/<function=([^>]+)>([\s\S]*?)<\/function>/g, '')
|
||||||
.replace(/<function=([^>]+)>([\s\S]*?)<\/function>/g, '')
|
.replace(/<tool_call>([\s\S]*?)<\/tool_call>/g, '');
|
||||||
.replace(/<tool_call>([\s\S]*?)<\/tool_call>/g, '')
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
if (message.reasoning) {
|
if (message.reasoning) {
|
||||||
message.reasoning = sanitizeContent(
|
message.reasoning = message.reasoning
|
||||||
message.reasoning
|
.replace(/<function=([^>]+)>([\s\S]*?)<\/function>/g, '')
|
||||||
.replace(/<function=([^>]+)>([\s\S]*?)<\/function>/g, '')
|
.replace(/<tool_call>([\s\S]*?)<\/tool_call>/g, '');
|
||||||
.replace(/<tool_call>([\s\S]*?)<\/tool_call>/g, '')
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
if (message.thinking) {
|
if (message.thinking) {
|
||||||
message.thinking = sanitizeContent(
|
message.thinking = message.thinking
|
||||||
message.thinking
|
.replace(/<function=([^>]+)>([\s\S]*?)<\/function>/g, '')
|
||||||
.replace(/<function=([^>]+)>([\s\S]*?)<\/function>/g, '')
|
.replace(/<tool_call>([\s\S]*?)<\/tool_call>/g, '');
|
||||||
.replace(/<tool_call>([\s\S]*?)<\/tool_call>/g, '')
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
sanitizeVllmReasoning(message);
|
||||||
return response;
|
return response;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -80,15 +89,7 @@ export function rewriteVllmResponse(response: any): any {
|
|||||||
if (message.content) {
|
if (message.content) {
|
||||||
message.content = sanitizeContent(message.content);
|
message.content = sanitizeContent(message.content);
|
||||||
}
|
}
|
||||||
if (message.reasoning_content) {
|
sanitizeVllmReasoning(message);
|
||||||
message.reasoning_content = sanitizeContent(message.reasoning_content);
|
|
||||||
}
|
|
||||||
if (message.reasoning) {
|
|
||||||
message.reasoning = sanitizeContent(message.reasoning);
|
|
||||||
}
|
|
||||||
if (message.thinking) {
|
|
||||||
message.thinking = sanitizeContent(message.thinking);
|
|
||||||
}
|
|
||||||
|
|
||||||
return response;
|
return response;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -59,4 +59,37 @@ describe('vLLM Proxy Integration Test', () => {
|
|||||||
path: "/tmp/test.txt"
|
path: "/tmp/test.txt"
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('spoofs streaming responses for tool-calling turns without content chunks', async () => {
|
||||||
|
const requestFixturePath = path.join(__dirname, 'fixtures', 'vllm-like-request.json');
|
||||||
|
const responseFixturePath = path.join(__dirname, 'fixtures', 'vllm-xml-response.json');
|
||||||
|
|
||||||
|
const requestJson = JSON.parse(fs.readFileSync(requestFixturePath, 'utf8'));
|
||||||
|
const responseJson = JSON.parse(fs.readFileSync(responseFixturePath, 'utf8'));
|
||||||
|
requestJson.stream = true;
|
||||||
|
|
||||||
|
(global.fetch as any).mockResolvedValue({
|
||||||
|
ok: true,
|
||||||
|
json: async () => responseJson
|
||||||
|
});
|
||||||
|
|
||||||
|
const response = await server.inject({
|
||||||
|
method: 'POST',
|
||||||
|
url: '/v1/chat/completions',
|
||||||
|
payload: requestJson
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(response.statusCode).toBe(200);
|
||||||
|
expect(response.headers['content-type']).toContain('text/event-stream');
|
||||||
|
|
||||||
|
const fetchArgs = (global.fetch as any).mock.calls[0];
|
||||||
|
const upstreamBody = JSON.parse(fetchArgs[1].body);
|
||||||
|
expect(upstreamBody.stream).toBe(false);
|
||||||
|
|
||||||
|
expect(response.payload).toContain('"role":"assistant"');
|
||||||
|
expect(response.payload).toContain('"tool_calls"');
|
||||||
|
expect(response.payload).toContain('"finish_reason":"tool_calls"');
|
||||||
|
expect(response.payload).not.toContain('"content"');
|
||||||
|
expect(response.payload).toContain('data: [DONE]');
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -28,12 +28,13 @@ describe('vLLM Response Rewriter', () => {
|
|||||||
expect(argsObject).toEqual({ path: '/tmp/test.txt' });
|
expect(argsObject).toEqual({ path: '/tmp/test.txt' });
|
||||||
});
|
});
|
||||||
|
|
||||||
it('does not touch response that already has tool_calls', () => {
|
it('normalizes response that already has tool_calls into a tool-first shape', () => {
|
||||||
const inputResponse = {
|
const inputResponse = {
|
||||||
choices: [{
|
choices: [{
|
||||||
message: {
|
message: {
|
||||||
role: "assistant",
|
role: "assistant",
|
||||||
content: "Here are the calls",
|
content: "Here are the calls",
|
||||||
|
thinking: "<think>internal</think>",
|
||||||
tool_calls: [
|
tool_calls: [
|
||||||
{
|
{
|
||||||
id: "123",
|
id: "123",
|
||||||
@@ -47,7 +48,8 @@ describe('vLLM Response Rewriter', () => {
|
|||||||
|
|
||||||
const result = rewriteVllmResponse(inputResponse);
|
const result = rewriteVllmResponse(inputResponse);
|
||||||
|
|
||||||
expect(result.choices[0].message.content).toBe("Here are the calls");
|
expect(result.choices[0].message.content).toBe("");
|
||||||
|
expect(result.choices[0].message.thinking).toBe("");
|
||||||
expect(result.choices[0].message.tool_calls).toHaveLength(1);
|
expect(result.choices[0].message.tool_calls).toHaveLength(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user