fix: make vllm tool-calling turns tool-first
This commit is contained in:
@@ -59,4 +59,37 @@ describe('vLLM Proxy Integration Test', () => {
|
||||
path: "/tmp/test.txt"
|
||||
});
|
||||
});
|
||||
|
||||
it('spoofs streaming responses for tool-calling turns without content chunks', async () => {
|
||||
const requestFixturePath = path.join(__dirname, 'fixtures', 'vllm-like-request.json');
|
||||
const responseFixturePath = path.join(__dirname, 'fixtures', 'vllm-xml-response.json');
|
||||
|
||||
const requestJson = JSON.parse(fs.readFileSync(requestFixturePath, 'utf8'));
|
||||
const responseJson = JSON.parse(fs.readFileSync(responseFixturePath, 'utf8'));
|
||||
requestJson.stream = true;
|
||||
|
||||
(global.fetch as any).mockResolvedValue({
|
||||
ok: true,
|
||||
json: async () => responseJson
|
||||
});
|
||||
|
||||
const response = await server.inject({
|
||||
method: 'POST',
|
||||
url: '/v1/chat/completions',
|
||||
payload: requestJson
|
||||
});
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.headers['content-type']).toContain('text/event-stream');
|
||||
|
||||
const fetchArgs = (global.fetch as any).mock.calls[0];
|
||||
const upstreamBody = JSON.parse(fetchArgs[1].body);
|
||||
expect(upstreamBody.stream).toBe(false);
|
||||
|
||||
expect(response.payload).toContain('"role":"assistant"');
|
||||
expect(response.payload).toContain('"tool_calls"');
|
||||
expect(response.payload).toContain('"finish_reason":"tool_calls"');
|
||||
expect(response.payload).not.toContain('"content"');
|
||||
expect(response.payload).toContain('data: [DONE]');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -28,12 +28,13 @@ describe('vLLM Response Rewriter', () => {
|
||||
expect(argsObject).toEqual({ path: '/tmp/test.txt' });
|
||||
});
|
||||
|
||||
it('does not touch response that already has tool_calls', () => {
|
||||
it('normalizes response that already has tool_calls into a tool-first shape', () => {
|
||||
const inputResponse = {
|
||||
choices: [{
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: "Here are the calls",
|
||||
thinking: "<think>internal</think>",
|
||||
tool_calls: [
|
||||
{
|
||||
id: "123",
|
||||
@@ -47,7 +48,8 @@ describe('vLLM Response Rewriter', () => {
|
||||
|
||||
const result = rewriteVllmResponse(inputResponse);
|
||||
|
||||
expect(result.choices[0].message.content).toBe("Here are the calls");
|
||||
expect(result.choices[0].message.content).toBe("");
|
||||
expect(result.choices[0].message.thinking).toBe("");
|
||||
expect(result.choices[0].message.tool_calls).toHaveLength(1);
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user