feat: implement Stream Spoofing interceptor and robust format preservation patches
This commit is contained in:
27
test/fixtures/vllm-like-request.json
vendored
Executable file
27
test/fixtures/vllm-like-request.json
vendored
Executable file
@@ -0,0 +1,27 @@
|
||||
{
|
||||
"model": "Qwen3.5-27B",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Please tell me what's in /tmp/test.txt"
|
||||
}
|
||||
],
|
||||
"stream": false,
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "read",
|
||||
"description": "Read a file",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
21
test/fixtures/vllm-xml-response.json
vendored
Executable file
21
test/fixtures/vllm-xml-response.json
vendored
Executable file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"id": "chatcmpl-123",
|
||||
"object": "chat.completion",
|
||||
"created": 1715012345,
|
||||
"model": "Qwen3.5-27B",
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "<function=read>\n<parameter=path>\n/tmp/test.txt\n</parameter>\n</function>"
|
||||
},
|
||||
"finish_reason": "stop"
|
||||
}
|
||||
],
|
||||
"usage": {
|
||||
"prompt_tokens": 56,
|
||||
"completion_tokens": 31,
|
||||
"total_tokens": 87
|
||||
}
|
||||
}
|
||||
@@ -3,11 +3,13 @@ import { buildServer } from '../src/server';
|
||||
import { FastifyInstance } from 'fastify';
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { config } from '../src/config';
|
||||
|
||||
describe('Proxy Integration Test', () => {
|
||||
let server: FastifyInstance;
|
||||
|
||||
beforeEach(() => {
|
||||
config.proxyMode = 'ollama';
|
||||
server = buildServer();
|
||||
// In vitest we can mock the global fetch
|
||||
global.fetch = vi.fn();
|
||||
@@ -29,6 +31,7 @@ describe('Proxy Integration Test', () => {
|
||||
// Mock fetch to return the ollama-xml-response.json
|
||||
(global.fetch as any).mockResolvedValue({
|
||||
ok: true,
|
||||
text: async () => JSON.stringify(responseJson),
|
||||
json: async () => responseJson
|
||||
});
|
||||
|
||||
@@ -47,14 +50,14 @@ describe('Proxy Integration Test', () => {
|
||||
expect(fetchArgs[0]).toContain('/api/chat');
|
||||
|
||||
const upstreamBody = JSON.parse(fetchArgs[1].body);
|
||||
expect(upstreamBody.model).toBe('Huihui-Qwen3.5-27B-Claude-4.6-Opus-abliterated-4bit');
|
||||
expect(upstreamBody.model).toBe('hotwa/qwen35-9b-agent:latest');
|
||||
|
||||
// Verify response was rewritten
|
||||
expect(body.message.content).toBe("");
|
||||
expect(body.message.tool_calls).toBeDefined();
|
||||
expect(body.message.tool_calls).toHaveLength(1);
|
||||
expect(body.message.tool_calls[0].function.name).toBe('read');
|
||||
expect(JSON.parse(body.message.tool_calls[0].function.arguments)).toEqual({
|
||||
expect(body.message.tool_calls[0].function.arguments).toEqual({
|
||||
path: "/tmp/test.txt"
|
||||
});
|
||||
});
|
||||
|
||||
62
test/integration.vllm.test.ts
Executable file
62
test/integration.vllm.test.ts
Executable file
@@ -0,0 +1,62 @@
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
||||
import { buildServer } from '../src/server';
|
||||
import { FastifyInstance } from 'fastify';
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { config } from '../src/config';
|
||||
|
||||
describe('vLLM Proxy Integration Test', () => {
|
||||
let server: FastifyInstance;
|
||||
|
||||
beforeEach(() => {
|
||||
// Mutate the loaded config instance
|
||||
config.proxyMode = 'vllm';
|
||||
|
||||
server = buildServer();
|
||||
global.fetch = vi.fn();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await server.close();
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it('proxies request and rewrites XML response to tool_calls for vLLM', async () => {
|
||||
const requestFixturePath = path.join(__dirname, 'fixtures', 'vllm-like-request.json');
|
||||
const responseFixturePath = path.join(__dirname, 'fixtures', 'vllm-xml-response.json');
|
||||
|
||||
const requestJson = JSON.parse(fs.readFileSync(requestFixturePath, 'utf8'));
|
||||
const responseJson = JSON.parse(fs.readFileSync(responseFixturePath, 'utf8'));
|
||||
|
||||
(global.fetch as any).mockResolvedValue({
|
||||
ok: true,
|
||||
json: async () => responseJson
|
||||
});
|
||||
|
||||
const response = await server.inject({
|
||||
method: 'POST',
|
||||
url: '/v1/chat/completions',
|
||||
payload: requestJson
|
||||
});
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
const body = JSON.parse(response.payload);
|
||||
|
||||
// Verify proxy forwarded it
|
||||
expect(global.fetch).toHaveBeenCalledTimes(1);
|
||||
const fetchArgs = (global.fetch as any).mock.calls[0];
|
||||
expect(fetchArgs[0]).toContain('/v1/chat/completions');
|
||||
|
||||
const upstreamBody = JSON.parse(fetchArgs[1].body);
|
||||
expect(upstreamBody.model).toBe('Qwen3.5-27B');
|
||||
|
||||
// Verify response was rewritten
|
||||
expect(body.choices[0].message.content).toBe("");
|
||||
expect(body.choices[0].message.tool_calls).toBeDefined();
|
||||
expect(body.choices[0].message.tool_calls).toHaveLength(1);
|
||||
expect(body.choices[0].message.tool_calls[0].function.name).toBe('read');
|
||||
expect(JSON.parse(body.choices[0].message.tool_calls[0].function.arguments)).toEqual({
|
||||
path: "/tmp/test.txt"
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -23,7 +23,7 @@ describe('Response Rewriter', () => {
|
||||
expect(toolCall.type).toBe('function');
|
||||
expect(toolCall.function.name).toBe('read');
|
||||
|
||||
const argsObject = JSON.parse(toolCall.function.arguments);
|
||||
const argsObject = toolCall.function.arguments;
|
||||
expect(argsObject).toEqual({ path: '/tmp/test.txt' });
|
||||
});
|
||||
|
||||
|
||||
53
test/vllm-rewriter.test.ts
Executable file
53
test/vllm-rewriter.test.ts
Executable file
@@ -0,0 +1,53 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { rewriteVllmResponse } from '../src/proxy/vllm-response-rewriter';
|
||||
|
||||
describe('vLLM Response Rewriter', () => {
|
||||
it('rewrites XML tool call in OpenAI choices content into structured tool_calls', () => {
|
||||
const inputResponse = {
|
||||
id: "chatcmpl-123",
|
||||
choices: [{
|
||||
index: 0,
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: "<function=read>\n<parameter=path>\n/tmp/test.txt\n</parameter>\n</function>"
|
||||
}
|
||||
}]
|
||||
};
|
||||
|
||||
const result = rewriteVllmResponse(inputResponse);
|
||||
|
||||
expect(result.choices[0].message.content).toBe("");
|
||||
expect(result.choices[0].message.tool_calls).toBeDefined();
|
||||
expect(result.choices[0].message.tool_calls).toHaveLength(1);
|
||||
|
||||
const toolCall = result.choices[0].message.tool_calls![0];
|
||||
expect(toolCall.type).toBe('function');
|
||||
expect(toolCall.function.name).toBe('read');
|
||||
|
||||
const argsObject = JSON.parse(toolCall.function.arguments);
|
||||
expect(argsObject).toEqual({ path: '/tmp/test.txt' });
|
||||
});
|
||||
|
||||
it('does not touch response that already has tool_calls', () => {
|
||||
const inputResponse = {
|
||||
choices: [{
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: "Here are the calls",
|
||||
tool_calls: [
|
||||
{
|
||||
id: "123",
|
||||
type: "function",
|
||||
function: { name: "read", arguments: "{}" }
|
||||
}
|
||||
]
|
||||
}
|
||||
}]
|
||||
};
|
||||
|
||||
const result = rewriteVllmResponse(inputResponse);
|
||||
|
||||
expect(result.choices[0].message.content).toBe("Here are the calls");
|
||||
expect(result.choices[0].message.tool_calls).toHaveLength(1);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user