feat: implement Stream Spoofing interceptor and robust format preservation patches

This commit is contained in:
lingyuzeng
2026-03-22 17:55:47 +08:00
parent ebd73e1c69
commit ce99e8b418
12 changed files with 426 additions and 11 deletions

27
test/fixtures/vllm-like-request.json vendored Executable file
View File

@@ -0,0 +1,27 @@
{
"model": "Qwen3.5-27B",
"messages": [
{
"role": "user",
"content": "Please tell me what's in /tmp/test.txt"
}
],
"stream": false,
"tools": [
{
"type": "function",
"function": {
"name": "read",
"description": "Read a file",
"parameters": {
"type": "object",
"properties": {
"path": {
"type": "string"
}
}
}
}
}
]
}

21
test/fixtures/vllm-xml-response.json vendored Executable file
View File

@@ -0,0 +1,21 @@
{
"id": "chatcmpl-123",
"object": "chat.completion",
"created": 1715012345,
"model": "Qwen3.5-27B",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "<function=read>\n<parameter=path>\n/tmp/test.txt\n</parameter>\n</function>"
},
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 56,
"completion_tokens": 31,
"total_tokens": 87
}
}

View File

@@ -3,11 +3,13 @@ import { buildServer } from '../src/server';
import { FastifyInstance } from 'fastify';
import fs from 'fs';
import path from 'path';
import { config } from '../src/config';
describe('Proxy Integration Test', () => {
let server: FastifyInstance;
beforeEach(() => {
config.proxyMode = 'ollama';
server = buildServer();
// In vitest we can mock the global fetch
global.fetch = vi.fn();
@@ -29,6 +31,7 @@ describe('Proxy Integration Test', () => {
// Mock fetch to return the ollama-xml-response.json
(global.fetch as any).mockResolvedValue({
ok: true,
text: async () => JSON.stringify(responseJson),
json: async () => responseJson
});
@@ -47,14 +50,14 @@ describe('Proxy Integration Test', () => {
expect(fetchArgs[0]).toContain('/api/chat');
const upstreamBody = JSON.parse(fetchArgs[1].body);
expect(upstreamBody.model).toBe('Huihui-Qwen3.5-27B-Claude-4.6-Opus-abliterated-4bit');
expect(upstreamBody.model).toBe('hotwa/qwen35-9b-agent:latest');
// Verify response was rewritten
expect(body.message.content).toBe("");
expect(body.message.tool_calls).toBeDefined();
expect(body.message.tool_calls).toHaveLength(1);
expect(body.message.tool_calls[0].function.name).toBe('read');
expect(JSON.parse(body.message.tool_calls[0].function.arguments)).toEqual({
expect(body.message.tool_calls[0].function.arguments).toEqual({
path: "/tmp/test.txt"
});
});

62
test/integration.vllm.test.ts Executable file
View File

@@ -0,0 +1,62 @@
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import { buildServer } from '../src/server';
import { FastifyInstance } from 'fastify';
import fs from 'fs';
import path from 'path';
import { config } from '../src/config';
describe('vLLM Proxy Integration Test', () => {
let server: FastifyInstance;
beforeEach(() => {
// Mutate the loaded config instance
config.proxyMode = 'vllm';
server = buildServer();
global.fetch = vi.fn();
});
afterEach(async () => {
await server.close();
vi.restoreAllMocks();
});
it('proxies request and rewrites XML response to tool_calls for vLLM', async () => {
const requestFixturePath = path.join(__dirname, 'fixtures', 'vllm-like-request.json');
const responseFixturePath = path.join(__dirname, 'fixtures', 'vllm-xml-response.json');
const requestJson = JSON.parse(fs.readFileSync(requestFixturePath, 'utf8'));
const responseJson = JSON.parse(fs.readFileSync(responseFixturePath, 'utf8'));
(global.fetch as any).mockResolvedValue({
ok: true,
json: async () => responseJson
});
const response = await server.inject({
method: 'POST',
url: '/v1/chat/completions',
payload: requestJson
});
expect(response.statusCode).toBe(200);
const body = JSON.parse(response.payload);
// Verify proxy forwarded it
expect(global.fetch).toHaveBeenCalledTimes(1);
const fetchArgs = (global.fetch as any).mock.calls[0];
expect(fetchArgs[0]).toContain('/v1/chat/completions');
const upstreamBody = JSON.parse(fetchArgs[1].body);
expect(upstreamBody.model).toBe('Qwen3.5-27B');
// Verify response was rewritten
expect(body.choices[0].message.content).toBe("");
expect(body.choices[0].message.tool_calls).toBeDefined();
expect(body.choices[0].message.tool_calls).toHaveLength(1);
expect(body.choices[0].message.tool_calls[0].function.name).toBe('read');
expect(JSON.parse(body.choices[0].message.tool_calls[0].function.arguments)).toEqual({
path: "/tmp/test.txt"
});
});
});

View File

@@ -23,7 +23,7 @@ describe('Response Rewriter', () => {
expect(toolCall.type).toBe('function');
expect(toolCall.function.name).toBe('read');
const argsObject = JSON.parse(toolCall.function.arguments);
const argsObject = toolCall.function.arguments;
expect(argsObject).toEqual({ path: '/tmp/test.txt' });
});

53
test/vllm-rewriter.test.ts Executable file
View File

@@ -0,0 +1,53 @@
import { describe, it, expect } from 'vitest';
import { rewriteVllmResponse } from '../src/proxy/vllm-response-rewriter';
describe('vLLM Response Rewriter', () => {
it('rewrites XML tool call in OpenAI choices content into structured tool_calls', () => {
const inputResponse = {
id: "chatcmpl-123",
choices: [{
index: 0,
message: {
role: "assistant",
content: "<function=read>\n<parameter=path>\n/tmp/test.txt\n</parameter>\n</function>"
}
}]
};
const result = rewriteVllmResponse(inputResponse);
expect(result.choices[0].message.content).toBe("");
expect(result.choices[0].message.tool_calls).toBeDefined();
expect(result.choices[0].message.tool_calls).toHaveLength(1);
const toolCall = result.choices[0].message.tool_calls![0];
expect(toolCall.type).toBe('function');
expect(toolCall.function.name).toBe('read');
const argsObject = JSON.parse(toolCall.function.arguments);
expect(argsObject).toEqual({ path: '/tmp/test.txt' });
});
it('does not touch response that already has tool_calls', () => {
const inputResponse = {
choices: [{
message: {
role: "assistant",
content: "Here are the calls",
tool_calls: [
{
id: "123",
type: "function",
function: { name: "read", arguments: "{}" }
}
]
}
}]
};
const result = rewriteVllmResponse(inputResponse);
expect(result.choices[0].message.content).toBe("Here are the calls");
expect(result.choices[0].message.tool_calls).toHaveLength(1);
});
});