feat: implement Stream Spoofing interceptor and robust format preservation patches

2026-03-22 17:55:47 +08:00
parent ebd73e1c69
commit ce99e8b418
12 changed files with 426 additions and 11 deletions
--- a/test/fixtures/vllm-like-request.json
+++ b/test/fixtures/vllm-like-request.json
@@ -0,0 +1,27 @@
+{
+  "model": "Qwen3.5-27B",
+  "messages": [
+    {
+      "role": "user",
+      "content": "Please tell me what's in /tmp/test.txt"
+    }
+  ],
+  "stream": false,
+  "tools": [
+    {
+      "type": "function",
+      "function": {
+        "name": "read",
+        "description": "Read a file",
+        "parameters": {
+          "type": "object",
+          "properties": {
+            "path": {
+              "type": "string"
+            }
+          }
+        }
+      }
+    }
+  ]
+}
--- a/test/fixtures/vllm-xml-response.json
+++ b/test/fixtures/vllm-xml-response.json
@@ -0,0 +1,21 @@
+{
+  "id": "chatcmpl-123",
+  "object": "chat.completion",
+  "created": 1715012345,
+  "model": "Qwen3.5-27B",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "<function=read>\n<parameter=path>\n/tmp/test.txt\n</parameter>\n</function>"
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 56,
+    "completion_tokens": 31,
+    "total_tokens": 87
+  }
+}
--- a/test/integration.proxy.test.ts
+++ b/test/integration.proxy.test.ts
@@ -3,11 +3,13 @@ import { buildServer } from '../src/server';
 import { FastifyInstance } from 'fastify';
 import fs from 'fs';
 import path from 'path';
+import { config } from '../src/config';

 describe('Proxy Integration Test', () => {
  let server: FastifyInstance;

  beforeEach(() => {
+    config.proxyMode = 'ollama';
    server = buildServer();
    // In vitest we can mock the global fetch
    global.fetch = vi.fn();
@@ -29,6 +31,7 @@ describe('Proxy Integration Test', () => {
    // Mock fetch to return the ollama-xml-response.json
    (global.fetch as any).mockResolvedValue({
      ok: true,
+      text: async () => JSON.stringify(responseJson),
      json: async () => responseJson
    });

@@ -47,14 +50,14 @@ describe('Proxy Integration Test', () => {
    expect(fetchArgs[0]).toContain('/api/chat');
    
    const upstreamBody = JSON.parse(fetchArgs[1].body);
-    expect(upstreamBody.model).toBe('Huihui-Qwen3.5-27B-Claude-4.6-Opus-abliterated-4bit');
+    expect(upstreamBody.model).toBe('hotwa/qwen35-9b-agent:latest');
    
    // Verify response was rewritten
    expect(body.message.content).toBe("");
    expect(body.message.tool_calls).toBeDefined();
    expect(body.message.tool_calls).toHaveLength(1);
    expect(body.message.tool_calls[0].function.name).toBe('read');
-    expect(JSON.parse(body.message.tool_calls[0].function.arguments)).toEqual({
+    expect(body.message.tool_calls[0].function.arguments).toEqual({
      path: "/tmp/test.txt"
    });
  });
--- a/test/integration.vllm.test.ts
+++ b/test/integration.vllm.test.ts
@@ -0,0 +1,62 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { buildServer } from '../src/server';
+import { FastifyInstance } from 'fastify';
+import fs from 'fs';
+import path from 'path';
+import { config } from '../src/config';
+
+describe('vLLM Proxy Integration Test', () => {
+  let server: FastifyInstance;
+
+  beforeEach(() => {
+    // Mutate the loaded config instance
+    config.proxyMode = 'vllm';
+    
+    server = buildServer();
+    global.fetch = vi.fn();
+  });
+
+  afterEach(async () => {
+    await server.close();
+    vi.restoreAllMocks();
+  });
+
+  it('proxies request and rewrites XML response to tool_calls for vLLM', async () => {
+    const requestFixturePath = path.join(__dirname, 'fixtures', 'vllm-like-request.json');
+    const responseFixturePath = path.join(__dirname, 'fixtures', 'vllm-xml-response.json');
+    
+    const requestJson = JSON.parse(fs.readFileSync(requestFixturePath, 'utf8'));
+    const responseJson = JSON.parse(fs.readFileSync(responseFixturePath, 'utf8'));
+
+    (global.fetch as any).mockResolvedValue({
+      ok: true,
+      json: async () => responseJson
+    });
+
+    const response = await server.inject({
+      method: 'POST',
+      url: '/v1/chat/completions',
+      payload: requestJson
+    });
+
+    expect(response.statusCode).toBe(200);
+    const body = JSON.parse(response.payload);
+
+    // Verify proxy forwarded it
+    expect(global.fetch).toHaveBeenCalledTimes(1);
+    const fetchArgs = (global.fetch as any).mock.calls[0];
+    expect(fetchArgs[0]).toContain('/v1/chat/completions');
+    
+    const upstreamBody = JSON.parse(fetchArgs[1].body);
+    expect(upstreamBody.model).toBe('Qwen3.5-27B');
+    
+    // Verify response was rewritten
+    expect(body.choices[0].message.content).toBe("");
+    expect(body.choices[0].message.tool_calls).toBeDefined();
+    expect(body.choices[0].message.tool_calls).toHaveLength(1);
+    expect(body.choices[0].message.tool_calls[0].function.name).toBe('read');
+    expect(JSON.parse(body.choices[0].message.tool_calls[0].function.arguments)).toEqual({
+      path: "/tmp/test.txt"
+    });
+  });
+});
--- a/test/response-rewriter.test.ts
+++ b/test/response-rewriter.test.ts
@@ -23,7 +23,7 @@ describe('Response Rewriter', () => {
    expect(toolCall.type).toBe('function');
    expect(toolCall.function.name).toBe('read');
    
-    const argsObject = JSON.parse(toolCall.function.arguments);
+    const argsObject = toolCall.function.arguments;
    expect(argsObject).toEqual({ path: '/tmp/test.txt' });
  });

--- a/test/vllm-rewriter.test.ts
+++ b/test/vllm-rewriter.test.ts
@@ -0,0 +1,53 @@
+import { describe, it, expect } from 'vitest';
+import { rewriteVllmResponse } from '../src/proxy/vllm-response-rewriter';
+
+describe('vLLM Response Rewriter', () => {
+  it('rewrites XML tool call in OpenAI choices content into structured tool_calls', () => {
+    const inputResponse = {
+      id: "chatcmpl-123",
+      choices: [{
+        index: 0,
+        message: {
+          role: "assistant",
+          content: "<function=read>\n<parameter=path>\n/tmp/test.txt\n</parameter>\n</function>"
+        }
+      }]
+    };
+
+    const result = rewriteVllmResponse(inputResponse);
+
+    expect(result.choices[0].message.content).toBe("");
+    expect(result.choices[0].message.tool_calls).toBeDefined();
+    expect(result.choices[0].message.tool_calls).toHaveLength(1);
+    
+    const toolCall = result.choices[0].message.tool_calls![0];
+    expect(toolCall.type).toBe('function');
+    expect(toolCall.function.name).toBe('read');
+    
+    const argsObject = JSON.parse(toolCall.function.arguments);
+    expect(argsObject).toEqual({ path: '/tmp/test.txt' });
+  });
+
+  it('does not touch response that already has tool_calls', () => {
+    const inputResponse = {
+      choices: [{
+        message: {
+          role: "assistant",
+          content: "Here are the calls",
+          tool_calls: [
+            {
+              id: "123",
+              type: "function",
+              function: { name: "read", arguments: "{}" }
+            }
+          ]
+        }
+      }]
+    };
+
+    const result = rewriteVllmResponse(inputResponse);
+
+    expect(result.choices[0].message.content).toBe("Here are the calls");
+    expect(result.choices[0].message.tool_calls).toHaveLength(1);
+  });
+});