fix: spoof ollama streaming responses for tool calls

This commit is contained in:
lingyuzeng
2026-03-22 20:15:30 +08:00
parent 7a718d8983
commit 649958677e
3 changed files with 103 additions and 14 deletions

View File

@@ -2,6 +2,7 @@ import { config } from '../config';
import { rewriteResponse } from './response-rewriter';
import { normalizeRequest } from './request-normalizer';
import { logger } from '../utils/logger';
import { Readable } from 'stream';
export async function forwardChatRequest(requestBody: any, authorization?: string): Promise<any> {
const targetHost = config.targetUrl;
@@ -108,3 +109,75 @@ export async function forwardChatRequest(requestBody: any, authorization?: strin
return rewrittenData;
}
export async function forwardAndSpoofOllamaStreamRequest(requestBody: any, authorization?: string): Promise<Readable> {
const upstreamBody = { ...requestBody, stream: false };
const rewrittenData = await forwardChatRequest(upstreamBody, authorization);
const sseStream = new Readable({
read() {}
});
const pushChunk = (delta: any, finishReason: string | null = null) => {
const chunk = {
id: rewrittenData.id || `chatcmpl-${Date.now()}`,
object: 'chat.completion.chunk',
created: Math.floor(Date.now() / 1000),
model: rewrittenData.model || upstreamBody.model,
choices: [
{
index: 0,
delta,
finish_reason: finishReason
}
]
};
sseStream.push(`data: ${JSON.stringify(chunk)}\n\n`);
};
process.nextTick(() => {
try {
pushChunk({ role: 'assistant' });
const message = rewrittenData.message || {};
if (message.content) {
const chunkSize = 16;
for (let i = 0; i < message.content.length; i += chunkSize) {
pushChunk({ content: message.content.substring(i, i + chunkSize) });
}
}
if (message.tool_calls && message.tool_calls.length > 0) {
const streamToolCalls = message.tool_calls.map((tc: any, idx: number) => ({
index: idx,
id: tc.id,
type: tc.type,
function: {
name: tc.function.name,
arguments: JSON.stringify(tc.function.arguments ?? {})
}
}));
pushChunk({ tool_calls: streamToolCalls });
}
const finalFinishReason =
message.tool_calls?.length > 0
? 'tool_calls'
: (rewrittenData.done_reason || 'stop');
pushChunk({}, finalFinishReason);
sseStream.push('data: [DONE]\n\n');
sseStream.push(null);
} catch (e: any) {
logger.error('Error generating Ollama SSE stream:', e.message);
sseStream.push('data: {"error":"Internal stream spoofing error"}\n\n');
sseStream.push('data: [DONE]\n\n');
sseStream.push(null);
}
});
return sseStream;
}

View File

@@ -1,5 +1,5 @@
import { FastifyInstance, FastifyPluginAsync } from 'fastify';
import { forwardChatRequest } from '../proxy/forward';
import { forwardAndSpoofOllamaStreamRequest, forwardChatRequest } from '../proxy/forward';
import { logger } from '../utils/logger';
const ollamaRoutes: FastifyPluginAsync = async (server: FastifyInstance) => {
@@ -7,14 +7,14 @@ const ollamaRoutes: FastifyPluginAsync = async (server: FastifyInstance) => {
try {
const body = request.body as any;
// Currently only supporting non-streaming requests in this proxy MVP
if (body?.stream === true) {
// As per requirements: return clear error or pass through without rewriting
// We'll return a clear error for now, because stream parsing is out of scope for MVP
reply.status(400).send({
error: "Streaming is not supported by this proxy MVP. Please set stream=false."
});
return;
const spoofedStream = await forwardAndSpoofOllamaStreamRequest(body, request.headers.authorization);
reply.raw.setHeader('Content-Type', 'text/event-stream');
reply.raw.setHeader('Cache-Control', 'no-cache');
reply.raw.setHeader('Connection', 'keep-alive');
return reply.send(spoofedStream);
}
const response = await forwardChatRequest(body, request.headers.authorization);

View File

@@ -62,20 +62,36 @@ describe('Proxy Integration Test', () => {
});
});
it('rejects streaming requests cleanly', async () => {
it('spoofs streaming responses for stream=true requests', async () => {
const requestFixturePath = path.join(__dirname, 'fixtures', 'openclaw-like-request.json');
const responseFixturePath = path.join(__dirname, 'fixtures', 'ollama-xml-response.json');
const requestJson = JSON.parse(fs.readFileSync(requestFixturePath, 'utf8'));
const responseJson = JSON.parse(fs.readFileSync(responseFixturePath, 'utf8'));
requestJson.stream = true;
(global.fetch as any).mockResolvedValue({
ok: true,
text: async () => JSON.stringify(responseJson),
json: async () => responseJson
});
const response = await server.inject({
method: 'POST',
url: '/api/chat',
payload: requestJson
});
expect(response.statusCode).toBe(400);
const body = JSON.parse(response.payload);
expect(body.error).toContain('Streaming is not supported');
expect(global.fetch).not.toHaveBeenCalled();
expect(response.statusCode).toBe(200);
expect(response.headers['content-type']).toContain('text/event-stream');
expect(global.fetch).toHaveBeenCalledTimes(1);
const fetchArgs = (global.fetch as any).mock.calls[0];
const upstreamBody = JSON.parse(fetchArgs[1].body);
expect(upstreamBody.stream).toBe(false);
expect(response.payload).toContain('"role":"assistant"');
expect(response.payload).toContain('"tool_calls"');
expect(response.payload).toContain('"finish_reason":"tool_calls"');
expect(response.payload).toContain('data: [DONE]');
});
});