fix: spoof ollama streaming responses for tool calls

This commit is contained in:
lingyuzeng
2026-03-22 20:15:30 +08:00
parent 7a718d8983
commit 649958677e
3 changed files with 103 additions and 14 deletions

View File

@@ -1,20 +1,20 @@
import { FastifyInstance, FastifyPluginAsync } from 'fastify';
import { forwardChatRequest } from '../proxy/forward';
import { forwardAndSpoofOllamaStreamRequest, forwardChatRequest } from '../proxy/forward';
import { logger } from '../utils/logger';
const ollamaRoutes: FastifyPluginAsync = async (server: FastifyInstance) => {
server.post('/api/chat', async (request, reply) => {
try {
const body = request.body as any;
// Currently only supporting non-streaming requests in this proxy MVP
if (body?.stream === true) {
// As per requirements: return clear error or pass through without rewriting
// We'll return a clear error for now, because stream parsing is out of scope for MVP
reply.status(400).send({
error: "Streaming is not supported by this proxy MVP. Please set stream=false."
});
return;
const spoofedStream = await forwardAndSpoofOllamaStreamRequest(body, request.headers.authorization);
reply.raw.setHeader('Content-Type', 'text/event-stream');
reply.raw.setHeader('Cache-Control', 'no-cache');
reply.raw.setHeader('Connection', 'keep-alive');
return reply.send(spoofedStream);
}
const response = await forwardChatRequest(body, request.headers.authorization);