fix: spoof ollama streaming responses for tool calls
This commit is contained in:
@@ -1,20 +1,20 @@
|
||||
import { FastifyInstance, FastifyPluginAsync } from 'fastify';
|
||||
import { forwardChatRequest } from '../proxy/forward';
|
||||
import { forwardAndSpoofOllamaStreamRequest, forwardChatRequest } from '../proxy/forward';
|
||||
import { logger } from '../utils/logger';
|
||||
|
||||
const ollamaRoutes: FastifyPluginAsync = async (server: FastifyInstance) => {
|
||||
server.post('/api/chat', async (request, reply) => {
|
||||
try {
|
||||
const body = request.body as any;
|
||||
|
||||
// Currently only supporting non-streaming requests in this proxy MVP
|
||||
|
||||
if (body?.stream === true) {
|
||||
// As per requirements: return clear error or pass through without rewriting
|
||||
// We'll return a clear error for now, because stream parsing is out of scope for MVP
|
||||
reply.status(400).send({
|
||||
error: "Streaming is not supported by this proxy MVP. Please set stream=false."
|
||||
});
|
||||
return;
|
||||
const spoofedStream = await forwardAndSpoofOllamaStreamRequest(body, request.headers.authorization);
|
||||
|
||||
reply.raw.setHeader('Content-Type', 'text/event-stream');
|
||||
reply.raw.setHeader('Cache-Control', 'no-cache');
|
||||
reply.raw.setHeader('Connection', 'keep-alive');
|
||||
|
||||
return reply.send(spoofedStream);
|
||||
}
|
||||
|
||||
const response = await forwardChatRequest(body, request.headers.authorization);
|
||||
|
||||
Reference in New Issue
Block a user