diff --git a/.env.example b/.env.example
index 09032e0..ff4a49e 100755
--- a/.env.example
+++ b/.env.example
@@ -1,9 +1,12 @@
 # 下游 Ollama 地址
 OLLAMA_PROXY_TARGET=http://127.0.0.1:11434
 
-# 默认模型名 (可以为空，如果需要强制指定一个，也可以从配置中配)
-# OLLAMA_DEFAULT_MODEL=Huihui-Qwen3.5-27B-Claude-4.6-Opus-abliterated-4bit
+# 默认模型名
+OLLAMA_DEFAULT_MODEL=hotwa/qwen35-opus-distilled-agent:latest
 
 # 代理服务监听端口
 PROXY_PORT=11435
 PROXY_HOST=127.0.0.1
+
+# Osaurus API Key (可选，如果下游需要认证)
+# OSAURUS_API_KEY=your-api-key-here
diff --git a/src/config.ts b/src/config.ts
index 38b8a18..9578d85 100755
--- a/src/config.ts
+++ b/src/config.ts
@@ -9,9 +9,12 @@ export const config = {
   host: process.env.PROXY_HOST || '127.0.0.1',
   port: process.env.PROXY_PORT ? parseInt(process.env.PROXY_PORT, 10) : 11435,
 
-  // 下游 Ollama 的真实地此
+  // 下游 Ollama/Osaurus 的真实地址
   targetUrl: process.env.OLLAMA_PROXY_TARGET || 'http://127.0.0.1:11434',
 
   // 默认模型：当上游没有带模型时，或者为了默认测试配置，提供一个默认模型名称
-  defaultModel: process.env.OLLAMA_DEFAULT_MODEL || 'Huihui-Qwen3.5-27B-Claude-4.6-Opus-abliterated-4bit',
+  defaultModel: process.env.OLLAMA_DEFAULT_MODEL || '',
+
+  // Osaurus API Key (可选，如果下游需要认证)
+  apiKey: process.env.OSAURUS_API_KEY || '',
 };
diff --git a/src/proxy/forward.ts b/src/proxy/forward.ts
index 7ddb5a3..b9b80a1 100755
--- a/src/proxy/forward.ts
+++ b/src/proxy/forward.ts
@@ -16,30 +16,93 @@ export async function forwardChatRequest(requestBody: any): Promise<any> {
   requestBody = normalizeRequest(requestBody);
 
   logger.info(`Forwarding chat request to ${targetEndpoint} for model: ${requestBody.model}`);
-  logger.info(`Request body: ${JSON.stringify(requestBody, null, 2)}`);
+
+  const headers: Record<string, string> = {
+    'Content-Type': 'application/json',
+    'Accept': 'application/json'
+  };
+
+  // Add Authorization header if API key is configured
+  if (config.apiKey) {
+    headers['Authorization'] = `Bearer ${config.apiKey}`;
+  }
 
   const options: RequestInit = {
     method: 'POST',
-    headers: {
-      'Content-Type': 'application/json',
-      'Accept': 'application/json'
-    },
+    headers,
     body: JSON.stringify(requestBody)
   };
 
   const response = await fetch(targetEndpoint, options);
-  
+
   if (!response.ok) {
     const errorText = await response.text();
     logger.error(`Ollama upstream error ${response.status}: ${errorText}`);
     throw new Error(`Upstream returned ${response.status}: ${errorText}`);
   }
 
-  // Assuming it's not a stream for now
-  const responseData = await response.json();
-  
+  // Handle response - Osaurus may return streaming format even with stream: false
+  const responseText = await response.text();
+
+  // Try to parse as single JSON first (standard Ollama behavior)
+  let responseData: any;
+  try {
+    responseData = JSON.parse(responseText);
+  } catch (e) {
+    // If that fails, it might be streaming format (multiple JSON lines)
+    // Combine all chunks into a single response
+    const lines = responseText.trim().split('\n').filter(line => line.trim());
+    if (lines.length === 0) {
+      throw new Error('Empty response from upstream');
+    }
+
+    // Combine streaming chunks
+    let combinedContent = '';
+    let lastChunk: any = null;
+    let message: any = { role: 'assistant', content: '' };
+
+    for (const line of lines) {
+      try {
+        const chunk = JSON.parse(line);
+        lastChunk = chunk;
+        if (chunk.message?.content) {
+          combinedContent += chunk.message.content;
+        }
+        // Preserve tool_calls from any chunk
+        if (chunk.message?.tool_calls) {
+          message.tool_calls = chunk.message.tool_calls;
+        }
+        // Preserve thinking if present
+        if (chunk.message?.thinking) {
+          message.thinking = chunk.message.thinking;
+        }
+      } catch (parseError) {
+        logger.warn(`Failed to parse chunk: ${line}`);
+      }
+    }
+
+    message.content = combinedContent;
+
+    // Build the combined response using the last chunk's metadata
+    responseData = {
+      model: lastChunk?.model || requestBody.model,
+      created_at: lastChunk?.created_at || new Date().toISOString(),
+      message,
+      done: true,
+      done_reason: lastChunk?.done_reason || 'stop',
+      total_duration: lastChunk?.total_duration,
+      load_duration: lastChunk?.load_duration,
+      prompt_eval_count: lastChunk?.prompt_eval_count,
+      prompt_eval_duration: lastChunk?.prompt_eval_duration,
+      eval_count: lastChunk?.eval_count,
+      eval_duration: lastChunk?.eval_duration,
+    };
+
+    logger.info(`Combined ${lines.length} streaming chunks into single response`);
+  }
+
   // Rewrite if necessary
   const rewrittenData = rewriteResponse(responseData);
-  
+
   return rewrittenData;
 }