diff --git a/src/proxy/vllm-forward.ts b/src/proxy/vllm-forward.ts
index d77fa66..a093cbd 100755
--- a/src/proxy/vllm-forward.ts
+++ b/src/proxy/vllm-forward.ts
@@ -115,8 +115,11 @@ export async function forwardAndSpoofVllmStreamRequest(requestBody: any, authori
 
       const message = rewrittenData.choices?.[0]?.message || {};
 
-      // 2. Content chunk MUST come strictly before tool calls
-      if (message.content) {
+      const hasToolCalls = Array.isArray(message.tool_calls) && message.tool_calls.length > 0;
+
+      // 2. Content chunk MUST come strictly before tool calls, but suppress it when
+      // the response is a tool-calling turn to keep OpenClaw in a tool-first state.
+      if (!hasToolCalls && message.content) {
         // Fragment the content to simulate real token streaming and prevent UI double-rendering bugs
         const chunkSize = 16;
         for (let i = 0; i < message.content.length; i += chunkSize) {
@@ -125,7 +128,7 @@ export async function forwardAndSpoofVllmStreamRequest(requestBody: any, authori
       }
 
       // 3. Tool calls chunk
-      if (message.tool_calls && message.tool_calls.length > 0) {
+      if (hasToolCalls) {
         const streamToolCalls = message.tool_calls.map((tc: any, idx: number) => ({
           index: idx,
           id: tc.id,
@@ -139,7 +142,9 @@ export async function forwardAndSpoofVllmStreamRequest(requestBody: any, authori
       }
 
       // 4. Finish reason chunk
-      const finalFinishReason = rewrittenData.choices?.[0]?.finish_reason || (message.tool_calls?.length > 0 ? 'tool_calls' : 'stop');
+      const finalFinishReason = hasToolCalls
+        ? 'tool_calls'
+        : (rewrittenData.choices?.[0]?.finish_reason || 'stop');
       pushChunk({}, finalFinishReason);
 
       // 5. Done
diff --git a/src/proxy/vllm-response-rewriter.ts b/src/proxy/vllm-response-rewriter.ts
index a111aa0..32599b3 100755
--- a/src/proxy/vllm-response-rewriter.ts
+++ b/src/proxy/vllm-response-rewriter.ts
@@ -22,6 +22,18 @@ function buildVllmToolCalls(parsedCalls: ReturnType<typeof parseXmlToolCalls>) {
   });
 }
 
+function sanitizeVllmReasoning(message: any) {
+  if (message.reasoning_content) {
+    message.reasoning_content = sanitizeContent(message.reasoning_content);
+  }
+  if (message.reasoning) {
+    message.reasoning = sanitizeContent(message.reasoning);
+  }
+  if (message.thinking) {
+    message.thinking = sanitizeContent(message.thinking);
+  }
+}
+
 /**
  * Rewrites the vLLM/OpenAI standard response to include structured tool calls if missing
  * but present in XML tags within the content.
@@ -34,8 +46,10 @@ export function rewriteVllmResponse(response: any): any {
   const message = response.choices[0].message;
   if (!message) return response;
 
-  // If already has tool_calls, do nothing
+  // If already has tool_calls, normalize into a tool-first shape.
   if (message.tool_calls && message.tool_calls.length > 0) {
+    message.content = '';
+    sanitizeVllmReasoning(message);
     return response;
   }
 
@@ -53,26 +67,21 @@ export function rewriteVllmResponse(response: any): any {
     message.content = '';
 
     if (message.reasoning_content) {
-      message.reasoning_content = sanitizeContent(
-        message.reasoning_content
-          .replace(/<function=([^>]+)>([\s\S]*?)<\/function>/g, '')
-          .replace(/<tool_call>([\s\S]*?)<\/tool_call>/g, '')
-      );
+      message.reasoning_content = message.reasoning_content
+        .replace(/<function=([^>]+)>([\s\S]*?)<\/function>/g, '')
+        .replace(/<tool_call>([\s\S]*?)<\/tool_call>/g, '');
     }
     if (message.reasoning) {
-      message.reasoning = sanitizeContent(
-        message.reasoning
-          .replace(/<function=([^>]+)>([\s\S]*?)<\/function>/g, '')
-          .replace(/<tool_call>([\s\S]*?)<\/tool_call>/g, '')
-      );
+      message.reasoning = message.reasoning
+        .replace(/<function=([^>]+)>([\s\S]*?)<\/function>/g, '')
+        .replace(/<tool_call>([\s\S]*?)<\/tool_call>/g, '');
     }
     if (message.thinking) {
-      message.thinking = sanitizeContent(
-        message.thinking
-          .replace(/<function=([^>]+)>([\s\S]*?)<\/function>/g, '')
-          .replace(/<tool_call>([\s\S]*?)<\/tool_call>/g, '')
-      );
+      message.thinking = message.thinking
+        .replace(/<function=([^>]+)>([\s\S]*?)<\/function>/g, '')
+        .replace(/<tool_call>([\s\S]*?)<\/tool_call>/g, '');
     }
+    sanitizeVllmReasoning(message);
     return response;
   }
 
@@ -80,15 +89,7 @@ export function rewriteVllmResponse(response: any): any {
   if (message.content) {
     message.content = sanitizeContent(message.content);
   }
-  if (message.reasoning_content) {
-    message.reasoning_content = sanitizeContent(message.reasoning_content);
-  }
-  if (message.reasoning) {
-    message.reasoning = sanitizeContent(message.reasoning);
-  }
-  if (message.thinking) {
-    message.thinking = sanitizeContent(message.thinking);
-  }
+  sanitizeVllmReasoning(message);
 
   return response;
 }
diff --git a/test/integration.vllm.test.ts b/test/integration.vllm.test.ts
index f927b92..38867e3 100755
--- a/test/integration.vllm.test.ts
+++ b/test/integration.vllm.test.ts
@@ -59,4 +59,37 @@ describe('vLLM Proxy Integration Test', () => {
       path: "/tmp/test.txt"
     });
   });
+
+  it('spoofs streaming responses for tool-calling turns without content chunks', async () => {
+    const requestFixturePath = path.join(__dirname, 'fixtures', 'vllm-like-request.json');
+    const responseFixturePath = path.join(__dirname, 'fixtures', 'vllm-xml-response.json');
+
+    const requestJson = JSON.parse(fs.readFileSync(requestFixturePath, 'utf8'));
+    const responseJson = JSON.parse(fs.readFileSync(responseFixturePath, 'utf8'));
+    requestJson.stream = true;
+
+    (global.fetch as any).mockResolvedValue({
+      ok: true,
+      json: async () => responseJson
+    });
+
+    const response = await server.inject({
+      method: 'POST',
+      url: '/v1/chat/completions',
+      payload: requestJson
+    });
+
+    expect(response.statusCode).toBe(200);
+    expect(response.headers['content-type']).toContain('text/event-stream');
+
+    const fetchArgs = (global.fetch as any).mock.calls[0];
+    const upstreamBody = JSON.parse(fetchArgs[1].body);
+    expect(upstreamBody.stream).toBe(false);
+
+    expect(response.payload).toContain('"role":"assistant"');
+    expect(response.payload).toContain('"tool_calls"');
+    expect(response.payload).toContain('"finish_reason":"tool_calls"');
+    expect(response.payload).not.toContain('"content"');
+    expect(response.payload).toContain('data: [DONE]');
+  });
 });
diff --git a/test/vllm-rewriter.test.ts b/test/vllm-rewriter.test.ts
index 59e3d5a..62f5a62 100755
--- a/test/vllm-rewriter.test.ts
+++ b/test/vllm-rewriter.test.ts
@@ -28,12 +28,13 @@ describe('vLLM Response Rewriter', () => {
     expect(argsObject).toEqual({ path: '/tmp/test.txt' });
   });
 
-  it('does not touch response that already has tool_calls', () => {
+  it('normalizes response that already has tool_calls into a tool-first shape', () => {
     const inputResponse = {
       choices: [{
         message: {
           role: "assistant",
           content: "Here are the calls",
+          thinking: "<think>internal</think>",
           tool_calls: [
             {
               id: "123",
@@ -47,7 +48,8 @@ describe('vLLM Response Rewriter', () => {
 
     const result = rewriteVllmResponse(inputResponse);
 
-    expect(result.choices[0].message.content).toBe("Here are the calls");
+    expect(result.choices[0].message.content).toBe("");
+    expect(result.choices[0].message.thinking).toBe("");
     expect(result.choices[0].message.tool_calls).toHaveLength(1);
   });