diff --git a/extensions/cli/src/stream/handleToolCalls.test.ts b/extensions/cli/src/stream/handleToolCalls.test.ts
new file mode 100644
index 00000000000..a6587e7bd0a
--- /dev/null
+++ b/extensions/cli/src/stream/handleToolCalls.test.ts
@@ -0,0 +1,392 @@
+import type { ChatHistoryItem, ToolStatus } from "core/index.js";
+import { convertFromUnifiedHistory } from "core/util/messageConversion.js";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+
+import { services } from "../services/index.js";
+
+// eslint-disable-next-line
+import { handleToolCalls } from "./handleToolCalls.js"; // for some reason can't resolve import groups here
+
+// Mock the services
+vi.mock("../services/index.js", () => ({
+  services: {
+    chatHistory: {
+      isReady: vi.fn(),
+      addAssistantMessage: vi.fn(),
+      addToolResult: vi.fn(),
+      addHistoryItem: vi.fn(),
+    },
+    toolPermissions: {
+      getState: vi.fn().mockReturnValue({
+        permissions: { defaultPermission: "allow" },
+      }),
+    },
+  },
+  serviceContainer: {
+    get: vi.fn().mockResolvedValue({
+      permissions: { defaultPermission: "allow" },
+    }),
+  },
+  SERVICE_NAMES: {
+    TOOL_PERMISSIONS: "toolPermissions",
+  },
+}));
+
+// Mock the helper functions
+vi.mock("./streamChatResponse.helpers.js", () => ({
+  preprocessStreamedToolCalls: vi.fn(),
+  executeStreamedToolCalls: vi.fn(),
+}));
+
+// Import the mocked modules
+import {
+  executeStreamedToolCalls,
+  preprocessStreamedToolCalls,
+} from "./streamChatResponse.helpers.js";
+
+const mockPreprocess = vi.mocked(preprocessStreamedToolCalls);
+const mockExecute = vi.mocked(executeStreamedToolCalls);
+
+describe("handleToolCalls - duplicate tool_result prevention", () => {
+  let chatHistory: ChatHistoryItem[];
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    chatHistory = [
+      {
+        message: { role: "user", content: "Test prompt" },
+        contextItems: [],
+      },
+    ];
+
+    // Default: service is ready
+    vi.mocked(services.chatHistory.isReady).mockReturnValue(true);
+  });
+
+  /**
+   * This test verifies that preprocessing errors are stored in toolCallStates
+   * (on the assistant message) rather than as separate tool history items.
+   *
+   * If errors were added as separate history items, convertFromUnifiedHistory
+   * would generate duplicate tool_result messages - one from the standalone
+   * tool history item and one from the toolCallStates.
+   */
+  it("should store preprocessing errors in toolCallStates, not as separate history items", async () => {
+    const toolCalls = [
+      {
+        id: "tool-call-1",
+        name: "invalid_tool",
+        arguments: {},
+        argumentsStr: "{}",
+        startNotified: false,
+      },
+    ];
+
+    // Simulate preprocessing error
+    mockPreprocess.mockResolvedValue({
+      preprocessedCalls: [],
+      errorChatEntries: [
+        {
+          role: "tool",
+          tool_call_id: "tool-call-1",
+          content: "Tool invalid_tool not found",
+        },
+      ],
+    });
+
+    mockExecute.mockResolvedValue({
+      hasRejection: false,
+      chatHistoryEntries: [],
+    });
+
+    await handleToolCalls({
+      toolCalls,
+      chatHistory,
+      content: "Let me try that tool",
+      callbacks: undefined,
+      isHeadless: false,
+    });
+
+    // CRITICAL: addToolResult should be called to update the toolCallState
+    expect(services.chatHistory.addToolResult).toHaveBeenCalledWith(
+      "tool-call-1",
+      "Tool invalid_tool not found",
+      "errored",
+    );
+
+    // CRITICAL: addHistoryItem should NOT be called
+    // (previously, errors were added as separate history items which caused duplicates)
+    expect(services.chatHistory.addHistoryItem).not.toHaveBeenCalled();
+  });
+
+  /**
+   * This test verifies that when converting history to API format,
+   * we get exactly one tool_result per tool call - not duplicates.
+   */
+  it("should produce exactly one tool_result per tool call when converting to API format", async () => {
+    // Simulate history with an assistant message that has toolCallStates with output
+    const historyWithToolResults: ChatHistoryItem[] = [
+      {
+        message: { role: "user", content: "Run a command" },
+        contextItems: [],
+      },
+      {
+        message: {
+          role: "assistant",
+          content: "I'll run that for you",
+          toolCalls: [
+            {
+              id: "tool-call-1",
+              type: "function",
+              function: { name: "run_command", arguments: '{"cmd":"ls"}' },
+            },
+          ],
+        },
+        contextItems: [],
+        toolCallStates: [
+          {
+            toolCallId: "tool-call-1",
+            toolCall: {
+              id: "tool-call-1",
+              type: "function",
+              function: { name: "run_command", arguments: '{"cmd":"ls"}' },
+            },
+            status: "done" as ToolStatus,
+            parsedArgs: { cmd: "ls" },
+            output: [
+              {
+                content: "file1.txt\nfile2.txt",
+                name: "Tool Result",
+                description: "Tool execution result",
+              },
+            ],
+          },
+        ],
+      },
+    ];
+
+    const messages = convertFromUnifiedHistory(historyWithToolResults);
+
+    // Count tool messages
+    const toolMessages = messages.filter((m) => m.role === "tool");
+
+    // Should have exactly ONE tool message, not duplicates
+    expect(toolMessages).toHaveLength(1);
+    expect(toolMessages[0]).toEqual({
+      role: "tool",
+      content: "file1.txt\nfile2.txt",
+      tool_call_id: "tool-call-1",
+    });
+  });
+
+  /**
+   * This test simulates the bug that was occurring:
+   * If tool errors are added as separate history items AND stored in toolCallStates,
+   * convertFromUnifiedHistory would produce duplicate tool_result messages.
+   */
+  it("should NOT produce duplicate tool_results even with errored tool calls", async () => {
+    // Simulate history where error is stored in toolCallStates (correct approach)
+    const historyWithErroredTool: ChatHistoryItem[] = [
+      {
+        message: { role: "user", content: "Use invalid tool" },
+        contextItems: [],
+      },
+      {
+        message: {
+          role: "assistant",
+          content: "",
+          toolCalls: [
+            {
+              id: "tool-call-error",
+              type: "function",
+              function: { name: "nonexistent", arguments: "{}" },
+            },
+          ],
+        },
+        contextItems: [],
+        toolCallStates: [
+          {
+            toolCallId: "tool-call-error",
+            toolCall: {
+              id: "tool-call-error",
+              type: "function",
+              function: { name: "nonexistent", arguments: "{}" },
+            },
+            status: "errored" as ToolStatus,
+            parsedArgs: {},
+            output: [
+              {
+                content: "Tool nonexistent not found",
+                name: "Tool Result",
+                description: "Tool execution result",
+              },
+            ],
+          },
+        ],
+      },
+    ];
+
+    const messages = convertFromUnifiedHistory(historyWithErroredTool);
+
+    // Count tool messages with this specific ID
+    const toolMessagesForCall = messages.filter(
+      (m) => m.role === "tool" && (m as any).tool_call_id === "tool-call-error",
+    );
+
+    // Should have exactly ONE tool message for this tool call
+    expect(toolMessagesForCall).toHaveLength(1);
+    expect(toolMessagesForCall[0]).toEqual({
+      role: "tool",
+      content: "Tool nonexistent not found",
+      tool_call_id: "tool-call-error",
+    });
+  });
+
+  /**
+   * This test verifies the problematic pattern that WOULD cause duplicates
+   * (for documentation purposes - this is what we're preventing).
+   */
+  it("demonstrates the duplicate bug when tool messages are stored separately", () => {
+    // BAD PATTERN: Having both a standalone tool message AND toolCallStates
+    // This is what the old code did, and it caused the duplicate tool_result error
+    const badHistoryWithDuplicates: ChatHistoryItem[] = [
+      {
+        message: { role: "user", content: "Run command" },
+        contextItems: [],
+      },
+      {
+        message: {
+          role: "assistant",
+          content: "",
+          toolCalls: [
+            {
+              id: "duplicate-call",
+              type: "function",
+              function: { name: "run_command", arguments: '{"cmd":"ls"}' },
+            },
+          ],
+        },
+        contextItems: [],
+        toolCallStates: [
+          {
+            toolCallId: "duplicate-call",
+            toolCall: {
+              id: "duplicate-call",
+              type: "function",
+              function: { name: "run_command", arguments: '{"cmd":"ls"}' },
+            },
+            status: "done" as ToolStatus,
+            parsedArgs: { cmd: "ls" },
+            output: [
+              {
+                content: "Result from toolCallStates",
+                name: "Tool Result",
+                description: "Tool execution result",
+              },
+            ],
+          },
+        ],
+      },
+      // BAD: Standalone tool message that should NOT exist
+      // (This is what the old buggy code was adding)
+      {
+        message: {
+          role: "tool",
+          content: "Result from standalone message",
+          toolCallId: "duplicate-call",
+        },
+        contextItems: [],
+      },
+    ];
+
+    const messages = convertFromUnifiedHistory(badHistoryWithDuplicates);
+
+    // Count tool messages with this ID - this demonstrates the bug
+    const toolMessagesForCall = messages.filter(
+      (m) => m.role === "tool" && (m as any).tool_call_id === "duplicate-call",
+    );
+
+    // With the bad pattern, we get TWO tool messages (the bug!)
+    // This would cause: "each tool_use must have a single result"
+    expect(toolMessagesForCall).toHaveLength(2);
+
+    // The fix ensures we never create this bad pattern in the first place
+  });
+
+  /**
+   * Test that the fallback path (when service is not ready) also
+   * updates toolCallStates instead of adding separate items.
+   */
+  it("should update toolCallStates in fallback mode for preprocessing errors", async () => {
+    // Service not ready - use fallback path
+    vi.mocked(services.chatHistory.isReady).mockReturnValue(false);
+
+    const toolCalls = [
+      {
+        id: "fallback-tool-1",
+        name: "broken_tool",
+        arguments: { arg: "value" },
+        argumentsStr: '{"arg":"value"}',
+        startNotified: false,
+      },
+    ];
+
+    // Simulate preprocessing error
+    mockPreprocess.mockResolvedValue({
+      preprocessedCalls: [],
+      errorChatEntries: [
+        {
+          role: "tool",
+          tool_call_id: "fallback-tool-1",
+          content: "Tool broken_tool not found",
+        },
+      ],
+    });
+
+    mockExecute.mockResolvedValue({
+      hasRejection: false,
+      chatHistoryEntries: [],
+    });
+
+    await handleToolCalls({
+      toolCalls,
+      chatHistory,
+      content: "",
+      callbacks: undefined,
+      isHeadless: false,
+    });
+
+    // In fallback mode, service methods should NOT be called for adding results
+    expect(services.chatHistory.addToolResult).not.toHaveBeenCalled();
+    expect(services.chatHistory.addHistoryItem).not.toHaveBeenCalled();
+
+    // The local chatHistory should have the assistant message with toolCallStates
+    // (handleToolCalls adds this via createHistoryItem in fallback mode)
+    const assistantItem = chatHistory.find(
+      (item) =>
+        item.message.role === "assistant" && item.toolCallStates?.length,
+    );
+
+    expect(assistantItem).toBeDefined();
+    const toolState = assistantItem!.toolCallStates!.find(
+      (ts) => ts.toolCallId === "fallback-tool-1",
+    );
+
+    // The toolState should be updated with the error
+    expect(toolState).toBeDefined();
+    expect(toolState!.status).toBe("errored");
+    expect(toolState!.output).toEqual([
+      {
+        content: "Tool broken_tool not found",
+        name: "Tool Result",
+        description: "Tool execution result",
+      },
+    ]);
+
+    // CRITICAL: No separate tool message should be added to chatHistory
+    const separateToolMessages = chatHistory.filter(
+      (item) => item.message.role === "tool",
+    );
+    expect(separateToolMessages).toHaveLength(0);
+  });
+});
diff --git a/extensions/cli/src/stream/handleToolCalls.ts b/extensions/cli/src/stream/handleToolCalls.ts
index 5150a436bd5..bc5d99176e0 100644
--- a/extensions/cli/src/stream/handleToolCalls.ts
+++ b/extensions/cli/src/stream/handleToolCalls.ts
@@ -109,51 +109,15 @@ export async function handleToolCalls(
   const { preprocessedCalls, errorChatEntries } =
     await preprocessStreamedToolCalls(isHeadless, toolCalls, callbacks);
 
-  // Add any preprocessing errors to chat history
-  // Convert error entries from OpenAI format to ChatHistoryItem format
+  // Add any preprocessing errors to the toolCallStates on the assistant message
+  // (NOT as separate history items, which would cause duplicate tool_result messages)
   errorChatEntries.forEach((errorEntry) => {
-    const item = createHistoryItem({
-      role: "tool",
-      content: stripImages(errorEntry.content) || "",
-      toolCallId: errorEntry.tool_call_id,
-    });
-    if (useService) {
-      chatHistorySvc.addHistoryItem(item);
-    } else {
-      // Fallback only when service is unavailable
-      chatHistory.push(item);
-    }
-  });
-
-  // Execute the valid preprocessed tool calls
-  const { chatHistoryEntries: toolResults, hasRejection } =
-    await executeStreamedToolCalls(preprocessedCalls, callbacks, isHeadless);
-
-  if (isHeadless && hasRejection) {
-    logger.debug(
-      "Tool call rejected in headless mode - returning current content",
-    );
-    return true; // Signal early return needed
-  }
-
-  // Convert tool results and add them to the chat history with status from execution
-  toolResults.forEach((toolResult) => {
-    const resultContent =
-      typeof toolResult.content === "string" ? toolResult.content : "";
-
-    // Use the status from the tool execution result instead of text matching
-    const status = toolResult.status;
-
-    logger.debug("Tool result status", {
-      status,
-      toolCallId: toolResult.tool_call_id,
-    });
-
+    const errorContent = stripImages(errorEntry.content) || "";
     if (useService) {
       chatHistorySvc.addToolResult(
-        toolResult.tool_call_id,
-        resultContent,
-        status,
+        errorEntry.tool_call_id,
+        errorContent,
+        "errored",
       );
     } else {
       // Fallback only when service is unavailable: update local tool state
@@ -165,13 +129,13 @@ export async function handleToolCalls(
         chatHistory[lastAssistantIndex].toolCallStates
       ) {
         const toolState = chatHistory[lastAssistantIndex].toolCallStates.find(
-          (ts) => ts.toolCallId === toolResult.tool_call_id,
+          (ts) => ts.toolCallId === errorEntry.tool_call_id,
         );
         if (toolState) {
-          toolState.status = status;
+          toolState.status = "errored";
           toolState.output = [
             {
-              content: resultContent,
+              content: errorContent,
               name: `Tool Result`,
               description: "Tool execution result",
             },
@@ -180,6 +144,27 @@ export async function handleToolCalls(
       }
     }
   });
+
+  // Execute the valid preprocessed tool calls
+  // Note: executeStreamedToolCalls adds tool results to toolCallStates via
+  // services.chatHistory.addToolResult() internally
+  const { hasRejection } = await executeStreamedToolCalls(
+    preprocessedCalls,
+    callbacks,
+    isHeadless,
+  );
+
+  if (isHeadless && hasRejection) {
+    logger.debug(
+      "Tool call rejected in headless mode - returning current content",
+    );
+    return true; // Signal early return needed
+  }
+
+  // Tool results are already added to toolCallStates in executeStreamedToolCalls
+  // via services.chatHistory.addToolResult() - no need to add them again here.
+  // Adding them again would be redundant (and previously caused duplicate tool_result messages
+  // when combined with separate tool history items).
   return false;
 }