streaming response fixes (Ollama)

2026-05-08 02:02:17 -04:00 · 2026-05-08 02:02:17 -04:00 · e0df415237
commit e0df415237
parent 61ba0e4412
9 changed files with 627 additions and 74 deletions
--- a/gadget-code/frontend/src/pages/ChatSessionView.tsx
+++ b/gadget-code/frontend/src/pages/ChatSessionView.tsx
@ -170,14 +170,14 @@ export default function ChatSessionView() {
                // Mode changed, append new block and update index
                newTurn.blocks = [...oldTurn.blocks, ...turnUpdates.blocks];
                if (state) {
-                  state.currentBlockIndex = oldTurn.blocks.length;
+                  state.currentBlockIndex = newTurn.blocks.length - 1;
                }
              }
            } else {
              // No current block, append and set index
              newTurn.blocks = [...(oldTurn.blocks || []), ...turnUpdates.blocks];
              if (state && turnUpdates.blocks.length > 0) {
-                state.currentBlockIndex = oldTurn.blocks ? oldTurn.blocks.length : 0;
+                state.currentBlockIndex = newTurn.blocks.length - 1;
              }
            }
          }
@ -225,7 +225,6 @@ export default function ChatSessionView() {
            content: state.respondingContent,
          }],
        });
-        scheduleUpdate();
        state.respondingContent = '';
        state.currentBlockIndex = null;
      }
@ -267,7 +266,6 @@ export default function ChatSessionView() {
            content: state.thinkingContent,
          }],
        });
-        scheduleUpdate();
        state.thinkingContent = '';
        state.currentBlockIndex = null;
      }
@ -295,28 +293,32 @@ export default function ChatSessionView() {
    // Flush current streaming state
    const state = streamingStateRef.current.get(turnId);
    if (state) {
+      const blocksToFlush: ChatTurnBlock[] = [];
+      
      if (state.currentMode === 'thinking' && state.thinkingContent) {
-        pendingUpdatesRef.current.set(turnId, {
-          blocks: [{
-            mode: 'thinking' as const,
-            createdAt: new Date().toISOString(),
-            content: state.thinkingContent,
-          }],
+        blocksToFlush.push({
+          mode: 'thinking' as const,
+          createdAt: new Date().toISOString(),
+          content: state.thinkingContent,
        });
        state.thinkingContent = '';
-        state.currentBlockIndex = null;
      }
      if (state.currentMode === 'responding' && state.respondingContent) {
-        pendingUpdatesRef.current.set(turnId, {
-          blocks: [{
-            mode: 'responding' as const,
-            createdAt: new Date().toISOString(),
-            content: state.respondingContent,
-          }],
+        blocksToFlush.push({
+          mode: 'responding' as const,
+          createdAt: new Date().toISOString(),
+          content: state.respondingContent,
        });
        state.respondingContent = '';
-        state.currentBlockIndex = null;
      }
+      
+      if (blocksToFlush.length > 0) {
+        pendingUpdatesRef.current.set(turnId, {
+          blocks: blocksToFlush,
+        });
+        scheduleUpdate();
+      }
+      
      state.currentMode = null;
      state.currentBlockIndex = null;
    }
@ -334,33 +336,10 @@ export default function ChatSessionView() {
  }, [scheduleUpdate]);

  const handleWorkOrderComplete = useCallback((turnId: string, success: boolean, message?: string) => {
-    // Flush any remaining streaming state
+    // Backend has already flushed and persisted all streaming content
+    // Just clean up frontend streaming state and update status
    const state = streamingStateRef.current.get(turnId);
    if (state) {
-      const blocks: ChatTurnBlock[] = [];
-      if (state.currentMode === 'thinking' && state.thinkingContent) {
-        blocks.push({
-          mode: 'thinking' as const,
-          createdAt: new Date().toISOString(),
-          content: state.thinkingContent,
-        });
-      }
-      if (state.currentMode === 'responding' && state.respondingContent) {
-        blocks.push({
-          mode: 'responding' as const,
-          createdAt: new Date().toISOString(),
-          content: state.respondingContent,
-        });
-      }
-      if (blocks.length > 0) {
-        setTurns(prevTurns =>
-          prevTurns.map(turn =>
-            turn._id === turnId
-              ? { ...turn, blocks: [...(turn.blocks || []), ...blocks] }
-              : turn
-          )
-        );
-      }
      streamingStateRef.current.delete(turnId);
    }

--- a/gadget-drone/src/services/agent.ts
+++ b/gadget-drone/src/services/agent.ts
@ -123,6 +123,11 @@ class AgentService extends GadgetService {
        onStreamChunk,
      );

+      // Check for model loading failure
+      if (response.doneReason === 'load' && !response.response && !response.thinking && (!response.toolCalls || response.toolCalls.length === 0)) {
+        throw new Error('Model failed to respond (still loading or error)');
+      }
+
      // Emit thinking content if present
      if (response.thinking) {
        socket.emit("thinking", response.thinking);
@ -194,6 +199,7 @@ class AgentService extends GadgetService {
            createdAt: turn.createdAt,
            role: "tool",
            callId: toolCall.callId,
+            toolName: toolCall.name,
            content: toolCall.response,
          });
        }
--- a/packages/ai/package.json
+++ b/packages/ai/package.json
@ -16,7 +16,7 @@
    "dev": "tsc --watch",
    "clean": "rm -rf dist/",
    "typecheck": "tsc --noEmit",
-    "test": "echo \"No tests configured yet\""
+    "test": "vitest run"
  },
  "keywords": [
    "gadget",
@ -36,6 +36,7 @@
  "devDependencies": {
    "@types/node": "^25.6.0",
    "@types/numeral": "^2.0.5",
-    "typescript": "^6.0.3"
+    "typescript": "^6.0.3",
+    "vitest": "^4.1.5"
  }
 }
--- a/packages/ai/src/api.ts
+++ b/packages/ai/src/api.ts
@ -54,6 +54,7 @@ export interface IContextChatMessage {
  createdAt: Date;
  role: string;
  callId?: string;
+  toolName?: string;
  content: string;
  user?: {
    _id: string;
--- a/packages/ai/src/ollama.test.ts
+++ b/packages/ai/src/ollama.test.ts
@ -0,0 +1,375 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import type { ChatResponseStream } from 'ollama';
+
+// Mock the Ollama client BEFORE importing the module
+const mockOllamaClient = {
+  chat: vi.fn(),
+  generate: vi.fn(),
+  list: vi.fn(),
+  show: vi.fn(),
+};
+
+vi.mock('ollama', () => {
+  return {
+    Ollama: class MockOllama {
+      constructor() {
+        return mockOllamaClient;
+      }
+    },
+  };
+});
+
+import { OllamaAiApi } from './ollama';
+
+// Mock logger
+const mockLogger = {
+  debug: vi.fn(),
+  info: vi.fn(),
+  warn: vi.fn(),
+  error: vi.fn(),
+};
+
+// Mock environment and provider
+const mockEnv = {
+  NODE_ENV: 'test',
+  services: {
+    google: {
+      cse: {
+        apiKey: 'test-key',
+        engineId: 'test-engine',
+      },
+    },
+  },
+};
+
+const mockProvider = {
+  _id: 'test-provider',
+  name: 'Test Ollama',
+  sdk: 'ollama' as const,
+  baseUrl: 'http://localhost:11434',
+  apiKey: 'test-key',
+};
+
+describe('OllamaAiApi', () => {
+  let api: OllamaAiApi;
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    api = new OllamaAiApi(mockEnv as any, mockProvider as any, mockLogger as any);
+  });
+
+  describe('chat', () => {
+    it('should handle normal response streaming', async () => {
+      // Mock streaming response
+      const mockStream = async function* () {
+        yield {
+          message: { content: 'Hello' },
+          done: false,
+        };
+        yield {
+          message: { content: ' world' },
+          done: false,
+        };
+        yield {
+          message: { content: '!' },
+          done: true,
+          done_reason: 'stop',
+          total_duration: 100,
+          prompt_eval_count: 10,
+          eval_count: 3,
+        };
+      };
+
+      mockOllamaClient.chat.mockResolvedValue(mockStream());
+
+      const streamCallback = vi.fn();
+      const response = await api.chat(
+        {
+          provider: mockProvider as any,
+          modelId: 'test-model',
+          params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
+        },
+        {
+          userPrompt: 'Test prompt',
+          context: [],
+        },
+        streamCallback,
+      );
+
+      // Verify stream callback was called for each chunk
+      expect(streamCallback).toHaveBeenCalledTimes(3);
+      expect(streamCallback).toHaveBeenCalledWith({
+        type: 'response',
+        data: 'Hello',
+      });
+      expect(streamCallback).toHaveBeenCalledWith({
+        type: 'response',
+        data: ' world',
+      });
+      expect(streamCallback).toHaveBeenCalledWith({
+        type: 'response',
+        data: '!',
+      });
+
+      // Verify response
+      expect(response.done).toBe(true);
+      expect(response.doneReason).toBe('stop');
+      expect(response.response).toBe('!');
+    });
+
+    it('should handle tool calls', async () => {
+      // Mock streaming response with tool call
+      const mockStream = async function* () {
+        yield {
+          message: {
+            content: '',
+            tool_calls: [
+              {
+                function: {
+                  name: 'search_google',
+                  arguments: { query: 'test query' },
+                },
+              },
+            ],
+          },
+          done: false,
+        };
+        yield {
+          message: { content: '' },
+          done: true,
+          done_reason: 'stop',
+          total_duration: 100,
+          prompt_eval_count: 10,
+          eval_count: 1,
+        };
+      };
+
+      mockOllamaClient.chat.mockResolvedValue(mockStream());
+
+      const mockTool = {
+        definition: {
+          type: 'function',
+          function: {
+            name: 'search_google',
+            description: 'Search Google',
+            parameters: { type: 'object', properties: {} },
+          },
+        },
+        execute: vi.fn().mockResolvedValue({ result: 'search results' }),
+      };
+
+      const streamCallback = vi.fn();
+      const response = await api.chat(
+        {
+          provider: mockProvider as any,
+          modelId: 'test-model',
+          params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
+        },
+        {
+          userPrompt: 'Test prompt',
+          context: [],
+          tools: [mockTool as any],
+        },
+        streamCallback,
+      );
+
+      // Verify tool call was emitted via stream callback
+      expect(streamCallback).toHaveBeenCalledWith(
+        expect.objectContaining({
+          type: 'toolCall',
+          toolName: 'search_google',
+        }),
+      );
+
+      // Verify tool was executed
+      expect(mockTool.execute).toHaveBeenCalled();
+
+      // Verify response indicates tool calls were processed
+      expect(response.toolCalls).toBeDefined();
+    });
+
+    it('should handle thinking content when reasoning is enabled', async () => {
+      // Mock streaming response with thinking
+      const mockStream = async function* () {
+        yield {
+          message: {
+            thinking: 'Let me think about this...',
+            content: '',
+          },
+          done: false,
+        };
+        yield {
+          message: {
+            thinking: ' The answer is',
+            content: '',
+          },
+          done: false,
+        };
+        yield {
+          message: { content: '42' },
+          done: true,
+          done_reason: 'stop',
+          total_duration: 100,
+          prompt_eval_count: 10,
+          eval_count: 1,
+        };
+      };
+
+      mockOllamaClient.chat.mockResolvedValue(mockStream());
+
+      const streamCallback = vi.fn();
+      const response = await api.chat(
+        {
+          provider: mockProvider as any,
+          modelId: 'test-model',
+          params: { reasoning: true, temperature: 0.8, topP: 0.9, topK: 40 },
+        },
+        {
+          userPrompt: 'What is the answer?',
+          context: [],
+        },
+        streamCallback,
+      );
+
+      // Verify thinking was emitted
+      expect(streamCallback).toHaveBeenCalledWith({
+        type: 'thinking',
+        data: 'Let me think about this...',
+      });
+      expect(streamCallback).toHaveBeenCalledWith({
+        type: 'thinking',
+        data: ' The answer is',
+      });
+
+      // Verify response was emitted
+      expect(streamCallback).toHaveBeenCalledWith({
+        type: 'response',
+        data: '42',
+      });
+
+      // Verify final response includes thinking
+      expect(response.thinking).toBe('Let me think about this... The answer is');
+    });
+
+    it('should handle empty response on load failure', async () => {
+      // Mock streaming response with load failure
+      const mockStream = async function* () {
+        yield {
+          message: { content: '' },
+          done: true,
+          done_reason: 'load',
+          total_duration: 5000,
+          prompt_eval_count: 0,
+          eval_count: 0,
+        };
+      };
+
+      mockOllamaClient.chat.mockResolvedValue(mockStream());
+
+      const streamCallback = vi.fn();
+      const response = await api.chat(
+        {
+          provider: mockProvider as any,
+          modelId: 'test-model',
+          params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
+        },
+        {
+          userPrompt: 'Test prompt',
+          context: [],
+        },
+        streamCallback,
+      );
+
+      // Verify response indicates load failure
+      expect(response.done).toBe(true);
+      expect(response.doneReason).toBe('load');
+      expect(response.response).toBe('');
+
+      // Verify no stream callbacks for empty content
+      expect(streamCallback).not.toHaveBeenCalled();
+    });
+
+    it('should iterate tool calling loop when tools are present', async () => {
+      let callCount = 0;
+      
+      // Mock streaming response that requires tool call then returns
+      const mockStream = async function* () {
+        callCount++;
+        if (callCount === 1) {
+          // First call: return tool call
+          yield {
+            message: {
+              content: '',
+              tool_calls: [
+                {
+                  function: {
+                    name: 'search_google',
+                    arguments: { query: 'test' },
+                  },
+                },
+              ],
+            },
+            done: false,
+          };
+          yield {
+            message: { content: '' },
+            done: true,
+            done_reason: 'stop',
+            total_duration: 100,
+            prompt_eval_count: 10,
+            eval_count: 1,
+          };
+        } else {
+          // Second call: return final response
+          yield {
+            message: { content: 'Here are the results' },
+            done: true,
+            done_reason: 'stop',
+            total_duration: 100,
+            prompt_eval_count: 15,
+            eval_count: 5,
+          };
+        }
+      };
+
+      mockOllamaClient.chat.mockImplementation(() => mockStream());
+
+      const mockTool = {
+        definition: {
+          type: 'function',
+          function: {
+            name: 'search_google',
+            description: 'Search Google',
+            parameters: { type: 'object', properties: {} },
+          },
+        },
+        execute: vi.fn().mockResolvedValue({ result: 'search results' }),
+      };
+
+      const streamCallback = vi.fn();
+      const response = await api.chat(
+        {
+          provider: mockProvider as any,
+          modelId: 'test-model',
+          params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
+        },
+        {
+          userPrompt: 'Test prompt',
+          context: [],
+          tools: [mockTool as any],
+        },
+        streamCallback,
+      );
+
+      // Verify chat was called twice (once for tool call, once for response)
+      expect(mockOllamaClient.chat).toHaveBeenCalledTimes(2);
+
+      // Verify tool was executed
+      expect(mockTool.execute).toHaveBeenCalled();
+
+      // Verify final response
+      expect(response.done).toBe(true);
+      expect(response.response).toBe('Here are the results');
+    });
+  });
+});
--- a/packages/ai/src/ollama.ts
+++ b/packages/ai/src/ollama.ts
@ -10,6 +10,7 @@ import {
  AiApi,
  IAiChatOptions,
  IAiChatResponse,
+  IToolCall,
  IToolCallResult,
  IAiGenerateOptions,
  IAiGenerateResponse,
@ -212,16 +213,68 @@ export class OllamaAiApi extends AiApi {
      modelId: model.modelId,
    });

+    // VALIDATE: Ensure we have at least one message with content
+    if (!options.userPrompt || !options.userPrompt.trim()) {
+      throw new Error("userPrompt is required and cannot be empty");
+    }
+
    const maxIterations = options.maxToolIterations ?? 5;
    let iteration = 0;

-    const messages: OllamaMessage[] = options.context
-      ? options.context.map((msg) => ({
-          role: msg.role,
-          content: msg.content,
-        }))
-      : [];
+    // Build messages array like OpenAI does
+    const messages: OllamaMessage[] = [];
+    
+    // Add system prompt if present
+    if (options.systemPrompt) {
+      messages.push({
+        role: 'system',
+        content: options.systemPrompt,
+      });
+    }
+    
+    // Add context messages
+    if (options.context) {
+      for (const msg of options.context) {
+        if (msg.content && msg.content.trim()) {
+          if (msg.role === 'tool') {
+            messages.push({
+              role: 'tool',
+              content: msg.content,
+              tool_name: msg.toolName,
+            });
+          } else {
+            messages.push({
+              role: msg.role as 'user' | 'assistant' | 'system',
+              content: msg.content,
+            });
+          }
+        }
+      }
+    }
+    
+    // Add user prompt (required)
+    messages.push({
+      role: 'user',
+      content: options.userPrompt,
+    });
+    
+    // VALIDATE: Ensure messages array is not empty before calling API
+    if (messages.length === 0) {
+      throw new Error("Messages array is empty - cannot call Ollama API with no messages");
+    }
+    
+    // DEBUG: Log what we're sending to Ollama
+    await this.log.debug("Ollama chat request", {
+      messagesCount: messages.length,
+      messages: messages.map(m => ({ role: m.role, contentLength: m.content?.length || 0 })),
+      userPrompt: options.userPrompt?.slice(0, 100),
+      contextCount: options.context?.length || 0,
+    });
+    
    const allToolCallResults: IToolCallResult[] = [];
+    const allToolCalls: IToolCall[] = [];
+    let totalAccumulatedResponse = "";
+    let totalAccumulatedThinking = "";

    while (iteration < maxIterations) {
      iteration++;
@ -248,6 +301,10 @@ export class OllamaAiApi extends AiApi {
      let lastChunk;
      let accumulatedThinking = "";
      let accumulatedResponse = "";
+      const streamedToolCalls: Array<{
+        callId: string;
+        function: { name: string; arguments: any };
+      }> = [];

      for await (const chunk of response) {
        await this.log.debug("stream chunk received", { chunk });
@ -271,10 +328,22 @@ export class OllamaAiApi extends AiApi {
          if (chunk.message.tool_calls) {
            for (const tc of chunk.message.tool_calls) {
              const params = JSON.stringify(tc.function.arguments);
+              const callId = `tool_${tc.function.name}_${Date.now()}`;
+              
+              const toolCall: IToolCall = {
+                callId,
+                function: {
+                  name: tc.function.name,
+                  arguments: JSON.stringify(tc.function.arguments),
+                },
+              };
+              streamedToolCalls.push(toolCall);
+              allToolCalls.push(toolCall);
+              
              await streamCallback({
                type: 'toolCall',
                data: params,
-                toolCallId: `tool_${tc.function.name}_${Date.now()}`,
+                toolCallId: callId,
                toolName: tc.function.name,
                params,
              });
@ -284,21 +353,24 @@ export class OllamaAiApi extends AiApi {
      }
      assert(lastChunk, "no response chunks received");

-      const toolCalls = lastChunk.message.tool_calls?.map((tc) => ({
-        callId: `tool_${tc.function.name}_${Date.now()}`,
-        function: {
-          name: tc.function.name,
-          arguments: JSON.stringify(tc.function.arguments),
-        },
-      }));
+      // Use accumulated thinking/response for final response
+      const finalThinking = accumulatedThinking || lastChunk.message.thinking;
+      const finalResponse = accumulatedResponse || lastChunk.message.content;
+      
+      // Accumulate across iterations
+      totalAccumulatedResponse += finalResponse || "";
+      totalAccumulatedThinking += finalThinking || "";
+
+      // Use accumulated tool calls from stream
+      const toolCalls = streamedToolCalls;

      if (!toolCalls || toolCalls.length === 0) {
        return {
          done: lastChunk.done,
          doneReason: lastChunk.done_reason,
-          response: lastChunk.message.content,
-          thinking: lastChunk.message.thinking,
-          toolCalls: undefined,
+          response: totalAccumulatedResponse,
+          thinking: totalAccumulatedThinking,
+          toolCalls: allToolCalls.length > 0 ? allToolCalls : undefined,
          toolCallResults: allToolCallResults.length > 0 ? allToolCallResults : undefined,
          stats: {
            duration: {
@ -320,9 +392,19 @@ export class OllamaAiApi extends AiApi {
      );
      allToolCallResults.push(...toolCallResults);

+      // DEBUG: Log tool results being added to context
+      await this.log.debug("tool results ready for context", {
+        toolCallResults: toolCallResults.map(r => ({
+          callId: r.callId,
+          functionName: r.functionName,
+          resultLength: r.result?.length || 0,
+          hasError: !!r.error,
+        })),
+      });
+
      const assistantMsg: OllamaMessage = {
        role: "assistant",
-        content: lastChunk.message.content,
+        content: accumulatedResponse || lastChunk.message.content,
      };
      if (lastChunk.message.thinking) {
        assistantMsg.thinking = lastChunk.message.thinking;
@ -333,10 +415,38 @@ export class OllamaAiApi extends AiApi {
      messages.push(assistantMsg);

      for (const result of toolCallResults) {
-        messages.push({
-          role: "tool",
-          content: result.error || result.result,
-          tool_name: result.functionName,
+        const toolContent = result.error 
+          ? `Error executing ${result.functionName}: ${result.error}`
+          : result.result;
+        
+        const toolMsg = {
+          role: "tool" as const,
+          content: toolContent,
+        };
+        await this.log.debug("adding tool result to messages", {
+          contentLength: toolMsg.content?.length || 0,
+          hasContent: !!(toolMsg.content && toolMsg.content.length),
+        });
+        messages.push(toolMsg);
+      }
+
+      // DEBUG: Log full messages array before next iteration
+      await this.log.debug("messages array for next Ollama API call", {
+        messageCount: messages.length,
+        messages: messages.map(m => ({
+          role: m.role,
+          contentLength: m.content?.length || 0,
+          tool_name: (m as any).tool_name,
+          contentPreview: m.content?.slice(0, 200),
+        })),
+      });
+
+      // VALIDATE: Ensure tool results are in messages
+      const toolMessages = messages.filter(m => m.role === 'tool');
+      if (toolMessages.length === 0 && toolCallResults.length > 0) {
+        await this.log.error("CRITICAL: tool results NOT in messages array", {
+          toolCallResultsCount: toolCallResults.length,
+          messagesCount: messages.length,
        });
      }
    }
@ -346,7 +456,7 @@ export class OllamaAiApi extends AiApi {
      doneReason: "max_tool_iterations_reached",
      response: "",
      thinking: undefined,
-      toolCalls: undefined,
+      toolCalls: allToolCalls.length > 0 ? allToolCalls : undefined,
      toolCallResults: allToolCallResults,
      stats: {
        duration: {
--- a/packages/ai/src/openai.ts
+++ b/packages/ai/src/openai.ts
@ -8,6 +8,7 @@ import {
  AiApi,
  IAiChatOptions,
  IAiChatResponse,
+  IToolCall,
  IToolCallResult,
  IAiGenerateOptions,
  IAiGenerateResponse,
@ -272,10 +273,18 @@ export class OpenAiApi extends AiApi {
    }
    if (options.context) {
      for (const msg of options.context) {
-        messages.push({
-          role: msg.role as "user" | "assistant" | "system",
-          content: msg.content,
-        });
+        if (msg.role === 'tool') {
+          messages.push({
+            role: 'tool',
+            content: msg.content,
+            tool_call_id: msg.callId || '',
+          });
+        } else {
+          messages.push({
+            role: msg.role as "user" | "assistant" | "system",
+            content: msg.content,
+          });
+        }
      }
    }
    if (options.userPrompt) {
@ -283,6 +292,7 @@ export class OpenAiApi extends AiApi {
    }

    const allToolCallResults: IToolCallResult[] = [];
+    const allToolCalls: IToolCall[] = [];

    while (iteration < maxIterations) {
      iteration++;
@ -337,6 +347,14 @@ export class OpenAiApi extends AiApi {
            finalToolCalls = delta.tool_calls;
            for (const tc of delta.tool_calls) {
              if (tc.function) {
+                const toolCall: IToolCall = {
+                  callId: tc.id || "",
+                  function: {
+                    name: tc.function.name || "",
+                    arguments: tc.function.arguments || "",
+                  },
+                };
+                allToolCalls.push(toolCall);
                if (streamCallback) {
                  await streamCallback({
                    type: 'toolCall',
@ -367,7 +385,7 @@ export class OpenAiApi extends AiApi {
          done: true,
          response: accumulatedResponse,
          thinking: accumulatedThinking || undefined,
-          toolCalls: undefined,
+          toolCalls: allToolCalls.length > 0 ? allToolCalls : undefined,
          toolCallResults: allToolCallResults.length > 0 ? allToolCallResults : undefined,
          stats: {
            duration: {
@ -416,7 +434,7 @@ export class OpenAiApi extends AiApi {
      doneReason: "max_tool_iterations_reached",
      response: "",
      thinking: undefined,
-      toolCalls: undefined,
+      toolCalls: allToolCalls.length > 0 ? allToolCalls : undefined,
      toolCallResults: allToolCallResults,
      stats: {
        duration: {
--- a/packages/ai/vitest.config.ts
+++ b/packages/ai/vitest.config.ts
@ -0,0 +1,9 @@
+import { defineConfig } from 'vitest/config';
+
+export default defineConfig({
+  test: {
+    globals: true,
+    environment: 'node',
+    include: ['src/**/*.test.ts'],
+  },
+});
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@ -337,6 +337,9 @@ importers:
      typescript:
        specifier: ^6.0.3
        version: 6.0.3
+      vitest:
+        specifier: ^4.1.5
+        version: 4.1.5(@types/node@25.6.0)(jsdom@29.1.0)(vite@8.0.10(@types/node@25.6.0)(esbuild@0.27.7)(jiti@2.6.1)(less@4.6.4)(tsx@4.21.0))

  packages/api:
    dependencies:
@ -4671,6 +4674,14 @@ snapshots:
    optionalDependencies:
      vite: 8.0.10(@types/node@24.12.2)(esbuild@0.25.12)(jiti@2.6.1)(less@4.6.4)(tsx@4.21.0)

+  '@vitest/mocker@4.1.5(vite@8.0.10(@types/node@25.6.0)(esbuild@0.27.7)(jiti@2.6.1)(less@4.6.4)(tsx@4.21.0))':
+    dependencies:
+      '@vitest/spy': 4.1.5
+      estree-walker: 3.0.3
+      magic-string: 0.30.21
+    optionalDependencies:
+      vite: 8.0.10(@types/node@25.6.0)(esbuild@0.27.7)(jiti@2.6.1)(less@4.6.4)(tsx@4.21.0)
+
  '@vitest/pretty-format@4.1.5':
    dependencies:
      tinyrainbow: 3.1.0
@ -6925,6 +6936,21 @@ snapshots:
      less: 4.6.4
      tsx: 4.21.0

+  vite@8.0.10(@types/node@25.6.0)(esbuild@0.27.7)(jiti@2.6.1)(less@4.6.4)(tsx@4.21.0):
+    dependencies:
+      lightningcss: 1.32.0
+      picomatch: 4.0.4
+      postcss: 8.5.12
+      rolldown: 1.0.0-rc.17
+      tinyglobby: 0.2.16
+    optionalDependencies:
+      '@types/node': 25.6.0
+      esbuild: 0.27.7
+      fsevents: 2.3.3
+      jiti: 2.6.1
+      less: 4.6.4
+      tsx: 4.21.0
+
  vitest@4.1.5(@types/node@24.12.2)(jsdom@29.1.0)(vite@8.0.10(@types/node@24.12.2)(esbuild@0.25.12)(jiti@2.6.1)(less@4.6.4)(tsx@4.21.0)):
    dependencies:
      '@vitest/expect': 4.1.5
@ -6953,6 +6979,34 @@ snapshots:
    transitivePeerDependencies:
      - msw

+  vitest@4.1.5(@types/node@25.6.0)(jsdom@29.1.0)(vite@8.0.10(@types/node@25.6.0)(esbuild@0.27.7)(jiti@2.6.1)(less@4.6.4)(tsx@4.21.0)):
+    dependencies:
+      '@vitest/expect': 4.1.5
+      '@vitest/mocker': 4.1.5(vite@8.0.10(@types/node@25.6.0)(esbuild@0.27.7)(jiti@2.6.1)(less@4.6.4)(tsx@4.21.0))
+      '@vitest/pretty-format': 4.1.5
+      '@vitest/runner': 4.1.5
+      '@vitest/snapshot': 4.1.5
+      '@vitest/spy': 4.1.5
+      '@vitest/utils': 4.1.5
+      es-module-lexer: 2.1.0
+      expect-type: 1.3.0
+      magic-string: 0.30.21
+      obug: 2.1.1
+      pathe: 2.0.3
+      picomatch: 4.0.4
+      std-env: 4.1.0
+      tinybench: 2.9.0
+      tinyexec: 1.1.1
+      tinyglobby: 0.2.16
+      tinyrainbow: 3.1.0
+      vite: 8.0.10(@types/node@25.6.0)(esbuild@0.27.7)(jiti@2.6.1)(less@4.6.4)(tsx@4.21.0)
+      why-is-node-running: 2.3.0
+    optionalDependencies:
+      '@types/node': 25.6.0
+      jsdom: 29.1.0
+    transitivePeerDependencies:
+      - msw
+
  void-elements@3.1.0: {}

  w3c-xmlserializer@5.0.0: