gadget/packages/ai/src/ollama.test.ts

import { describe, it, expect, vi, beforeEach } from 'vitest';
import type { ChatResponseStream } from 'ollama';

// Mock the Ollama client BEFORE importing the module
const mockOllamaClient = {
  chat: vi.fn(),
  generate: vi.fn(),
  list: vi.fn(),
  show: vi.fn(),
};

vi.mock('ollama', () => {
  return {
    Ollama: class MockOllama {
      constructor() {
        return mockOllamaClient;
      }
    },
  };
});

import { OllamaAiApi } from './ollama';

// Mock logger
const mockLogger = {
  debug: vi.fn(),
  info: vi.fn(),
  warn: vi.fn(),
  error: vi.fn(),
};

// Mock environment and provider
const mockEnv = {
  NODE_ENV: 'test',
  services: {
    google: {
      cse: {
        apiKey: 'test-key',
        engineId: 'test-engine',
      },
    },
  },
};

const mockProvider = {
  _id: 'test-provider',
  name: 'Test Ollama',
  sdk: 'ollama' as const,
  baseUrl: 'http://localhost:11434',
  apiKey: 'test-key',
};

describe('OllamaAiApi', () => {
  let api: OllamaAiApi;

  beforeEach(() => {
    vi.clearAllMocks();
    api = new OllamaAiApi(mockEnv as any, mockProvider as any, mockLogger as any);
  });

  describe('chat', () => {
    it('should handle normal response streaming', async () => {
      // Mock streaming response
      const mockStream = async function* () {
        yield {
          message: { content: 'Hello' },
          done: false,
        };
        yield {
          message: { content: ' world' },
          done: false,
        };
        yield {
          message: { content: '!' },
          done: true,
          done_reason: 'stop',
          total_duration: 100,
          prompt_eval_count: 10,
          eval_count: 3,
        };
      };

      mockOllamaClient.chat.mockResolvedValue(mockStream());

      const streamCallback = vi.fn();
      const response = await api.chat(
        {
          provider: mockProvider as any,
          modelId: 'test-model',
          params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
        },
        {
          userPrompt: 'Test prompt',
          context: [],
        },
        streamCallback,
      );

      // Verify stream callback was called for each chunk
      expect(streamCallback).toHaveBeenCalledTimes(3);
      expect(streamCallback).toHaveBeenCalledWith({
        type: 'response',
        data: 'Hello',
      });
      expect(streamCallback).toHaveBeenCalledWith({
        type: 'response',
        data: ' world',
      });
      expect(streamCallback).toHaveBeenCalledWith({
        type: 'response',
        data: '!',
      });

      // Verify response
      expect(response.done).toBe(true);
      expect(response.doneReason).toBe('stop');
      expect(response.response).toBe('Hello world!');
    });

    it('should handle tool calls', async () => {
      const mockStream = async function* () {
        yield {
          message: {
            content: '',
            tool_calls: [
              {
                function: {
                  name: 'search_google',
                  arguments: { query: 'test query' },
                },
              },
            ],
          },
          done: false,
        };
        yield {
          message: { content: '' },
          done: true,
          done_reason: 'stop',
          total_duration: 100,
          prompt_eval_count: 10,
          eval_count: 1,
        };
      };

      mockOllamaClient.chat.mockResolvedValue(mockStream());

      const streamCallback = vi.fn();
      const response = await api.chat(
        {
          provider: mockProvider as any,
          modelId: 'test-model',
          params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
        },
        {
          userPrompt: 'Test prompt',
          context: [],
        },
        streamCallback,
      );

      // Verify tool calls are returned, not executed
      expect(response.toolCalls).toBeDefined();
      expect(response.toolCalls!.length).toBe(1);
      expect(response.toolCalls![0].function.name).toBe('search_google');

      // chat() should only be called once (no internal loop)
      expect(mockOllamaClient.chat).toHaveBeenCalledTimes(1);
    });

    it('should handle thinking content when reasoning is enabled', async () => {
      const mockStream = async function* () {
        yield {
          message: {
            thinking: 'Let me think about this...',
            content: '',
          },
          done: false,
        };
        yield {
          message: {
            thinking: ' The answer is',
            content: '',
          },
          done: false,
        };
        yield {
          message: { content: '42' },
          done: true,
          done_reason: 'stop',
          total_duration: 100,
          prompt_eval_count: 10,
          eval_count: 1,
        };
      };

      mockOllamaClient.chat.mockResolvedValue(mockStream());

      const streamCallback = vi.fn();
      const response = await api.chat(
        {
          provider: mockProvider as any,
          modelId: 'test-model',
          params: { reasoning: true, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
        },
        {
          userPrompt: 'What is the answer?',
          context: [],
        },
        streamCallback,
      );

      expect(streamCallback).toHaveBeenCalledWith({
        type: 'thinking',
        data: 'Let me think about this...',
      });
      expect(streamCallback).toHaveBeenCalledWith({
        type: 'thinking',
        data: ' The answer is',
      });
      expect(streamCallback).toHaveBeenCalledWith({
        type: 'response',
        data: '42',
      });
      expect(response.thinking).toBe('Let me think about this... The answer is');
    });

    it('should reject empty response on load failure', async () => {
      const mockStream = async function* () {
        yield {
          message: { content: '' },
          done: true,
          done_reason: 'load',
          total_duration: 5000,
          prompt_eval_count: 0,
          eval_count: 0,
        };
      };

      mockOllamaClient.chat.mockResolvedValue(mockStream());

      await expect(api.chat(
        {
          provider: mockProvider as any,
          modelId: 'test-model',
          params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
        },
        {
          userPrompt: 'Test prompt',
          context: [],
        },
        vi.fn(),
      )).rejects.toThrow('Provider returned an empty chat response');
    });
  });

  describe('probeModel', () => {
    it('should detect thinking capability from "thinking" (Ollama convention)', async () => {
      mockOllamaClient.show.mockResolvedValue({
        capabilities: ['completion', 'vision', 'tools', 'thinking'],
        details: { family: 'gemma4' },
        model_info: {},
        modified_at: '2026-04-04T06:20:40.211Z',
      });

      const result = await api.probeModel('gemma4:e4b');
      expect(result.capabilities.hasThinking).toBe(true);
      expect(result.capabilities.canCallTools).toBe(true);
      expect(result.capabilities.hasVision).toBe(true);
    });

    it('should detect thinking capability from "reasoning" (OpenAI convention)', async () => {
      mockOllamaClient.show.mockResolvedValue({
        capabilities: ['completion', 'reasoning'],
        details: { family: 'deepseek' },
        model_info: {},
        modified_at: '2026-04-04T06:20:40.211Z',
      });

      const result = await api.probeModel('deepseek-r1');
      expect(result.capabilities.hasThinking).toBe(true);
    });

    it('should set hasThinking false when neither thinking nor reasoning in capabilities', async () => {
      mockOllamaClient.show.mockResolvedValue({
        capabilities: ['completion'],
        details: { family: 'llama' },
        model_info: {},
        modified_at: '2026-04-04T06:20:40.211Z',
      });

      const result = await api.probeModel('llama3.2');
      expect(result.capabilities.hasThinking).toBe(false);
    });

    it('should detect vision, tools, and embedding capabilities', async () => {
      mockOllamaClient.show.mockResolvedValue({
        capabilities: ['completion', 'vision', 'tools', 'embeddings'],
        details: { family: 'llama' },
        model_info: {},
        modified_at: '2026-04-04T06:20:40.211Z',
      });

      const result = await api.probeModel('some-model');
      expect(result.capabilities.hasVision).toBe(true);
      expect(result.capabilities.canCallTools).toBe(true);
      expect(result.capabilities.hasEmbedding).toBe(true);
    });

    it('should extract settings from Modelfile parameters', async () => {
      mockOllamaClient.show.mockResolvedValue({
        capabilities: ['completion'],
        details: { family: 'llama' },
        model_info: {},
        parameters: 'temperature 0.7\ntop_k 40\ntop_p 0.9\nnum_ctx 4096',
        modified_at: '2026-04-04T06:20:40.211Z',
      });

      const result = await api.probeModel('llama3.2');
      expect(result.settings).toBeDefined();
      expect(result.settings!.temperature).toBe(0.7);
      expect(result.settings!.topK).toBe(40);
      expect(result.settings!.topP).toBe(0.9);
      expect(result.settings!.numCtx).toBe(4096);
    });
  });
});