gadget/packages/ai/src/ollama.ts

// src/ollama.ts
// Copyright (C) 2026 Rob Colbert <rob.colbert@openplatform.us>
// Licensed under the Apache License, Version 2.0

import assert from "node:assert";

import { Ollama } from "ollama";
import numeral from "numeral";
import {
  AiApi,
  IAiChatOptions,
  IAiChatResponse,
  IToolCall,
  IAiGenerateOptions,
  IAiGenerateResponse,
  IAiLogger,
  IAiModelConfig,
  IAiModelListResult,
  IAiModelProbeResult,
  IAiProvider,
  IAiResponseStreamFn,
} from "./api.js";
import { IAiEnvironment } from "./config/env.ts";
import type { Message as OllamaMessage } from "ollama";

export class OllamaAiApi extends AiApi {
  protected client: Ollama;

  constructor(env: IAiEnvironment, provider: IAiProvider, logger?: IAiLogger) {
    super(env, provider, logger);
    this.client = new Ollama({
      host: this.provider.baseUrl,
      headers: { Authorization: `Bearer ${this.provider.apiKey}` },
    });
  }

  async listModels(): Promise<IAiModelListResult> {
    const response = await this.client.list();
    const models = response.models.map((model) => {
      const parameterCount = this.parseParameterCount(
        model.details.parameter_size,
      );
      return {
        id: model.name,
        name: model.name,
        parameterLabel: model.details.parameter_size,
        parameterCount,
        contextWindow: undefined,
      };
    });

    return { models };
  }

  async probeModel(modelId: string): Promise<IAiModelProbeResult> {
    const response = await this.client.show({ model: modelId });
    const capabilities = this.analyzeCapabilities(response, modelId);
    const settings = this.extractSettings(response);
    return {
      capabilities,
      settings,
    };
  }

  private parseParameterCount(parameterSize?: string): number | undefined {
    if (!parameterSize) return undefined;
    const match = parameterSize.match(/^([\d.]+)[BbMm]?$/);
    if (!match) return undefined;
    const value = parseFloat(match[1]);
    if (parameterSize.toLowerCase().includes("m")) {
      return value / 1000;
    }
    return value;
  }

  private analyzeCapabilities(
    response: Awaited<ReturnType<typeof this.client.show>>,
    modelId: string,
  ): IAiModelProbeResult["capabilities"] {
    const capabilities = response.capabilities || [];
    const modelInfo = response.model_info as unknown as
      | Record<string, unknown>
      | undefined;

    return {
      canCallTools:
        capabilities.includes("tools") ||
        capabilities.includes("function_calling"),
      hasVision:
        capabilities.includes("vision") ||
        !!modelInfo?.["vision_model"] ||
        !!modelInfo?.["clip"],
      hasEmbedding: capabilities.includes("embeddings"),
      hasThinking:
        capabilities.includes("thinking") || capabilities.includes("reasoning"),
      isInstructTuned:
        modelId.toLowerCase().includes("instruct") ||
        modelId.toLowerCase().includes("chat") ||
        modelId.toLowerCase().includes("-it"),
    };
  }

  private extractSettings(
    response: Awaited<ReturnType<typeof this.client.show>>,
  ): IAiModelProbeResult["settings"] {
    const parameters = response.parameters || "";
    const settings: IAiModelProbeResult["settings"] = {};

    const temperatureMatch = parameters.match(/temperature\s+(\d+\.?\d*)/i);
    if (temperatureMatch) {
      settings.temperature = parseFloat(temperatureMatch[1]);
    }

    const topPMatch = parameters.match(/top_p\s+(\d+\.?\d*)/i);
    if (topPMatch) {
      settings.topP = parseFloat(topPMatch[1]);
    }

    const topKMatch = parameters.match(/top_k\s+(\d+)/i);
    if (topKMatch) {
      settings.topK = parseInt(topKMatch[1], 10);
    }

    const numCtxMatch = parameters.match(/num_ctx\s+(\d+)/i);
    if (numCtxMatch) {
      settings.numCtx = parseInt(numCtxMatch[1], 10);
    }

    return Object.keys(settings).length > 0 ? settings : undefined;
  }

  async generate(
    model: IAiModelConfig,
    options: IAiGenerateOptions,
    streamCallback?: IAiResponseStreamFn,
  ): Promise<IAiGenerateResponse> {
    await this.log.debug("OllamaAiApi.generate called", {
      provider: model.provider.name,
      modelId: model.modelId,
    });

    const response = await this.client.generate({
      model: model.modelId,
      prompt: options.prompt,
      system: options.systemPrompt,
      stream: true,
    });

    const content = {
      response: "",
      thinking: "",
    };
    let lastChunk;
    for await (const chunk of response) {
      lastChunk = chunk;

      if (chunk.thinking) {
        content.thinking += chunk.thinking;
        if (streamCallback) {
          await streamCallback({
            type: "thinking",
            data: chunk.thinking,
          });
        }
      }

      if (chunk.response) {
        content.response += chunk.response;
        if (streamCallback) {
          await streamCallback({
            type: "response",
            data: chunk.response,
          });
        }
      }
    }

    this.log.debug("generate call is done", content);
    assert(lastChunk, "no stream response chunks received");

    return {
      done: lastChunk.done,
      doneReason: lastChunk.done_reason,
      response: content.response,
      thinking: content.thinking,
      stats: {
        duration: {
          seconds: lastChunk.total_duration,
          text: numeral(lastChunk.total_duration).format("hh:mm:ss"),
        },
        tokenCounts: {
          input: lastChunk.prompt_eval_count,
          response: lastChunk.eval_count,
          thinking: 0,
        },
      },
    };
  }

  async chat(
    model: IAiModelConfig,
    options: IAiChatOptions,
    streamCallback?: IAiResponseStreamFn,
  ): Promise<IAiChatResponse> {
    await this.log.debug("OllamaAiApi.chat called", {
      provider: model.provider.name,
      modelId: model.modelId,
    });

    const messages: OllamaMessage[] = [];

    if (options.systemPrompt) {
      messages.push({ role: "system", content: options.systemPrompt });
    }

    if (options.context) {
      for (const msg of options.context) {
        if (msg.content && msg.content.trim()) {
          if (msg.role === "tool") {
            messages.push({
              role: "tool",
              content: msg.content,
              tool_name: msg.toolName,
            });
          } else {
            messages.push({
              role: msg.role as "user" | "assistant" | "system",
              content: msg.content,
            });
          }
        }
      }
    }

    if (options.userPrompt) {
      messages.push({ role: "user", content: options.userPrompt });
    }

    if (messages.length === 0) {
      throw new Error(
        "Messages array is empty - cannot call Ollama API with no messages",
      );
    }

    const ollamaTools = options.tools
      ? options.tools.map((tool) => ({
          type: tool.definition.type,
          function: {
            name: tool.definition.function.name,
            description: tool.definition.function.description,
            parameters: tool.definition.function.parameters,
          },
        }))
      : undefined;

    const response = await this.client.chat({
      model: model.modelId,
      messages,
      stream: true,
      think: model.params.reasoning,
      tools: ollamaTools,
    });

    let lastChunk;
    let accumulatedThinking = "";
    let accumulatedResponse = "";
    const toolCalls: IToolCall[] = [];

    for await (const chunk of response) {
      lastChunk = chunk;

      if (chunk.message.thinking) {
        accumulatedThinking += chunk.message.thinking;
        if (streamCallback) {
          await streamCallback({
            type: "thinking",
            data: chunk.message.thinking,
          });
        }
      }
      if (chunk.message.content) {
        accumulatedResponse += chunk.message.content;
        if (streamCallback) {
          await streamCallback({
            type: "response",
            data: chunk.message.content,
          });
        }
      }
      if (chunk.message.tool_calls) {
        for (const [index, tc] of chunk.message.tool_calls.entries()) {
          const params = JSON.stringify(tc.function.arguments);
          const callId = `tool_${tc.function.name}_${Date.now()}_${index}`;

          toolCalls.push({
            callId,
            function: {
              name: tc.function.name,
              arguments: params,
            },
          });
        }
      }
    }
    assert(lastChunk, "no response chunks received");

    const chatResponse: IAiChatResponse = {
      done: lastChunk.done,
      doneReason: lastChunk.done_reason,
      response: accumulatedResponse || lastChunk.message.content,
      thinking: accumulatedThinking || lastChunk.message.thinking,
      toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
      stats: {
        duration: {
          seconds: lastChunk.total_duration,
          text: numeral(lastChunk.total_duration).format("hh:mm:ss"),
        },
        tokenCounts: {
          input: lastChunk.prompt_eval_count,
          response: lastChunk.eval_count,
          thinking: 0,
        },
      },
    };
    this.assertNonEmptyChatResponse(chatResponse);
    return chatResponse;
  }
}