// src/ollama.ts // Copyright (C) 2026 Rob Colbert // Licensed under the Apache License, Version 2.0 import assert from "node:assert"; import { Ollama } from "ollama"; import numeral from "numeral"; import { AiApi, IAiChatOptions, IAiChatResponse, IToolCall, IAiGenerateOptions, IAiGenerateResponse, IAiLogger, IAiModelConfig, IAiModelListResult, IAiModelProbeResult, IAiProvider, IAiResponseStreamFn, } from "./api.js"; import { IAiEnvironment } from "./config/env.ts"; import type { Message as OllamaMessage } from "ollama"; export class OllamaAiApi extends AiApi { protected client: Ollama; constructor(env: IAiEnvironment, provider: IAiProvider, logger?: IAiLogger) { super(env, provider, logger); this.client = new Ollama({ host: this.provider.baseUrl, headers: { Authorization: `Bearer ${this.provider.apiKey}` }, }); } async listModels(): Promise { const response = await this.client.list(); const models = response.models.map((model) => { const parameterCount = this.parseParameterCount( model.details.parameter_size, ); return { id: model.name, name: model.name, parameterLabel: model.details.parameter_size, parameterCount, contextWindow: undefined, }; }); return { models }; } async probeModel(modelId: string): Promise { const response = await this.client.show({ model: modelId }); const capabilities = this.analyzeCapabilities(response, modelId); const settings = this.extractSettings(response); return { capabilities, settings, }; } private parseParameterCount(parameterSize?: string): number | undefined { if (!parameterSize) return undefined; const match = parameterSize.match(/^([\d.]+)[BbMm]?$/); if (!match) return undefined; const value = parseFloat(match[1]); if (parameterSize.toLowerCase().includes("m")) { return value / 1000; } return value; } private analyzeCapabilities( response: Awaited>, modelId: string, ): IAiModelProbeResult["capabilities"] { const capabilities = response.capabilities || []; const modelInfo = response.model_info as unknown as | Record | undefined; return { canCallTools: capabilities.includes("tools") || capabilities.includes("function_calling"), hasVision: capabilities.includes("vision") || !!modelInfo?.["vision_model"] || !!modelInfo?.["clip"], hasEmbedding: capabilities.includes("embeddings"), hasThinking: capabilities.includes("thinking") || capabilities.includes("reasoning"), isInstructTuned: modelId.toLowerCase().includes("instruct") || modelId.toLowerCase().includes("chat") || modelId.toLowerCase().includes("-it"), }; } private extractSettings( response: Awaited>, ): IAiModelProbeResult["settings"] { const parameters = response.parameters || ""; const settings: IAiModelProbeResult["settings"] = {}; const temperatureMatch = parameters.match(/temperature\s+(\d+\.?\d*)/i); if (temperatureMatch) { settings.temperature = parseFloat(temperatureMatch[1]); } const topPMatch = parameters.match(/top_p\s+(\d+\.?\d*)/i); if (topPMatch) { settings.topP = parseFloat(topPMatch[1]); } const topKMatch = parameters.match(/top_k\s+(\d+)/i); if (topKMatch) { settings.topK = parseInt(topKMatch[1], 10); } const numCtxMatch = parameters.match(/num_ctx\s+(\d+)/i); if (numCtxMatch) { settings.numCtx = parseInt(numCtxMatch[1], 10); } return Object.keys(settings).length > 0 ? settings : undefined; } async generate( model: IAiModelConfig, options: IAiGenerateOptions, streamCallback?: IAiResponseStreamFn, ): Promise { await this.log.debug("OllamaAiApi.generate called", { provider: model.provider.name, modelId: model.modelId, }); const response = await this.client.generate({ model: model.modelId, prompt: options.prompt, system: options.systemPrompt, stream: true, }); const content = { response: "", thinking: "", }; let lastChunk; for await (const chunk of response) { lastChunk = chunk; if (chunk.thinking) { content.thinking += chunk.thinking; if (streamCallback) { await streamCallback({ type: "thinking", data: chunk.thinking, }); } } if (chunk.response) { content.response += chunk.response; if (streamCallback) { await streamCallback({ type: "response", data: chunk.response, }); } } } this.log.debug("generate call is done", content); assert(lastChunk, "no stream response chunks received"); return { done: lastChunk.done, doneReason: lastChunk.done_reason, response: content.response, thinking: content.thinking, stats: { duration: { seconds: lastChunk.total_duration, text: numeral(lastChunk.total_duration).format("hh:mm:ss"), }, tokenCounts: { input: lastChunk.prompt_eval_count, response: lastChunk.eval_count, thinking: 0, }, }, }; } async chat( model: IAiModelConfig, options: IAiChatOptions, streamCallback?: IAiResponseStreamFn, ): Promise { await this.log.debug("OllamaAiApi.chat called", { provider: model.provider.name, modelId: model.modelId, }); const messages: OllamaMessage[] = []; if (options.systemPrompt) { messages.push({ role: "system", content: options.systemPrompt }); } if (options.context) { for (const msg of options.context) { if (msg.content && msg.content.trim()) { if (msg.role === "tool") { messages.push({ role: "tool", content: msg.content, tool_name: msg.toolName, }); } else { messages.push({ role: msg.role as "user" | "assistant" | "system", content: msg.content, }); } } } } if (options.userPrompt) { messages.push({ role: "user", content: options.userPrompt }); } if (messages.length === 0) { throw new Error( "Messages array is empty - cannot call Ollama API with no messages", ); } const ollamaTools = options.tools ? options.tools.map((tool) => ({ type: tool.definition.type, function: { name: tool.definition.function.name, description: tool.definition.function.description, parameters: tool.definition.function.parameters, }, })) : undefined; const response = await this.client.chat({ model: model.modelId, messages, stream: true, think: model.params.reasoning, tools: ollamaTools, }); let lastChunk; let accumulatedThinking = ""; let accumulatedResponse = ""; const toolCalls: IToolCall[] = []; for await (const chunk of response) { lastChunk = chunk; if (chunk.message.thinking) { accumulatedThinking += chunk.message.thinking; if (streamCallback) { await streamCallback({ type: "thinking", data: chunk.message.thinking, }); } } if (chunk.message.content) { accumulatedResponse += chunk.message.content; if (streamCallback) { await streamCallback({ type: "response", data: chunk.message.content, }); } } if (chunk.message.tool_calls) { for (const [index, tc] of chunk.message.tool_calls.entries()) { const params = JSON.stringify(tc.function.arguments); const callId = `tool_${tc.function.name}_${Date.now()}_${index}`; toolCalls.push({ callId, function: { name: tc.function.name, arguments: params, }, }); } } } assert(lastChunk, "no response chunks received"); const chatResponse: IAiChatResponse = { done: lastChunk.done, doneReason: lastChunk.done_reason, response: accumulatedResponse || lastChunk.message.content, thinking: accumulatedThinking || lastChunk.message.thinking, toolCalls: toolCalls.length > 0 ? toolCalls : undefined, stats: { duration: { seconds: lastChunk.total_duration, text: numeral(lastChunk.total_duration).format("hh:mm:ss"), }, tokenCounts: { input: lastChunk.prompt_eval_count, response: lastChunk.eval_count, thinking: 0, }, }, }; this.assertNonEmptyChatResponse(chatResponse); return chatResponse; } }