gadget/packages/ai/src/ollama.ts
Rob Colbert 73c5345879 Re-build Agentic Workflow Loop
The ridiculousness of trying to maintain the previous agent's work got
out of hand, so we had this one re-build it - and got a better result.
2026-05-09 21:04:18 -04:00

329 lines
9.0 KiB
TypeScript

// src/ollama.ts
// Copyright (C) 2026 Rob Colbert <rob.colbert@openplatform.us>
// Licensed under the Apache License, Version 2.0
import assert from "node:assert";
import { Ollama } from "ollama";
import numeral from "numeral";
import {
AiApi,
IAiChatOptions,
IAiChatResponse,
IToolCall,
IAiGenerateOptions,
IAiGenerateResponse,
IAiLogger,
IAiModelConfig,
IAiModelListResult,
IAiModelProbeResult,
IAiProvider,
IAiResponseStreamFn,
} from "./api.js";
import { IAiEnvironment } from "./config/env.ts";
import type { Message as OllamaMessage } from "ollama";
export class OllamaAiApi extends AiApi {
protected client: Ollama;
constructor(env: IAiEnvironment, provider: IAiProvider, logger?: IAiLogger) {
super(env, provider, logger);
this.client = new Ollama({
host: this.provider.baseUrl,
headers: { Authorization: `Bearer ${this.provider.apiKey}` },
});
}
async listModels(): Promise<IAiModelListResult> {
const response = await this.client.list();
const models = response.models.map((model) => {
const parameterCount = this.parseParameterCount(
model.details.parameter_size,
);
return {
id: model.name,
name: model.name,
parameterLabel: model.details.parameter_size,
parameterCount,
contextWindow: undefined,
};
});
return { models };
}
async probeModel(modelId: string): Promise<IAiModelProbeResult> {
const response = await this.client.show({ model: modelId });
const capabilities = this.analyzeCapabilities(response, modelId);
const settings = this.extractSettings(response);
return {
capabilities,
settings,
};
}
private parseParameterCount(parameterSize?: string): number | undefined {
if (!parameterSize) return undefined;
const match = parameterSize.match(/^([\d.]+)[BbMm]?$/);
if (!match) return undefined;
const value = parseFloat(match[1]);
if (parameterSize.toLowerCase().includes("m")) {
return value / 1000;
}
return value;
}
private analyzeCapabilities(
response: Awaited<ReturnType<typeof this.client.show>>,
modelId: string,
): IAiModelProbeResult["capabilities"] {
const capabilities = response.capabilities || [];
const modelInfo = response.model_info as unknown as
| Record<string, unknown>
| undefined;
return {
canCallTools:
capabilities.includes("tools") ||
capabilities.includes("function_calling"),
hasVision:
capabilities.includes("vision") ||
!!modelInfo?.["vision_model"] ||
!!modelInfo?.["clip"],
hasEmbedding: capabilities.includes("embeddings"),
hasThinking:
capabilities.includes("thinking") || capabilities.includes("reasoning"),
isInstructTuned:
modelId.toLowerCase().includes("instruct") ||
modelId.toLowerCase().includes("chat") ||
modelId.toLowerCase().includes("-it"),
};
}
private extractSettings(
response: Awaited<ReturnType<typeof this.client.show>>,
): IAiModelProbeResult["settings"] {
const parameters = response.parameters || "";
const settings: IAiModelProbeResult["settings"] = {};
const temperatureMatch = parameters.match(/temperature\s+(\d+\.?\d*)/i);
if (temperatureMatch) {
settings.temperature = parseFloat(temperatureMatch[1]);
}
const topPMatch = parameters.match(/top_p\s+(\d+\.?\d*)/i);
if (topPMatch) {
settings.topP = parseFloat(topPMatch[1]);
}
const topKMatch = parameters.match(/top_k\s+(\d+)/i);
if (topKMatch) {
settings.topK = parseInt(topKMatch[1], 10);
}
const numCtxMatch = parameters.match(/num_ctx\s+(\d+)/i);
if (numCtxMatch) {
settings.numCtx = parseInt(numCtxMatch[1], 10);
}
return Object.keys(settings).length > 0 ? settings : undefined;
}
async generate(
model: IAiModelConfig,
options: IAiGenerateOptions,
streamCallback?: IAiResponseStreamFn,
): Promise<IAiGenerateResponse> {
await this.log.debug("OllamaAiApi.generate called", {
provider: model.provider.name,
modelId: model.modelId,
});
const response = await this.client.generate({
model: model.modelId,
prompt: options.prompt,
system: options.systemPrompt,
stream: true,
});
const content = {
response: "",
thinking: "",
};
let lastChunk;
for await (const chunk of response) {
lastChunk = chunk;
if (chunk.thinking) {
content.thinking += chunk.thinking;
if (streamCallback) {
await streamCallback({
type: "thinking",
data: chunk.thinking,
});
}
}
if (chunk.response) {
content.response += chunk.response;
if (streamCallback) {
await streamCallback({
type: "response",
data: chunk.response,
});
}
}
}
this.log.debug("generate call is done", content);
assert(lastChunk, "no stream response chunks received");
return {
done: lastChunk.done,
doneReason: lastChunk.done_reason,
response: content.response,
thinking: content.thinking,
stats: {
duration: {
seconds: lastChunk.total_duration,
text: numeral(lastChunk.total_duration).format("hh:mm:ss"),
},
tokenCounts: {
input: lastChunk.prompt_eval_count,
response: lastChunk.eval_count,
thinking: 0,
},
},
};
}
async chat(
model: IAiModelConfig,
options: IAiChatOptions,
streamCallback?: IAiResponseStreamFn,
): Promise<IAiChatResponse> {
await this.log.debug("OllamaAiApi.chat called", {
provider: model.provider.name,
modelId: model.modelId,
});
const messages: OllamaMessage[] = [];
if (options.systemPrompt) {
messages.push({ role: "system", content: options.systemPrompt });
}
if (options.context) {
for (const msg of options.context) {
if (msg.content && msg.content.trim()) {
if (msg.role === "tool") {
messages.push({
role: "tool",
content: msg.content,
tool_name: msg.toolName,
});
} else {
messages.push({
role: msg.role as "user" | "assistant" | "system",
content: msg.content,
});
}
}
}
}
if (options.userPrompt) {
messages.push({ role: "user", content: options.userPrompt });
}
if (messages.length === 0) {
throw new Error(
"Messages array is empty - cannot call Ollama API with no messages",
);
}
const ollamaTools = options.tools
? options.tools.map((tool) => ({
type: tool.definition.type,
function: {
name: tool.definition.function.name,
description: tool.definition.function.description,
parameters: tool.definition.function.parameters,
},
}))
: undefined;
const response = await this.client.chat({
model: model.modelId,
messages,
stream: true,
think: model.params.reasoning,
tools: ollamaTools,
});
let lastChunk;
let accumulatedThinking = "";
let accumulatedResponse = "";
const toolCalls: IToolCall[] = [];
for await (const chunk of response) {
lastChunk = chunk;
if (chunk.message.thinking) {
accumulatedThinking += chunk.message.thinking;
if (streamCallback) {
await streamCallback({
type: "thinking",
data: chunk.message.thinking,
});
}
}
if (chunk.message.content) {
accumulatedResponse += chunk.message.content;
if (streamCallback) {
await streamCallback({
type: "response",
data: chunk.message.content,
});
}
}
if (chunk.message.tool_calls) {
for (const [index, tc] of chunk.message.tool_calls.entries()) {
const params = JSON.stringify(tc.function.arguments);
const callId = `tool_${tc.function.name}_${Date.now()}_${index}`;
toolCalls.push({
callId,
function: {
name: tc.function.name,
arguments: params,
},
});
}
}
}
assert(lastChunk, "no response chunks received");
const chatResponse: IAiChatResponse = {
done: lastChunk.done,
doneReason: lastChunk.done_reason,
response: accumulatedResponse || lastChunk.message.content,
thinking: accumulatedThinking || lastChunk.message.thinking,
toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
stats: {
duration: {
seconds: lastChunk.total_duration,
text: numeral(lastChunk.total_duration).format("hh:mm:ss"),
},
tokenCounts: {
input: lastChunk.prompt_eval_count,
response: lastChunk.eval_count,
thinking: 0,
},
},
};
this.assertNonEmptyChatResponse(chatResponse);
return chatResponse;
}
}