The ridiculousness of trying to maintain the previous agent's work got out of hand, so we had this one re-build it - and got a better result.
329 lines
9.0 KiB
TypeScript
329 lines
9.0 KiB
TypeScript
// src/ollama.ts
|
|
// Copyright (C) 2026 Rob Colbert <rob.colbert@openplatform.us>
|
|
// Licensed under the Apache License, Version 2.0
|
|
|
|
import assert from "node:assert";
|
|
|
|
import { Ollama } from "ollama";
|
|
import numeral from "numeral";
|
|
import {
|
|
AiApi,
|
|
IAiChatOptions,
|
|
IAiChatResponse,
|
|
IToolCall,
|
|
IAiGenerateOptions,
|
|
IAiGenerateResponse,
|
|
IAiLogger,
|
|
IAiModelConfig,
|
|
IAiModelListResult,
|
|
IAiModelProbeResult,
|
|
IAiProvider,
|
|
IAiResponseStreamFn,
|
|
} from "./api.js";
|
|
import { IAiEnvironment } from "./config/env.ts";
|
|
import type { Message as OllamaMessage } from "ollama";
|
|
|
|
export class OllamaAiApi extends AiApi {
|
|
protected client: Ollama;
|
|
|
|
constructor(env: IAiEnvironment, provider: IAiProvider, logger?: IAiLogger) {
|
|
super(env, provider, logger);
|
|
this.client = new Ollama({
|
|
host: this.provider.baseUrl,
|
|
headers: { Authorization: `Bearer ${this.provider.apiKey}` },
|
|
});
|
|
}
|
|
|
|
async listModels(): Promise<IAiModelListResult> {
|
|
const response = await this.client.list();
|
|
const models = response.models.map((model) => {
|
|
const parameterCount = this.parseParameterCount(
|
|
model.details.parameter_size,
|
|
);
|
|
return {
|
|
id: model.name,
|
|
name: model.name,
|
|
parameterLabel: model.details.parameter_size,
|
|
parameterCount,
|
|
contextWindow: undefined,
|
|
};
|
|
});
|
|
|
|
return { models };
|
|
}
|
|
|
|
async probeModel(modelId: string): Promise<IAiModelProbeResult> {
|
|
const response = await this.client.show({ model: modelId });
|
|
const capabilities = this.analyzeCapabilities(response, modelId);
|
|
const settings = this.extractSettings(response);
|
|
return {
|
|
capabilities,
|
|
settings,
|
|
};
|
|
}
|
|
|
|
private parseParameterCount(parameterSize?: string): number | undefined {
|
|
if (!parameterSize) return undefined;
|
|
const match = parameterSize.match(/^([\d.]+)[BbMm]?$/);
|
|
if (!match) return undefined;
|
|
const value = parseFloat(match[1]);
|
|
if (parameterSize.toLowerCase().includes("m")) {
|
|
return value / 1000;
|
|
}
|
|
return value;
|
|
}
|
|
|
|
private analyzeCapabilities(
|
|
response: Awaited<ReturnType<typeof this.client.show>>,
|
|
modelId: string,
|
|
): IAiModelProbeResult["capabilities"] {
|
|
const capabilities = response.capabilities || [];
|
|
const modelInfo = response.model_info as unknown as
|
|
| Record<string, unknown>
|
|
| undefined;
|
|
|
|
return {
|
|
canCallTools:
|
|
capabilities.includes("tools") ||
|
|
capabilities.includes("function_calling"),
|
|
hasVision:
|
|
capabilities.includes("vision") ||
|
|
!!modelInfo?.["vision_model"] ||
|
|
!!modelInfo?.["clip"],
|
|
hasEmbedding: capabilities.includes("embeddings"),
|
|
hasThinking:
|
|
capabilities.includes("thinking") || capabilities.includes("reasoning"),
|
|
isInstructTuned:
|
|
modelId.toLowerCase().includes("instruct") ||
|
|
modelId.toLowerCase().includes("chat") ||
|
|
modelId.toLowerCase().includes("-it"),
|
|
};
|
|
}
|
|
|
|
private extractSettings(
|
|
response: Awaited<ReturnType<typeof this.client.show>>,
|
|
): IAiModelProbeResult["settings"] {
|
|
const parameters = response.parameters || "";
|
|
const settings: IAiModelProbeResult["settings"] = {};
|
|
|
|
const temperatureMatch = parameters.match(/temperature\s+(\d+\.?\d*)/i);
|
|
if (temperatureMatch) {
|
|
settings.temperature = parseFloat(temperatureMatch[1]);
|
|
}
|
|
|
|
const topPMatch = parameters.match(/top_p\s+(\d+\.?\d*)/i);
|
|
if (topPMatch) {
|
|
settings.topP = parseFloat(topPMatch[1]);
|
|
}
|
|
|
|
const topKMatch = parameters.match(/top_k\s+(\d+)/i);
|
|
if (topKMatch) {
|
|
settings.topK = parseInt(topKMatch[1], 10);
|
|
}
|
|
|
|
const numCtxMatch = parameters.match(/num_ctx\s+(\d+)/i);
|
|
if (numCtxMatch) {
|
|
settings.numCtx = parseInt(numCtxMatch[1], 10);
|
|
}
|
|
|
|
return Object.keys(settings).length > 0 ? settings : undefined;
|
|
}
|
|
|
|
async generate(
|
|
model: IAiModelConfig,
|
|
options: IAiGenerateOptions,
|
|
streamCallback?: IAiResponseStreamFn,
|
|
): Promise<IAiGenerateResponse> {
|
|
await this.log.debug("OllamaAiApi.generate called", {
|
|
provider: model.provider.name,
|
|
modelId: model.modelId,
|
|
});
|
|
|
|
const response = await this.client.generate({
|
|
model: model.modelId,
|
|
prompt: options.prompt,
|
|
system: options.systemPrompt,
|
|
stream: true,
|
|
});
|
|
|
|
const content = {
|
|
response: "",
|
|
thinking: "",
|
|
};
|
|
let lastChunk;
|
|
for await (const chunk of response) {
|
|
lastChunk = chunk;
|
|
|
|
if (chunk.thinking) {
|
|
content.thinking += chunk.thinking;
|
|
if (streamCallback) {
|
|
await streamCallback({
|
|
type: "thinking",
|
|
data: chunk.thinking,
|
|
});
|
|
}
|
|
}
|
|
|
|
if (chunk.response) {
|
|
content.response += chunk.response;
|
|
if (streamCallback) {
|
|
await streamCallback({
|
|
type: "response",
|
|
data: chunk.response,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
this.log.debug("generate call is done", content);
|
|
assert(lastChunk, "no stream response chunks received");
|
|
|
|
return {
|
|
done: lastChunk.done,
|
|
doneReason: lastChunk.done_reason,
|
|
response: content.response,
|
|
thinking: content.thinking,
|
|
stats: {
|
|
duration: {
|
|
seconds: lastChunk.total_duration,
|
|
text: numeral(lastChunk.total_duration).format("hh:mm:ss"),
|
|
},
|
|
tokenCounts: {
|
|
input: lastChunk.prompt_eval_count,
|
|
response: lastChunk.eval_count,
|
|
thinking: 0,
|
|
},
|
|
},
|
|
};
|
|
}
|
|
|
|
async chat(
|
|
model: IAiModelConfig,
|
|
options: IAiChatOptions,
|
|
streamCallback?: IAiResponseStreamFn,
|
|
): Promise<IAiChatResponse> {
|
|
await this.log.debug("OllamaAiApi.chat called", {
|
|
provider: model.provider.name,
|
|
modelId: model.modelId,
|
|
});
|
|
|
|
const messages: OllamaMessage[] = [];
|
|
|
|
if (options.systemPrompt) {
|
|
messages.push({ role: "system", content: options.systemPrompt });
|
|
}
|
|
|
|
if (options.context) {
|
|
for (const msg of options.context) {
|
|
if (msg.content && msg.content.trim()) {
|
|
if (msg.role === "tool") {
|
|
messages.push({
|
|
role: "tool",
|
|
content: msg.content,
|
|
tool_name: msg.toolName,
|
|
});
|
|
} else {
|
|
messages.push({
|
|
role: msg.role as "user" | "assistant" | "system",
|
|
content: msg.content,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (options.userPrompt) {
|
|
messages.push({ role: "user", content: options.userPrompt });
|
|
}
|
|
|
|
if (messages.length === 0) {
|
|
throw new Error(
|
|
"Messages array is empty - cannot call Ollama API with no messages",
|
|
);
|
|
}
|
|
|
|
const ollamaTools = options.tools
|
|
? options.tools.map((tool) => ({
|
|
type: tool.definition.type,
|
|
function: {
|
|
name: tool.definition.function.name,
|
|
description: tool.definition.function.description,
|
|
parameters: tool.definition.function.parameters,
|
|
},
|
|
}))
|
|
: undefined;
|
|
|
|
const response = await this.client.chat({
|
|
model: model.modelId,
|
|
messages,
|
|
stream: true,
|
|
think: model.params.reasoning,
|
|
tools: ollamaTools,
|
|
});
|
|
|
|
let lastChunk;
|
|
let accumulatedThinking = "";
|
|
let accumulatedResponse = "";
|
|
const toolCalls: IToolCall[] = [];
|
|
|
|
for await (const chunk of response) {
|
|
lastChunk = chunk;
|
|
|
|
if (chunk.message.thinking) {
|
|
accumulatedThinking += chunk.message.thinking;
|
|
if (streamCallback) {
|
|
await streamCallback({
|
|
type: "thinking",
|
|
data: chunk.message.thinking,
|
|
});
|
|
}
|
|
}
|
|
if (chunk.message.content) {
|
|
accumulatedResponse += chunk.message.content;
|
|
if (streamCallback) {
|
|
await streamCallback({
|
|
type: "response",
|
|
data: chunk.message.content,
|
|
});
|
|
}
|
|
}
|
|
if (chunk.message.tool_calls) {
|
|
for (const [index, tc] of chunk.message.tool_calls.entries()) {
|
|
const params = JSON.stringify(tc.function.arguments);
|
|
const callId = `tool_${tc.function.name}_${Date.now()}_${index}`;
|
|
|
|
toolCalls.push({
|
|
callId,
|
|
function: {
|
|
name: tc.function.name,
|
|
arguments: params,
|
|
},
|
|
});
|
|
}
|
|
}
|
|
}
|
|
assert(lastChunk, "no response chunks received");
|
|
|
|
const chatResponse: IAiChatResponse = {
|
|
done: lastChunk.done,
|
|
doneReason: lastChunk.done_reason,
|
|
response: accumulatedResponse || lastChunk.message.content,
|
|
thinking: accumulatedThinking || lastChunk.message.thinking,
|
|
toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
|
|
stats: {
|
|
duration: {
|
|
seconds: lastChunk.total_duration,
|
|
text: numeral(lastChunk.total_duration).format("hh:mm:ss"),
|
|
},
|
|
tokenCounts: {
|
|
input: lastChunk.prompt_eval_count,
|
|
response: lastChunk.eval_count,
|
|
thinking: 0,
|
|
},
|
|
},
|
|
};
|
|
this.assertNonEmptyChatResponse(chatResponse);
|
|
return chatResponse;
|
|
}
|
|
}
|