From 07a760c7b5bd33e7e678ecbc06a8c2a2733573ea Mon Sep 17 00:00:00 2001 From: Rob Colbert Date: Mon, 11 May 2026 13:05:40 -0400 Subject: [PATCH] feat: add numPredict, numCtx, maxCompletionTokens to model config pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes premature AI API response truncation by propagating inference parameters through the entire probe → storage → runtime → API call chain. Root cause: Ollama defaults num_predict to 128 tokens and num_ctx to 4096, silently truncating output and context. We never overrode these. Changes: - IAiModelSettings: add numPredict, maxCompletionTokens fields - IDroneModelConfig: moved from gadget-drone to @gadget/api (shared), expanded with numPredict, numCtx, maxCompletionTokens params - IAiModelConfig.params: add numPredict, numCtx, maxCompletionTokens - IAiModelProbeResult.settings: add numPredict, maxCompletionTokens - AiModelSettingsSchema (Mongoose): add numPredict, maxCompletionTokens - Ollama extractSettings(): extract num_predict from model parameters - Ollama generate()/chat(): pass options: { num_ctx, num_predict } - OpenAI all three create() calls: add max_completion_tokens - web-cli.ts onProviderProbe(): compute numPredict (-1 for Ollama) and maxCompletionTokens (contextWindow for OpenAI) during probe - agent.ts main + subagent loops: read model settings from provider cached models, build IDroneModelConfig with stored params - ai.ts: remove local IDroneModelConfig, import from @gadget/api - chat-session.ts: add new params to title generation call - Tests: update all fixtures with new params, all 19 tests pass Defaults when model settings unavailable: - numPredict: -1 (Ollama unlimited - generate until natural stop) - numCtx: 131072 (128k - covers most modern models) - maxCompletionTokens: 16384 (16k - reasonable OpenAI default) --- gadget-code/src/models/ai-provider.ts | 2 + gadget-code/src/services/chat-session.ts | 3 ++ gadget-code/src/web-cli.ts | 18 +++++++- gadget-drone/src/services/agent.ts | 53 ++++++++++++++++++---- gadget-drone/src/services/ai.ts | 16 +------ packages/ai/src/api.ts | 10 ++++ packages/ai/src/ollama.test.ts | 8 ++-- packages/ai/src/ollama.ts | 13 ++++++ packages/ai/src/openai.test.ts | 8 ++-- packages/ai/src/openai.ts | 9 ++++ packages/api/src/interfaces/ai-provider.ts | 27 +++++++++++ 11 files changed, 135 insertions(+), 32 deletions(-) diff --git a/gadget-code/src/models/ai-provider.ts b/gadget-code/src/models/ai-provider.ts index 42dc9c0..f367217 100644 --- a/gadget-code/src/models/ai-provider.ts +++ b/gadget-code/src/models/ai-provider.ts @@ -29,6 +29,8 @@ export const AiModelSettingsSchema = new Schema( topP: { type: Number }, topK: { type: Number }, numCtx: { type: Number }, + numPredict: { type: Number }, + maxCompletionTokens: { type: Number }, }, { _id: false }, ); diff --git a/gadget-code/src/services/chat-session.ts b/gadget-code/src/services/chat-session.ts index 9dc3a92..0ab9cca 100644 --- a/gadget-code/src/services/chat-session.ts +++ b/gadget-code/src/services/chat-session.ts @@ -439,6 +439,9 @@ class ChatSessionService extends DtpService { temperature: 1.0, topK: 0.6, topP: 0.4, + numPredict: -1, + numCtx: 131072, + maxCompletionTokens: 256, }, }, { diff --git a/gadget-code/src/web-cli.ts b/gadget-code/src/web-cli.ts index c66a5f6..03056c2 100644 --- a/gadget-code/src/web-cli.ts +++ b/gadget-code/src/web-cli.ts @@ -586,6 +586,22 @@ class DtpWebCli extends DtpProcess { try { const probeResult = await api.probeModel(modelInfo.id); + // Compute provider-specific inference settings + const settings: IAiModelSettings = { + ...(probeResult.settings as IAiModelSettings | undefined), + }; + + if (provider.apiType === 'ollama') { + // Ollama: always override numPredict to -1 (unlimited) for agentic workflows + // The model must generate until its natural stop token or context limit + settings.numPredict = -1; + // numCtx is already populated by probeResult.settings from extractSettings() + } else if (provider.apiType === 'openai') { + // OpenAI-compatible: set maxCompletionTokens to model's context window + // This prevents compatible providers (Gab AI, etc.) from imposing low defaults + settings.maxCompletionTokens = modelInfo.contextWindow || 16384; + } + const model: IAiModel = { id: modelInfo.id, name: modelInfo.name, @@ -593,7 +609,7 @@ class DtpWebCli extends DtpProcess { parameterLabel: modelInfo.parameterLabel, contextWindow: modelInfo.contextWindow, capabilities: probeResult.capabilities as IAiModelCapabilities, - settings: probeResult.settings as IAiModelSettings | undefined, + settings, }; models.push(model); diff --git a/gadget-drone/src/services/agent.ts b/gadget-drone/src/services/agent.ts index 06372e0..b755552 100644 --- a/gadget-drone/src/services/agent.ts +++ b/gadget-drone/src/services/agent.ts @@ -17,11 +17,14 @@ import { } from "@gadget/ai"; import { type IAiProvider, + type IAiModelSettings, + type IDroneModelConfig, type IChatSession, type IChatTurn, type IChatSubagentProcess, type IChatToolCall, type IUser, + type GadgetId, type ServerToClientEvents, type ClientToServerEvents, ChatSessionMode, @@ -209,10 +212,7 @@ class AgentService extends GadgetService { try { response = await AiService.chat( turn.provider, - { - modelId: turn.llm, - params: { reasoning: currentReasoning, temperature: 0.8, topP: 0.9, topK: 40 }, - }, + this.buildDroneModelConfig(turn.provider, turn.llm, currentReasoning), chatOptions, this.makeStreamHandler(socket), ); @@ -410,6 +410,46 @@ class AgentService extends GadgetService { }; } + /** + * Builds an IDroneModelConfig by looking up the model's stored settings + * from the provider's cached models array. Falls back to safe defaults + * when stored settings aren't available (e.g., model not yet probed). + */ + private buildDroneModelConfig( + provider: IAiProvider | GadgetId, + modelId: string, + reasoning: boolean | "low" | "medium" | "high", + ): Omit { + const settings = this.getModelSettings(provider, modelId); + return { + modelId, + params: { + reasoning, + temperature: settings?.temperature ?? 0.8, + topP: settings?.topP ?? 0.9, + topK: settings?.topK ?? 40, + numPredict: settings?.numPredict ?? -1, // -1 = unlimited (Ollama) + numCtx: settings?.numCtx ?? 131072, // 128k fallback + maxCompletionTokens: settings?.maxCompletionTokens ?? 16384, // 16k fallback + }, + }; + } + + /** + * Looks up the stored IAiModelSettings for a model from the provider's + * cached models array. Returns undefined if the provider or model isn't found. + */ + private getModelSettings( + provider: IAiProvider | GadgetId, + modelId: string, + ): IAiModelSettings | undefined { + if (typeof provider === "string") return undefined; + const modelRecord = (provider as IAiProvider).models?.find( + (m) => m.id === modelId, + ); + return modelRecord?.settings; + } + private async executeTool(name: string, argsJson: string): Promise { const tool = this.toolbox.getTool(name); if (!tool) { @@ -528,10 +568,7 @@ class AgentService extends GadgetService { const response = await AiService.chat( provider, - { - modelId: turn.llm, - params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 }, - }, + this.buildDroneModelConfig(provider, turn.llm, false), chatOptions, streamHandler, ); diff --git a/gadget-drone/src/services/ai.ts b/gadget-drone/src/services/ai.ts index 5ca2e8d..40d29f0 100644 --- a/gadget-drone/src/services/ai.ts +++ b/gadget-drone/src/services/ai.ts @@ -15,7 +15,7 @@ const aiEnv: IAiEnvironment = { }, }; -import { IAiProvider as DbAiProvider, GadgetId } from "@gadget/api"; +import { IAiProvider as DbAiProvider, GadgetId, type IDroneModelConfig } from "@gadget/api"; import { GadgetService } from "../lib/service.js"; import { type IAiChatOptions, @@ -29,20 +29,6 @@ import { IAiEnvironment, } from "@gadget/ai"; -/** - * Drone-specific model config that accepts the database provider type. - */ -export interface IDroneModelConfig { - provider: DbAiProvider | GadgetId; - modelId: string; - params: { - reasoning: boolean | "low" | "medium" | "high"; - temperature: number; - topP: number; - topK: number; - }; -} - /** * An abstraction of the backend AI APIs (Ollama, OpenAI) that provides one * common interface and contract for working with different AI APIs. diff --git a/packages/ai/src/api.ts b/packages/ai/src/api.ts index df7718c..ecb5981 100644 --- a/packages/ai/src/api.ts +++ b/packages/ai/src/api.ts @@ -22,6 +22,12 @@ export interface IAiModelConfig { temperature: number; topP: number; topK: number; + /** Ollama: -1 = unlimited (generate until natural stop or context limit) */ + numPredict: number; + /** Context window size (input + output tokens); Ollama passes as num_ctx */ + numCtx: number; + /** OpenAI-compatible: maximum completion tokens the model can generate */ + maxCompletionTokens: number; }; } @@ -146,6 +152,10 @@ export interface IAiModelProbeResult { topP?: number; topK?: number; numCtx?: number; + /** Ollama: discovered num_predict from model parameters (informational; overridden to -1 at inference time) */ + numPredict?: number; + /** OpenAI-compatible: discovered from model info */ + maxCompletionTokens?: number; }; } diff --git a/packages/ai/src/ollama.test.ts b/packages/ai/src/ollama.test.ts index 64d9859..dfd93b0 100644 --- a/packages/ai/src/ollama.test.ts +++ b/packages/ai/src/ollama.test.ts @@ -87,7 +87,7 @@ describe('OllamaAiApi', () => { { provider: mockProvider as any, modelId: 'test-model', - params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 }, + params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 }, }, { userPrompt: 'Test prompt', @@ -150,7 +150,7 @@ describe('OllamaAiApi', () => { { provider: mockProvider as any, modelId: 'test-model', - params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 }, + params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 }, }, { userPrompt: 'Test prompt', @@ -201,7 +201,7 @@ describe('OllamaAiApi', () => { { provider: mockProvider as any, modelId: 'test-model', - params: { reasoning: true, temperature: 0.8, topP: 0.9, topK: 40 }, + params: { reasoning: true, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 }, }, { userPrompt: 'What is the answer?', @@ -243,7 +243,7 @@ describe('OllamaAiApi', () => { { provider: mockProvider as any, modelId: 'test-model', - params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 }, + params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 }, }, { userPrompt: 'Test prompt', diff --git a/packages/ai/src/ollama.ts b/packages/ai/src/ollama.ts index 10e82ba..a9af2a6 100644 --- a/packages/ai/src/ollama.ts +++ b/packages/ai/src/ollama.ts @@ -126,6 +126,11 @@ export class OllamaAiApi extends AiApi { settings.numCtx = parseInt(numCtxMatch[1], 10); } + const numPredictMatch = parameters.match(/num_predict\s+(-?\d+)/i); + if (numPredictMatch) { + settings.numPredict = parseInt(numPredictMatch[1], 10); + } + return Object.keys(settings).length > 0 ? settings : undefined; } @@ -144,6 +149,10 @@ export class OllamaAiApi extends AiApi { prompt: options.prompt, system: options.systemPrompt, stream: true, + options: { + num_ctx: model.params.numCtx, + num_predict: model.params.numPredict, + }, }); const content = { @@ -259,6 +268,10 @@ export class OllamaAiApi extends AiApi { stream: true, think: model.params.reasoning, tools: ollamaTools, + options: { + num_ctx: model.params.numCtx, + num_predict: model.params.numPredict, + }, }); let lastChunk; diff --git a/packages/ai/src/openai.test.ts b/packages/ai/src/openai.test.ts index 80db455..8acc307 100644 --- a/packages/ai/src/openai.test.ts +++ b/packages/ai/src/openai.test.ts @@ -65,7 +65,7 @@ describe("OpenAiApi", () => { { provider: mockProvider as any, modelId: "test-model", - params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 }, + params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 }, }, { userPrompt: "Hello", context: [], tools: [] }, vi.fn(), @@ -105,7 +105,7 @@ describe("OpenAiApi", () => { { provider: mockProvider as any, modelId: "test-model", - params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 }, + params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 }, }, { userPrompt: "Read index.html", context: [], tools: [] }, streamCallback, @@ -133,7 +133,7 @@ describe("OpenAiApi", () => { { provider: mockProvider as any, modelId: "test-model", - params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 }, + params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 }, }, { userPrompt: "Hello", context: [], tools: [] }, streamCallback, @@ -153,7 +153,7 @@ describe("OpenAiApi", () => { { provider: mockProvider as any, modelId: "test-model", - params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 }, + params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 }, }, { userPrompt: "Don't edit code. Just talk to me.", diff --git a/packages/ai/src/openai.ts b/packages/ai/src/openai.ts index d7b329f..794f146 100644 --- a/packages/ai/src/openai.ts +++ b/packages/ai/src/openai.ts @@ -202,6 +202,9 @@ export class OpenAiApi extends AiApi { { role: "user" as const, content: options.prompt }, ], stream: true, + ...(model.params.maxCompletionTokens + ? { max_completion_tokens: model.params.maxCompletionTokens } + : {}), ...(typeof model.params.reasoning === "string" ? { reasoning_effort: model.params.reasoning as @@ -373,6 +376,9 @@ export class OpenAiApi extends AiApi { messages, tools, stream: true, + ...(model.params.maxCompletionTokens + ? { max_completion_tokens: model.params.maxCompletionTokens } + : {}), ...(typeof model.params.reasoning === "string" ? { reasoning_effort: model.params.reasoning as @@ -439,6 +445,9 @@ export class OpenAiApi extends AiApi { messages, tools, stream: false, + ...(model.params.maxCompletionTokens + ? { max_completion_tokens: model.params.maxCompletionTokens } + : {}), ...(typeof model.params.reasoning === "string" ? { reasoning_effort: model.params.reasoning as diff --git a/packages/api/src/interfaces/ai-provider.ts b/packages/api/src/interfaces/ai-provider.ts index f6745f9..88f56d3 100644 --- a/packages/api/src/interfaces/ai-provider.ts +++ b/packages/api/src/interfaces/ai-provider.ts @@ -17,6 +17,10 @@ export interface IAiModelSettings { topP?: number; topK?: number; numCtx?: number; + /** Ollama: maximum number of tokens to predict (-1 = unlimited, generate until natural stop or context limit) */ + numPredict?: number; + /** OpenAI-compatible: maximum number of completion tokens the model can generate */ + maxCompletionTokens?: number; } export interface IAiModelCapabilities { @@ -61,3 +65,26 @@ export interface IAiProvider { } export type AiProviderDocument = HydratedDocument; + +/** + * Drone-side model configuration that accepts the database IAiProvider type. + * This is the canonical interface used by the drone to configure AI API calls, + * carrying the model's stored inference settings (numCtx, numPredict, etc.) + * from the provider's cached model data through to the AI package. + */ +export interface IDroneModelConfig { + provider: IAiProvider | GadgetId; + modelId: string; + params: { + reasoning: boolean | "low" | "medium" | "high"; + temperature: number; + topP: number; + topK: number; + /** Ollama: -1 = unlimited (generate until natural stop or context limit) */ + numPredict: number; + /** Context window size (input + output tokens); Ollama passes as num_ctx */ + numCtx: number; + /** OpenAI-compatible: maximum completion tokens the model can generate */ + maxCompletionTokens: number; + }; +}