feat: add numPredict, numCtx, maxCompletionTokens to model config pipeline

Fixes premature AI API response truncation by propagating inference
parameters through the entire probe → storage → runtime → API call chain.

Root cause: Ollama defaults num_predict to 128 tokens and num_ctx to
4096, silently truncating output and context. We never overrode these.

Changes:
- IAiModelSettings: add numPredict, maxCompletionTokens fields
- IDroneModelConfig: moved from gadget-drone to @gadget/api (shared),
  expanded with numPredict, numCtx, maxCompletionTokens params
- IAiModelConfig.params: add numPredict, numCtx, maxCompletionTokens
- IAiModelProbeResult.settings: add numPredict, maxCompletionTokens
- AiModelSettingsSchema (Mongoose): add numPredict, maxCompletionTokens
- Ollama extractSettings(): extract num_predict from model parameters
- Ollama generate()/chat(): pass options: { num_ctx, num_predict }
- OpenAI all three create() calls: add max_completion_tokens
- web-cli.ts onProviderProbe(): compute numPredict (-1 for Ollama)
  and maxCompletionTokens (contextWindow for OpenAI) during probe
- agent.ts main + subagent loops: read model settings from provider
  cached models, build IDroneModelConfig with stored params
- ai.ts: remove local IDroneModelConfig, import from @gadget/api
- chat-session.ts: add new params to title generation call
- Tests: update all fixtures with new params, all 19 tests pass

Defaults when model settings unavailable:
- numPredict: -1 (Ollama unlimited - generate until natural stop)
- numCtx: 131072 (128k - covers most modern models)
- maxCompletionTokens: 16384 (16k - reasonable OpenAI default)
This commit is contained in:
Rob Colbert 2026-05-11 13:05:40 -04:00
parent 62578e8e56
commit 07a760c7b5
11 changed files with 135 additions and 32 deletions

View File

@ -29,6 +29,8 @@ export const AiModelSettingsSchema = new Schema<IAiModelSettings>(
topP: { type: Number }, topP: { type: Number },
topK: { type: Number }, topK: { type: Number },
numCtx: { type: Number }, numCtx: { type: Number },
numPredict: { type: Number },
maxCompletionTokens: { type: Number },
}, },
{ _id: false }, { _id: false },
); );

View File

@ -439,6 +439,9 @@ class ChatSessionService extends DtpService {
temperature: 1.0, temperature: 1.0,
topK: 0.6, topK: 0.6,
topP: 0.4, topP: 0.4,
numPredict: -1,
numCtx: 131072,
maxCompletionTokens: 256,
}, },
}, },
{ {

View File

@ -586,6 +586,22 @@ class DtpWebCli extends DtpProcess {
try { try {
const probeResult = await api.probeModel(modelInfo.id); const probeResult = await api.probeModel(modelInfo.id);
// Compute provider-specific inference settings
const settings: IAiModelSettings = {
...(probeResult.settings as IAiModelSettings | undefined),
};
if (provider.apiType === 'ollama') {
// Ollama: always override numPredict to -1 (unlimited) for agentic workflows
// The model must generate until its natural stop token or context limit
settings.numPredict = -1;
// numCtx is already populated by probeResult.settings from extractSettings()
} else if (provider.apiType === 'openai') {
// OpenAI-compatible: set maxCompletionTokens to model's context window
// This prevents compatible providers (Gab AI, etc.) from imposing low defaults
settings.maxCompletionTokens = modelInfo.contextWindow || 16384;
}
const model: IAiModel = { const model: IAiModel = {
id: modelInfo.id, id: modelInfo.id,
name: modelInfo.name, name: modelInfo.name,
@ -593,7 +609,7 @@ class DtpWebCli extends DtpProcess {
parameterLabel: modelInfo.parameterLabel, parameterLabel: modelInfo.parameterLabel,
contextWindow: modelInfo.contextWindow, contextWindow: modelInfo.contextWindow,
capabilities: probeResult.capabilities as IAiModelCapabilities, capabilities: probeResult.capabilities as IAiModelCapabilities,
settings: probeResult.settings as IAiModelSettings | undefined, settings,
}; };
models.push(model); models.push(model);

View File

@ -17,11 +17,14 @@ import {
} from "@gadget/ai"; } from "@gadget/ai";
import { import {
type IAiProvider, type IAiProvider,
type IAiModelSettings,
type IDroneModelConfig,
type IChatSession, type IChatSession,
type IChatTurn, type IChatTurn,
type IChatSubagentProcess, type IChatSubagentProcess,
type IChatToolCall, type IChatToolCall,
type IUser, type IUser,
type GadgetId,
type ServerToClientEvents, type ServerToClientEvents,
type ClientToServerEvents, type ClientToServerEvents,
ChatSessionMode, ChatSessionMode,
@ -209,10 +212,7 @@ class AgentService extends GadgetService {
try { try {
response = await AiService.chat( response = await AiService.chat(
turn.provider, turn.provider,
{ this.buildDroneModelConfig(turn.provider, turn.llm, currentReasoning),
modelId: turn.llm,
params: { reasoning: currentReasoning, temperature: 0.8, topP: 0.9, topK: 40 },
},
chatOptions, chatOptions,
this.makeStreamHandler(socket), this.makeStreamHandler(socket),
); );
@ -410,6 +410,46 @@ class AgentService extends GadgetService {
}; };
} }
/**
* Builds an IDroneModelConfig by looking up the model's stored settings
* from the provider's cached models array. Falls back to safe defaults
* when stored settings aren't available (e.g., model not yet probed).
*/
private buildDroneModelConfig(
provider: IAiProvider | GadgetId,
modelId: string,
reasoning: boolean | "low" | "medium" | "high",
): Omit<IDroneModelConfig, "provider"> {
const settings = this.getModelSettings(provider, modelId);
return {
modelId,
params: {
reasoning,
temperature: settings?.temperature ?? 0.8,
topP: settings?.topP ?? 0.9,
topK: settings?.topK ?? 40,
numPredict: settings?.numPredict ?? -1, // -1 = unlimited (Ollama)
numCtx: settings?.numCtx ?? 131072, // 128k fallback
maxCompletionTokens: settings?.maxCompletionTokens ?? 16384, // 16k fallback
},
};
}
/**
* Looks up the stored IAiModelSettings for a model from the provider's
* cached models array. Returns undefined if the provider or model isn't found.
*/
private getModelSettings(
provider: IAiProvider | GadgetId,
modelId: string,
): IAiModelSettings | undefined {
if (typeof provider === "string") return undefined;
const modelRecord = (provider as IAiProvider).models?.find(
(m) => m.id === modelId,
);
return modelRecord?.settings;
}
private async executeTool(name: string, argsJson: string): Promise<string> { private async executeTool(name: string, argsJson: string): Promise<string> {
const tool = this.toolbox.getTool(name); const tool = this.toolbox.getTool(name);
if (!tool) { if (!tool) {
@ -528,10 +568,7 @@ class AgentService extends GadgetService {
const response = await AiService.chat( const response = await AiService.chat(
provider, provider,
{ this.buildDroneModelConfig(provider, turn.llm, false),
modelId: turn.llm,
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
},
chatOptions, chatOptions,
streamHandler, streamHandler,
); );

View File

@ -15,7 +15,7 @@ const aiEnv: IAiEnvironment = {
}, },
}; };
import { IAiProvider as DbAiProvider, GadgetId } from "@gadget/api"; import { IAiProvider as DbAiProvider, GadgetId, type IDroneModelConfig } from "@gadget/api";
import { GadgetService } from "../lib/service.js"; import { GadgetService } from "../lib/service.js";
import { import {
type IAiChatOptions, type IAiChatOptions,
@ -29,20 +29,6 @@ import {
IAiEnvironment, IAiEnvironment,
} from "@gadget/ai"; } from "@gadget/ai";
/**
* Drone-specific model config that accepts the database provider type.
*/
export interface IDroneModelConfig {
provider: DbAiProvider | GadgetId;
modelId: string;
params: {
reasoning: boolean | "low" | "medium" | "high";
temperature: number;
topP: number;
topK: number;
};
}
/** /**
* An abstraction of the backend AI APIs (Ollama, OpenAI) that provides one * An abstraction of the backend AI APIs (Ollama, OpenAI) that provides one
* common interface and contract for working with different AI APIs. * common interface and contract for working with different AI APIs.

View File

@ -22,6 +22,12 @@ export interface IAiModelConfig {
temperature: number; temperature: number;
topP: number; topP: number;
topK: number; topK: number;
/** Ollama: -1 = unlimited (generate until natural stop or context limit) */
numPredict: number;
/** Context window size (input + output tokens); Ollama passes as num_ctx */
numCtx: number;
/** OpenAI-compatible: maximum completion tokens the model can generate */
maxCompletionTokens: number;
}; };
} }
@ -146,6 +152,10 @@ export interface IAiModelProbeResult {
topP?: number; topP?: number;
topK?: number; topK?: number;
numCtx?: number; numCtx?: number;
/** Ollama: discovered num_predict from model parameters (informational; overridden to -1 at inference time) */
numPredict?: number;
/** OpenAI-compatible: discovered from model info */
maxCompletionTokens?: number;
}; };
} }

View File

@ -87,7 +87,7 @@ describe('OllamaAiApi', () => {
{ {
provider: mockProvider as any, provider: mockProvider as any,
modelId: 'test-model', modelId: 'test-model',
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 }, params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
}, },
{ {
userPrompt: 'Test prompt', userPrompt: 'Test prompt',
@ -150,7 +150,7 @@ describe('OllamaAiApi', () => {
{ {
provider: mockProvider as any, provider: mockProvider as any,
modelId: 'test-model', modelId: 'test-model',
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 }, params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
}, },
{ {
userPrompt: 'Test prompt', userPrompt: 'Test prompt',
@ -201,7 +201,7 @@ describe('OllamaAiApi', () => {
{ {
provider: mockProvider as any, provider: mockProvider as any,
modelId: 'test-model', modelId: 'test-model',
params: { reasoning: true, temperature: 0.8, topP: 0.9, topK: 40 }, params: { reasoning: true, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
}, },
{ {
userPrompt: 'What is the answer?', userPrompt: 'What is the answer?',
@ -243,7 +243,7 @@ describe('OllamaAiApi', () => {
{ {
provider: mockProvider as any, provider: mockProvider as any,
modelId: 'test-model', modelId: 'test-model',
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 }, params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
}, },
{ {
userPrompt: 'Test prompt', userPrompt: 'Test prompt',

View File

@ -126,6 +126,11 @@ export class OllamaAiApi extends AiApi {
settings.numCtx = parseInt(numCtxMatch[1], 10); settings.numCtx = parseInt(numCtxMatch[1], 10);
} }
const numPredictMatch = parameters.match(/num_predict\s+(-?\d+)/i);
if (numPredictMatch) {
settings.numPredict = parseInt(numPredictMatch[1], 10);
}
return Object.keys(settings).length > 0 ? settings : undefined; return Object.keys(settings).length > 0 ? settings : undefined;
} }
@ -144,6 +149,10 @@ export class OllamaAiApi extends AiApi {
prompt: options.prompt, prompt: options.prompt,
system: options.systemPrompt, system: options.systemPrompt,
stream: true, stream: true,
options: {
num_ctx: model.params.numCtx,
num_predict: model.params.numPredict,
},
}); });
const content = { const content = {
@ -259,6 +268,10 @@ export class OllamaAiApi extends AiApi {
stream: true, stream: true,
think: model.params.reasoning, think: model.params.reasoning,
tools: ollamaTools, tools: ollamaTools,
options: {
num_ctx: model.params.numCtx,
num_predict: model.params.numPredict,
},
}); });
let lastChunk; let lastChunk;

View File

@ -65,7 +65,7 @@ describe("OpenAiApi", () => {
{ {
provider: mockProvider as any, provider: mockProvider as any,
modelId: "test-model", modelId: "test-model",
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 }, params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
}, },
{ userPrompt: "Hello", context: [], tools: [] }, { userPrompt: "Hello", context: [], tools: [] },
vi.fn(), vi.fn(),
@ -105,7 +105,7 @@ describe("OpenAiApi", () => {
{ {
provider: mockProvider as any, provider: mockProvider as any,
modelId: "test-model", modelId: "test-model",
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 }, params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
}, },
{ userPrompt: "Read index.html", context: [], tools: [] }, { userPrompt: "Read index.html", context: [], tools: [] },
streamCallback, streamCallback,
@ -133,7 +133,7 @@ describe("OpenAiApi", () => {
{ {
provider: mockProvider as any, provider: mockProvider as any,
modelId: "test-model", modelId: "test-model",
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 }, params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
}, },
{ userPrompt: "Hello", context: [], tools: [] }, { userPrompt: "Hello", context: [], tools: [] },
streamCallback, streamCallback,
@ -153,7 +153,7 @@ describe("OpenAiApi", () => {
{ {
provider: mockProvider as any, provider: mockProvider as any,
modelId: "test-model", modelId: "test-model",
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 }, params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
}, },
{ {
userPrompt: "Don't edit code. Just talk to me.", userPrompt: "Don't edit code. Just talk to me.",

View File

@ -202,6 +202,9 @@ export class OpenAiApi extends AiApi {
{ role: "user" as const, content: options.prompt }, { role: "user" as const, content: options.prompt },
], ],
stream: true, stream: true,
...(model.params.maxCompletionTokens
? { max_completion_tokens: model.params.maxCompletionTokens }
: {}),
...(typeof model.params.reasoning === "string" ...(typeof model.params.reasoning === "string"
? { ? {
reasoning_effort: model.params.reasoning as reasoning_effort: model.params.reasoning as
@ -373,6 +376,9 @@ export class OpenAiApi extends AiApi {
messages, messages,
tools, tools,
stream: true, stream: true,
...(model.params.maxCompletionTokens
? { max_completion_tokens: model.params.maxCompletionTokens }
: {}),
...(typeof model.params.reasoning === "string" ...(typeof model.params.reasoning === "string"
? { ? {
reasoning_effort: model.params.reasoning as reasoning_effort: model.params.reasoning as
@ -439,6 +445,9 @@ export class OpenAiApi extends AiApi {
messages, messages,
tools, tools,
stream: false, stream: false,
...(model.params.maxCompletionTokens
? { max_completion_tokens: model.params.maxCompletionTokens }
: {}),
...(typeof model.params.reasoning === "string" ...(typeof model.params.reasoning === "string"
? { ? {
reasoning_effort: model.params.reasoning as reasoning_effort: model.params.reasoning as

View File

@ -17,6 +17,10 @@ export interface IAiModelSettings {
topP?: number; topP?: number;
topK?: number; topK?: number;
numCtx?: number; numCtx?: number;
/** Ollama: maximum number of tokens to predict (-1 = unlimited, generate until natural stop or context limit) */
numPredict?: number;
/** OpenAI-compatible: maximum number of completion tokens the model can generate */
maxCompletionTokens?: number;
} }
export interface IAiModelCapabilities { export interface IAiModelCapabilities {
@ -61,3 +65,26 @@ export interface IAiProvider {
} }
export type AiProviderDocument = HydratedDocument<IAiProvider>; export type AiProviderDocument = HydratedDocument<IAiProvider>;
/**
* Drone-side model configuration that accepts the database IAiProvider type.
* This is the canonical interface used by the drone to configure AI API calls,
* carrying the model's stored inference settings (numCtx, numPredict, etc.)
* from the provider's cached model data through to the AI package.
*/
export interface IDroneModelConfig {
provider: IAiProvider | GadgetId;
modelId: string;
params: {
reasoning: boolean | "low" | "medium" | "high";
temperature: number;
topP: number;
topK: number;
/** Ollama: -1 = unlimited (generate until natural stop or context limit) */
numPredict: number;
/** Context window size (input + output tokens); Ollama passes as num_ctx */
numCtx: number;
/** OpenAI-compatible: maximum completion tokens the model can generate */
maxCompletionTokens: number;
};
}