feat: add numPredict, numCtx, maxCompletionTokens to model config pipeline

Fixes premature AI API response truncation by propagating inference
parameters through the entire probe → storage → runtime → API call chain.

Root cause: Ollama defaults num_predict to 128 tokens and num_ctx to
4096, silently truncating output and context. We never overrode these.

Changes:
- IAiModelSettings: add numPredict, maxCompletionTokens fields
- IDroneModelConfig: moved from gadget-drone to @gadget/api (shared),
  expanded with numPredict, numCtx, maxCompletionTokens params
- IAiModelConfig.params: add numPredict, numCtx, maxCompletionTokens
- IAiModelProbeResult.settings: add numPredict, maxCompletionTokens
- AiModelSettingsSchema (Mongoose): add numPredict, maxCompletionTokens
- Ollama extractSettings(): extract num_predict from model parameters
- Ollama generate()/chat(): pass options: { num_ctx, num_predict }
- OpenAI all three create() calls: add max_completion_tokens
- web-cli.ts onProviderProbe(): compute numPredict (-1 for Ollama)
  and maxCompletionTokens (contextWindow for OpenAI) during probe
- agent.ts main + subagent loops: read model settings from provider
  cached models, build IDroneModelConfig with stored params
- ai.ts: remove local IDroneModelConfig, import from @gadget/api
- chat-session.ts: add new params to title generation call
- Tests: update all fixtures with new params, all 19 tests pass

Defaults when model settings unavailable:
- numPredict: -1 (Ollama unlimited - generate until natural stop)
- numCtx: 131072 (128k - covers most modern models)
- maxCompletionTokens: 16384 (16k - reasonable OpenAI default)
This commit is contained in:
Rob Colbert 2026-05-11 13:05:40 -04:00
parent 62578e8e56
commit 07a760c7b5
11 changed files with 135 additions and 32 deletions

View File

@ -29,6 +29,8 @@ export const AiModelSettingsSchema = new Schema<IAiModelSettings>(
topP: { type: Number },
topK: { type: Number },
numCtx: { type: Number },
numPredict: { type: Number },
maxCompletionTokens: { type: Number },
},
{ _id: false },
);

View File

@ -439,6 +439,9 @@ class ChatSessionService extends DtpService {
temperature: 1.0,
topK: 0.6,
topP: 0.4,
numPredict: -1,
numCtx: 131072,
maxCompletionTokens: 256,
},
},
{

View File

@ -586,6 +586,22 @@ class DtpWebCli extends DtpProcess {
try {
const probeResult = await api.probeModel(modelInfo.id);
// Compute provider-specific inference settings
const settings: IAiModelSettings = {
...(probeResult.settings as IAiModelSettings | undefined),
};
if (provider.apiType === 'ollama') {
// Ollama: always override numPredict to -1 (unlimited) for agentic workflows
// The model must generate until its natural stop token or context limit
settings.numPredict = -1;
// numCtx is already populated by probeResult.settings from extractSettings()
} else if (provider.apiType === 'openai') {
// OpenAI-compatible: set maxCompletionTokens to model's context window
// This prevents compatible providers (Gab AI, etc.) from imposing low defaults
settings.maxCompletionTokens = modelInfo.contextWindow || 16384;
}
const model: IAiModel = {
id: modelInfo.id,
name: modelInfo.name,
@ -593,7 +609,7 @@ class DtpWebCli extends DtpProcess {
parameterLabel: modelInfo.parameterLabel,
contextWindow: modelInfo.contextWindow,
capabilities: probeResult.capabilities as IAiModelCapabilities,
settings: probeResult.settings as IAiModelSettings | undefined,
settings,
};
models.push(model);

View File

@ -17,11 +17,14 @@ import {
} from "@gadget/ai";
import {
type IAiProvider,
type IAiModelSettings,
type IDroneModelConfig,
type IChatSession,
type IChatTurn,
type IChatSubagentProcess,
type IChatToolCall,
type IUser,
type GadgetId,
type ServerToClientEvents,
type ClientToServerEvents,
ChatSessionMode,
@ -209,10 +212,7 @@ class AgentService extends GadgetService {
try {
response = await AiService.chat(
turn.provider,
{
modelId: turn.llm,
params: { reasoning: currentReasoning, temperature: 0.8, topP: 0.9, topK: 40 },
},
this.buildDroneModelConfig(turn.provider, turn.llm, currentReasoning),
chatOptions,
this.makeStreamHandler(socket),
);
@ -410,6 +410,46 @@ class AgentService extends GadgetService {
};
}
/**
* Builds an IDroneModelConfig by looking up the model's stored settings
* from the provider's cached models array. Falls back to safe defaults
* when stored settings aren't available (e.g., model not yet probed).
*/
private buildDroneModelConfig(
provider: IAiProvider | GadgetId,
modelId: string,
reasoning: boolean | "low" | "medium" | "high",
): Omit<IDroneModelConfig, "provider"> {
const settings = this.getModelSettings(provider, modelId);
return {
modelId,
params: {
reasoning,
temperature: settings?.temperature ?? 0.8,
topP: settings?.topP ?? 0.9,
topK: settings?.topK ?? 40,
numPredict: settings?.numPredict ?? -1, // -1 = unlimited (Ollama)
numCtx: settings?.numCtx ?? 131072, // 128k fallback
maxCompletionTokens: settings?.maxCompletionTokens ?? 16384, // 16k fallback
},
};
}
/**
* Looks up the stored IAiModelSettings for a model from the provider's
* cached models array. Returns undefined if the provider or model isn't found.
*/
private getModelSettings(
provider: IAiProvider | GadgetId,
modelId: string,
): IAiModelSettings | undefined {
if (typeof provider === "string") return undefined;
const modelRecord = (provider as IAiProvider).models?.find(
(m) => m.id === modelId,
);
return modelRecord?.settings;
}
private async executeTool(name: string, argsJson: string): Promise<string> {
const tool = this.toolbox.getTool(name);
if (!tool) {
@ -528,10 +568,7 @@ class AgentService extends GadgetService {
const response = await AiService.chat(
provider,
{
modelId: turn.llm,
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
},
this.buildDroneModelConfig(provider, turn.llm, false),
chatOptions,
streamHandler,
);

View File

@ -15,7 +15,7 @@ const aiEnv: IAiEnvironment = {
},
};
import { IAiProvider as DbAiProvider, GadgetId } from "@gadget/api";
import { IAiProvider as DbAiProvider, GadgetId, type IDroneModelConfig } from "@gadget/api";
import { GadgetService } from "../lib/service.js";
import {
type IAiChatOptions,
@ -29,20 +29,6 @@ import {
IAiEnvironment,
} from "@gadget/ai";
/**
* Drone-specific model config that accepts the database provider type.
*/
export interface IDroneModelConfig {
provider: DbAiProvider | GadgetId;
modelId: string;
params: {
reasoning: boolean | "low" | "medium" | "high";
temperature: number;
topP: number;
topK: number;
};
}
/**
* An abstraction of the backend AI APIs (Ollama, OpenAI) that provides one
* common interface and contract for working with different AI APIs.

View File

@ -22,6 +22,12 @@ export interface IAiModelConfig {
temperature: number;
topP: number;
topK: number;
/** Ollama: -1 = unlimited (generate until natural stop or context limit) */
numPredict: number;
/** Context window size (input + output tokens); Ollama passes as num_ctx */
numCtx: number;
/** OpenAI-compatible: maximum completion tokens the model can generate */
maxCompletionTokens: number;
};
}
@ -146,6 +152,10 @@ export interface IAiModelProbeResult {
topP?: number;
topK?: number;
numCtx?: number;
/** Ollama: discovered num_predict from model parameters (informational; overridden to -1 at inference time) */
numPredict?: number;
/** OpenAI-compatible: discovered from model info */
maxCompletionTokens?: number;
};
}

View File

@ -87,7 +87,7 @@ describe('OllamaAiApi', () => {
{
provider: mockProvider as any,
modelId: 'test-model',
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
},
{
userPrompt: 'Test prompt',
@ -150,7 +150,7 @@ describe('OllamaAiApi', () => {
{
provider: mockProvider as any,
modelId: 'test-model',
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
},
{
userPrompt: 'Test prompt',
@ -201,7 +201,7 @@ describe('OllamaAiApi', () => {
{
provider: mockProvider as any,
modelId: 'test-model',
params: { reasoning: true, temperature: 0.8, topP: 0.9, topK: 40 },
params: { reasoning: true, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
},
{
userPrompt: 'What is the answer?',
@ -243,7 +243,7 @@ describe('OllamaAiApi', () => {
{
provider: mockProvider as any,
modelId: 'test-model',
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
},
{
userPrompt: 'Test prompt',

View File

@ -126,6 +126,11 @@ export class OllamaAiApi extends AiApi {
settings.numCtx = parseInt(numCtxMatch[1], 10);
}
const numPredictMatch = parameters.match(/num_predict\s+(-?\d+)/i);
if (numPredictMatch) {
settings.numPredict = parseInt(numPredictMatch[1], 10);
}
return Object.keys(settings).length > 0 ? settings : undefined;
}
@ -144,6 +149,10 @@ export class OllamaAiApi extends AiApi {
prompt: options.prompt,
system: options.systemPrompt,
stream: true,
options: {
num_ctx: model.params.numCtx,
num_predict: model.params.numPredict,
},
});
const content = {
@ -259,6 +268,10 @@ export class OllamaAiApi extends AiApi {
stream: true,
think: model.params.reasoning,
tools: ollamaTools,
options: {
num_ctx: model.params.numCtx,
num_predict: model.params.numPredict,
},
});
let lastChunk;

View File

@ -65,7 +65,7 @@ describe("OpenAiApi", () => {
{
provider: mockProvider as any,
modelId: "test-model",
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
},
{ userPrompt: "Hello", context: [], tools: [] },
vi.fn(),
@ -105,7 +105,7 @@ describe("OpenAiApi", () => {
{
provider: mockProvider as any,
modelId: "test-model",
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
},
{ userPrompt: "Read index.html", context: [], tools: [] },
streamCallback,
@ -133,7 +133,7 @@ describe("OpenAiApi", () => {
{
provider: mockProvider as any,
modelId: "test-model",
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
},
{ userPrompt: "Hello", context: [], tools: [] },
streamCallback,
@ -153,7 +153,7 @@ describe("OpenAiApi", () => {
{
provider: mockProvider as any,
modelId: "test-model",
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
},
{
userPrompt: "Don't edit code. Just talk to me.",

View File

@ -202,6 +202,9 @@ export class OpenAiApi extends AiApi {
{ role: "user" as const, content: options.prompt },
],
stream: true,
...(model.params.maxCompletionTokens
? { max_completion_tokens: model.params.maxCompletionTokens }
: {}),
...(typeof model.params.reasoning === "string"
? {
reasoning_effort: model.params.reasoning as
@ -373,6 +376,9 @@ export class OpenAiApi extends AiApi {
messages,
tools,
stream: true,
...(model.params.maxCompletionTokens
? { max_completion_tokens: model.params.maxCompletionTokens }
: {}),
...(typeof model.params.reasoning === "string"
? {
reasoning_effort: model.params.reasoning as
@ -439,6 +445,9 @@ export class OpenAiApi extends AiApi {
messages,
tools,
stream: false,
...(model.params.maxCompletionTokens
? { max_completion_tokens: model.params.maxCompletionTokens }
: {}),
...(typeof model.params.reasoning === "string"
? {
reasoning_effort: model.params.reasoning as

View File

@ -17,6 +17,10 @@ export interface IAiModelSettings {
topP?: number;
topK?: number;
numCtx?: number;
/** Ollama: maximum number of tokens to predict (-1 = unlimited, generate until natural stop or context limit) */
numPredict?: number;
/** OpenAI-compatible: maximum number of completion tokens the model can generate */
maxCompletionTokens?: number;
}
export interface IAiModelCapabilities {
@ -61,3 +65,26 @@ export interface IAiProvider {
}
export type AiProviderDocument = HydratedDocument<IAiProvider>;
/**
* Drone-side model configuration that accepts the database IAiProvider type.
* This is the canonical interface used by the drone to configure AI API calls,
* carrying the model's stored inference settings (numCtx, numPredict, etc.)
* from the provider's cached model data through to the AI package.
*/
export interface IDroneModelConfig {
provider: IAiProvider | GadgetId;
modelId: string;
params: {
reasoning: boolean | "low" | "medium" | "high";
temperature: number;
topP: number;
topK: number;
/** Ollama: -1 = unlimited (generate until natural stop or context limit) */
numPredict: number;
/** Context window size (input + output tokens); Ollama passes as num_ctx */
numCtx: number;
/** OpenAI-compatible: maximum completion tokens the model can generate */
maxCompletionTokens: number;
};
}