feat: add numPredict, numCtx, maxCompletionTokens to model config pipeline
Fixes premature AI API response truncation by propagating inference
parameters through the entire probe → storage → runtime → API call chain.
Root cause: Ollama defaults num_predict to 128 tokens and num_ctx to
4096, silently truncating output and context. We never overrode these.
Changes:
- IAiModelSettings: add numPredict, maxCompletionTokens fields
- IDroneModelConfig: moved from gadget-drone to @gadget/api (shared),
expanded with numPredict, numCtx, maxCompletionTokens params
- IAiModelConfig.params: add numPredict, numCtx, maxCompletionTokens
- IAiModelProbeResult.settings: add numPredict, maxCompletionTokens
- AiModelSettingsSchema (Mongoose): add numPredict, maxCompletionTokens
- Ollama extractSettings(): extract num_predict from model parameters
- Ollama generate()/chat(): pass options: { num_ctx, num_predict }
- OpenAI all three create() calls: add max_completion_tokens
- web-cli.ts onProviderProbe(): compute numPredict (-1 for Ollama)
and maxCompletionTokens (contextWindow for OpenAI) during probe
- agent.ts main + subagent loops: read model settings from provider
cached models, build IDroneModelConfig with stored params
- ai.ts: remove local IDroneModelConfig, import from @gadget/api
- chat-session.ts: add new params to title generation call
- Tests: update all fixtures with new params, all 19 tests pass
Defaults when model settings unavailable:
- numPredict: -1 (Ollama unlimited - generate until natural stop)
- numCtx: 131072 (128k - covers most modern models)
- maxCompletionTokens: 16384 (16k - reasonable OpenAI default)
This commit is contained in:
parent
62578e8e56
commit
07a760c7b5
@ -29,6 +29,8 @@ export const AiModelSettingsSchema = new Schema<IAiModelSettings>(
|
||||
topP: { type: Number },
|
||||
topK: { type: Number },
|
||||
numCtx: { type: Number },
|
||||
numPredict: { type: Number },
|
||||
maxCompletionTokens: { type: Number },
|
||||
},
|
||||
{ _id: false },
|
||||
);
|
||||
|
||||
@ -439,6 +439,9 @@ class ChatSessionService extends DtpService {
|
||||
temperature: 1.0,
|
||||
topK: 0.6,
|
||||
topP: 0.4,
|
||||
numPredict: -1,
|
||||
numCtx: 131072,
|
||||
maxCompletionTokens: 256,
|
||||
},
|
||||
},
|
||||
{
|
||||
|
||||
@ -586,6 +586,22 @@ class DtpWebCli extends DtpProcess {
|
||||
try {
|
||||
const probeResult = await api.probeModel(modelInfo.id);
|
||||
|
||||
// Compute provider-specific inference settings
|
||||
const settings: IAiModelSettings = {
|
||||
...(probeResult.settings as IAiModelSettings | undefined),
|
||||
};
|
||||
|
||||
if (provider.apiType === 'ollama') {
|
||||
// Ollama: always override numPredict to -1 (unlimited) for agentic workflows
|
||||
// The model must generate until its natural stop token or context limit
|
||||
settings.numPredict = -1;
|
||||
// numCtx is already populated by probeResult.settings from extractSettings()
|
||||
} else if (provider.apiType === 'openai') {
|
||||
// OpenAI-compatible: set maxCompletionTokens to model's context window
|
||||
// This prevents compatible providers (Gab AI, etc.) from imposing low defaults
|
||||
settings.maxCompletionTokens = modelInfo.contextWindow || 16384;
|
||||
}
|
||||
|
||||
const model: IAiModel = {
|
||||
id: modelInfo.id,
|
||||
name: modelInfo.name,
|
||||
@ -593,7 +609,7 @@ class DtpWebCli extends DtpProcess {
|
||||
parameterLabel: modelInfo.parameterLabel,
|
||||
contextWindow: modelInfo.contextWindow,
|
||||
capabilities: probeResult.capabilities as IAiModelCapabilities,
|
||||
settings: probeResult.settings as IAiModelSettings | undefined,
|
||||
settings,
|
||||
};
|
||||
|
||||
models.push(model);
|
||||
|
||||
@ -17,11 +17,14 @@ import {
|
||||
} from "@gadget/ai";
|
||||
import {
|
||||
type IAiProvider,
|
||||
type IAiModelSettings,
|
||||
type IDroneModelConfig,
|
||||
type IChatSession,
|
||||
type IChatTurn,
|
||||
type IChatSubagentProcess,
|
||||
type IChatToolCall,
|
||||
type IUser,
|
||||
type GadgetId,
|
||||
type ServerToClientEvents,
|
||||
type ClientToServerEvents,
|
||||
ChatSessionMode,
|
||||
@ -209,10 +212,7 @@ class AgentService extends GadgetService {
|
||||
try {
|
||||
response = await AiService.chat(
|
||||
turn.provider,
|
||||
{
|
||||
modelId: turn.llm,
|
||||
params: { reasoning: currentReasoning, temperature: 0.8, topP: 0.9, topK: 40 },
|
||||
},
|
||||
this.buildDroneModelConfig(turn.provider, turn.llm, currentReasoning),
|
||||
chatOptions,
|
||||
this.makeStreamHandler(socket),
|
||||
);
|
||||
@ -410,6 +410,46 @@ class AgentService extends GadgetService {
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an IDroneModelConfig by looking up the model's stored settings
|
||||
* from the provider's cached models array. Falls back to safe defaults
|
||||
* when stored settings aren't available (e.g., model not yet probed).
|
||||
*/
|
||||
private buildDroneModelConfig(
|
||||
provider: IAiProvider | GadgetId,
|
||||
modelId: string,
|
||||
reasoning: boolean | "low" | "medium" | "high",
|
||||
): Omit<IDroneModelConfig, "provider"> {
|
||||
const settings = this.getModelSettings(provider, modelId);
|
||||
return {
|
||||
modelId,
|
||||
params: {
|
||||
reasoning,
|
||||
temperature: settings?.temperature ?? 0.8,
|
||||
topP: settings?.topP ?? 0.9,
|
||||
topK: settings?.topK ?? 40,
|
||||
numPredict: settings?.numPredict ?? -1, // -1 = unlimited (Ollama)
|
||||
numCtx: settings?.numCtx ?? 131072, // 128k fallback
|
||||
maxCompletionTokens: settings?.maxCompletionTokens ?? 16384, // 16k fallback
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks up the stored IAiModelSettings for a model from the provider's
|
||||
* cached models array. Returns undefined if the provider or model isn't found.
|
||||
*/
|
||||
private getModelSettings(
|
||||
provider: IAiProvider | GadgetId,
|
||||
modelId: string,
|
||||
): IAiModelSettings | undefined {
|
||||
if (typeof provider === "string") return undefined;
|
||||
const modelRecord = (provider as IAiProvider).models?.find(
|
||||
(m) => m.id === modelId,
|
||||
);
|
||||
return modelRecord?.settings;
|
||||
}
|
||||
|
||||
private async executeTool(name: string, argsJson: string): Promise<string> {
|
||||
const tool = this.toolbox.getTool(name);
|
||||
if (!tool) {
|
||||
@ -528,10 +568,7 @@ class AgentService extends GadgetService {
|
||||
|
||||
const response = await AiService.chat(
|
||||
provider,
|
||||
{
|
||||
modelId: turn.llm,
|
||||
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
|
||||
},
|
||||
this.buildDroneModelConfig(provider, turn.llm, false),
|
||||
chatOptions,
|
||||
streamHandler,
|
||||
);
|
||||
|
||||
@ -15,7 +15,7 @@ const aiEnv: IAiEnvironment = {
|
||||
},
|
||||
};
|
||||
|
||||
import { IAiProvider as DbAiProvider, GadgetId } from "@gadget/api";
|
||||
import { IAiProvider as DbAiProvider, GadgetId, type IDroneModelConfig } from "@gadget/api";
|
||||
import { GadgetService } from "../lib/service.js";
|
||||
import {
|
||||
type IAiChatOptions,
|
||||
@ -29,20 +29,6 @@ import {
|
||||
IAiEnvironment,
|
||||
} from "@gadget/ai";
|
||||
|
||||
/**
|
||||
* Drone-specific model config that accepts the database provider type.
|
||||
*/
|
||||
export interface IDroneModelConfig {
|
||||
provider: DbAiProvider | GadgetId;
|
||||
modelId: string;
|
||||
params: {
|
||||
reasoning: boolean | "low" | "medium" | "high";
|
||||
temperature: number;
|
||||
topP: number;
|
||||
topK: number;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* An abstraction of the backend AI APIs (Ollama, OpenAI) that provides one
|
||||
* common interface and contract for working with different AI APIs.
|
||||
|
||||
@ -22,6 +22,12 @@ export interface IAiModelConfig {
|
||||
temperature: number;
|
||||
topP: number;
|
||||
topK: number;
|
||||
/** Ollama: -1 = unlimited (generate until natural stop or context limit) */
|
||||
numPredict: number;
|
||||
/** Context window size (input + output tokens); Ollama passes as num_ctx */
|
||||
numCtx: number;
|
||||
/** OpenAI-compatible: maximum completion tokens the model can generate */
|
||||
maxCompletionTokens: number;
|
||||
};
|
||||
}
|
||||
|
||||
@ -146,6 +152,10 @@ export interface IAiModelProbeResult {
|
||||
topP?: number;
|
||||
topK?: number;
|
||||
numCtx?: number;
|
||||
/** Ollama: discovered num_predict from model parameters (informational; overridden to -1 at inference time) */
|
||||
numPredict?: number;
|
||||
/** OpenAI-compatible: discovered from model info */
|
||||
maxCompletionTokens?: number;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@ -87,7 +87,7 @@ describe('OllamaAiApi', () => {
|
||||
{
|
||||
provider: mockProvider as any,
|
||||
modelId: 'test-model',
|
||||
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
|
||||
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
|
||||
},
|
||||
{
|
||||
userPrompt: 'Test prompt',
|
||||
@ -150,7 +150,7 @@ describe('OllamaAiApi', () => {
|
||||
{
|
||||
provider: mockProvider as any,
|
||||
modelId: 'test-model',
|
||||
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
|
||||
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
|
||||
},
|
||||
{
|
||||
userPrompt: 'Test prompt',
|
||||
@ -201,7 +201,7 @@ describe('OllamaAiApi', () => {
|
||||
{
|
||||
provider: mockProvider as any,
|
||||
modelId: 'test-model',
|
||||
params: { reasoning: true, temperature: 0.8, topP: 0.9, topK: 40 },
|
||||
params: { reasoning: true, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
|
||||
},
|
||||
{
|
||||
userPrompt: 'What is the answer?',
|
||||
@ -243,7 +243,7 @@ describe('OllamaAiApi', () => {
|
||||
{
|
||||
provider: mockProvider as any,
|
||||
modelId: 'test-model',
|
||||
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
|
||||
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
|
||||
},
|
||||
{
|
||||
userPrompt: 'Test prompt',
|
||||
|
||||
@ -126,6 +126,11 @@ export class OllamaAiApi extends AiApi {
|
||||
settings.numCtx = parseInt(numCtxMatch[1], 10);
|
||||
}
|
||||
|
||||
const numPredictMatch = parameters.match(/num_predict\s+(-?\d+)/i);
|
||||
if (numPredictMatch) {
|
||||
settings.numPredict = parseInt(numPredictMatch[1], 10);
|
||||
}
|
||||
|
||||
return Object.keys(settings).length > 0 ? settings : undefined;
|
||||
}
|
||||
|
||||
@ -144,6 +149,10 @@ export class OllamaAiApi extends AiApi {
|
||||
prompt: options.prompt,
|
||||
system: options.systemPrompt,
|
||||
stream: true,
|
||||
options: {
|
||||
num_ctx: model.params.numCtx,
|
||||
num_predict: model.params.numPredict,
|
||||
},
|
||||
});
|
||||
|
||||
const content = {
|
||||
@ -259,6 +268,10 @@ export class OllamaAiApi extends AiApi {
|
||||
stream: true,
|
||||
think: model.params.reasoning,
|
||||
tools: ollamaTools,
|
||||
options: {
|
||||
num_ctx: model.params.numCtx,
|
||||
num_predict: model.params.numPredict,
|
||||
},
|
||||
});
|
||||
|
||||
let lastChunk;
|
||||
|
||||
@ -65,7 +65,7 @@ describe("OpenAiApi", () => {
|
||||
{
|
||||
provider: mockProvider as any,
|
||||
modelId: "test-model",
|
||||
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
|
||||
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
|
||||
},
|
||||
{ userPrompt: "Hello", context: [], tools: [] },
|
||||
vi.fn(),
|
||||
@ -105,7 +105,7 @@ describe("OpenAiApi", () => {
|
||||
{
|
||||
provider: mockProvider as any,
|
||||
modelId: "test-model",
|
||||
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
|
||||
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
|
||||
},
|
||||
{ userPrompt: "Read index.html", context: [], tools: [] },
|
||||
streamCallback,
|
||||
@ -133,7 +133,7 @@ describe("OpenAiApi", () => {
|
||||
{
|
||||
provider: mockProvider as any,
|
||||
modelId: "test-model",
|
||||
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
|
||||
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
|
||||
},
|
||||
{ userPrompt: "Hello", context: [], tools: [] },
|
||||
streamCallback,
|
||||
@ -153,7 +153,7 @@ describe("OpenAiApi", () => {
|
||||
{
|
||||
provider: mockProvider as any,
|
||||
modelId: "test-model",
|
||||
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
|
||||
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
|
||||
},
|
||||
{
|
||||
userPrompt: "Don't edit code. Just talk to me.",
|
||||
|
||||
@ -202,6 +202,9 @@ export class OpenAiApi extends AiApi {
|
||||
{ role: "user" as const, content: options.prompt },
|
||||
],
|
||||
stream: true,
|
||||
...(model.params.maxCompletionTokens
|
||||
? { max_completion_tokens: model.params.maxCompletionTokens }
|
||||
: {}),
|
||||
...(typeof model.params.reasoning === "string"
|
||||
? {
|
||||
reasoning_effort: model.params.reasoning as
|
||||
@ -373,6 +376,9 @@ export class OpenAiApi extends AiApi {
|
||||
messages,
|
||||
tools,
|
||||
stream: true,
|
||||
...(model.params.maxCompletionTokens
|
||||
? { max_completion_tokens: model.params.maxCompletionTokens }
|
||||
: {}),
|
||||
...(typeof model.params.reasoning === "string"
|
||||
? {
|
||||
reasoning_effort: model.params.reasoning as
|
||||
@ -439,6 +445,9 @@ export class OpenAiApi extends AiApi {
|
||||
messages,
|
||||
tools,
|
||||
stream: false,
|
||||
...(model.params.maxCompletionTokens
|
||||
? { max_completion_tokens: model.params.maxCompletionTokens }
|
||||
: {}),
|
||||
...(typeof model.params.reasoning === "string"
|
||||
? {
|
||||
reasoning_effort: model.params.reasoning as
|
||||
|
||||
@ -17,6 +17,10 @@ export interface IAiModelSettings {
|
||||
topP?: number;
|
||||
topK?: number;
|
||||
numCtx?: number;
|
||||
/** Ollama: maximum number of tokens to predict (-1 = unlimited, generate until natural stop or context limit) */
|
||||
numPredict?: number;
|
||||
/** OpenAI-compatible: maximum number of completion tokens the model can generate */
|
||||
maxCompletionTokens?: number;
|
||||
}
|
||||
|
||||
export interface IAiModelCapabilities {
|
||||
@ -61,3 +65,26 @@ export interface IAiProvider {
|
||||
}
|
||||
|
||||
export type AiProviderDocument = HydratedDocument<IAiProvider>;
|
||||
|
||||
/**
|
||||
* Drone-side model configuration that accepts the database IAiProvider type.
|
||||
* This is the canonical interface used by the drone to configure AI API calls,
|
||||
* carrying the model's stored inference settings (numCtx, numPredict, etc.)
|
||||
* from the provider's cached model data through to the AI package.
|
||||
*/
|
||||
export interface IDroneModelConfig {
|
||||
provider: IAiProvider | GadgetId;
|
||||
modelId: string;
|
||||
params: {
|
||||
reasoning: boolean | "low" | "medium" | "high";
|
||||
temperature: number;
|
||||
topP: number;
|
||||
topK: number;
|
||||
/** Ollama: -1 = unlimited (generate until natural stop or context limit) */
|
||||
numPredict: number;
|
||||
/** Context window size (input + output tokens); Ollama passes as num_ctx */
|
||||
numCtx: number;
|
||||
/** OpenAI-compatible: maximum completion tokens the model can generate */
|
||||
maxCompletionTokens: number;
|
||||
};
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user