feat: add numPredict, numCtx, maxCompletionTokens to model config pipeline
Fixes premature AI API response truncation by propagating inference
parameters through the entire probe → storage → runtime → API call chain.
Root cause: Ollama defaults num_predict to 128 tokens and num_ctx to
4096, silently truncating output and context. We never overrode these.
Changes:
- IAiModelSettings: add numPredict, maxCompletionTokens fields
- IDroneModelConfig: moved from gadget-drone to @gadget/api (shared),
expanded with numPredict, numCtx, maxCompletionTokens params
- IAiModelConfig.params: add numPredict, numCtx, maxCompletionTokens
- IAiModelProbeResult.settings: add numPredict, maxCompletionTokens
- AiModelSettingsSchema (Mongoose): add numPredict, maxCompletionTokens
- Ollama extractSettings(): extract num_predict from model parameters
- Ollama generate()/chat(): pass options: { num_ctx, num_predict }
- OpenAI all three create() calls: add max_completion_tokens
- web-cli.ts onProviderProbe(): compute numPredict (-1 for Ollama)
and maxCompletionTokens (contextWindow for OpenAI) during probe
- agent.ts main + subagent loops: read model settings from provider
cached models, build IDroneModelConfig with stored params
- ai.ts: remove local IDroneModelConfig, import from @gadget/api
- chat-session.ts: add new params to title generation call
- Tests: update all fixtures with new params, all 19 tests pass
Defaults when model settings unavailable:
- numPredict: -1 (Ollama unlimited - generate until natural stop)
- numCtx: 131072 (128k - covers most modern models)
- maxCompletionTokens: 16384 (16k - reasonable OpenAI default)
This commit is contained in:
parent
62578e8e56
commit
07a760c7b5
@ -29,6 +29,8 @@ export const AiModelSettingsSchema = new Schema<IAiModelSettings>(
|
|||||||
topP: { type: Number },
|
topP: { type: Number },
|
||||||
topK: { type: Number },
|
topK: { type: Number },
|
||||||
numCtx: { type: Number },
|
numCtx: { type: Number },
|
||||||
|
numPredict: { type: Number },
|
||||||
|
maxCompletionTokens: { type: Number },
|
||||||
},
|
},
|
||||||
{ _id: false },
|
{ _id: false },
|
||||||
);
|
);
|
||||||
|
|||||||
@ -439,6 +439,9 @@ class ChatSessionService extends DtpService {
|
|||||||
temperature: 1.0,
|
temperature: 1.0,
|
||||||
topK: 0.6,
|
topK: 0.6,
|
||||||
topP: 0.4,
|
topP: 0.4,
|
||||||
|
numPredict: -1,
|
||||||
|
numCtx: 131072,
|
||||||
|
maxCompletionTokens: 256,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@ -586,6 +586,22 @@ class DtpWebCli extends DtpProcess {
|
|||||||
try {
|
try {
|
||||||
const probeResult = await api.probeModel(modelInfo.id);
|
const probeResult = await api.probeModel(modelInfo.id);
|
||||||
|
|
||||||
|
// Compute provider-specific inference settings
|
||||||
|
const settings: IAiModelSettings = {
|
||||||
|
...(probeResult.settings as IAiModelSettings | undefined),
|
||||||
|
};
|
||||||
|
|
||||||
|
if (provider.apiType === 'ollama') {
|
||||||
|
// Ollama: always override numPredict to -1 (unlimited) for agentic workflows
|
||||||
|
// The model must generate until its natural stop token or context limit
|
||||||
|
settings.numPredict = -1;
|
||||||
|
// numCtx is already populated by probeResult.settings from extractSettings()
|
||||||
|
} else if (provider.apiType === 'openai') {
|
||||||
|
// OpenAI-compatible: set maxCompletionTokens to model's context window
|
||||||
|
// This prevents compatible providers (Gab AI, etc.) from imposing low defaults
|
||||||
|
settings.maxCompletionTokens = modelInfo.contextWindow || 16384;
|
||||||
|
}
|
||||||
|
|
||||||
const model: IAiModel = {
|
const model: IAiModel = {
|
||||||
id: modelInfo.id,
|
id: modelInfo.id,
|
||||||
name: modelInfo.name,
|
name: modelInfo.name,
|
||||||
@ -593,7 +609,7 @@ class DtpWebCli extends DtpProcess {
|
|||||||
parameterLabel: modelInfo.parameterLabel,
|
parameterLabel: modelInfo.parameterLabel,
|
||||||
contextWindow: modelInfo.contextWindow,
|
contextWindow: modelInfo.contextWindow,
|
||||||
capabilities: probeResult.capabilities as IAiModelCapabilities,
|
capabilities: probeResult.capabilities as IAiModelCapabilities,
|
||||||
settings: probeResult.settings as IAiModelSettings | undefined,
|
settings,
|
||||||
};
|
};
|
||||||
|
|
||||||
models.push(model);
|
models.push(model);
|
||||||
|
|||||||
@ -17,11 +17,14 @@ import {
|
|||||||
} from "@gadget/ai";
|
} from "@gadget/ai";
|
||||||
import {
|
import {
|
||||||
type IAiProvider,
|
type IAiProvider,
|
||||||
|
type IAiModelSettings,
|
||||||
|
type IDroneModelConfig,
|
||||||
type IChatSession,
|
type IChatSession,
|
||||||
type IChatTurn,
|
type IChatTurn,
|
||||||
type IChatSubagentProcess,
|
type IChatSubagentProcess,
|
||||||
type IChatToolCall,
|
type IChatToolCall,
|
||||||
type IUser,
|
type IUser,
|
||||||
|
type GadgetId,
|
||||||
type ServerToClientEvents,
|
type ServerToClientEvents,
|
||||||
type ClientToServerEvents,
|
type ClientToServerEvents,
|
||||||
ChatSessionMode,
|
ChatSessionMode,
|
||||||
@ -209,10 +212,7 @@ class AgentService extends GadgetService {
|
|||||||
try {
|
try {
|
||||||
response = await AiService.chat(
|
response = await AiService.chat(
|
||||||
turn.provider,
|
turn.provider,
|
||||||
{
|
this.buildDroneModelConfig(turn.provider, turn.llm, currentReasoning),
|
||||||
modelId: turn.llm,
|
|
||||||
params: { reasoning: currentReasoning, temperature: 0.8, topP: 0.9, topK: 40 },
|
|
||||||
},
|
|
||||||
chatOptions,
|
chatOptions,
|
||||||
this.makeStreamHandler(socket),
|
this.makeStreamHandler(socket),
|
||||||
);
|
);
|
||||||
@ -410,6 +410,46 @@ class AgentService extends GadgetService {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Builds an IDroneModelConfig by looking up the model's stored settings
|
||||||
|
* from the provider's cached models array. Falls back to safe defaults
|
||||||
|
* when stored settings aren't available (e.g., model not yet probed).
|
||||||
|
*/
|
||||||
|
private buildDroneModelConfig(
|
||||||
|
provider: IAiProvider | GadgetId,
|
||||||
|
modelId: string,
|
||||||
|
reasoning: boolean | "low" | "medium" | "high",
|
||||||
|
): Omit<IDroneModelConfig, "provider"> {
|
||||||
|
const settings = this.getModelSettings(provider, modelId);
|
||||||
|
return {
|
||||||
|
modelId,
|
||||||
|
params: {
|
||||||
|
reasoning,
|
||||||
|
temperature: settings?.temperature ?? 0.8,
|
||||||
|
topP: settings?.topP ?? 0.9,
|
||||||
|
topK: settings?.topK ?? 40,
|
||||||
|
numPredict: settings?.numPredict ?? -1, // -1 = unlimited (Ollama)
|
||||||
|
numCtx: settings?.numCtx ?? 131072, // 128k fallback
|
||||||
|
maxCompletionTokens: settings?.maxCompletionTokens ?? 16384, // 16k fallback
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Looks up the stored IAiModelSettings for a model from the provider's
|
||||||
|
* cached models array. Returns undefined if the provider or model isn't found.
|
||||||
|
*/
|
||||||
|
private getModelSettings(
|
||||||
|
provider: IAiProvider | GadgetId,
|
||||||
|
modelId: string,
|
||||||
|
): IAiModelSettings | undefined {
|
||||||
|
if (typeof provider === "string") return undefined;
|
||||||
|
const modelRecord = (provider as IAiProvider).models?.find(
|
||||||
|
(m) => m.id === modelId,
|
||||||
|
);
|
||||||
|
return modelRecord?.settings;
|
||||||
|
}
|
||||||
|
|
||||||
private async executeTool(name: string, argsJson: string): Promise<string> {
|
private async executeTool(name: string, argsJson: string): Promise<string> {
|
||||||
const tool = this.toolbox.getTool(name);
|
const tool = this.toolbox.getTool(name);
|
||||||
if (!tool) {
|
if (!tool) {
|
||||||
@ -528,10 +568,7 @@ class AgentService extends GadgetService {
|
|||||||
|
|
||||||
const response = await AiService.chat(
|
const response = await AiService.chat(
|
||||||
provider,
|
provider,
|
||||||
{
|
this.buildDroneModelConfig(provider, turn.llm, false),
|
||||||
modelId: turn.llm,
|
|
||||||
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
|
|
||||||
},
|
|
||||||
chatOptions,
|
chatOptions,
|
||||||
streamHandler,
|
streamHandler,
|
||||||
);
|
);
|
||||||
|
|||||||
@ -15,7 +15,7 @@ const aiEnv: IAiEnvironment = {
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
import { IAiProvider as DbAiProvider, GadgetId } from "@gadget/api";
|
import { IAiProvider as DbAiProvider, GadgetId, type IDroneModelConfig } from "@gadget/api";
|
||||||
import { GadgetService } from "../lib/service.js";
|
import { GadgetService } from "../lib/service.js";
|
||||||
import {
|
import {
|
||||||
type IAiChatOptions,
|
type IAiChatOptions,
|
||||||
@ -29,20 +29,6 @@ import {
|
|||||||
IAiEnvironment,
|
IAiEnvironment,
|
||||||
} from "@gadget/ai";
|
} from "@gadget/ai";
|
||||||
|
|
||||||
/**
|
|
||||||
* Drone-specific model config that accepts the database provider type.
|
|
||||||
*/
|
|
||||||
export interface IDroneModelConfig {
|
|
||||||
provider: DbAiProvider | GadgetId;
|
|
||||||
modelId: string;
|
|
||||||
params: {
|
|
||||||
reasoning: boolean | "low" | "medium" | "high";
|
|
||||||
temperature: number;
|
|
||||||
topP: number;
|
|
||||||
topK: number;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An abstraction of the backend AI APIs (Ollama, OpenAI) that provides one
|
* An abstraction of the backend AI APIs (Ollama, OpenAI) that provides one
|
||||||
* common interface and contract for working with different AI APIs.
|
* common interface and contract for working with different AI APIs.
|
||||||
|
|||||||
@ -22,6 +22,12 @@ export interface IAiModelConfig {
|
|||||||
temperature: number;
|
temperature: number;
|
||||||
topP: number;
|
topP: number;
|
||||||
topK: number;
|
topK: number;
|
||||||
|
/** Ollama: -1 = unlimited (generate until natural stop or context limit) */
|
||||||
|
numPredict: number;
|
||||||
|
/** Context window size (input + output tokens); Ollama passes as num_ctx */
|
||||||
|
numCtx: number;
|
||||||
|
/** OpenAI-compatible: maximum completion tokens the model can generate */
|
||||||
|
maxCompletionTokens: number;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -146,6 +152,10 @@ export interface IAiModelProbeResult {
|
|||||||
topP?: number;
|
topP?: number;
|
||||||
topK?: number;
|
topK?: number;
|
||||||
numCtx?: number;
|
numCtx?: number;
|
||||||
|
/** Ollama: discovered num_predict from model parameters (informational; overridden to -1 at inference time) */
|
||||||
|
numPredict?: number;
|
||||||
|
/** OpenAI-compatible: discovered from model info */
|
||||||
|
maxCompletionTokens?: number;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -87,7 +87,7 @@ describe('OllamaAiApi', () => {
|
|||||||
{
|
{
|
||||||
provider: mockProvider as any,
|
provider: mockProvider as any,
|
||||||
modelId: 'test-model',
|
modelId: 'test-model',
|
||||||
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
|
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
userPrompt: 'Test prompt',
|
userPrompt: 'Test prompt',
|
||||||
@ -150,7 +150,7 @@ describe('OllamaAiApi', () => {
|
|||||||
{
|
{
|
||||||
provider: mockProvider as any,
|
provider: mockProvider as any,
|
||||||
modelId: 'test-model',
|
modelId: 'test-model',
|
||||||
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
|
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
userPrompt: 'Test prompt',
|
userPrompt: 'Test prompt',
|
||||||
@ -201,7 +201,7 @@ describe('OllamaAiApi', () => {
|
|||||||
{
|
{
|
||||||
provider: mockProvider as any,
|
provider: mockProvider as any,
|
||||||
modelId: 'test-model',
|
modelId: 'test-model',
|
||||||
params: { reasoning: true, temperature: 0.8, topP: 0.9, topK: 40 },
|
params: { reasoning: true, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
userPrompt: 'What is the answer?',
|
userPrompt: 'What is the answer?',
|
||||||
@ -243,7 +243,7 @@ describe('OllamaAiApi', () => {
|
|||||||
{
|
{
|
||||||
provider: mockProvider as any,
|
provider: mockProvider as any,
|
||||||
modelId: 'test-model',
|
modelId: 'test-model',
|
||||||
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
|
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
userPrompt: 'Test prompt',
|
userPrompt: 'Test prompt',
|
||||||
|
|||||||
@ -126,6 +126,11 @@ export class OllamaAiApi extends AiApi {
|
|||||||
settings.numCtx = parseInt(numCtxMatch[1], 10);
|
settings.numCtx = parseInt(numCtxMatch[1], 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const numPredictMatch = parameters.match(/num_predict\s+(-?\d+)/i);
|
||||||
|
if (numPredictMatch) {
|
||||||
|
settings.numPredict = parseInt(numPredictMatch[1], 10);
|
||||||
|
}
|
||||||
|
|
||||||
return Object.keys(settings).length > 0 ? settings : undefined;
|
return Object.keys(settings).length > 0 ? settings : undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -144,6 +149,10 @@ export class OllamaAiApi extends AiApi {
|
|||||||
prompt: options.prompt,
|
prompt: options.prompt,
|
||||||
system: options.systemPrompt,
|
system: options.systemPrompt,
|
||||||
stream: true,
|
stream: true,
|
||||||
|
options: {
|
||||||
|
num_ctx: model.params.numCtx,
|
||||||
|
num_predict: model.params.numPredict,
|
||||||
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
const content = {
|
const content = {
|
||||||
@ -259,6 +268,10 @@ export class OllamaAiApi extends AiApi {
|
|||||||
stream: true,
|
stream: true,
|
||||||
think: model.params.reasoning,
|
think: model.params.reasoning,
|
||||||
tools: ollamaTools,
|
tools: ollamaTools,
|
||||||
|
options: {
|
||||||
|
num_ctx: model.params.numCtx,
|
||||||
|
num_predict: model.params.numPredict,
|
||||||
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
let lastChunk;
|
let lastChunk;
|
||||||
|
|||||||
@ -65,7 +65,7 @@ describe("OpenAiApi", () => {
|
|||||||
{
|
{
|
||||||
provider: mockProvider as any,
|
provider: mockProvider as any,
|
||||||
modelId: "test-model",
|
modelId: "test-model",
|
||||||
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
|
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
|
||||||
},
|
},
|
||||||
{ userPrompt: "Hello", context: [], tools: [] },
|
{ userPrompt: "Hello", context: [], tools: [] },
|
||||||
vi.fn(),
|
vi.fn(),
|
||||||
@ -105,7 +105,7 @@ describe("OpenAiApi", () => {
|
|||||||
{
|
{
|
||||||
provider: mockProvider as any,
|
provider: mockProvider as any,
|
||||||
modelId: "test-model",
|
modelId: "test-model",
|
||||||
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
|
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
|
||||||
},
|
},
|
||||||
{ userPrompt: "Read index.html", context: [], tools: [] },
|
{ userPrompt: "Read index.html", context: [], tools: [] },
|
||||||
streamCallback,
|
streamCallback,
|
||||||
@ -133,7 +133,7 @@ describe("OpenAiApi", () => {
|
|||||||
{
|
{
|
||||||
provider: mockProvider as any,
|
provider: mockProvider as any,
|
||||||
modelId: "test-model",
|
modelId: "test-model",
|
||||||
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
|
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
|
||||||
},
|
},
|
||||||
{ userPrompt: "Hello", context: [], tools: [] },
|
{ userPrompt: "Hello", context: [], tools: [] },
|
||||||
streamCallback,
|
streamCallback,
|
||||||
@ -153,7 +153,7 @@ describe("OpenAiApi", () => {
|
|||||||
{
|
{
|
||||||
provider: mockProvider as any,
|
provider: mockProvider as any,
|
||||||
modelId: "test-model",
|
modelId: "test-model",
|
||||||
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
|
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
userPrompt: "Don't edit code. Just talk to me.",
|
userPrompt: "Don't edit code. Just talk to me.",
|
||||||
|
|||||||
@ -202,6 +202,9 @@ export class OpenAiApi extends AiApi {
|
|||||||
{ role: "user" as const, content: options.prompt },
|
{ role: "user" as const, content: options.prompt },
|
||||||
],
|
],
|
||||||
stream: true,
|
stream: true,
|
||||||
|
...(model.params.maxCompletionTokens
|
||||||
|
? { max_completion_tokens: model.params.maxCompletionTokens }
|
||||||
|
: {}),
|
||||||
...(typeof model.params.reasoning === "string"
|
...(typeof model.params.reasoning === "string"
|
||||||
? {
|
? {
|
||||||
reasoning_effort: model.params.reasoning as
|
reasoning_effort: model.params.reasoning as
|
||||||
@ -373,6 +376,9 @@ export class OpenAiApi extends AiApi {
|
|||||||
messages,
|
messages,
|
||||||
tools,
|
tools,
|
||||||
stream: true,
|
stream: true,
|
||||||
|
...(model.params.maxCompletionTokens
|
||||||
|
? { max_completion_tokens: model.params.maxCompletionTokens }
|
||||||
|
: {}),
|
||||||
...(typeof model.params.reasoning === "string"
|
...(typeof model.params.reasoning === "string"
|
||||||
? {
|
? {
|
||||||
reasoning_effort: model.params.reasoning as
|
reasoning_effort: model.params.reasoning as
|
||||||
@ -439,6 +445,9 @@ export class OpenAiApi extends AiApi {
|
|||||||
messages,
|
messages,
|
||||||
tools,
|
tools,
|
||||||
stream: false,
|
stream: false,
|
||||||
|
...(model.params.maxCompletionTokens
|
||||||
|
? { max_completion_tokens: model.params.maxCompletionTokens }
|
||||||
|
: {}),
|
||||||
...(typeof model.params.reasoning === "string"
|
...(typeof model.params.reasoning === "string"
|
||||||
? {
|
? {
|
||||||
reasoning_effort: model.params.reasoning as
|
reasoning_effort: model.params.reasoning as
|
||||||
|
|||||||
@ -17,6 +17,10 @@ export interface IAiModelSettings {
|
|||||||
topP?: number;
|
topP?: number;
|
||||||
topK?: number;
|
topK?: number;
|
||||||
numCtx?: number;
|
numCtx?: number;
|
||||||
|
/** Ollama: maximum number of tokens to predict (-1 = unlimited, generate until natural stop or context limit) */
|
||||||
|
numPredict?: number;
|
||||||
|
/** OpenAI-compatible: maximum number of completion tokens the model can generate */
|
||||||
|
maxCompletionTokens?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface IAiModelCapabilities {
|
export interface IAiModelCapabilities {
|
||||||
@ -61,3 +65,26 @@ export interface IAiProvider {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export type AiProviderDocument = HydratedDocument<IAiProvider>;
|
export type AiProviderDocument = HydratedDocument<IAiProvider>;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Drone-side model configuration that accepts the database IAiProvider type.
|
||||||
|
* This is the canonical interface used by the drone to configure AI API calls,
|
||||||
|
* carrying the model's stored inference settings (numCtx, numPredict, etc.)
|
||||||
|
* from the provider's cached model data through to the AI package.
|
||||||
|
*/
|
||||||
|
export interface IDroneModelConfig {
|
||||||
|
provider: IAiProvider | GadgetId;
|
||||||
|
modelId: string;
|
||||||
|
params: {
|
||||||
|
reasoning: boolean | "low" | "medium" | "high";
|
||||||
|
temperature: number;
|
||||||
|
topP: number;
|
||||||
|
topK: number;
|
||||||
|
/** Ollama: -1 = unlimited (generate until natural stop or context limit) */
|
||||||
|
numPredict: number;
|
||||||
|
/** Context window size (input + output tokens); Ollama passes as num_ctx */
|
||||||
|
numCtx: number;
|
||||||
|
/** OpenAI-compatible: maximum completion tokens the model can generate */
|
||||||
|
maxCompletionTokens: number;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user