feat: add numPredict, numCtx, maxCompletionTokens to model config pipeline

Fixes premature AI API response truncation by propagating inference parameters through the entire probe → storage → runtime → API call chain. Root cause: Ollama defaults num_predict to 128 tokens and num_ctx to 4096, silently truncating output and context. We never overrode these. Changes: - IAiModelSettings: add numPredict, maxCompletionTokens fields - IDroneModelConfig: moved from gadget-drone to @gadget/api (shared), expanded with numPredict, numCtx, maxCompletionTokens params - IAiModelConfig.params: add numPredict, numCtx, maxCompletionTokens - IAiModelProbeResult.settings: add numPredict, maxCompletionTokens - AiModelSettingsSchema (Mongoose): add numPredict, maxCompletionTokens - Ollama extractSettings(): extract num_predict from model parameters - Ollama generate()/chat(): pass options: { num_ctx, num_predict } - OpenAI all three create() calls: add max_completion_tokens - web-cli.ts onProviderProbe(): compute numPredict (-1 for Ollama) and maxCompletionTokens (contextWindow for OpenAI) during probe - agent.ts main + subagent loops: read model settings from provider cached models, build IDroneModelConfig with stored params - ai.ts: remove local IDroneModelConfig, import from @gadget/api - chat-session.ts: add new params to title generation call - Tests: update all fixtures with new params, all 19 tests pass Defaults when model settings unavailable: - numPredict: -1 (Ollama unlimited - generate until natural stop) - numCtx: 131072 (128k - covers most modern models) - maxCompletionTokens: 16384 (16k - reasonable OpenAI default)
2026-05-11 13:05:40 -04:00 · 2026-05-11 13:05:40 -04:00 · 07a760c7b5
commit 07a760c7b5
parent 62578e8e56
11 changed files with 135 additions and 32 deletions
--- a/gadget-code/src/models/ai-provider.ts
+++ b/gadget-code/src/models/ai-provider.ts
@ -29,6 +29,8 @@ export const AiModelSettingsSchema = new Schema<IAiModelSettings>(
    topP: { type: Number },
    topK: { type: Number },
    numCtx: { type: Number },
    numPredict: { type: Number },
    maxCompletionTokens: { type: Number },
  },
  { _id: false },
 );
--- a/gadget-code/src/services/chat-session.ts
+++ b/gadget-code/src/services/chat-session.ts
@ -439,6 +439,9 @@ class ChatSessionService extends DtpService {
          temperature: 1.0,
          topK: 0.6,
          topP: 0.4,
          numPredict: -1,
          numCtx: 131072,
          maxCompletionTokens: 256,
        },
      },
      {
--- a/gadget-code/src/web-cli.ts
+++ b/gadget-code/src/web-cli.ts
@ -586,6 +586,22 @@ class DtpWebCli extends DtpProcess {
      try {
        const probeResult = await api.probeModel(modelInfo.id);
        // Compute provider-specific inference settings
        const settings: IAiModelSettings = {
          ...(probeResult.settings as IAiModelSettings | undefined),
        };
        if (provider.apiType === 'ollama') {
          // Ollama: always override numPredict to -1 (unlimited) for agentic workflows
          // The model must generate until its natural stop token or context limit
          settings.numPredict = -1;
          // numCtx is already populated by probeResult.settings from extractSettings()
        } else if (provider.apiType === 'openai') {
          // OpenAI-compatible: set maxCompletionTokens to model's context window
          // This prevents compatible providers (Gab AI, etc.) from imposing low defaults
          settings.maxCompletionTokens = modelInfo.contextWindow || 16384;
        }
        const model: IAiModel = {
          id: modelInfo.id,
          name: modelInfo.name,
@ -593,7 +609,7 @@ class DtpWebCli extends DtpProcess {
          parameterLabel: modelInfo.parameterLabel,
          contextWindow: modelInfo.contextWindow,
          capabilities: probeResult.capabilities as IAiModelCapabilities,
-          settings: probeResult.settings as IAiModelSettings | undefined,
+          settings,
        };
        models.push(model);
--- a/gadget-drone/src/services/agent.ts
+++ b/gadget-drone/src/services/agent.ts
@ -17,11 +17,14 @@ import {
 } from "@gadget/ai";
 import {
  type IAiProvider,
  type IAiModelSettings,
  type IDroneModelConfig,
  type IChatSession,
  type IChatTurn,
  type IChatSubagentProcess,
  type IChatToolCall,
  type IUser,
  type GadgetId,
  type ServerToClientEvents,
  type ClientToServerEvents,
  ChatSessionMode,
@ -209,10 +212,7 @@ class AgentService extends GadgetService {
          try {
            response = await AiService.chat(
              turn.provider,
-              {
+              this.buildDroneModelConfig(turn.provider, turn.llm, currentReasoning),
                modelId: turn.llm,
                params: { reasoning: currentReasoning, temperature: 0.8, topP: 0.9, topK: 40 },
              },
              chatOptions,
              this.makeStreamHandler(socket),
            );
@ -410,6 +410,46 @@ class AgentService extends GadgetService {
    };
  }
  /**
   * Builds an IDroneModelConfig by looking up the model's stored settings
   * from the provider's cached models array. Falls back to safe defaults
   * when stored settings aren't available (e.g., model not yet probed).
   */
  private buildDroneModelConfig(
    provider: IAiProvider | GadgetId,
    modelId: string,
    reasoning: boolean | "low" | "medium" | "high",
  ): Omit<IDroneModelConfig, "provider"> {
    const settings = this.getModelSettings(provider, modelId);
    return {
      modelId,
      params: {
        reasoning,
        temperature: settings?.temperature ?? 0.8,
        topP: settings?.topP ?? 0.9,
        topK: settings?.topK ?? 40,
        numPredict: settings?.numPredict ?? -1,         // -1 = unlimited (Ollama)
        numCtx: settings?.numCtx ?? 131072,             // 128k fallback
        maxCompletionTokens: settings?.maxCompletionTokens ?? 16384, // 16k fallback
      },
    };
  }
  /**
   * Looks up the stored IAiModelSettings for a model from the provider's
   * cached models array. Returns undefined if the provider or model isn't found.
   */
  private getModelSettings(
    provider: IAiProvider | GadgetId,
    modelId: string,
  ): IAiModelSettings | undefined {
    if (typeof provider === "string") return undefined;
    const modelRecord = (provider as IAiProvider).models?.find(
      (m) => m.id === modelId,
    );
    return modelRecord?.settings;
  }
  private async executeTool(name: string, argsJson: string): Promise<string> {
    const tool = this.toolbox.getTool(name);
    if (!tool) {
@ -528,10 +568,7 @@ class AgentService extends GadgetService {
        const response = await AiService.chat(
          provider,
-          {
+          this.buildDroneModelConfig(provider, turn.llm, false),
            modelId: turn.llm,
            params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
          },
          chatOptions,
          streamHandler,
        );
--- a/gadget-drone/src/services/ai.ts
+++ b/gadget-drone/src/services/ai.ts
@ -15,7 +15,7 @@ const aiEnv: IAiEnvironment = {
  },
 };
-import { IAiProvider as DbAiProvider, GadgetId } from "@gadget/api";
+import { IAiProvider as DbAiProvider, GadgetId, type IDroneModelConfig } from "@gadget/api";
 import { GadgetService } from "../lib/service.js";
 import {
  type IAiChatOptions,
@ -29,20 +29,6 @@ import {
  IAiEnvironment,
 } from "@gadget/ai";
 /**
 * Drone-specific model config that accepts the database provider type.
 */
 export interface IDroneModelConfig {
  provider: DbAiProvider | GadgetId;
  modelId: string;
  params: {
    reasoning: boolean | "low" | "medium" | "high";
    temperature: number;
    topP: number;
    topK: number;
  };
 }
 /**
 * An abstraction of the backend AI APIs (Ollama, OpenAI) that provides one
 * common interface and contract for working with different AI APIs.
--- a/packages/ai/src/api.ts
+++ b/packages/ai/src/api.ts
@ -22,6 +22,12 @@ export interface IAiModelConfig {
    temperature: number;
    topP: number;
    topK: number;
    /** Ollama: -1 = unlimited (generate until natural stop or context limit) */
    numPredict: number;
    /** Context window size (input + output tokens); Ollama passes as num_ctx */
    numCtx: number;
    /** OpenAI-compatible: maximum completion tokens the model can generate */
    maxCompletionTokens: number;
  };
 }
@ -146,6 +152,10 @@ export interface IAiModelProbeResult {
    topP?: number;
    topK?: number;
    numCtx?: number;
    /** Ollama: discovered num_predict from model parameters (informational; overridden to -1 at inference time) */
    numPredict?: number;
    /** OpenAI-compatible: discovered from model info */
    maxCompletionTokens?: number;
  };
 }
--- a/packages/ai/src/ollama.test.ts
+++ b/packages/ai/src/ollama.test.ts
@ -87,7 +87,7 @@ describe('OllamaAiApi', () => {
        {
          provider: mockProvider as any,
          modelId: 'test-model',
-          params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
+          params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
        },
        {
          userPrompt: 'Test prompt',
@ -150,7 +150,7 @@ describe('OllamaAiApi', () => {
        {
          provider: mockProvider as any,
          modelId: 'test-model',
-          params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
+          params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
        },
        {
          userPrompt: 'Test prompt',
@ -201,7 +201,7 @@ describe('OllamaAiApi', () => {
        {
          provider: mockProvider as any,
          modelId: 'test-model',
-          params: { reasoning: true, temperature: 0.8, topP: 0.9, topK: 40 },
+          params: { reasoning: true, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
        },
        {
          userPrompt: 'What is the answer?',
@ -243,7 +243,7 @@ describe('OllamaAiApi', () => {
        {
          provider: mockProvider as any,
          modelId: 'test-model',
-          params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
+          params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
        },
        {
          userPrompt: 'Test prompt',
--- a/packages/ai/src/ollama.ts
+++ b/packages/ai/src/ollama.ts
@ -126,6 +126,11 @@ export class OllamaAiApi extends AiApi {
      settings.numCtx = parseInt(numCtxMatch[1], 10);
    }
    const numPredictMatch = parameters.match(/num_predict\s+(-?\d+)/i);
    if (numPredictMatch) {
      settings.numPredict = parseInt(numPredictMatch[1], 10);
    }
    return Object.keys(settings).length > 0 ? settings : undefined;
  }
@ -144,6 +149,10 @@ export class OllamaAiApi extends AiApi {
      prompt: options.prompt,
      system: options.systemPrompt,
      stream: true,
      options: {
        num_ctx: model.params.numCtx,
        num_predict: model.params.numPredict,
      },
    });
    const content = {
@ -259,6 +268,10 @@ export class OllamaAiApi extends AiApi {
      stream: true,
      think: model.params.reasoning,
      tools: ollamaTools,
      options: {
        num_ctx: model.params.numCtx,
        num_predict: model.params.numPredict,
      },
    });
    let lastChunk;
--- a/packages/ai/src/openai.test.ts
+++ b/packages/ai/src/openai.test.ts
@ -65,7 +65,7 @@ describe("OpenAiApi", () => {
      {
        provider: mockProvider as any,
        modelId: "test-model",
-        params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
+        params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
      },
      { userPrompt: "Hello", context: [], tools: [] },
      vi.fn(),
@ -105,7 +105,7 @@ describe("OpenAiApi", () => {
      {
        provider: mockProvider as any,
        modelId: "test-model",
-        params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
+        params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
      },
      { userPrompt: "Read index.html", context: [], tools: [] },
      streamCallback,
@ -133,7 +133,7 @@ describe("OpenAiApi", () => {
      {
        provider: mockProvider as any,
        modelId: "test-model",
-        params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
+        params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
      },
      { userPrompt: "Hello", context: [], tools: [] },
      streamCallback,
@ -153,7 +153,7 @@ describe("OpenAiApi", () => {
      {
        provider: mockProvider as any,
        modelId: "test-model",
-        params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40 },
+        params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
      },
      {
        userPrompt: "Don't edit code. Just talk to me.",
--- a/packages/ai/src/openai.ts
+++ b/packages/ai/src/openai.ts
@ -202,6 +202,9 @@ export class OpenAiApi extends AiApi {
        { role: "user" as const, content: options.prompt },
      ],
      stream: true,
      ...(model.params.maxCompletionTokens
        ? { max_completion_tokens: model.params.maxCompletionTokens }
        : {}),
      ...(typeof model.params.reasoning === "string"
        ? {
            reasoning_effort: model.params.reasoning as
@ -373,6 +376,9 @@ export class OpenAiApi extends AiApi {
      messages,
      tools,
      stream: true,
      ...(model.params.maxCompletionTokens
        ? { max_completion_tokens: model.params.maxCompletionTokens }
        : {}),
      ...(typeof model.params.reasoning === "string"
        ? {
            reasoning_effort: model.params.reasoning as
@ -439,6 +445,9 @@ export class OpenAiApi extends AiApi {
      messages,
      tools,
      stream: false,
      ...(model.params.maxCompletionTokens
        ? { max_completion_tokens: model.params.maxCompletionTokens }
        : {}),
      ...(typeof model.params.reasoning === "string"
        ? {
            reasoning_effort: model.params.reasoning as
--- a/packages/api/src/interfaces/ai-provider.ts
+++ b/packages/api/src/interfaces/ai-provider.ts
@ -17,6 +17,10 @@ export interface IAiModelSettings {
  topP?: number;
  topK?: number;
  numCtx?: number;
  /** Ollama: maximum number of tokens to predict (-1 = unlimited, generate until natural stop or context limit) */
  numPredict?: number;
  /** OpenAI-compatible: maximum number of completion tokens the model can generate */
  maxCompletionTokens?: number;
 }
 export interface IAiModelCapabilities {
@ -61,3 +65,26 @@ export interface IAiProvider {
 }
 export type AiProviderDocument = HydratedDocument<IAiProvider>;
 /**
 * Drone-side model configuration that accepts the database IAiProvider type.
 * This is the canonical interface used by the drone to configure AI API calls,
 * carrying the model's stored inference settings (numCtx, numPredict, etc.)
 * from the provider's cached model data through to the AI package.
 */
 export interface IDroneModelConfig {
  provider: IAiProvider | GadgetId;
  modelId: string;
  params: {
    reasoning: boolean | "low" | "medium" | "high";
    temperature: number;
    topP: number;
    topK: number;
    /** Ollama: -1 = unlimited (generate until natural stop or context limit) */
    numPredict: number;
    /** Context window size (input + output tokens); Ollama passes as num_ctx */
    numCtx: number;
    /** OpenAI-compatible: maximum completion tokens the model can generate */
    maxCompletionTokens: number;
  };
 }