Fixes premature AI API response truncation by propagating inference
parameters through the entire probe → storage → runtime → API call chain.
Root cause: Ollama defaults num_predict to 128 tokens and num_ctx to
4096, silently truncating output and context. We never overrode these.
Changes:
- IAiModelSettings: add numPredict, maxCompletionTokens fields
- IDroneModelConfig: moved from gadget-drone to @gadget/api (shared),
expanded with numPredict, numCtx, maxCompletionTokens params
- IAiModelConfig.params: add numPredict, numCtx, maxCompletionTokens
- IAiModelProbeResult.settings: add numPredict, maxCompletionTokens
- AiModelSettingsSchema (Mongoose): add numPredict, maxCompletionTokens
- Ollama extractSettings(): extract num_predict from model parameters
- Ollama generate()/chat(): pass options: { num_ctx, num_predict }
- OpenAI all three create() calls: add max_completion_tokens
- web-cli.ts onProviderProbe(): compute numPredict (-1 for Ollama)
and maxCompletionTokens (contextWindow for OpenAI) during probe
- agent.ts main + subagent loops: read model settings from provider
cached models, build IDroneModelConfig with stored params
- ai.ts: remove local IDroneModelConfig, import from @gadget/api
- chat-session.ts: add new params to title generation call
- Tests: update all fixtures with new params, all 19 tests pass
Defaults when model settings unavailable:
- numPredict: -1 (Ollama unlimited - generate until natural stop)
- numCtx: 131072 (128k - covers most modern models)
- maxCompletionTokens: 16384 (16k - reasonable OpenAI default)
328 lines
9.2 KiB
TypeScript
328 lines
9.2 KiB
TypeScript
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
import type { ChatResponseStream } from 'ollama';
|
|
|
|
// Mock the Ollama client BEFORE importing the module
|
|
const mockOllamaClient = {
|
|
chat: vi.fn(),
|
|
generate: vi.fn(),
|
|
list: vi.fn(),
|
|
show: vi.fn(),
|
|
};
|
|
|
|
vi.mock('ollama', () => {
|
|
return {
|
|
Ollama: class MockOllama {
|
|
constructor() {
|
|
return mockOllamaClient;
|
|
}
|
|
},
|
|
};
|
|
});
|
|
|
|
import { OllamaAiApi } from './ollama';
|
|
|
|
// Mock logger
|
|
const mockLogger = {
|
|
debug: vi.fn(),
|
|
info: vi.fn(),
|
|
warn: vi.fn(),
|
|
error: vi.fn(),
|
|
};
|
|
|
|
// Mock environment and provider
|
|
const mockEnv = {
|
|
NODE_ENV: 'test',
|
|
services: {
|
|
google: {
|
|
cse: {
|
|
apiKey: 'test-key',
|
|
engineId: 'test-engine',
|
|
},
|
|
},
|
|
},
|
|
};
|
|
|
|
const mockProvider = {
|
|
_id: 'test-provider',
|
|
name: 'Test Ollama',
|
|
sdk: 'ollama' as const,
|
|
baseUrl: 'http://localhost:11434',
|
|
apiKey: 'test-key',
|
|
};
|
|
|
|
describe('OllamaAiApi', () => {
|
|
let api: OllamaAiApi;
|
|
|
|
beforeEach(() => {
|
|
vi.clearAllMocks();
|
|
api = new OllamaAiApi(mockEnv as any, mockProvider as any, mockLogger as any);
|
|
});
|
|
|
|
describe('chat', () => {
|
|
it('should handle normal response streaming', async () => {
|
|
// Mock streaming response
|
|
const mockStream = async function* () {
|
|
yield {
|
|
message: { content: 'Hello' },
|
|
done: false,
|
|
};
|
|
yield {
|
|
message: { content: ' world' },
|
|
done: false,
|
|
};
|
|
yield {
|
|
message: { content: '!' },
|
|
done: true,
|
|
done_reason: 'stop',
|
|
total_duration: 100,
|
|
prompt_eval_count: 10,
|
|
eval_count: 3,
|
|
};
|
|
};
|
|
|
|
mockOllamaClient.chat.mockResolvedValue(mockStream());
|
|
|
|
const streamCallback = vi.fn();
|
|
const response = await api.chat(
|
|
{
|
|
provider: mockProvider as any,
|
|
modelId: 'test-model',
|
|
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
|
|
},
|
|
{
|
|
userPrompt: 'Test prompt',
|
|
context: [],
|
|
},
|
|
streamCallback,
|
|
);
|
|
|
|
// Verify stream callback was called for each chunk
|
|
expect(streamCallback).toHaveBeenCalledTimes(3);
|
|
expect(streamCallback).toHaveBeenCalledWith({
|
|
type: 'response',
|
|
data: 'Hello',
|
|
});
|
|
expect(streamCallback).toHaveBeenCalledWith({
|
|
type: 'response',
|
|
data: ' world',
|
|
});
|
|
expect(streamCallback).toHaveBeenCalledWith({
|
|
type: 'response',
|
|
data: '!',
|
|
});
|
|
|
|
// Verify response
|
|
expect(response.done).toBe(true);
|
|
expect(response.doneReason).toBe('stop');
|
|
expect(response.response).toBe('Hello world!');
|
|
});
|
|
|
|
it('should handle tool calls', async () => {
|
|
const mockStream = async function* () {
|
|
yield {
|
|
message: {
|
|
content: '',
|
|
tool_calls: [
|
|
{
|
|
function: {
|
|
name: 'search_google',
|
|
arguments: { query: 'test query' },
|
|
},
|
|
},
|
|
],
|
|
},
|
|
done: false,
|
|
};
|
|
yield {
|
|
message: { content: '' },
|
|
done: true,
|
|
done_reason: 'stop',
|
|
total_duration: 100,
|
|
prompt_eval_count: 10,
|
|
eval_count: 1,
|
|
};
|
|
};
|
|
|
|
mockOllamaClient.chat.mockResolvedValue(mockStream());
|
|
|
|
const streamCallback = vi.fn();
|
|
const response = await api.chat(
|
|
{
|
|
provider: mockProvider as any,
|
|
modelId: 'test-model',
|
|
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
|
|
},
|
|
{
|
|
userPrompt: 'Test prompt',
|
|
context: [],
|
|
},
|
|
streamCallback,
|
|
);
|
|
|
|
// Verify tool calls are returned, not executed
|
|
expect(response.toolCalls).toBeDefined();
|
|
expect(response.toolCalls!.length).toBe(1);
|
|
expect(response.toolCalls![0].function.name).toBe('search_google');
|
|
|
|
// chat() should only be called once (no internal loop)
|
|
expect(mockOllamaClient.chat).toHaveBeenCalledTimes(1);
|
|
});
|
|
|
|
it('should handle thinking content when reasoning is enabled', async () => {
|
|
const mockStream = async function* () {
|
|
yield {
|
|
message: {
|
|
thinking: 'Let me think about this...',
|
|
content: '',
|
|
},
|
|
done: false,
|
|
};
|
|
yield {
|
|
message: {
|
|
thinking: ' The answer is',
|
|
content: '',
|
|
},
|
|
done: false,
|
|
};
|
|
yield {
|
|
message: { content: '42' },
|
|
done: true,
|
|
done_reason: 'stop',
|
|
total_duration: 100,
|
|
prompt_eval_count: 10,
|
|
eval_count: 1,
|
|
};
|
|
};
|
|
|
|
mockOllamaClient.chat.mockResolvedValue(mockStream());
|
|
|
|
const streamCallback = vi.fn();
|
|
const response = await api.chat(
|
|
{
|
|
provider: mockProvider as any,
|
|
modelId: 'test-model',
|
|
params: { reasoning: true, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
|
|
},
|
|
{
|
|
userPrompt: 'What is the answer?',
|
|
context: [],
|
|
},
|
|
streamCallback,
|
|
);
|
|
|
|
expect(streamCallback).toHaveBeenCalledWith({
|
|
type: 'thinking',
|
|
data: 'Let me think about this...',
|
|
});
|
|
expect(streamCallback).toHaveBeenCalledWith({
|
|
type: 'thinking',
|
|
data: ' The answer is',
|
|
});
|
|
expect(streamCallback).toHaveBeenCalledWith({
|
|
type: 'response',
|
|
data: '42',
|
|
});
|
|
expect(response.thinking).toBe('Let me think about this... The answer is');
|
|
});
|
|
|
|
it('should reject empty response on load failure', async () => {
|
|
const mockStream = async function* () {
|
|
yield {
|
|
message: { content: '' },
|
|
done: true,
|
|
done_reason: 'load',
|
|
total_duration: 5000,
|
|
prompt_eval_count: 0,
|
|
eval_count: 0,
|
|
};
|
|
};
|
|
|
|
mockOllamaClient.chat.mockResolvedValue(mockStream());
|
|
|
|
await expect(api.chat(
|
|
{
|
|
provider: mockProvider as any,
|
|
modelId: 'test-model',
|
|
params: { reasoning: false, temperature: 0.8, topP: 0.9, topK: 40, numPredict: -1, numCtx: 131072, maxCompletionTokens: 16384 },
|
|
},
|
|
{
|
|
userPrompt: 'Test prompt',
|
|
context: [],
|
|
},
|
|
vi.fn(),
|
|
)).rejects.toThrow('Provider returned an empty chat response');
|
|
});
|
|
});
|
|
|
|
describe('probeModel', () => {
|
|
it('should detect thinking capability from "thinking" (Ollama convention)', async () => {
|
|
mockOllamaClient.show.mockResolvedValue({
|
|
capabilities: ['completion', 'vision', 'tools', 'thinking'],
|
|
details: { family: 'gemma4' },
|
|
model_info: {},
|
|
modified_at: '2026-04-04T06:20:40.211Z',
|
|
});
|
|
|
|
const result = await api.probeModel('gemma4:e4b');
|
|
expect(result.capabilities.hasThinking).toBe(true);
|
|
expect(result.capabilities.canCallTools).toBe(true);
|
|
expect(result.capabilities.hasVision).toBe(true);
|
|
});
|
|
|
|
it('should detect thinking capability from "reasoning" (OpenAI convention)', async () => {
|
|
mockOllamaClient.show.mockResolvedValue({
|
|
capabilities: ['completion', 'reasoning'],
|
|
details: { family: 'deepseek' },
|
|
model_info: {},
|
|
modified_at: '2026-04-04T06:20:40.211Z',
|
|
});
|
|
|
|
const result = await api.probeModel('deepseek-r1');
|
|
expect(result.capabilities.hasThinking).toBe(true);
|
|
});
|
|
|
|
it('should set hasThinking false when neither thinking nor reasoning in capabilities', async () => {
|
|
mockOllamaClient.show.mockResolvedValue({
|
|
capabilities: ['completion'],
|
|
details: { family: 'llama' },
|
|
model_info: {},
|
|
modified_at: '2026-04-04T06:20:40.211Z',
|
|
});
|
|
|
|
const result = await api.probeModel('llama3.2');
|
|
expect(result.capabilities.hasThinking).toBe(false);
|
|
});
|
|
|
|
it('should detect vision, tools, and embedding capabilities', async () => {
|
|
mockOllamaClient.show.mockResolvedValue({
|
|
capabilities: ['completion', 'vision', 'tools', 'embeddings'],
|
|
details: { family: 'llama' },
|
|
model_info: {},
|
|
modified_at: '2026-04-04T06:20:40.211Z',
|
|
});
|
|
|
|
const result = await api.probeModel('some-model');
|
|
expect(result.capabilities.hasVision).toBe(true);
|
|
expect(result.capabilities.canCallTools).toBe(true);
|
|
expect(result.capabilities.hasEmbedding).toBe(true);
|
|
});
|
|
|
|
it('should extract settings from Modelfile parameters', async () => {
|
|
mockOllamaClient.show.mockResolvedValue({
|
|
capabilities: ['completion'],
|
|
details: { family: 'llama' },
|
|
model_info: {},
|
|
parameters: 'temperature 0.7\ntop_k 40\ntop_p 0.9\nnum_ctx 4096',
|
|
modified_at: '2026-04-04T06:20:40.211Z',
|
|
});
|
|
|
|
const result = await api.probeModel('llama3.2');
|
|
expect(result.settings).toBeDefined();
|
|
expect(result.settings!.temperature).toBe(0.7);
|
|
expect(result.settings!.topK).toBe(40);
|
|
expect(result.settings!.topP).toBe(0.9);
|
|
expect(result.settings!.numCtx).toBe(4096);
|
|
});
|
|
});
|
|
});
|