Skip to content

Commit e55d35c

Browse files
committed
fix: parse and respect user-configured num_ctx in Ollama models
- Parse num_ctx from parameters field in Ollama API response - Use configured num_ctx instead of default context_length when available - Remove num_ctx override in native-ollama.ts to respect model configuration - Add tests to verify num_ctx parsing and usage Fixes #7159
1 parent 8e4c0ae commit e55d35c

File tree

4 files changed

+111
-5
lines changed

4 files changed

+111
-5
lines changed

src/api/providers/__tests__/native-ollama.spec.ts

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,64 @@ describe("NativeOllamaHandler", () => {
120120
})
121121
expect(result).toBe("This is the response")
122122
})
123+
124+
it("should not override num_ctx in options", async () => {
125+
mockChat.mockResolvedValue({
126+
message: { content: "Response" },
127+
})
128+
129+
await handler.completePrompt("Test prompt")
130+
131+
// Verify that num_ctx is NOT in the options
132+
expect(mockChat).toHaveBeenCalledWith({
133+
model: "llama2",
134+
messages: [{ role: "user", content: "Test prompt" }],
135+
stream: false,
136+
options: {
137+
temperature: 0,
138+
// num_ctx should NOT be present here
139+
},
140+
})
141+
142+
// Explicitly check that num_ctx is not in the options
143+
const callArgs = mockChat.mock.calls[0][0]
144+
expect(callArgs.options).not.toHaveProperty("num_ctx")
145+
})
146+
})
147+
148+
describe("createMessage num_ctx handling", () => {
149+
it("should not set num_ctx in options for createMessage", async () => {
150+
// Mock the chat response
151+
mockChat.mockImplementation(async function* () {
152+
yield {
153+
message: { content: "Test" },
154+
eval_count: 1,
155+
prompt_eval_count: 1,
156+
}
157+
})
158+
159+
const stream = handler.createMessage("System", [{ role: "user" as const, content: "Test" }])
160+
161+
// Consume the stream
162+
for await (const _ of stream) {
163+
// Just consume
164+
}
165+
166+
// Verify the call was made without num_ctx
167+
expect(mockChat).toHaveBeenCalledWith({
168+
model: "llama2",
169+
messages: expect.any(Array),
170+
stream: true,
171+
options: {
172+
temperature: 0,
173+
// num_ctx should NOT be present
174+
},
175+
})
176+
177+
// Explicitly verify num_ctx is not in options
178+
const callArgs = mockChat.mock.calls[0][0]
179+
expect(callArgs.options).not.toHaveProperty("num_ctx")
180+
})
123181
})
124182

125183
describe("error handling", () => {

src/api/providers/fetchers/__tests__/ollama.test.ts

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,40 @@ describe("Ollama Fetcher", () => {
3232
})
3333
})
3434

35+
it("should parse num_ctx from parameters field when present", () => {
36+
const modelDataWithNumCtx = {
37+
...ollamaModelsData["qwen3-2to16:latest"],
38+
parameters: "num_ctx 16384\nstop_token <eos>",
39+
model_info: {
40+
"ollama.context_length": 40960,
41+
},
42+
}
43+
44+
const parsedModel = parseOllamaModel(modelDataWithNumCtx as any)
45+
46+
// Should use the configured num_ctx (16384) instead of the default context_length (40960)
47+
expect(parsedModel.contextWindow).toBe(16384)
48+
expect(parsedModel.maxTokens).toBe(16384)
49+
expect(parsedModel.description).toBe("Family: qwen3, Context: 16384, Size: 32.8B")
50+
})
51+
52+
it("should use default context_length when num_ctx is not in parameters", () => {
53+
const modelDataWithoutNumCtx = {
54+
...ollamaModelsData["qwen3-2to16:latest"],
55+
parameters: "stop_token <eos>", // No num_ctx here
56+
model_info: {
57+
"ollama.context_length": 40960,
58+
},
59+
}
60+
61+
const parsedModel = parseOllamaModel(modelDataWithoutNumCtx as any)
62+
63+
// Should use the default context_length (40960)
64+
expect(parsedModel.contextWindow).toBe(40960)
65+
expect(parsedModel.maxTokens).toBe(40960)
66+
expect(parsedModel.description).toBe("Family: qwen3, Context: 40960, Size: 32.8B")
67+
})
68+
3569
it("should handle models with null families field", () => {
3670
const modelDataWithNullFamilies = {
3771
...ollamaModelsData["qwen3-2to16:latest"],

src/api/providers/fetchers/ollama.ts

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,16 +39,30 @@ type OllamaModelInfoResponse = z.infer<typeof OllamaModelInfoResponseSchema>
3939

4040
export const parseOllamaModel = (rawModel: OllamaModelInfoResponse): ModelInfo => {
4141
const contextKey = Object.keys(rawModel.model_info).find((k) => k.includes("context_length"))
42-
const contextWindow =
42+
const defaultContextWindow =
4343
contextKey && typeof rawModel.model_info[contextKey] === "number" ? rawModel.model_info[contextKey] : undefined
4444

45+
// Parse the parameters field to check for user-configured num_ctx
46+
let configuredNumCtx: number | undefined
47+
if (rawModel.parameters) {
48+
// The parameters field contains modelfile parameters as a string
49+
// Look for num_ctx setting in the format "num_ctx <value>"
50+
const numCtxMatch = rawModel.parameters.match(/num_ctx\s+(\d+)/i)
51+
if (numCtxMatch && numCtxMatch[1]) {
52+
configuredNumCtx = parseInt(numCtxMatch[1], 10)
53+
}
54+
}
55+
56+
// Use the configured num_ctx if available, otherwise fall back to the default
57+
const actualContextWindow = configuredNumCtx || defaultContextWindow || ollamaDefaultModelInfo.contextWindow
58+
4559
const modelInfo: ModelInfo = Object.assign({}, ollamaDefaultModelInfo, {
46-
description: `Family: ${rawModel.details.family}, Context: ${contextWindow}, Size: ${rawModel.details.parameter_size}`,
47-
contextWindow: contextWindow || ollamaDefaultModelInfo.contextWindow,
60+
description: `Family: ${rawModel.details.family}, Context: ${actualContextWindow}, Size: ${rawModel.details.parameter_size}`,
61+
contextWindow: actualContextWindow,
4862
supportsPromptCache: true,
4963
supportsImages: rawModel.capabilities?.includes("vision"),
5064
supportsComputerUse: false,
51-
maxTokens: contextWindow || ollamaDefaultModelInfo.contextWindow,
65+
maxTokens: actualContextWindow,
5266
})
5367

5468
return modelInfo

src/api/providers/native-ollama.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
181181
messages: ollamaMessages,
182182
stream: true,
183183
options: {
184-
num_ctx: modelInfo.contextWindow,
184+
// Don't override num_ctx - let Ollama use the model's configured value
185185
temperature: this.options.modelTemperature ?? (useR1Format ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0),
186186
},
187187
})

0 commit comments

Comments
 (0)