fix: parse and respect user-configured num_ctx in Ollama models

roomote · roomote · commit e55d35c7f067 · 2025-08-22T22:19:35.000Z
- Parse num_ctx from parameters field in Ollama API response - Use configured num_ctx instead of default context_length when available - Remove num_ctx override in native-ollama.ts to respect model configuration - Add tests to verify num_ctx parsing and usage Fixes #7159
diff --git a/src/api/providers/__tests__/native-ollama.spec.ts b/src/api/providers/__tests__/native-ollama.spec.ts
@@ -120,6 +120,64 @@ describe("NativeOllamaHandler", () => {
 			})
 			expect(result).toBe("This is the response")
 		})
+
+		it("should not override num_ctx in options", async () => {
+			mockChat.mockResolvedValue({
+				message: { content: "Response" },
+			})
+
+			await handler.completePrompt("Test prompt")
+
+			// Verify that num_ctx is NOT in the options
+			expect(mockChat).toHaveBeenCalledWith({
+				model: "llama2",
+				messages: [{ role: "user", content: "Test prompt" }],
+				stream: false,
+				options: {
+					temperature: 0,
+					// num_ctx should NOT be present here
+				},
+			})
+
+			// Explicitly check that num_ctx is not in the options
+			const callArgs = mockChat.mock.calls[0][0]
+			expect(callArgs.options).not.toHaveProperty("num_ctx")
+		})
+	})
+
+	describe("createMessage num_ctx handling", () => {
+		it("should not set num_ctx in options for createMessage", async () => {
+			// Mock the chat response
+			mockChat.mockImplementation(async function* () {
+				yield {
+					message: { content: "Test" },
+					eval_count: 1,
+					prompt_eval_count: 1,
+				}
+			})
+
+			const stream = handler.createMessage("System", [{ role: "user" as const, content: "Test" }])
+
+			// Consume the stream
+			for await (const _ of stream) {
+				// Just consume
+			}
+
+			// Verify the call was made without num_ctx
+			expect(mockChat).toHaveBeenCalledWith({
+				model: "llama2",
+				messages: expect.any(Array),
+				stream: true,
+				options: {
+					temperature: 0,
+					// num_ctx should NOT be present
+				},
+			})
+
+			// Explicitly verify num_ctx is not in options
+			const callArgs = mockChat.mock.calls[0][0]
+			expect(callArgs.options).not.toHaveProperty("num_ctx")
+		})
 	})
 
 	describe("error handling", () => {
diff --git a/src/api/providers/fetchers/__tests__/ollama.test.ts b/src/api/providers/fetchers/__tests__/ollama.test.ts
@@ -32,6 +32,40 @@ describe("Ollama Fetcher", () => {
 			})
 		})
 
+		it("should parse num_ctx from parameters field when present", () => {
+			const modelDataWithNumCtx = {
+				...ollamaModelsData["qwen3-2to16:latest"],
+				parameters: "num_ctx 16384\nstop_token <eos>",
+				model_info: {
+					"ollama.context_length": 40960,
+				},
+			}
+
+			const parsedModel = parseOllamaModel(modelDataWithNumCtx as any)
+
+			// Should use the configured num_ctx (16384) instead of the default context_length (40960)
+			expect(parsedModel.contextWindow).toBe(16384)
+			expect(parsedModel.maxTokens).toBe(16384)
+			expect(parsedModel.description).toBe("Family: qwen3, Context: 16384, Size: 32.8B")
+		})
+
+		it("should use default context_length when num_ctx is not in parameters", () => {
+			const modelDataWithoutNumCtx = {
+				...ollamaModelsData["qwen3-2to16:latest"],
+				parameters: "stop_token <eos>", // No num_ctx here
+				model_info: {
+					"ollama.context_length": 40960,
+				},
+			}
+
+			const parsedModel = parseOllamaModel(modelDataWithoutNumCtx as any)
+
+			// Should use the default context_length (40960)
+			expect(parsedModel.contextWindow).toBe(40960)
+			expect(parsedModel.maxTokens).toBe(40960)
+			expect(parsedModel.description).toBe("Family: qwen3, Context: 40960, Size: 32.8B")
+		})
+
 		it("should handle models with null families field", () => {
 			const modelDataWithNullFamilies = {
 				...ollamaModelsData["qwen3-2to16:latest"],
diff --git a/src/api/providers/fetchers/ollama.ts b/src/api/providers/fetchers/ollama.ts
@@ -39,16 +39,30 @@ type OllamaModelInfoResponse = z.infer<typeof OllamaModelInfoResponseSchema>
 
 export const parseOllamaModel = (rawModel: OllamaModelInfoResponse): ModelInfo => {
 	const contextKey = Object.keys(rawModel.model_info).find((k) => k.includes("context_length"))
-	const contextWindow =
+	const defaultContextWindow =
 		contextKey && typeof rawModel.model_info[contextKey] === "number" ? rawModel.model_info[contextKey] : undefined
 
+	// Parse the parameters field to check for user-configured num_ctx
+	let configuredNumCtx: number | undefined
+	if (rawModel.parameters) {
+		// The parameters field contains modelfile parameters as a string
+		// Look for num_ctx setting in the format "num_ctx <value>"
+		const numCtxMatch = rawModel.parameters.match(/num_ctx\s+(\d+)/i)
+		if (numCtxMatch && numCtxMatch[1]) {
+			configuredNumCtx = parseInt(numCtxMatch[1], 10)
+		}
+	}
+
+	// Use the configured num_ctx if available, otherwise fall back to the default
+	const actualContextWindow = configuredNumCtx || defaultContextWindow || ollamaDefaultModelInfo.contextWindow
+
 	const modelInfo: ModelInfo = Object.assign({}, ollamaDefaultModelInfo, {
-		description: `Family: ${rawModel.details.family}, Context: ${contextWindow}, Size: ${rawModel.details.parameter_size}`,
-		contextWindow: contextWindow || ollamaDefaultModelInfo.contextWindow,
+		description: `Family: ${rawModel.details.family}, Context: ${actualContextWindow}, Size: ${rawModel.details.parameter_size}`,
+		contextWindow: actualContextWindow,
 		supportsPromptCache: true,
 		supportsImages: rawModel.capabilities?.includes("vision"),
 		supportsComputerUse: false,
-		maxTokens: contextWindow || ollamaDefaultModelInfo.contextWindow,
+		maxTokens: actualContextWindow,
 	})
 
 	return modelInfo
diff --git a/src/api/providers/native-ollama.ts b/src/api/providers/native-ollama.ts
@@ -181,7 +181,7 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
 				messages: ollamaMessages,
 				stream: true,
 				options: {
-					num_ctx: modelInfo.contextWindow,
+					// Don't override num_ctx - let Ollama use the model's configured value
 					temperature: this.options.modelTemperature ?? (useR1Format ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0),
 				},
 			})