fix: prevent extension freeze when model calls unknown native tools

daniel-lxs · daniel-lxs · commit 9fe4dd28cf2f · 2025-12-04T14:39:02.000-05:00
When using native tool protocol, if a model attempts to call a tool that doesn't exist (like 'edit_file'), the extension would freeze in a loop instead of returning an error to the model.

Root causes:
1. During streaming, partial blocks of unknown tools would reach the default case in the switch statement, showing an error on every streaming chunk
2. validateToolUse was being called for partial blocks, causing repeated validation errors

Changes:
- Add isValidToolName() function to validateToolUse.ts to explicitly check for unknown tools
- Only run tool validation for complete (non-partial) blocks in presentAssistantMessage
- Skip partial blocks in the default case to prevent error loops during streaming
- Fetch state early in tool_use case so mode/customModes are available throughout

This ensures that when an invalid tool is called, the extension waits for the complete tool call before showing a single error and sending a tool_result back to the model.
diff --git a/src/core/assistant-message/presentAssistantMessage.ts b/src/core/assistant-message/presentAssistantMessage.ts
@@ -333,7 +333,11 @@ export async function presentAssistantMessage(cline: Task) {
 			await cline.say("text", content, undefined, block.partial)
 			break
 		}
-		case "tool_use":
+		case "tool_use": {
+			// Fetch state early so it's available for toolDescription and validation
+			const state = await cline.providerRef.deref()?.getState()
+			const { mode, customModes, experiments: stateExperiments, apiConfiguration } = state ?? {}
+
 			const toolDescription = (): string => {
 				switch (block.name) {
 					case "execute_command":
@@ -675,30 +679,46 @@ export async function presentAssistantMessage(cline: Task) {
 				TelemetryService.instance.captureToolUsage(cline.taskId, block.name, toolProtocol)
 			}
 
-			// Validate tool use before execution.
-			const {
-				mode,
-				customModes,
-				experiments: stateExperiments,
-				apiConfiguration,
-			} = (await cline.providerRef.deref()?.getState()) ?? {}
-			const modelInfo = cline.api.getModel()
-			const includedTools = modelInfo?.info?.includedTools
-
-			try {
-				validateToolUse(
-					block.name as ToolName,
-					mode ?? defaultModeSlug,
-					customModes ?? [],
-					{ apply_diff: cline.diffEnabled },
-					block.params,
-					stateExperiments,
-					includedTools,
-				)
-			} catch (error) {
-				cline.consecutiveMistakeCount++
-				pushToolResult(formatResponse.toolError(error.message, toolProtocol))
-				break
+			// Validate tool use before execution - ONLY for complete (non-partial) blocks.
+			// Validating partial blocks would cause validation errors to be thrown repeatedly
+			// during streaming, pushing multiple tool_results for the same tool_use_id and
+			// potentially causing the stream to appear frozen.
+			if (!block.partial) {
+				const modelInfo = cline.api.getModel()
+				const includedTools = modelInfo?.info?.includedTools
+
+				try {
+					validateToolUse(
+						block.name as ToolName,
+						mode ?? defaultModeSlug,
+						customModes ?? [],
+						{ apply_diff: cline.diffEnabled },
+						block.params,
+						stateExperiments,
+						includedTools,
+					)
+				} catch (error) {
+					cline.consecutiveMistakeCount++
+					// For validation errors (unknown tool, tool not allowed for mode), we need to:
+					// 1. Send a tool_result with the error (required for native protocol)
+					// 2. NOT set didAlreadyUseTool = true (the tool was never executed, just failed validation)
+					// This prevents the stream from being interrupted with "Response interrupted by tool use result"
+					// which would cause the extension to appear to hang
+					const errorContent = formatResponse.toolError(error.message, toolProtocol)
+					if (toolProtocol === TOOL_PROTOCOL.NATIVE && toolCallId) {
+						// For native protocol, push tool_result directly without setting didAlreadyUseTool
+						cline.userMessageContent.push({
+							type: "tool_result",
+							tool_use_id: toolCallId,
+							content: typeof errorContent === "string" ? errorContent : "(validation error)",
+							is_error: true,
+						} as Anthropic.ToolResultBlockParam)
+					} else {
+						// For XML protocol, use the standard pushToolResult
+						pushToolResult(errorContent)
+					}
+					break
+				}
 			}
 
 			// Check for identical consecutive tool calls.
@@ -998,16 +1018,37 @@ export async function presentAssistantMessage(cline: Task) {
 				default: {
 					// Handle unknown/invalid tool names
 					// This is critical for native protocol where every tool_use MUST have a tool_result
+					// Note: This case should rarely be reached since validateToolUse now checks for unknown tools
+
+					// CRITICAL: Don't process partial blocks for unknown tools - just let them stream in.
+					// If we try to show errors for partial blocks, we'd show the error on every streaming chunk,
+					// creating a loop that appears to freeze the extension. Only handle complete blocks.
+					if (block.partial) {
+						break
+					}
+
 					const errorMessage = `Unknown tool "${block.name}". This tool does not exist. Please use one of the available tools.`
 					cline.consecutiveMistakeCount++
 					cline.recordToolError(block.name as ToolName, errorMessage)
 					await cline.say("error", `Roo tried to use an unknown tool: "${block.name}". Retrying...`)
-					pushToolResult(formatResponse.toolError(errorMessage, toolProtocol))
+					// Push tool_result directly for native protocol WITHOUT setting didAlreadyUseTool
+					// This prevents the stream from being interrupted with "Response interrupted by tool use result"
+					if (toolProtocol === TOOL_PROTOCOL.NATIVE && toolCallId) {
+						cline.userMessageContent.push({
+							type: "tool_result",
+							tool_use_id: toolCallId,
+							content: formatResponse.toolError(errorMessage, toolProtocol),
+							is_error: true,
+						} as Anthropic.ToolResultBlockParam)
+					} else {
+						pushToolResult(formatResponse.toolError(errorMessage, toolProtocol))
+					}
 					break
 				}
 			}
 
 			break
+		}
 	}
 
 	// Seeing out of bounds is fine, it means that the next too call is being
diff --git a/src/core/tools/__tests__/validateToolUse.spec.ts b/src/core/tools/__tests__/validateToolUse.spec.ts
@@ -167,9 +167,17 @@ describe("mode-validator", () => {
 	})
 
 	describe("validateToolUse", () => {
-		it("throws error for disallowed tools in architect mode", () => {
+		it("throws error for unknown/invalid tools", () => {
+			// Unknown tools should throw with a specific "Unknown tool" error
 			expect(() => validateToolUse("unknown_tool" as any, "architect", [])).toThrow(
-				'Tool "unknown_tool" is not allowed in architect mode.',
+				'Unknown tool "unknown_tool". This tool does not exist.',
+			)
+		})
+
+		it("throws error for disallowed tools in architect mode", () => {
+			// execute_command is a valid tool but not allowed in architect mode
+			expect(() => validateToolUse("execute_command", "architect", [])).toThrow(
+				'Tool "execute_command" is not allowed in architect mode.',
 			)
 		})
 
diff --git a/src/core/tools/validateToolUse.ts b/src/core/tools/validateToolUse.ts
@@ -1,7 +1,27 @@
 import type { ToolName, ModeConfig } from "@roo-code/types"
+import { toolNames as validToolNames } from "@roo-code/types"
 
 import { Mode, isToolAllowedForMode } from "../../shared/modes"
 
+/**
+ * Checks if a tool name is a valid, known tool.
+ * Note: This does NOT check if the tool is allowed for a specific mode,
+ * only that the tool actually exists.
+ */
+export function isValidToolName(toolName: string): toolName is ToolName {
+	// Check if it's a valid static tool
+	if ((validToolNames as readonly string[]).includes(toolName)) {
+		return true
+	}
+
+	// Check if it's a dynamic MCP tool (mcp_serverName_toolName format)
+	if (toolName.startsWith("mcp_")) {
+		return true
+	}
+
+	return false
+}
+
 export function validateToolUse(
 	toolName: ToolName,
 	mode: Mode,
@@ -11,6 +31,15 @@ export function validateToolUse(
 	experiments?: Record<string, boolean>,
 	includedTools?: string[],
 ): void {
+	// First, check if the tool name is actually a valid/known tool
+	// This catches completely invalid tool names like "edit_file" that don't exist
+	if (!isValidToolName(toolName)) {
+		throw new Error(
+			`Unknown tool "${toolName}". This tool does not exist. Please use one of the available tools: ${validToolNames.join(", ")}.`,
+		)
+	}
+
+	// Then check if the tool is allowed for the current mode
 	if (
 		!isToolAllowedForMode(
 			toolName,