Skip to content

ACP mode does not support multimodal (images) #5832

@Pdash-exceeds

Description

@Pdash-exceeds

I am running goose in acp mode with Haiku 4.5. When I send images in the prompt (b64 encoded), those are not handled by goose.

I am not sure how to test goose for images in every modality. But I fixed it locally for both anthropic and openai api routes. It works for me (tested only in acp mode) Patch is attached.

  • OS & Arch: MacOS 15.4.1 (24E263
  • Interface: ACP
  • Version: Built locally off main 0f8d9a7
  • Extensions enabled: Defaults
  • Provider & Model: Haiku4.5

Patch:

From d56f0f0d71159a4ad86c086c9a213f953df82d20 Mon Sep 17 00:00:00 2001
From: Pradeepta Dash <[email protected]>
Date: Thu, 20 Nov 2025 19:28:55 -0800
Subject: [PATCH] Enhance message formatting for image content in Anthropic and
 OpenAI providers. Anthropic now supports image content, adding it to the
 message array. OpenAI's formatting logic is updated to build a content array
 instead of overwriting, ensuring proper handling of mixed content types.

---
 .../goose/src/providers/formats/anthropic.rs  |  6 +++-
 crates/goose/src/providers/formats/openai.rs  | 31 ++++++++++++++-----
 2 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/crates/goose/src/providers/formats/anthropic.rs b/crates/goose/src/providers/formats/anthropic.rs
index e82c92235a6..78fba446492 100644
--- a/crates/goose/src/providers/formats/anthropic.rs
+++ b/crates/goose/src/providers/formats/anthropic.rs
@@ -2,6 +2,7 @@ use crate::conversation::message::{Message, MessageContent};
 use crate::model::ModelConfig;
 use crate::providers::base::Usage;
 use crate::providers::errors::ProviderError;
+use crate::providers::utils::{convert_image, ImageFormat};
 use anyhow::{anyhow, Result};
 use rmcp::model::{object, CallToolRequestParam, ErrorCode, ErrorData, JsonObject, Role, Tool};
 use rmcp::object as json_object;
@@ -106,7 +107,10 @@ pub fn format_messages(messages: &[Message]) -> Vec<Value> {
                         DATA_FIELD: redacted.data
                     }));
                 }
-                MessageContent::Image(_) => continue, // Anthropic doesn't support image content yet
+                MessageContent::Image(image) => {
+                    // Anthropic supports images - add to content array
+                    content.push(convert_image(image, &ImageFormat::Anthropic));
+                }
                 MessageContent::FrontendToolRequest(tool_request) => {
                     if let Ok(tool_call) = &tool_request.tool_call {
                         content.push(json!({
diff --git a/crates/goose/src/providers/formats/openai.rs b/crates/goose/src/providers/formats/openai.rs
index b553260b568..67377fd65ce 100644
--- a/crates/goose/src/providers/formats/openai.rs
+++ b/crates/goose/src/providers/formats/openai.rs
@@ -63,6 +63,7 @@ pub fn format_messages(messages: &[Message], image_format: &ImageFormat) -> Vec<
         });
 
         let mut output = Vec::new();
+        let mut content_array = Vec::new(); // Build content array instead of overwriting
 
         for content in &message.content {
             match content {
@@ -72,16 +73,14 @@ pub fn format_messages(messages: &[Message], image_format: &ImageFormat) -> Vec<
                         if let Some(image_path) = detect_image_path(&text.text) {
                             // Try to load and convert the image
                             if let Ok(image) = load_image_file(image_path) {
-                                converted["content"] = json!([
-                                    {"type": "text", "text": text.text},
-                                    convert_image(&image, image_format)
-                                ]);
+                                content_array.push(json!({"type": "text", "text": text.text}));
+                                content_array.push(convert_image(&image, image_format));
                             } else {
                                 // If image loading fails, just use the text
-                                converted["content"] = json!(text.text);
+                                content_array.push(json!({"type": "text", "text": text.text}));
                             }
                         } else {
-                            converted["content"] = json!(text.text);
+                            content_array.push(json!({"type": "text", "text": text.text}));
                         }
                     }
                 }
@@ -205,8 +204,8 @@ pub fn format_messages(messages: &[Message], image_format: &ImageFormat) -> Vec<
                     // Skip tool confirmation requests
                 }
                 MessageContent::Image(image) => {
-                    // Handle direct image content
-                    converted["content"] = json!([convert_image(image, image_format)]);
+                    // Add image to content array instead of overwriting
+                    content_array.push(convert_image(image, image_format));
                 }
                 MessageContent::FrontendToolRequest(request) => match &request.tool_call {
                     Ok(tool_call) => {
@@ -244,6 +243,22 @@ pub fn format_messages(messages: &[Message], image_format: &ImageFormat) -> Vec<
             }
         }
 
+        // Set content once at the end - handle both array and string formats
+        if !content_array.is_empty() {
+            // If we have multiple items or any images, use array format
+            converted["content"] = json!(content_array);
+        } else if converted.get("content").is_none() {
+            // If no content was added, set empty array
+            converted["content"] = json!([]);
+        } else if let Some(existing_content) = converted.get("content") {
+            // If content was set elsewhere (e.g., from image path detection in text),
+            // ensure it's in the right format
+            if existing_content.is_string() && !content_array.is_empty() {
+                // This shouldn't happen with the new logic, but handle it just in case
+                converted["content"] = json!(content_array);
+            }
+        }
+
         if converted.get("content").is_some() || converted.get("tool_calls").is_some() {
             output.insert(0, converted);
         }
-- 
2.39.5 (Apple Git-154)


Metadata

Metadata

Assignees

Labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions