-
Notifications
You must be signed in to change notification settings - Fork 3.2k
ACP mode does not support multimodal (images) #5832
Copy link
Copy link
Closed
Labels
Description
I am running goose in acp mode with Haiku 4.5. When I send images in the prompt (b64 encoded), those are not handled by goose.
I am not sure how to test goose for images in every modality. But I fixed it locally for both anthropic and openai api routes. It works for me (tested only in acp mode) Patch is attached.
- OS & Arch: MacOS 15.4.1 (24E263
- Interface: ACP
- Version: Built locally off main 0f8d9a7
- Extensions enabled: Defaults
- Provider & Model: Haiku4.5
Patch:
From d56f0f0d71159a4ad86c086c9a213f953df82d20 Mon Sep 17 00:00:00 2001
From: Pradeepta Dash <[email protected]>
Date: Thu, 20 Nov 2025 19:28:55 -0800
Subject: [PATCH] Enhance message formatting for image content in Anthropic and
OpenAI providers. Anthropic now supports image content, adding it to the
message array. OpenAI's formatting logic is updated to build a content array
instead of overwriting, ensuring proper handling of mixed content types.
---
.../goose/src/providers/formats/anthropic.rs | 6 +++-
crates/goose/src/providers/formats/openai.rs | 31 ++++++++++++++-----
2 files changed, 28 insertions(+), 9 deletions(-)
diff --git a/crates/goose/src/providers/formats/anthropic.rs b/crates/goose/src/providers/formats/anthropic.rs
index e82c92235a6..78fba446492 100644
--- a/crates/goose/src/providers/formats/anthropic.rs
+++ b/crates/goose/src/providers/formats/anthropic.rs
@@ -2,6 +2,7 @@ use crate::conversation::message::{Message, MessageContent};
use crate::model::ModelConfig;
use crate::providers::base::Usage;
use crate::providers::errors::ProviderError;
+use crate::providers::utils::{convert_image, ImageFormat};
use anyhow::{anyhow, Result};
use rmcp::model::{object, CallToolRequestParam, ErrorCode, ErrorData, JsonObject, Role, Tool};
use rmcp::object as json_object;
@@ -106,7 +107,10 @@ pub fn format_messages(messages: &[Message]) -> Vec<Value> {
DATA_FIELD: redacted.data
}));
}
- MessageContent::Image(_) => continue, // Anthropic doesn't support image content yet
+ MessageContent::Image(image) => {
+ // Anthropic supports images - add to content array
+ content.push(convert_image(image, &ImageFormat::Anthropic));
+ }
MessageContent::FrontendToolRequest(tool_request) => {
if let Ok(tool_call) = &tool_request.tool_call {
content.push(json!({
diff --git a/crates/goose/src/providers/formats/openai.rs b/crates/goose/src/providers/formats/openai.rs
index b553260b568..67377fd65ce 100644
--- a/crates/goose/src/providers/formats/openai.rs
+++ b/crates/goose/src/providers/formats/openai.rs
@@ -63,6 +63,7 @@ pub fn format_messages(messages: &[Message], image_format: &ImageFormat) -> Vec<
});
let mut output = Vec::new();
+ let mut content_array = Vec::new(); // Build content array instead of overwriting
for content in &message.content {
match content {
@@ -72,16 +73,14 @@ pub fn format_messages(messages: &[Message], image_format: &ImageFormat) -> Vec<
if let Some(image_path) = detect_image_path(&text.text) {
// Try to load and convert the image
if let Ok(image) = load_image_file(image_path) {
- converted["content"] = json!([
- {"type": "text", "text": text.text},
- convert_image(&image, image_format)
- ]);
+ content_array.push(json!({"type": "text", "text": text.text}));
+ content_array.push(convert_image(&image, image_format));
} else {
// If image loading fails, just use the text
- converted["content"] = json!(text.text);
+ content_array.push(json!({"type": "text", "text": text.text}));
}
} else {
- converted["content"] = json!(text.text);
+ content_array.push(json!({"type": "text", "text": text.text}));
}
}
}
@@ -205,8 +204,8 @@ pub fn format_messages(messages: &[Message], image_format: &ImageFormat) -> Vec<
// Skip tool confirmation requests
}
MessageContent::Image(image) => {
- // Handle direct image content
- converted["content"] = json!([convert_image(image, image_format)]);
+ // Add image to content array instead of overwriting
+ content_array.push(convert_image(image, image_format));
}
MessageContent::FrontendToolRequest(request) => match &request.tool_call {
Ok(tool_call) => {
@@ -244,6 +243,22 @@ pub fn format_messages(messages: &[Message], image_format: &ImageFormat) -> Vec<
}
}
+ // Set content once at the end - handle both array and string formats
+ if !content_array.is_empty() {
+ // If we have multiple items or any images, use array format
+ converted["content"] = json!(content_array);
+ } else if converted.get("content").is_none() {
+ // If no content was added, set empty array
+ converted["content"] = json!([]);
+ } else if let Some(existing_content) = converted.get("content") {
+ // If content was set elsewhere (e.g., from image path detection in text),
+ // ensure it's in the right format
+ if existing_content.is_string() && !content_array.is_empty() {
+ // This shouldn't happen with the new logic, but handle it just in case
+ converted["content"] = json!(content_array);
+ }
+ }
+
if converted.get("content").is_some() || converted.get("tool_calls").is_some() {
output.insert(0, converted);
}
--
2.39.5 (Apple Git-154)
Reactions are currently unavailable