Skip to content

Commit 6345af9

Browse files
authored
fix(ai): Truncate list-based message content in AI monitoring (#5631)
1 parent 01346a9 commit 6345af9

File tree

2 files changed

+116
-2
lines changed

2 files changed

+116
-2
lines changed

sentry_sdk/ai/utils.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -550,10 +550,25 @@ def _truncate_single_message_content_if_present(
550550
return message
551551
content = message["content"]
552552

553-
if not isinstance(content, str) or len(content) <= max_chars:
553+
if isinstance(content, str):
554+
if len(content) <= max_chars:
555+
return message
556+
message["content"] = content[:max_chars] + "..."
557+
return message
558+
559+
if isinstance(content, list):
560+
remaining = max_chars
561+
for item in content:
562+
if isinstance(item, dict) and "text" in item:
563+
text = item["text"]
564+
if isinstance(text, str):
565+
if len(text) > remaining:
566+
item["text"] = text[:remaining] + "..."
567+
remaining = 0
568+
else:
569+
remaining -= len(text)
554570
return message
555571

556-
message["content"] = content[:max_chars] + "..."
557572
return message
558573

559574

tests/test_ai_monitoring.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,105 @@ def test_single_message_truncation(self):
312312
assert user_msgs[0]["content"].endswith("...")
313313
assert len(user_msgs[0]["content"]) < len(large_content)
314314

315+
def test_single_message_truncation_list_content_exceeds_limit(self):
316+
"""Test that list-based content (e.g. pydantic-ai multimodal format) is truncated."""
317+
large_text = "A" * 200_000
318+
319+
messages = [
320+
{
321+
"role": "user",
322+
"content": [
323+
{"type": "text", "text": large_text},
324+
],
325+
},
326+
]
327+
328+
result, _ = truncate_messages_by_size(messages)
329+
330+
text_part = result[0]["content"][0]
331+
assert text_part["text"].endswith("...")
332+
assert len(text_part["text"]) == MAX_SINGLE_MESSAGE_CONTENT_CHARS + 3
333+
334+
def test_single_message_truncation_list_content_under_limit(self):
335+
"""Test that small text parts are preserved when non-text parts push size over byte limit."""
336+
short_text = "Hello world"
337+
large_data_url = "data:image/png;base64," + "A" * 200_000
338+
339+
messages = [
340+
{
341+
"role": "user",
342+
"content": [
343+
{"type": "text", "text": short_text},
344+
{"type": "image_url", "image_url": {"url": large_data_url}},
345+
],
346+
},
347+
]
348+
349+
result, _ = truncate_messages_by_size(messages)
350+
351+
text_part = result[0]["content"][0]
352+
assert text_part["text"] == short_text
353+
354+
def test_single_message_truncation_list_content_mixed_parts(self):
355+
"""Test truncation with mixed content types (text + non-text parts)."""
356+
max_chars = 50
357+
large_data_url = "data:image/png;base64," + "X" * 200_000
358+
359+
messages = [
360+
{
361+
"role": "user",
362+
"content": [
363+
{"type": "text", "text": "A" * 30},
364+
{"type": "image_url", "image_url": {"url": large_data_url}},
365+
{"type": "text", "text": "B" * 30},
366+
],
367+
},
368+
]
369+
370+
result, _ = truncate_messages_by_size(
371+
messages, max_single_message_chars=max_chars
372+
)
373+
374+
parts = result[0]["content"]
375+
# First text part uses 30 chars of the 50 budget
376+
assert parts[0]["text"] == "A" * 30
377+
# Image part is unchanged
378+
assert parts[1]["type"] == "image_url"
379+
# Second text part is truncated to remaining 20 chars
380+
assert parts[2]["text"] == "B" * 20 + "..."
381+
382+
def test_single_message_truncation_list_content_multiple_text_parts(self):
383+
"""Test that budget is distributed across multiple text parts."""
384+
max_chars = 10
385+
# Two large text parts that together exceed 128KB byte limit
386+
messages = [
387+
{
388+
"role": "user",
389+
"content": [
390+
{"type": "text", "text": "A" * 100_000},
391+
{"type": "text", "text": "B" * 100_000},
392+
],
393+
},
394+
]
395+
396+
result, _ = truncate_messages_by_size(
397+
messages, max_single_message_chars=max_chars
398+
)
399+
400+
parts = result[0]["content"]
401+
# First part is truncated to the full budget
402+
assert parts[0]["text"] == "A" * 10 + "..."
403+
# Second part gets truncated to 0 chars + ellipsis
404+
assert parts[1]["text"] == "..."
405+
406+
@pytest.mark.parametrize("content", [None, 42, 3.14, True])
407+
def test_single_message_truncation_non_str_non_list_content(self, content):
408+
messages = [{"role": "user", "content": content}]
409+
410+
result, _ = truncate_messages_by_size(messages)
411+
412+
assert result[0]["content"] is content
413+
315414

316415
class TestTruncateAndAnnotateMessages:
317416
def test_only_keeps_last_message(self, sample_messages):

0 commit comments

Comments
 (0)