fix: merging consecutive user messages corrupts multimodal (image) content (#1277)

* fix: preserve multimodal content blocks when merging consecutive user messages

* test: consecutive user-message merge keeps multimodal image blocks
This commit is contained in:
Afonso Coutinho
2026-06-02 17:21:57 +01:00
committed by GitHub
parent 83aa35b83e
commit 65751186bd
2 changed files with 60 additions and 6 deletions
+32 -6
View File
@@ -576,6 +576,20 @@ def _parse_anthropic_response(data: dict) -> str:
)
def _as_content_blocks(content) -> List[Dict]:
"""Coerce a message `content` into a list of content blocks.
A list (multimodal: text + image parts) passes through; a non-empty string
becomes a single text block; None/empty yields no blocks. Used when merging
consecutive user messages so multimodal content isn't str()-ed away.
"""
if isinstance(content, list):
return content
if content:
return [{"type": "text", "text": str(content)}]
return []
def _sanitize_llm_messages(messages: List[Dict]) -> List[Dict]:
"""Strip Odysseus-only metadata before sending messages to providers.
@@ -689,13 +703,25 @@ def _sanitize_llm_messages(messages: List[Dict]) -> List[Dict]:
last = merged[-1]
if last.get("role") == "user" and item.get("role") == "user":
last_copy = dict(last)
last_str = str(last_copy.get("content")) if last_copy.get("content") is not None else ""
item_str = str(item.get("content")) if item.get("content") is not None else ""
new_content = "\n\n".join(part for part in (last_str, item_str) if part)
if new_content:
last_copy["content"] = new_content
lc = last_copy.get("content")
ic = item.get("content")
if isinstance(lc, list) or isinstance(ic, list):
# Preserve multimodal content blocks (e.g. an image part) by
# concatenating the block lists. str()-ing a list turned an
# image message into its Python repr and dropped the image.
merged_blocks = _as_content_blocks(lc) + _as_content_blocks(ic)
if merged_blocks:
last_copy["content"] = merged_blocks
else:
last_copy.pop("content", None)
else:
last_copy.pop("content", None)
last_str = str(lc) if lc is not None else ""
item_str = str(ic) if ic is not None else ""
new_content = "\n\n".join(part for part in (last_str, item_str) if part)
if new_content:
last_copy["content"] = new_content
else:
last_copy.pop("content", None)
merged[-1] = last_copy
else:
merged.append(item)