mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-15 17:25:26 -04:00
fix(images): render agent-generated images in chat (#2809)
* fix(images): render agent-generated images in chat When a chat model calls generate_image mid-conversation (agentic flow), the image does not display — it survives only as a URL the model echoes in prose. generate_image runs as a text-only MCP server, so result['image_url'] is never populated and the existing buildImageBubble render path never fires. Promote the image URL out of the tool's stdout in tool_execution so the agent loop's existing forwarding renders it via buildImageBubble — deterministically, no dependence on the model echoing the URL. Backend-only; reuses dev's image bubble, forwarding, and the tool's existing parseable output. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * feat(images): fully-qualified, valid generated-image links The chat model often mangled the generated-image URL it echoed in prose (relative path, or copying the 'image_url:' label into the link href). Build a fully-qualified link by prefixing the existing app_public_url setting (empty default keeps relative paths), and present it as a clean 'Direct link:' the model can echo verbatim (the frontend auto-links bare https URLs). One file; independent of how the image is rendered. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * test(images): cover _promote_image_fields; make exit-code guard self-contained Adds the unit tests requested in review on #2809: absolute URL, relative URL, no URL (result unchanged), and non-zero exit_code (not promoted). Moves the dict/exit_code==0 guard from the call site into _promote_image_fields so the function is self-contained and the failure case is unit-testable; call-site behavior is unchanged. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -115,6 +115,10 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
|
||||
|
||||
img = images[0]
|
||||
image_url = None
|
||||
# Prefix the instance's public base URL (existing app_public_url setting) so the
|
||||
# link is fully-qualified and clickable when the model echoes it. Empty = relative
|
||||
# same-origin path (unchanged default).
|
||||
_pub_base = (get_setting("app_public_url", "") or "").rstrip("/")
|
||||
|
||||
if img.get("b64_json"):
|
||||
img_dir = Path("data/generated_images")
|
||||
@@ -122,7 +126,7 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
|
||||
filename = f"{uuid.uuid4().hex[:12]}.png"
|
||||
img_path = img_dir / filename
|
||||
img_path.write_bytes(base64.b64decode(img["b64_json"]))
|
||||
image_url = f"/api/generated-image/{filename}"
|
||||
image_url = f"{_pub_base}/api/generated-image/{filename}"
|
||||
|
||||
# Save to gallery
|
||||
try:
|
||||
@@ -146,7 +150,13 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
|
||||
else:
|
||||
return [TextContent(type="text", text="Error: Unexpected image API response format")]
|
||||
|
||||
result = f"Generated image for: {prompt[:100]}\nimage_url: {image_url}\nmodel: {model_id}\nsize: {size}"
|
||||
# "Direct link:" rather than an "image_url:" label — small models copied the
|
||||
# label token ("image_url") into the link href, producing a broken link.
|
||||
result = (
|
||||
f"Generated image for: {prompt[:100]}\n"
|
||||
f"Direct link: {image_url}\n"
|
||||
f"model: {model_id}\nsize: {size}"
|
||||
)
|
||||
return [TextContent(type="text", text=result)]
|
||||
|
||||
except httpx.TimeoutException:
|
||||
|
||||
@@ -13,6 +13,7 @@ import json
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
from typing import Any, Awaitable, Callable, Dict, Optional, Tuple
|
||||
@@ -594,9 +595,40 @@ async def _call_mcp_tool(
|
||||
if fallback:
|
||||
return fallback
|
||||
|
||||
# generate_image runs as a text-only MCP tool, so the saved image URL never
|
||||
# reaches the agent loop's structured forwarding (which renders the image via
|
||||
# buildImageBubble on result["image_url"]). Lift it out of the tool's stdout so
|
||||
# the image renders deterministically — no dependence on the model echoing the
|
||||
# URL into its prose (which it mangles/hallucinates).
|
||||
if tool == "generate_image":
|
||||
_promote_image_fields(result)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _promote_image_fields(result: Dict) -> None:
|
||||
"""Lift the image URL (+ prompt/model/size) from a successful generate_image MCP
|
||||
text result into structured fields the agent loop already forwards to
|
||||
buildImageBubble. Only acts on a dict result with exit_code 0; matches the
|
||||
generated-image URL by pattern (absolute or relative) so it's robust to the
|
||||
result's wording."""
|
||||
if not isinstance(result, dict) or result.get("exit_code") != 0:
|
||||
return
|
||||
out = result.get("stdout") or ""
|
||||
m = re.search(r'(?:https?://[^\s)\]]+)?/api/generated-image/[A-Za-z0-9._-]+', out)
|
||||
if not m:
|
||||
return
|
||||
result["image_url"] = m.group(0).strip()
|
||||
for field, pat in (
|
||||
("image_prompt", r'^Generated image for:\s*(.+)$'),
|
||||
("image_model", r'^model:\s*(.+)$'),
|
||||
("image_size", r'^size:\s*(.+)$'),
|
||||
):
|
||||
fm = re.search(pat, out, re.M)
|
||||
if fm:
|
||||
result[field] = fm.group(1).strip()
|
||||
|
||||
|
||||
_BG_MARKERS = {"#!bg", "#bg", "# bg", "#background", "# background", "@background", "# @background"}
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
"""Unit tests for `_promote_image_fields` (PR #2809).
|
||||
|
||||
`generate_image` is a text-only MCP tool, so the saved image URL never reaches
|
||||
the agent loop's structured forwarding (which renders the image via
|
||||
`buildImageBubble` on `result["image_url"]`). `_promote_image_fields` lifts the
|
||||
URL — plus prompt/model/size — out of the tool's stdout into structured fields so
|
||||
the image renders deterministically, without relying on the model echoing the URL
|
||||
into prose. These cases cover the absolute-URL, relative-URL, no-URL, and
|
||||
non-success-exit paths.
|
||||
"""
|
||||
from src.tool_execution import _promote_image_fields
|
||||
|
||||
|
||||
def _result(stdout, exit_code=0):
|
||||
return {"exit_code": exit_code, "stdout": stdout}
|
||||
|
||||
|
||||
def test_absolute_url_promoted_with_fields():
|
||||
"""An absolute https URL in stdout is lifted into image_url, along with the
|
||||
prompt/model/size lines."""
|
||||
r = _result(
|
||||
"Generated image for: a red fox in snow\n"
|
||||
"Direct link: https://odysseus.example.com/api/generated-image/abc123.png\n"
|
||||
"model: qwen-image\n"
|
||||
"size: 1024x1024"
|
||||
)
|
||||
_promote_image_fields(r)
|
||||
assert r["image_url"] == "https://odysseus.example.com/api/generated-image/abc123.png"
|
||||
assert r["image_prompt"] == "a red fox in snow"
|
||||
assert r["image_model"] == "qwen-image"
|
||||
assert r["image_size"] == "1024x1024"
|
||||
|
||||
|
||||
def test_relative_url_promoted():
|
||||
"""A relative /api/generated-image/... path (no host) is still matched."""
|
||||
r = _result(
|
||||
"Generated image for: a cat\n"
|
||||
"Direct link: /api/generated-image/def456.png"
|
||||
)
|
||||
_promote_image_fields(r)
|
||||
assert r["image_url"] == "/api/generated-image/def456.png"
|
||||
assert r["image_prompt"] == "a cat"
|
||||
|
||||
|
||||
def test_no_url_leaves_result_unchanged():
|
||||
"""No generated-image URL anywhere -> no image_url key is added."""
|
||||
r = _result("Generated image for: a dog\n(no link produced)")
|
||||
_promote_image_fields(r)
|
||||
assert "image_url" not in r
|
||||
assert "image_prompt" not in r
|
||||
|
||||
|
||||
def test_nonzero_exit_not_promoted():
|
||||
"""A non-success result is never promoted, even if stdout contains a URL."""
|
||||
r = _result("https://host/api/generated-image/zzz.png", exit_code=1)
|
||||
_promote_image_fields(r)
|
||||
assert "image_url" not in r
|
||||
Reference in New Issue
Block a user