fix(images): render agent-generated images in chat (#2809)

* fix(images): render agent-generated images in chat

When a chat model calls generate_image mid-conversation (agentic flow), the image does
not display — it survives only as a URL the model echoes in prose. generate_image runs
as a text-only MCP server, so result['image_url'] is never populated and the existing
buildImageBubble render path never fires. Promote the image URL out of the tool's stdout
in tool_execution so the agent loop's existing forwarding renders it via buildImageBubble
— deterministically, no dependence on the model echoing the URL. Backend-only; reuses
dev's image bubble, forwarding, and the tool's existing parseable output.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

* feat(images): fully-qualified, valid generated-image links

The chat model often mangled the generated-image URL it echoed in prose (relative path,
or copying the 'image_url:' label into the link href). Build a fully-qualified link by
prefixing the existing app_public_url setting (empty default keeps relative paths), and
present it as a clean 'Direct link:' the model can echo verbatim (the frontend auto-links
bare https URLs). One file; independent of how the image is rendered.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

* test(images): cover _promote_image_fields; make exit-code guard self-contained

Adds the unit tests requested in review on #2809: absolute URL, relative URL,
no URL (result unchanged), and non-zero exit_code (not promoted). Moves the
dict/exit_code==0 guard from the call site into _promote_image_fields so the
function is self-contained and the failure case is unit-testable; call-site
behavior is unchanged.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
nsgds
2026-06-05 19:04:33 +08:00
committed by GitHub
parent 201e207b56
commit 0f8d12363a
3 changed files with 101 additions and 2 deletions
+12 -2
View File
@@ -115,6 +115,10 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
img = images[0] img = images[0]
image_url = None image_url = None
# Prefix the instance's public base URL (existing app_public_url setting) so the
# link is fully-qualified and clickable when the model echoes it. Empty = relative
# same-origin path (unchanged default).
_pub_base = (get_setting("app_public_url", "") or "").rstrip("/")
if img.get("b64_json"): if img.get("b64_json"):
img_dir = Path("data/generated_images") img_dir = Path("data/generated_images")
@@ -122,7 +126,7 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
filename = f"{uuid.uuid4().hex[:12]}.png" filename = f"{uuid.uuid4().hex[:12]}.png"
img_path = img_dir / filename img_path = img_dir / filename
img_path.write_bytes(base64.b64decode(img["b64_json"])) img_path.write_bytes(base64.b64decode(img["b64_json"]))
image_url = f"/api/generated-image/{filename}" image_url = f"{_pub_base}/api/generated-image/{filename}"
# Save to gallery # Save to gallery
try: try:
@@ -146,7 +150,13 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
else: else:
return [TextContent(type="text", text="Error: Unexpected image API response format")] return [TextContent(type="text", text="Error: Unexpected image API response format")]
result = f"Generated image for: {prompt[:100]}\nimage_url: {image_url}\nmodel: {model_id}\nsize: {size}" # "Direct link:" rather than an "image_url:" label — small models copied the
# label token ("image_url") into the link href, producing a broken link.
result = (
f"Generated image for: {prompt[:100]}\n"
f"Direct link: {image_url}\n"
f"model: {model_id}\nsize: {size}"
)
return [TextContent(type="text", text=result)] return [TextContent(type="text", text=result)]
except httpx.TimeoutException: except httpx.TimeoutException:
+32
View File
@@ -13,6 +13,7 @@ import json
import logging import logging
import os import os
import pathlib import pathlib
import re
import sys import sys
import time import time
from typing import Any, Awaitable, Callable, Dict, Optional, Tuple from typing import Any, Awaitable, Callable, Dict, Optional, Tuple
@@ -594,9 +595,40 @@ async def _call_mcp_tool(
if fallback: if fallback:
return fallback return fallback
# generate_image runs as a text-only MCP tool, so the saved image URL never
# reaches the agent loop's structured forwarding (which renders the image via
# buildImageBubble on result["image_url"]). Lift it out of the tool's stdout so
# the image renders deterministically — no dependence on the model echoing the
# URL into its prose (which it mangles/hallucinates).
if tool == "generate_image":
_promote_image_fields(result)
return result return result
def _promote_image_fields(result: Dict) -> None:
"""Lift the image URL (+ prompt/model/size) from a successful generate_image MCP
text result into structured fields the agent loop already forwards to
buildImageBubble. Only acts on a dict result with exit_code 0; matches the
generated-image URL by pattern (absolute or relative) so it's robust to the
result's wording."""
if not isinstance(result, dict) or result.get("exit_code") != 0:
return
out = result.get("stdout") or ""
m = re.search(r'(?:https?://[^\s)\]]+)?/api/generated-image/[A-Za-z0-9._-]+', out)
if not m:
return
result["image_url"] = m.group(0).strip()
for field, pat in (
("image_prompt", r'^Generated image for:\s*(.+)$'),
("image_model", r'^model:\s*(.+)$'),
("image_size", r'^size:\s*(.+)$'),
):
fm = re.search(pat, out, re.M)
if fm:
result[field] = fm.group(1).strip()
_BG_MARKERS = {"#!bg", "#bg", "# bg", "#background", "# background", "@background", "# @background"} _BG_MARKERS = {"#!bg", "#bg", "# bg", "#background", "# background", "@background", "# @background"}
+57
View File
@@ -0,0 +1,57 @@
"""Unit tests for `_promote_image_fields` (PR #2809).
`generate_image` is a text-only MCP tool, so the saved image URL never reaches
the agent loop's structured forwarding (which renders the image via
`buildImageBubble` on `result["image_url"]`). `_promote_image_fields` lifts the
URL — plus prompt/model/size — out of the tool's stdout into structured fields so
the image renders deterministically, without relying on the model echoing the URL
into prose. These cases cover the absolute-URL, relative-URL, no-URL, and
non-success-exit paths.
"""
from src.tool_execution import _promote_image_fields
def _result(stdout, exit_code=0):
return {"exit_code": exit_code, "stdout": stdout}
def test_absolute_url_promoted_with_fields():
"""An absolute https URL in stdout is lifted into image_url, along with the
prompt/model/size lines."""
r = _result(
"Generated image for: a red fox in snow\n"
"Direct link: https://odysseus.example.com/api/generated-image/abc123.png\n"
"model: qwen-image\n"
"size: 1024x1024"
)
_promote_image_fields(r)
assert r["image_url"] == "https://odysseus.example.com/api/generated-image/abc123.png"
assert r["image_prompt"] == "a red fox in snow"
assert r["image_model"] == "qwen-image"
assert r["image_size"] == "1024x1024"
def test_relative_url_promoted():
"""A relative /api/generated-image/... path (no host) is still matched."""
r = _result(
"Generated image for: a cat\n"
"Direct link: /api/generated-image/def456.png"
)
_promote_image_fields(r)
assert r["image_url"] == "/api/generated-image/def456.png"
assert r["image_prompt"] == "a cat"
def test_no_url_leaves_result_unchanged():
"""No generated-image URL anywhere -> no image_url key is added."""
r = _result("Generated image for: a dog\n(no link produced)")
_promote_image_fields(r)
assert "image_url" not in r
assert "image_prompt" not in r
def test_nonzero_exit_not_promoted():
"""A non-success result is never promoted, even if stdout contains a URL."""
r = _result("https://host/api/generated-image/zzz.png", exit_code=1)
_promote_image_fields(r)
assert "image_url" not in r