From edd5ea36adad0f283e93c3bb2e84920094de379c Mon Sep 17 00:00:00 2001 From: Afonso Coutinho Date: Sat, 27 Jun 2026 14:31:51 +0100 Subject: [PATCH] Fix _parse_msg_content corrupting JSON-array-like text messages on reload (#2060) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _parse_msg_content deserializes stored multimodal content (image/audio blocks) back into a list. It treated ANY string starting with '[{' and containing the substring "type" as serialized content, requiring only that each element be a dict — never that "type" be a real content-block kind. So a plain text message whose content happens to be a JSON array of typed objects (e.g. a user pasting an API schema sample like [{"type": "object", ...}]) was silently parsed from str into a list on the next hydration, destroying the original string. This runs on every session load from the DB (_db_to_session -> get_session). Restrict the round-trip to non-empty lists whose every element is a dict whose "type" is a recognized block kind (text/image/image_url/audio/...); real multimodal content (verified: document_processor emits exactly these) still round-trips, JSON-looking text is left untouched. --- core/session_manager.py | 13 ++- .../test_parse_msg_content_jsonlike_string.py | 80 +++++++++++++++++++ 2 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 tests/test_parse_msg_content_jsonlike_string.py diff --git a/core/session_manager.py b/core/session_manager.py index 914205a7d..491fbc078 100644 --- a/core/session_manager.py +++ b/core/session_manager.py @@ -40,7 +40,18 @@ def _parse_msg_content(raw): if isinstance(raw, str) and raw.startswith('[{') and '"type"' in raw: try: parsed = json.loads(raw) - if isinstance(parsed, list) and all(isinstance(p, dict) for p in parsed): + # Only treat as serialized multimodal content when EVERY element is + # a dict whose "type" is a recognized content-block kind. Otherwise a + # plain text message that merely *looks* like a JSON array of objects + # (e.g. a user pasting an API schema/sample with a "type" field) was + # silently parsed back into a list, destroying the original string. + _BLOCK_TYPES = { + "text", "image", "image_url", "audio", "input_audio", + "input_image", "document", "file", + } + if (isinstance(parsed, list) and parsed + and all(isinstance(p, dict) and p.get("type") in _BLOCK_TYPES + for p in parsed)): return parsed except (json.JSONDecodeError, ValueError): pass diff --git a/tests/test_parse_msg_content_jsonlike_string.py b/tests/test_parse_msg_content_jsonlike_string.py new file mode 100644 index 000000000..87d44fe2b --- /dev/null +++ b/tests/test_parse_msg_content_jsonlike_string.py @@ -0,0 +1,80 @@ +"""A plain text message that merely *looks* like a JSON array of objects must +NOT be silently re-parsed into a list on reload. + +_parse_msg_content de-serializes multimodal (image/audio) content back into a +list of content blocks. The old heuristic accepted ANY string that started +with "[{" and contained the substring '"type"'. A user who pasted an API +schema / sample such as `[{"type": "object", "name": "foo"}]` therefore had +their text message permanently corrupted into a Python list on the next +session hydration. The fix restricts the round-trip to lists whose elements +are all recognized content-block types (text/image_url/audio/...). +""" +import tempfile +import uuid + +import pytest +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +from sqlalchemy.pool import NullPool + +import core.database as cdb +from core.database import Session as DbSession +from core.models import ChatMessage + +_TMPDB = tempfile.NamedTemporaryFile(suffix=".db", delete=False) +_ENGINE = create_engine( + f"sqlite:///{_TMPDB.name}", + connect_args={"check_same_thread": False}, + poolclass=NullPool, +) +cdb.Base.metadata.create_all(_ENGINE) +_TS = sessionmaker(bind=_ENGINE, autoflush=False, autocommit=False) + + +@pytest.fixture +def manager(monkeypatch): + import core.session_manager as sm + monkeypatch.setattr(sm, "SessionLocal", _TS) + mgr = sm.SessionManager.__new__(sm.SessionManager) + mgr.sessions = {} + return mgr + + +def _make_session(sid, owner="alice"): + db = _TS() + try: + db.add(DbSession(id=sid, owner=owner, name="chat", + endpoint_url="http://x", model="gpt-4o", + archived=False, message_count=1)) + db.commit() + finally: + db.close() + + +def test_jsonlike_user_string_not_corrupted(manager): + sid = "sess-" + uuid.uuid4().hex[:8] + _make_session(sid) + text = '[{"type": "object", "name": "foo"}]' + msgs = [ChatMessage(role="user", content=text)] + assert manager.replace_messages(sid, msgs) is True + + manager.sessions.clear() + reloaded = manager.get_session(sid) + # Must come back as the ORIGINAL STRING, not silently parsed into a list. + assert isinstance(reloaded.history[0].content, str) + assert reloaded.history[0].content == text + + +def test_real_multimodal_content_still_round_trips(manager): + sid = "sess-" + uuid.uuid4().hex[:8] + _make_session(sid) + multimodal = [ + {"type": "text", "text": "what is this?"}, + {"type": "image_url", "image_url": {"url": "data:image/png;base64,AAAA"}}, + ] + msgs = [ChatMessage(role="user", content=multimodal)] + assert manager.replace_messages(sid, msgs) is True + + manager.sessions.clear() + reloaded = manager.get_session(sid) + assert reloaded.history[0].content == multimodal