fix(llm): route gpt-oss harmony commentary channel without leaking markers/tool-args (#4523)

The harmony stream router only recognized the analysis and final channels, so gpt-oss's standard `commentary` channel (tool-call preambles / function-arg bodies) was unhandled: the literal `<|channel|>commentary` marker, the `to=functions.*` recipient, and the commentary body all leaked into the visible answer. Add commentary to the marker regex + the suffix-hold table, and route its body to thinking (only `final` is user-facing). Adds a regression test (split-chunk + recipient + body), verified to fail without the fix.
2026-06-19 11:15:24 -04:00 · 2026-06-18 19:12:25 +00:00
parent 790ef81b06
commit 0bfc7750a2
2 changed files with 36 additions and 2 deletions
@@ -87,7 +87,7 @@ _host_health_lock = threading.Lock()
 _model_activity: Dict[str, float] = {}

 _HARMONY_MARKER_RE = re.compile(
-    r"<\|channel\|>(analysis|final)"
+    r"<\|channel\|>(analysis|commentary|final)"
    r"|<\|start\|>(?:assistant|system|user|tool)?"
    r"|<\|message\|>"
    r"|<\|end\|>"
@@ -96,6 +96,7 @@ _HARMONY_MARKER_RE = re.compile(
 )
 _HARMONY_MARKERS = (
    "<|channel|>analysis",
+    "<|channel|>commentary",
    "<|channel|>final",
    "<|start|>assistant",
    "<|start|>system",
@@ -145,7 +146,10 @@ class _HarmonyStreamRouter:
            out.append((text, False))
            return
        if self._in_message:
-            out.append((text, self._channel == "analysis"))
+            # analysis + commentary (tool-call preambles / function-arg bodies)
+            # are internal, not user-facing — route them to thinking so they
+            # don't leak into the visible answer; only `final` is visible.
+            out.append((text, self._channel in ("analysis", "commentary")))

    def _handle_marker(self, match: re.Match[str]) -> None:
        marker = match.group(0)
@@ -206,3 +206,33 @@ def test_harmony_analysis_channel_routes_to_thinking(monkeypatch):
    assert answer == "Here are the files."
    assert "<|channel|>" not in thinking + answer
    assert "<|message|>" not in thinking + answer
+
+
+def test_harmony_commentary_channel_no_marker_or_toolarg_leak(monkeypatch):
+    # gpt-oss commentary channel (tool-call preambles / function-arg bodies) is
+    # internal — it must not leak the channel marker, the `to=functions.*`
+    # recipient, or its body into the visible answer. The `<|channel|>comm` /
+    # `entary` split also exercises the suffix-hold for the new marker.
+    deltas = _run_stream(
+        "gpt-oss:20b",
+        [
+            'data: {"choices":[{"delta":{"content":"<|channel|>comm"}}]}',
+            'data: {"choices":[{"delta":{"content":"entary to=functions.web_search<|message|>Let me search the web."}}]}',
+            'data: {"choices":[{"delta":{"content":"<|end|><|channel|>final<|message|>Here are the "}}]}',
+            'data: {"choices":[{"delta":{"content":"results.<|end|>"}}]}',
+            "data: [DONE]",
+        ],
+        monkeypatch,
+    )
+    thinking = "".join(d["delta"] for d in deltas if d.get("thinking"))
+    answer = "".join(d["delta"] for d in deltas if not d.get("thinking"))
+
+    # final channel is the only user-facing text
+    assert answer == "Here are the results."
+    # commentary body routed to thinking, not the visible answer
+    assert thinking == "Let me search the web."
+    # no harmony markers, channel name, or tool recipient leak anywhere
+    assert "<|channel|>" not in thinking + answer
+    assert "<|message|>" not in thinking + answer
+    assert "commentary" not in answer
+    assert "to=functions.web_search" not in thinking + answer