fix(llm): route gpt-oss harmony commentary channel without leaking markers/tool-args (#4523)

The harmony stream router only recognized the analysis and final channels, so gpt-oss's standard `commentary` channel (tool-call preambles / function-arg bodies) was unhandled: the literal `<|channel|>commentary` marker, the `to=functions.*` recipient, and the commentary body all leaked into the visible answer. Add commentary to the marker regex + the suffix-hold table, and route its body to thinking (only `final` is user-facing). Adds a regression test (split-chunk + recipient + body), verified to fail without the fix.
2026-06-23 21:25:33 -04:00 · 2026-06-18 19:12:25 +00:00
parent 790ef81b06
commit 0bfc7750a2
2 changed files with 36 additions and 2 deletions
@@ -87,7 +87,7 @@ _host_health_lock = threading.Lock()
 _model_activity: Dict[str, float] = {}

 _HARMONY_MARKER_RE = re.compile(
-    r"<\|channel\|>(analysis|final)"
+    r"<\|channel\|>(analysis|commentary|final)"
    r"|<\|start\|>(?:assistant|system|user|tool)?"
    r"|<\|message\|>"
    r"|<\|end\|>"
@@ -96,6 +96,7 @@ _HARMONY_MARKER_RE = re.compile(
 )
 _HARMONY_MARKERS = (
    "<|channel|>analysis",
+    "<|channel|>commentary",
    "<|channel|>final",
    "<|start|>assistant",
    "<|start|>system",
@@ -145,7 +146,10 @@ class _HarmonyStreamRouter:
            out.append((text, False))
            return
        if self._in_message:
-            out.append((text, self._channel == "analysis"))
+            # analysis + commentary (tool-call preambles / function-arg bodies)
+            # are internal, not user-facing — route them to thinking so they
+            # don't leak into the visible answer; only `final` is visible.
+            out.append((text, self._channel in ("analysis", "commentary")))

    def _handle_marker(self, match: re.Match[str]) -> None:
        marker = match.group(0)