fix(llm): route gpt-oss harmony commentary channel without leaking markers/tool-args (#4523)

The harmony stream router only recognized the analysis and final channels, so
gpt-oss's standard `commentary` channel (tool-call preambles / function-arg
bodies) was unhandled: the literal `<|channel|>commentary` marker, the
`to=functions.*` recipient, and the commentary body all leaked into the
visible answer. Add commentary to the marker regex + the suffix-hold table, and
route its body to thinking (only `final` is user-facing). Adds a regression
test (split-chunk + recipient + body), verified to fail without the fix.
This commit is contained in:
nubs
2026-06-18 19:12:25 +00:00
committed by GitHub
parent 790ef81b06
commit 0bfc7750a2
2 changed files with 36 additions and 2 deletions
+6 -2
View File
@@ -87,7 +87,7 @@ _host_health_lock = threading.Lock()
_model_activity: Dict[str, float] = {}
_HARMONY_MARKER_RE = re.compile(
r"<\|channel\|>(analysis|final)"
r"<\|channel\|>(analysis|commentary|final)"
r"|<\|start\|>(?:assistant|system|user|tool)?"
r"|<\|message\|>"
r"|<\|end\|>"
@@ -96,6 +96,7 @@ _HARMONY_MARKER_RE = re.compile(
)
_HARMONY_MARKERS = (
"<|channel|>analysis",
"<|channel|>commentary",
"<|channel|>final",
"<|start|>assistant",
"<|start|>system",
@@ -145,7 +146,10 @@ class _HarmonyStreamRouter:
out.append((text, False))
return
if self._in_message:
out.append((text, self._channel == "analysis"))
# analysis + commentary (tool-call preambles / function-arg bodies)
# are internal, not user-facing — route them to thinking so they
# don't leak into the visible answer; only `final` is visible.
out.append((text, self._channel in ("analysis", "commentary")))
def _handle_marker(self, match: re.Match[str]) -> None:
marker = match.group(0)
+30
View File
@@ -206,3 +206,33 @@ def test_harmony_analysis_channel_routes_to_thinking(monkeypatch):
assert answer == "Here are the files."
assert "<|channel|>" not in thinking + answer
assert "<|message|>" not in thinking + answer
def test_harmony_commentary_channel_no_marker_or_toolarg_leak(monkeypatch):
# gpt-oss commentary channel (tool-call preambles / function-arg bodies) is
# internal — it must not leak the channel marker, the `to=functions.*`
# recipient, or its body into the visible answer. The `<|channel|>comm` /
# `entary` split also exercises the suffix-hold for the new marker.
deltas = _run_stream(
"gpt-oss:20b",
[
'data: {"choices":[{"delta":{"content":"<|channel|>comm"}}]}',
'data: {"choices":[{"delta":{"content":"entary to=functions.web_search<|message|>Let me search the web."}}]}',
'data: {"choices":[{"delta":{"content":"<|end|><|channel|>final<|message|>Here are the "}}]}',
'data: {"choices":[{"delta":{"content":"results.<|end|>"}}]}',
"data: [DONE]",
],
monkeypatch,
)
thinking = "".join(d["delta"] for d in deltas if d.get("thinking"))
answer = "".join(d["delta"] for d in deltas if not d.get("thinking"))
# final channel is the only user-facing text
assert answer == "Here are the results."
# commentary body routed to thinking, not the visible answer
assert thinking == "Let me search the web."
# no harmony markers, channel name, or tool recipient leak anywhere
assert "<|channel|>" not in thinking + answer
assert "<|message|>" not in thinking + answer
assert "commentary" not in answer
assert "to=functions.web_search" not in thinking + answer