mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-17 10:15:27 -04:00
Keep reasoning (thinking) tokens out of the saved chat reply (#856)
Streamed deltas flagged thinking:true (reasoning-model traces) were being folded into full_response and persisted as part of the assistant message, so saved replies were polluted with the model's chain-of-thought. Forward those deltas to the client (for a live thinking indicator) but exclude them from the accumulated saved reply, in both chat and research-stream paths. Mirrors the existing rewrite path's handling.
This commit is contained in:
+13
-4
@@ -708,8 +708,13 @@ def setup_chat_routes(
|
||||
try:
|
||||
data = json.loads(chunk[6:])
|
||||
if "delta" in data:
|
||||
full_response += data["delta"]
|
||||
_stream_set(session, partial=full_response)
|
||||
# Reasoning tokens arrive flagged thinking:true.
|
||||
# Forward them so the client can show a thinking
|
||||
# indicator, but don't fold them into the saved
|
||||
# reply (mirrors the rewrite path below).
|
||||
if not data.get("thinking"):
|
||||
full_response += data["delta"]
|
||||
_stream_set(session, partial=full_response)
|
||||
yield chunk
|
||||
elif data.get("type") == "usage":
|
||||
last_metrics = data.get("data", {})
|
||||
@@ -805,8 +810,12 @@ def setup_chat_routes(
|
||||
try:
|
||||
data = json.loads(chunk[6:])
|
||||
if "delta" in data:
|
||||
full_response += data["delta"]
|
||||
_stream_set(session, partial=full_response)
|
||||
# Reasoning tokens arrive flagged thinking:true.
|
||||
# Forward them for the live indicator, but keep
|
||||
# them out of the saved reply (same as chat mode).
|
||||
if not data.get("thinking"):
|
||||
full_response += data["delta"]
|
||||
_stream_set(session, partial=full_response)
|
||||
yield chunk
|
||||
elif data.get("type") == "web_sources":
|
||||
web_sources = data.get("data", [])
|
||||
|
||||
Reference in New Issue
Block a user