mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-17 10:15:27 -04:00
Keep reasoning (thinking) tokens out of the saved chat reply (#856)
Streamed deltas flagged thinking:true (reasoning-model traces) were being folded into full_response and persisted as part of the assistant message, so saved replies were polluted with the model's chain-of-thought. Forward those deltas to the client (for a live thinking indicator) but exclude them from the accumulated saved reply, in both chat and research-stream paths. Mirrors the existing rewrite path's handling.
This commit is contained in:
@@ -708,6 +708,11 @@ def setup_chat_routes(
|
|||||||
try:
|
try:
|
||||||
data = json.loads(chunk[6:])
|
data = json.loads(chunk[6:])
|
||||||
if "delta" in data:
|
if "delta" in data:
|
||||||
|
# Reasoning tokens arrive flagged thinking:true.
|
||||||
|
# Forward them so the client can show a thinking
|
||||||
|
# indicator, but don't fold them into the saved
|
||||||
|
# reply (mirrors the rewrite path below).
|
||||||
|
if not data.get("thinking"):
|
||||||
full_response += data["delta"]
|
full_response += data["delta"]
|
||||||
_stream_set(session, partial=full_response)
|
_stream_set(session, partial=full_response)
|
||||||
yield chunk
|
yield chunk
|
||||||
@@ -805,6 +810,10 @@ def setup_chat_routes(
|
|||||||
try:
|
try:
|
||||||
data = json.loads(chunk[6:])
|
data = json.loads(chunk[6:])
|
||||||
if "delta" in data:
|
if "delta" in data:
|
||||||
|
# Reasoning tokens arrive flagged thinking:true.
|
||||||
|
# Forward them for the live indicator, but keep
|
||||||
|
# them out of the saved reply (same as chat mode).
|
||||||
|
if not data.get("thinking"):
|
||||||
full_response += data["delta"]
|
full_response += data["delta"]
|
||||||
_stream_set(session, partial=full_response)
|
_stream_set(session, partial=full_response)
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|||||||
Reference in New Issue
Block a user