mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-23 05:05:24 -04:00
Agent stream: 10s heartbeat keepalive on the SSE subscribe so long-running thinking models dont drop the connection
This commit is contained in:
+13
-1
@@ -174,8 +174,20 @@ async def subscribe(session_id: str) -> AsyncGenerator[str, None]:
|
|||||||
next_seq += 1
|
next_seq += 1
|
||||||
if run.status != "running":
|
if run.status != "running":
|
||||||
return
|
return
|
||||||
|
heartbeat_idx = 0
|
||||||
while True:
|
while True:
|
||||||
seq, ev = await q.get()
|
try:
|
||||||
|
seq, ev = await asyncio.wait_for(q.get(), timeout=10.0)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
# Keep slow local models/proxies alive while they prefill before
|
||||||
|
# the first token. SSE comments are ignored by the UI but reset
|
||||||
|
# browser/proxy idle timers, which prevents "empty response"
|
||||||
|
# disconnects on llama.cpp first-token latencies of 30s+.
|
||||||
|
if run.status == "running":
|
||||||
|
heartbeat_idx += 1
|
||||||
|
yield f": heartbeat {heartbeat_idx}\n\n"
|
||||||
|
continue
|
||||||
|
seq, ev = (None, None)
|
||||||
if seq is None: # end sentinel
|
if seq is None: # end sentinel
|
||||||
while next_seq < len(run.buffer): # flush any tail the sentinel raced
|
while next_seq < len(run.buffer): # flush any tail the sentinel raced
|
||||||
yield run.buffer[next_seq]
|
yield run.buffer[next_seq]
|
||||||
|
|||||||
Reference in New Issue
Block a user