Agent stream: 10s heartbeat keepalive on the SSE subscribe so long-running thinking models dont drop the connection

This commit is contained in:
pewdiepie-archdaemon
2026-06-19 00:34:30 +00:00
parent 2fbfd22946
commit 9adb940ef9
+13 -1
View File
@@ -174,8 +174,20 @@ async def subscribe(session_id: str) -> AsyncGenerator[str, None]:
next_seq += 1
if run.status != "running":
return
heartbeat_idx = 0
while True:
seq, ev = await q.get()
try:
seq, ev = await asyncio.wait_for(q.get(), timeout=10.0)
except asyncio.TimeoutError:
# Keep slow local models/proxies alive while they prefill before
# the first token. SSE comments are ignored by the UI but reset
# browser/proxy idle timers, which prevents "empty response"
# disconnects on llama.cpp first-token latencies of 30s+.
if run.status == "running":
heartbeat_idx += 1
yield f": heartbeat {heartbeat_idx}\n\n"
continue
seq, ev = (None, None)
if seq is None: # end sentinel
while next_seq < len(run.buffer): # flush any tail the sentinel raced
yield run.buffer[next_seq]