Agent stream: 10s heartbeat keepalive on the SSE subscribe so long-running thinking models dont drop the connection

2026-06-23 05:05:24 -04:00 · 2026-06-19 00:34:30 +00:00
parent 2fbfd22946
commit 9adb940ef9
1 changed files with 13 additions and 1 deletions
@@ -174,8 +174,20 @@ async def subscribe(session_id: str) -> AsyncGenerator[str, None]:
            next_seq += 1
        if run.status != "running":
            return
        heartbeat_idx = 0
        while True:
-            seq, ev = await q.get()
+            try:
                seq, ev = await asyncio.wait_for(q.get(), timeout=10.0)
            except asyncio.TimeoutError:
                # Keep slow local models/proxies alive while they prefill before
                # the first token. SSE comments are ignored by the UI but reset
                # browser/proxy idle timers, which prevents "empty response"
                # disconnects on llama.cpp first-token latencies of 30s+.
                if run.status == "running":
                    heartbeat_idx += 1
                    yield f": heartbeat {heartbeat_idx}\n\n"
                    continue
                seq, ev = (None, None)
            if seq is None:            # end sentinel
                while next_seq < len(run.buffer):   # flush any tail the sentinel raced
                    yield run.buffer[next_seq]