From 9adb940ef975c19d52c9cd6c6632552cda7ffccf Mon Sep 17 00:00:00 2001 From: pewdiepie-archdaemon Date: Fri, 19 Jun 2026 00:34:30 +0000 Subject: [PATCH] Agent stream: 10s heartbeat keepalive on the SSE subscribe so long-running thinking models dont drop the connection --- src/agent_runs.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/agent_runs.py b/src/agent_runs.py index 8adbab9c9..3431347c7 100644 --- a/src/agent_runs.py +++ b/src/agent_runs.py @@ -174,8 +174,20 @@ async def subscribe(session_id: str) -> AsyncGenerator[str, None]: next_seq += 1 if run.status != "running": return + heartbeat_idx = 0 while True: - seq, ev = await q.get() + try: + seq, ev = await asyncio.wait_for(q.get(), timeout=10.0) + except asyncio.TimeoutError: + # Keep slow local models/proxies alive while they prefill before + # the first token. SSE comments are ignored by the UI but reset + # browser/proxy idle timers, which prevents "empty response" + # disconnects on llama.cpp first-token latencies of 30s+. + if run.status == "running": + heartbeat_idx += 1 + yield f": heartbeat {heartbeat_idx}\n\n" + continue + seq, ev = (None, None) if seq is None: # end sentinel while next_seq < len(run.buffer): # flush any tail the sentinel raced yield run.buffer[next_seq]