fix(research): stop rescanning the research dir on every status poll (#3637)

get_status() called get_avg_duration() unconditionally, and that helper globs and JSON-parses every file under the research data dir. The SSE status stream polls get_status() roughly once a second, so with a few saved reports each poll re-read and re-parsed all of them, including for sessions that are not active (the disk branch never even used the value). Compute avg_duration only for active sessions and memoize it on the task entry, so a long stream computes it once instead of on every poll. Behaviour is unchanged: active streams still report avg_duration. Adds tests/test_research_status_avg_duration.py: an inactive session does no avg scan, and an active session computes it once across many polls.
2026-06-21 20:25:23 -04:00 · 2026-06-10 18:40:44 +03:00
parent 800d391234
commit 4e210d3337
2 changed files with 49 additions and 1 deletions
@@ -390,7 +390,6 @@ class ResearchHandler:

    def get_status(self, session_id: str) -> Optional[dict]:
        """Get current research status for a session."""
-        avg = self.get_avg_duration()
        if session_id in self._active_tasks:
            entry = self._active_tasks[session_id]
            result = {
@@ -399,6 +398,14 @@ class ResearchHandler:
                "query": entry["query"],
                "started_at": entry["started_at"],
            }
+            # avg_duration is a historical figure over completed reports on
+            # disk; get_avg_duration() globs and JSON-parses the whole research
+            # dir, so compute it at most once per active stream (memoized on the
+            # entry) instead of on every ~1s SSE poll. The disk branch below
+            # never used it, so it no longer pays that cost at all.
+            if "_avg_duration" not in entry:
+                entry["_avg_duration"] = self.get_avg_duration()
+            avg = entry["_avg_duration"]
            if avg is not None:
                result["avg_duration"] = round(avg, 1)
            return result