fix: stop leaking DB connections when persisting session mode (#64)

chat_routes.py persisted a session's "mode" in three best-effort spots —
reading the current mode, writing the effective mode, and setting
research_pending on the stream path. Each opened a session with SessionLocal()
and called .close() as the LAST statement inside a try/except, so if anything
before close() raised (e.g. a SQLite "database is locked" under concurrent chat
streams) the except only logged and the connection was never returned to the
pool.

DATABASE_URL defaults to file-backed SQLite, whose engine uses SQLAlchemy's
default QueuePool (5 connections + 10 overflow). Repeated leaks on these hot
paths exhaust the pool; later requests then block for pool_timeout and fail
with "QueuePool limit ... reached", taking the app down until restart.

Move the logic into two best-effort helpers in core.database, next to the
existing session helpers (update_session_last_accessed, get_session_by_id):

  - get_session_mode(session_id) -> Optional[str]
  - set_session_mode(session_id, mode) -> bool

Both route through the existing get_db_session() context manager, which commits
on success, rolls back on error, and always closes in a finally, so the
connection is returned to the pool on every path. chat_routes.py now calls
these instead of hand-rolling sessions, also removing three copies of the same
try/except.

Add tests/test_session_mode_helpers.py: the helpers commit+close on success
and, on a mid-operation DB error, swallow + roll back + close (no leak). The
error-path tests fail against the old close()-inside-try pattern.
This commit is contained in:
Collin
2026-06-01 00:57:48 -04:00
committed by GitHub
parent 8218421733
commit 0a7de1fdf4
3 changed files with 94 additions and 24 deletions
+27
View File
@@ -1755,6 +1755,33 @@ def update_session_last_accessed(session_id: str):
return True return True
return False return False
def get_session_mode(session_id: str):
"""Return a session's persisted `mode`, or None if unset/unknown.
Best-effort: never raises (returns None on any DB error) so callers on hot
request paths needn't guard it. Routed through get_db_session() so the
connection is always returned to the pool."""
try:
with get_db_session() as db:
return db.query(Session.mode).filter(Session.id == session_id).scalar()
except Exception:
logger.warning("Failed to read mode for session %s", session_id)
return None
def set_session_mode(session_id: str, mode: str) -> bool:
"""Persist a session's `mode`. Best-effort: never raises, returns success.
Routed through get_db_session() so a failure mid-write (e.g. a SQLite
'database is locked' under concurrent streams) still returns the connection
to the pool instead of leaking it — repeated leaks would exhaust it."""
try:
with get_db_session() as db:
db.query(Session).filter(Session.id == session_id).update({"mode": mode})
return True
except Exception:
logger.warning("Failed to persist mode %r for session %s", mode, session_id)
return False
def get_session_by_id(session_id: str): def get_session_by_id(session_id: str):
"""Get a session by ID""" """Get a session by ID"""
with get_db_session() as db: with get_db_session() as db:
+4 -22
View File
@@ -23,7 +23,7 @@ from src.prompt_security import untrusted_context_message
from core.exceptions import SessionNotFoundError from core.exceptions import SessionNotFoundError
from src.auth_helpers import get_current_user from src.auth_helpers import get_current_user
from routes.session_routes import _verify_session_owner from routes.session_routes import _verify_session_owner
from core.database import SessionLocal from core.database import SessionLocal, get_session_mode, set_session_mode
from core.database import Session as DBSession, ChatMessage as DBChatMessage from core.database import Session as DBSession, ChatMessage as DBChatMessage
from core.database import Document as DBDocument, ModelEndpoint from core.database import Document as DBDocument, ModelEndpoint
from routes.research_routes import _resolve_research_endpoint from routes.research_routes import _resolve_research_endpoint
@@ -326,26 +326,14 @@ def setup_chat_routes(
# Check for research_pending BEFORE mode persist overwrites it # Check for research_pending BEFORE mode persist overwrites it
do_research = str(use_research).lower() == "true" do_research = str(use_research).lower() == "true"
if not do_research: if not do_research:
try: if get_session_mode(session) == 'research_pending':
_mode_db = SessionLocal()
_db_mode = _mode_db.query(DBSession.mode).filter(DBSession.id == session).scalar()
_mode_db.close()
if _db_mode == 'research_pending':
do_research = True do_research = True
logger.info(f"Session {session} in research_pending — auto-triggering research") logger.info(f"Session {session} in research_pending — auto-triggering research")
except Exception:
pass
# Persist session mode (research > agent > chat) # Persist session mode (research > agent > chat)
_effective_mode = 'research' if do_research else (chat_mode or 'chat') _effective_mode = 'research' if do_research else (chat_mode or 'chat')
if _effective_mode in ('agent', 'research', 'chat'): if _effective_mode in ('agent', 'research', 'chat'):
try: set_session_mode(session, _effective_mode)
_mdb = SessionLocal()
_mdb.query(DBSession).filter(DBSession.id == session).update({"mode": _effective_mode})
_mdb.commit()
_mdb.close()
except Exception as _me:
logger.warning("Failed to persist session mode: %s", _me)
att_ids = [] att_ids = []
if body and isinstance(body.get("attachments"), list): if body and isinstance(body.get("attachments"), list):
@@ -547,13 +535,7 @@ def setup_chat_routes(
logger.info(f"First research message — asking clarifying questions for: {message[:60]}") logger.info(f"First research message — asking clarifying questions for: {message[:60]}")
yield f'data: {json.dumps({"type": "model_info", "model": sess.model, "suffix": "Research"})}\n\n' yield f'data: {json.dumps({"type": "model_info", "model": sess.model, "suffix": "Research"})}\n\n'
# Set DB mode to research_pending so the NEXT message auto-triggers research # Set DB mode to research_pending so the NEXT message auto-triggers research
try: set_session_mode(session, "research_pending")
_pdb = SessionLocal()
_pdb.query(DBSession).filter(DBSession.id == session).update({"mode": "research_pending"})
_pdb.commit()
_pdb.close()
except Exception as _pe:
logger.warning(f"Failed to set research_pending: {_pe}")
ctx.messages.insert(0, {"role": "system", "content": ctx.messages.insert(0, {"role": "system", "content":
"The user wants to start deep web research. Before searching, ask 2-3 brief " "The user wants to start deep web research. Before searching, ask 2-3 brief "
"clarifying questions to understand exactly what they want to know. For example: " "clarifying questions to understand exactly what they want to know. For example: "
+61
View File
@@ -0,0 +1,61 @@
"""Pin the leak-safety of the session-mode DB helpers.
chat_routes.py persists a session's "mode" in three best-effort spots (read
current mode, persist the effective mode, set research_pending). Those spots
previously hand-rolled `SessionLocal()` with `.close()` as the LAST statement
inside a try/except — so any error before close() (e.g. a SQLite "database is
locked" under concurrent streams) leaked the connection. With the default
QueuePool for file SQLite (5 + 10 overflow), accumulated leaks exhaust the
pool and the app can no longer obtain a DB session until restart.
The logic now lives in core.database.{get,set}_session_mode, which route
through get_db_session() (commit/rollback + guaranteed close). These tests pin
that a mid-operation DB error neither raises out of the helper nor leaks the
connection. The error-path cases fail against the old close()-inside-try
pattern.
"""
import os
os.environ.setdefault("DATABASE_URL", "sqlite:///:memory:")
from unittest.mock import MagicMock
from core import database as db
def _mock_session(monkeypatch):
"""Make get_db_session() hand out a MagicMock session (no real DB)."""
sess = MagicMock()
monkeypatch.setattr(db, "SessionLocal", lambda: sess)
return sess
def test_set_session_mode_commits_and_closes_on_success(monkeypatch):
sess = _mock_session(monkeypatch)
assert db.set_session_mode("s1", "agent") is True
sess.query.return_value.filter.return_value.update.assert_called_once_with({"mode": "agent"})
sess.commit.assert_called_once()
sess.close.assert_called_once()
def test_set_session_mode_does_not_leak_on_error(monkeypatch):
sess = _mock_session(monkeypatch)
sess.query.return_value.filter.return_value.update.side_effect = RuntimeError("database is locked")
# Best-effort: the error is swallowed and False returned...
assert db.set_session_mode("s1", "agent") is False
# ...and crucially the connection is still returned to the pool.
sess.rollback.assert_called_once()
sess.close.assert_called_once()
def test_get_session_mode_reads_and_closes(monkeypatch):
sess = _mock_session(monkeypatch)
sess.query.return_value.filter.return_value.scalar.return_value = "research_pending"
assert db.get_session_mode("s1") == "research_pending"
sess.close.assert_called_once()
def test_get_session_mode_does_not_leak_on_error(monkeypatch):
sess = _mock_session(monkeypatch)
sess.query.return_value.filter.return_value.scalar.side_effect = RuntimeError("database is locked")
assert db.get_session_mode("s1") is None
sess.close.assert_called_once()