diff --git a/routes/chat_helpers.py b/routes/chat_helpers.py index 60198194a..880b4a5eb 100644 --- a/routes/chat_helpers.py +++ b/routes/chat_helpers.py @@ -23,6 +23,22 @@ from fastapi import HTTPException logger = logging.getLogger(__name__) +# Strong references to in-flight fire-and-forget tasks scheduled from this +# module. asyncio only keeps weak references to tasks created via +# create_task, so without this the GC can collect a task mid-execution and +# the background work (extraction, auto-naming) silently never runs. +# Mirrors WebhookManager._spawn_tracked from src/webhook_manager.py. +_BG_TASKS: set[asyncio.Task] = set() + + +def _spawn_bg(coro) -> asyncio.Task: + """Schedule a background task and hold a strong reference until it finishes.""" + task = asyncio.create_task(coro) + _BG_TASKS.add(task) + task.add_done_callback(_BG_TASKS.discard) + return task + + # ── Data containers ────────────────────────────────────────────────────── # @dataclass @@ -1105,7 +1121,7 @@ def run_post_response_tasks( ))) if _extraction_jobs: - asyncio.create_task(_run_extraction_jobs_sequentially(session_id, _extraction_jobs)) + _spawn_bg(_run_extraction_jobs_sequentially(session_id, _extraction_jobs)) # Token accumulation if last_metrics: @@ -1120,4 +1136,4 @@ def run_post_response_tasks( # Auto-name if needs_auto_name(sess.name): - asyncio.create_task(auto_name_session(session_manager, sess)) + _spawn_bg(auto_name_session(session_manager, sess)) diff --git a/tests/test_chat_helpers_bg_tasks_tracked.py b/tests/test_chat_helpers_bg_tasks_tracked.py new file mode 100644 index 000000000..1f5992e91 --- /dev/null +++ b/tests/test_chat_helpers_bg_tasks_tracked.py @@ -0,0 +1,71 @@ +"""Guard: chat hot-path background tasks must go through _spawn_bg. + +asyncio only holds a weak reference to a bare create_task() result, so the +GC can collect the outer task before its body runs and the background work +(memory/skill extraction, session auto-naming) silently never happens. +routes/chat_helpers.py owns these schedules via _spawn_bg(), which adds the +task to _BG_TASKS and discards it via a done-callback. This guard catches a +regression where a copy-paste re-introduces a bare asyncio.create_task. + +This is the routes/chat_helpers.py-scoped sibling of the webhook-emitter +guard added in #4336 (tests/test_webhook_emitters_use_manager.py). +""" +import ast +from pathlib import Path + +CHAT_HELPERS = ( + Path(__file__).resolve().parent.parent / "routes" / "chat_helpers.py" +) + + +def _untracked_create_task_calls(tree: ast.AST) -> list[tuple[int, str]]: + """(lineno, snippet) for any bare asyncio.create_task(...). + + A call is "bare" when its return value is dropped — i.e. it is the direct + expression of an ast.Expr statement. Captured forms (`x = asyncio.create_task(...)`, + `[asyncio.create_task(...), ...]`, `await asyncio.create_task(...)`) are fine + because something else holds the reference. + + The helper itself (_spawn_bg) is exempt: it calls asyncio.create_task once + and registers the task in _BG_TASKS before returning. + """ + hits: list[tuple[int, str]] = [] + + def _is_create_task(call: ast.Call) -> bool: + f = call.func + return ( + isinstance(f, ast.Attribute) + and f.attr == "create_task" + and isinstance(f.value, ast.Name) + and f.value.id == "asyncio" + ) + + spawn_helper_lines: set[int] = set() + for node in ast.walk(tree): + if isinstance(node, ast.FunctionDef) and node.name == "_spawn_bg": + for n in ast.walk(node): + if hasattr(n, "lineno"): + spawn_helper_lines.add(n.lineno) + + for node in ast.walk(tree): + if not isinstance(node, ast.Expr): + continue + if not isinstance(node.value, ast.Call): + continue + if not _is_create_task(node.value): + continue + if node.lineno in spawn_helper_lines: + continue + hits.append((node.lineno, ast.unparse(node.value))) + return hits + + +def test_no_untracked_create_task_in_chat_helpers(): + tree = ast.parse(CHAT_HELPERS.read_text(), filename=str(CHAT_HELPERS)) + offenders = _untracked_create_task_calls(tree) + assert not offenders, ( + "Background tasks scheduled from routes/chat_helpers.py must go through " + "_spawn_bg(coro) so the task is registered in _BG_TASKS and survives until " + "it finishes. Found bare asyncio.create_task(...) call(s):\n " + + "\n ".join(f"chat_helpers.py:{ln}: {snip}" for ln, snip in offenders) + )