fix(chat): track chat hot-path background tasks for strong references (#4443) (#4444)

Two background tasks scheduled on every chat completion in
routes/chat_helpers.py — the memory/skill extraction dispatch and the
session auto-namer — are created via bare asyncio.create_task(...).
asyncio only holds a weak reference to the outer task, so the GC can
collect it mid-execution and the work silently never runs.

Add a module-private _BG_TASKS set and a _spawn_bg() helper that mirrors
WebhookManager._spawn_tracked (the pattern #3964 / #4336 established for
the webhook emitters two lines apart in the same function). Route both
call sites through it so the lifecycle owner is explicit.

Adds an AST-level guard test that fails on any bare
asyncio.create_task(...) statement in routes/chat_helpers.py to prevent
a regression — same shape as test_webhook_emitters_use_manager.py from
#4336.

The same bare pattern exists in routes/email_routes.py and
routes/cookbook_routes.py; left out of this PR per CONTRIBUTING.md's
"one fix per PR" and tracked in #4443's "Additional Information" for a
follow-up.
This commit is contained in:
Wei Hong
2026-06-19 03:26:11 +08:00
committed by GitHub
parent e7ffc69729
commit 7475779b7c
2 changed files with 89 additions and 2 deletions
+18 -2
View File
@@ -23,6 +23,22 @@ from fastapi import HTTPException
logger = logging.getLogger(__name__)
# Strong references to in-flight fire-and-forget tasks scheduled from this
# module. asyncio only keeps weak references to tasks created via
# create_task, so without this the GC can collect a task mid-execution and
# the background work (extraction, auto-naming) silently never runs.
# Mirrors WebhookManager._spawn_tracked from src/webhook_manager.py.
_BG_TASKS: set[asyncio.Task] = set()
def _spawn_bg(coro) -> asyncio.Task:
"""Schedule a background task and hold a strong reference until it finishes."""
task = asyncio.create_task(coro)
_BG_TASKS.add(task)
task.add_done_callback(_BG_TASKS.discard)
return task
# ── Data containers ────────────────────────────────────────────────────── #
@dataclass
@@ -1105,7 +1121,7 @@ def run_post_response_tasks(
)))
if _extraction_jobs:
asyncio.create_task(_run_extraction_jobs_sequentially(session_id, _extraction_jobs))
_spawn_bg(_run_extraction_jobs_sequentially(session_id, _extraction_jobs))
# Token accumulation
if last_metrics:
@@ -1120,4 +1136,4 @@ def run_post_response_tasks(
# Auto-name
if needs_auto_name(sess.name):
asyncio.create_task(auto_name_session(session_manager, sess))
_spawn_bg(auto_name_session(session_manager, sess))
@@ -0,0 +1,71 @@
"""Guard: chat hot-path background tasks must go through _spawn_bg.
asyncio only holds a weak reference to a bare create_task() result, so the
GC can collect the outer task before its body runs and the background work
(memory/skill extraction, session auto-naming) silently never happens.
routes/chat_helpers.py owns these schedules via _spawn_bg(), which adds the
task to _BG_TASKS and discards it via a done-callback. This guard catches a
regression where a copy-paste re-introduces a bare asyncio.create_task.
This is the routes/chat_helpers.py-scoped sibling of the webhook-emitter
guard added in #4336 (tests/test_webhook_emitters_use_manager.py).
"""
import ast
from pathlib import Path
CHAT_HELPERS = (
Path(__file__).resolve().parent.parent / "routes" / "chat_helpers.py"
)
def _untracked_create_task_calls(tree: ast.AST) -> list[tuple[int, str]]:
"""(lineno, snippet) for any bare asyncio.create_task(...).
A call is "bare" when its return value is dropped — i.e. it is the direct
expression of an ast.Expr statement. Captured forms (`x = asyncio.create_task(...)`,
`[asyncio.create_task(...), ...]`, `await asyncio.create_task(...)`) are fine
because something else holds the reference.
The helper itself (_spawn_bg) is exempt: it calls asyncio.create_task once
and registers the task in _BG_TASKS before returning.
"""
hits: list[tuple[int, str]] = []
def _is_create_task(call: ast.Call) -> bool:
f = call.func
return (
isinstance(f, ast.Attribute)
and f.attr == "create_task"
and isinstance(f.value, ast.Name)
and f.value.id == "asyncio"
)
spawn_helper_lines: set[int] = set()
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef) and node.name == "_spawn_bg":
for n in ast.walk(node):
if hasattr(n, "lineno"):
spawn_helper_lines.add(n.lineno)
for node in ast.walk(tree):
if not isinstance(node, ast.Expr):
continue
if not isinstance(node.value, ast.Call):
continue
if not _is_create_task(node.value):
continue
if node.lineno in spawn_helper_lines:
continue
hits.append((node.lineno, ast.unparse(node.value)))
return hits
def test_no_untracked_create_task_in_chat_helpers():
tree = ast.parse(CHAT_HELPERS.read_text(), filename=str(CHAT_HELPERS))
offenders = _untracked_create_task_calls(tree)
assert not offenders, (
"Background tasks scheduled from routes/chat_helpers.py must go through "
"_spawn_bg(coro) so the task is registered in _BG_TASKS and survives until "
"it finishes. Found bare asyncio.create_task(...) call(s):\n "
+ "\n ".join(f"chat_helpers.py:{ln}: {snip}" for ln, snip in offenders)
)