mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-18 18:55:28 -04:00
Two background tasks scheduled on every chat completion in routes/chat_helpers.py — the memory/skill extraction dispatch and the session auto-namer — are created via bare asyncio.create_task(...). asyncio only holds a weak reference to the outer task, so the GC can collect it mid-execution and the work silently never runs. Add a module-private _BG_TASKS set and a _spawn_bg() helper that mirrors WebhookManager._spawn_tracked (the pattern #3964 / #4336 established for the webhook emitters two lines apart in the same function). Route both call sites through it so the lifecycle owner is explicit. Adds an AST-level guard test that fails on any bare asyncio.create_task(...) statement in routes/chat_helpers.py to prevent a regression — same shape as test_webhook_emitters_use_manager.py from #4336. The same bare pattern exists in routes/email_routes.py and routes/cookbook_routes.py; left out of this PR per CONTRIBUTING.md's "one fix per PR" and tracked in #4443's "Additional Information" for a follow-up.
This commit is contained in:
+18
-2
@@ -23,6 +23,22 @@ from fastapi import HTTPException
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Strong references to in-flight fire-and-forget tasks scheduled from this
|
||||
# module. asyncio only keeps weak references to tasks created via
|
||||
# create_task, so without this the GC can collect a task mid-execution and
|
||||
# the background work (extraction, auto-naming) silently never runs.
|
||||
# Mirrors WebhookManager._spawn_tracked from src/webhook_manager.py.
|
||||
_BG_TASKS: set[asyncio.Task] = set()
|
||||
|
||||
|
||||
def _spawn_bg(coro) -> asyncio.Task:
|
||||
"""Schedule a background task and hold a strong reference until it finishes."""
|
||||
task = asyncio.create_task(coro)
|
||||
_BG_TASKS.add(task)
|
||||
task.add_done_callback(_BG_TASKS.discard)
|
||||
return task
|
||||
|
||||
|
||||
# ── Data containers ────────────────────────────────────────────────────── #
|
||||
|
||||
@dataclass
|
||||
@@ -1105,7 +1121,7 @@ def run_post_response_tasks(
|
||||
)))
|
||||
|
||||
if _extraction_jobs:
|
||||
asyncio.create_task(_run_extraction_jobs_sequentially(session_id, _extraction_jobs))
|
||||
_spawn_bg(_run_extraction_jobs_sequentially(session_id, _extraction_jobs))
|
||||
|
||||
# Token accumulation
|
||||
if last_metrics:
|
||||
@@ -1120,4 +1136,4 @@ def run_post_response_tasks(
|
||||
|
||||
# Auto-name
|
||||
if needs_auto_name(sess.name):
|
||||
asyncio.create_task(auto_name_session(session_manager, sess))
|
||||
_spawn_bg(auto_name_session(session_manager, sess))
|
||||
|
||||
@@ -0,0 +1,71 @@
|
||||
"""Guard: chat hot-path background tasks must go through _spawn_bg.
|
||||
|
||||
asyncio only holds a weak reference to a bare create_task() result, so the
|
||||
GC can collect the outer task before its body runs and the background work
|
||||
(memory/skill extraction, session auto-naming) silently never happens.
|
||||
routes/chat_helpers.py owns these schedules via _spawn_bg(), which adds the
|
||||
task to _BG_TASKS and discards it via a done-callback. This guard catches a
|
||||
regression where a copy-paste re-introduces a bare asyncio.create_task.
|
||||
|
||||
This is the routes/chat_helpers.py-scoped sibling of the webhook-emitter
|
||||
guard added in #4336 (tests/test_webhook_emitters_use_manager.py).
|
||||
"""
|
||||
import ast
|
||||
from pathlib import Path
|
||||
|
||||
CHAT_HELPERS = (
|
||||
Path(__file__).resolve().parent.parent / "routes" / "chat_helpers.py"
|
||||
)
|
||||
|
||||
|
||||
def _untracked_create_task_calls(tree: ast.AST) -> list[tuple[int, str]]:
|
||||
"""(lineno, snippet) for any bare asyncio.create_task(...).
|
||||
|
||||
A call is "bare" when its return value is dropped — i.e. it is the direct
|
||||
expression of an ast.Expr statement. Captured forms (`x = asyncio.create_task(...)`,
|
||||
`[asyncio.create_task(...), ...]`, `await asyncio.create_task(...)`) are fine
|
||||
because something else holds the reference.
|
||||
|
||||
The helper itself (_spawn_bg) is exempt: it calls asyncio.create_task once
|
||||
and registers the task in _BG_TASKS before returning.
|
||||
"""
|
||||
hits: list[tuple[int, str]] = []
|
||||
|
||||
def _is_create_task(call: ast.Call) -> bool:
|
||||
f = call.func
|
||||
return (
|
||||
isinstance(f, ast.Attribute)
|
||||
and f.attr == "create_task"
|
||||
and isinstance(f.value, ast.Name)
|
||||
and f.value.id == "asyncio"
|
||||
)
|
||||
|
||||
spawn_helper_lines: set[int] = set()
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, ast.FunctionDef) and node.name == "_spawn_bg":
|
||||
for n in ast.walk(node):
|
||||
if hasattr(n, "lineno"):
|
||||
spawn_helper_lines.add(n.lineno)
|
||||
|
||||
for node in ast.walk(tree):
|
||||
if not isinstance(node, ast.Expr):
|
||||
continue
|
||||
if not isinstance(node.value, ast.Call):
|
||||
continue
|
||||
if not _is_create_task(node.value):
|
||||
continue
|
||||
if node.lineno in spawn_helper_lines:
|
||||
continue
|
||||
hits.append((node.lineno, ast.unparse(node.value)))
|
||||
return hits
|
||||
|
||||
|
||||
def test_no_untracked_create_task_in_chat_helpers():
|
||||
tree = ast.parse(CHAT_HELPERS.read_text(), filename=str(CHAT_HELPERS))
|
||||
offenders = _untracked_create_task_calls(tree)
|
||||
assert not offenders, (
|
||||
"Background tasks scheduled from routes/chat_helpers.py must go through "
|
||||
"_spawn_bg(coro) so the task is registered in _BG_TASKS and survives until "
|
||||
"it finishes. Found bare asyncio.create_task(...) call(s):\n "
|
||||
+ "\n ".join(f"chat_helpers.py:{ln}: {snip}" for ln, snip in offenders)
|
||||
)
|
||||
Reference in New Issue
Block a user