mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-28 07:35:27 -04:00
fix(mcp): retain builtin startup tasks and reap npx probe
Keep strong references to builtin MCP startup tasks until completion and kill/reap the npx probe subprocess when cancellation interrupts the probe. Includes focused regression coverage for both lifecycle paths.
This commit is contained in:
+26
-2
@@ -89,6 +89,21 @@ _BUILTIN_NPX_SERVERS = {
|
||||
MCP_DISABLED = os.environ.get("ODYSSEUS_DISABLE_MCP", "").lower() in ("1", "true", "yes")
|
||||
|
||||
|
||||
# Strong references to the fire-and-forget startup tasks scheduled below.
|
||||
# asyncio only keeps weak references to tasks created via create_task, so
|
||||
# without this the GC can collect a task mid-execution and the server
|
||||
# registration silently never runs. Mirrors _spawn_bg in routes/chat_helpers.py.
|
||||
_BG_TASKS: set[asyncio.Task] = set()
|
||||
|
||||
|
||||
def _spawn_bg(coro) -> asyncio.Task:
|
||||
"""Schedule a background task and hold a strong reference until it finishes."""
|
||||
task = asyncio.create_task(coro)
|
||||
_BG_TASKS.add(task)
|
||||
task.add_done_callback(_BG_TASKS.discard)
|
||||
return task
|
||||
|
||||
|
||||
async def register_builtin_servers(mcp_manager):
|
||||
"""Connect all built-in MCP servers to the manager."""
|
||||
if MCP_DISABLED:
|
||||
@@ -123,7 +138,7 @@ async def register_builtin_servers(mcp_manager):
|
||||
if not os.path.exists(script_path):
|
||||
logger.warning(f"Built-in MCP server script not found: {script_path}")
|
||||
continue
|
||||
asyncio.create_task(_connect_python_server(server_id, script_path, name))
|
||||
_spawn_bg(_connect_python_server(server_id, script_path, name))
|
||||
|
||||
# Register NPX-based servers in the background (they take longer to start)
|
||||
npx_path = _find_npx()
|
||||
@@ -175,7 +190,7 @@ async def register_builtin_servers(mcp_manager):
|
||||
except BaseException as e:
|
||||
logger.warning(f"Built-in NPX server {cfg['name']} error: {type(e).__name__}: {e}")
|
||||
|
||||
asyncio.create_task(_start_npx_servers())
|
||||
_spawn_bg(_start_npx_servers())
|
||||
|
||||
|
||||
def _npx_package_from_args(args):
|
||||
@@ -233,6 +248,15 @@ async def _is_npx_package_cached(npx_path, package_spec, timeout_s=5):
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
except asyncio.CancelledError:
|
||||
# The probe was cancelled (e.g. app shutdown). Reap the child so it
|
||||
# isn't orphaned, then propagate the cancellation.
|
||||
try:
|
||||
proc.kill()
|
||||
await proc.wait()
|
||||
except Exception:
|
||||
pass
|
||||
raise
|
||||
return proc.returncode == 0 and bool(stdout.strip())
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,108 @@
|
||||
"""Issue #4592 — built-in MCP startup must not leak tasks or subprocesses.
|
||||
|
||||
Two defects in src/builtin_mcp.py:
|
||||
* `register_builtin_servers` scheduled its python/npx connect coroutines with
|
||||
a bare `asyncio.create_task(...)` whose return value was dropped. asyncio
|
||||
keeps only a weak reference to such tasks, so the GC can collect one
|
||||
mid-flight and the server silently never registers.
|
||||
* `_is_npx_package_cached` killed its `npx --version` probe subprocess on
|
||||
`TimeoutError` but not on `CancelledError`, so a cancellation (e.g. app
|
||||
shutdown) orphaned the child.
|
||||
|
||||
Both are exercised here with the module loaded in isolation (the same loader
|
||||
the existing npx-cache tests use), so no real servers or npx are involved.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import importlib.util
|
||||
import sys
|
||||
import types
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
|
||||
|
||||
def _load_builtin_mcp(monkeypatch):
|
||||
core = types.ModuleType("core")
|
||||
core.__path__ = []
|
||||
platform_compat = types.ModuleType("core.platform_compat")
|
||||
platform_compat.IS_WINDOWS = False
|
||||
platform_compat.which_tool = lambda name: None
|
||||
monkeypatch.setitem(sys.modules, "core", core)
|
||||
monkeypatch.setitem(sys.modules, "core.platform_compat", platform_compat)
|
||||
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
"builtin_mcp_under_test",
|
||||
ROOT / "src" / "builtin_mcp.py",
|
||||
)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader is not None
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
async def test_spawn_bg_holds_strong_ref_until_task_finishes(monkeypatch):
|
||||
builtin_mcp = _load_builtin_mcp(monkeypatch)
|
||||
|
||||
started = asyncio.Event()
|
||||
release = asyncio.Event()
|
||||
|
||||
async def work():
|
||||
started.set()
|
||||
await release.wait()
|
||||
|
||||
task = builtin_mcp._spawn_bg(work())
|
||||
await started.wait()
|
||||
# While the task is in flight it must be reachable from the module-level
|
||||
# set — that strong reference is what keeps the GC from collecting it.
|
||||
assert task in builtin_mcp._BG_TASKS
|
||||
|
||||
release.set()
|
||||
await task
|
||||
await asyncio.sleep(0) # let the done-callback run
|
||||
# Once finished it is discarded so the set doesn't grow without bound.
|
||||
assert task not in builtin_mcp._BG_TASKS
|
||||
|
||||
|
||||
async def test_npx_probe_reaps_subprocess_on_cancel(monkeypatch):
|
||||
builtin_mcp = _load_builtin_mcp(monkeypatch)
|
||||
|
||||
# Force the code past the fast cache hit so it spawns the probe subprocess.
|
||||
monkeypatch.setattr(builtin_mcp, "_is_package_in_npx_cache", lambda spec: False)
|
||||
|
||||
state = {"killed": False, "waited": False}
|
||||
started = asyncio.Event()
|
||||
|
||||
class FakeProc:
|
||||
returncode = None
|
||||
|
||||
async def communicate(self):
|
||||
started.set()
|
||||
await asyncio.sleep(3600) # block until the probe is cancelled
|
||||
|
||||
def kill(self):
|
||||
state["killed"] = True
|
||||
|
||||
async def wait(self):
|
||||
state["waited"] = True
|
||||
|
||||
async def fake_create(*args, **kwargs):
|
||||
return FakeProc()
|
||||
|
||||
monkeypatch.setattr(builtin_mcp.asyncio, "create_subprocess_exec", fake_create)
|
||||
|
||||
task = asyncio.create_task(
|
||||
builtin_mcp._is_npx_package_cached("npx", "some-pkg@1.0.0", timeout_s=3600)
|
||||
)
|
||||
await started.wait()
|
||||
task.cancel()
|
||||
|
||||
with pytest.raises(asyncio.CancelledError):
|
||||
await task
|
||||
|
||||
# The child was killed and reaped rather than orphaned.
|
||||
assert state["killed"] is True
|
||||
assert state["waited"] is True
|
||||
Reference in New Issue
Block a user