mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-28 07:35:27 -04:00
fix(mcp): retain builtin startup tasks and reap npx probe
Keep strong references to builtin MCP startup tasks until completion and kill/reap the npx probe subprocess when cancellation interrupts the probe. Includes focused regression coverage for both lifecycle paths.
This commit is contained in:
+26
-2
@@ -89,6 +89,21 @@ _BUILTIN_NPX_SERVERS = {
|
|||||||
MCP_DISABLED = os.environ.get("ODYSSEUS_DISABLE_MCP", "").lower() in ("1", "true", "yes")
|
MCP_DISABLED = os.environ.get("ODYSSEUS_DISABLE_MCP", "").lower() in ("1", "true", "yes")
|
||||||
|
|
||||||
|
|
||||||
|
# Strong references to the fire-and-forget startup tasks scheduled below.
|
||||||
|
# asyncio only keeps weak references to tasks created via create_task, so
|
||||||
|
# without this the GC can collect a task mid-execution and the server
|
||||||
|
# registration silently never runs. Mirrors _spawn_bg in routes/chat_helpers.py.
|
||||||
|
_BG_TASKS: set[asyncio.Task] = set()
|
||||||
|
|
||||||
|
|
||||||
|
def _spawn_bg(coro) -> asyncio.Task:
|
||||||
|
"""Schedule a background task and hold a strong reference until it finishes."""
|
||||||
|
task = asyncio.create_task(coro)
|
||||||
|
_BG_TASKS.add(task)
|
||||||
|
task.add_done_callback(_BG_TASKS.discard)
|
||||||
|
return task
|
||||||
|
|
||||||
|
|
||||||
async def register_builtin_servers(mcp_manager):
|
async def register_builtin_servers(mcp_manager):
|
||||||
"""Connect all built-in MCP servers to the manager."""
|
"""Connect all built-in MCP servers to the manager."""
|
||||||
if MCP_DISABLED:
|
if MCP_DISABLED:
|
||||||
@@ -123,7 +138,7 @@ async def register_builtin_servers(mcp_manager):
|
|||||||
if not os.path.exists(script_path):
|
if not os.path.exists(script_path):
|
||||||
logger.warning(f"Built-in MCP server script not found: {script_path}")
|
logger.warning(f"Built-in MCP server script not found: {script_path}")
|
||||||
continue
|
continue
|
||||||
asyncio.create_task(_connect_python_server(server_id, script_path, name))
|
_spawn_bg(_connect_python_server(server_id, script_path, name))
|
||||||
|
|
||||||
# Register NPX-based servers in the background (they take longer to start)
|
# Register NPX-based servers in the background (they take longer to start)
|
||||||
npx_path = _find_npx()
|
npx_path = _find_npx()
|
||||||
@@ -175,7 +190,7 @@ async def register_builtin_servers(mcp_manager):
|
|||||||
except BaseException as e:
|
except BaseException as e:
|
||||||
logger.warning(f"Built-in NPX server {cfg['name']} error: {type(e).__name__}: {e}")
|
logger.warning(f"Built-in NPX server {cfg['name']} error: {type(e).__name__}: {e}")
|
||||||
|
|
||||||
asyncio.create_task(_start_npx_servers())
|
_spawn_bg(_start_npx_servers())
|
||||||
|
|
||||||
|
|
||||||
def _npx_package_from_args(args):
|
def _npx_package_from_args(args):
|
||||||
@@ -233,6 +248,15 @@ async def _is_npx_package_cached(npx_path, package_spec, timeout_s=5):
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
return False
|
return False
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
# The probe was cancelled (e.g. app shutdown). Reap the child so it
|
||||||
|
# isn't orphaned, then propagate the cancellation.
|
||||||
|
try:
|
||||||
|
proc.kill()
|
||||||
|
await proc.wait()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
raise
|
||||||
return proc.returncode == 0 and bool(stdout.strip())
|
return proc.returncode == 0 and bool(stdout.strip())
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,108 @@
|
|||||||
|
"""Issue #4592 — built-in MCP startup must not leak tasks or subprocesses.
|
||||||
|
|
||||||
|
Two defects in src/builtin_mcp.py:
|
||||||
|
* `register_builtin_servers` scheduled its python/npx connect coroutines with
|
||||||
|
a bare `asyncio.create_task(...)` whose return value was dropped. asyncio
|
||||||
|
keeps only a weak reference to such tasks, so the GC can collect one
|
||||||
|
mid-flight and the server silently never registers.
|
||||||
|
* `_is_npx_package_cached` killed its `npx --version` probe subprocess on
|
||||||
|
`TimeoutError` but not on `CancelledError`, so a cancellation (e.g. app
|
||||||
|
shutdown) orphaned the child.
|
||||||
|
|
||||||
|
Both are exercised here with the module loaded in isolation (the same loader
|
||||||
|
the existing npx-cache tests use), so no real servers or npx are involved.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import importlib.util
|
||||||
|
import sys
|
||||||
|
import types
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
ROOT = Path(__file__).resolve().parent.parent
|
||||||
|
|
||||||
|
|
||||||
|
def _load_builtin_mcp(monkeypatch):
|
||||||
|
core = types.ModuleType("core")
|
||||||
|
core.__path__ = []
|
||||||
|
platform_compat = types.ModuleType("core.platform_compat")
|
||||||
|
platform_compat.IS_WINDOWS = False
|
||||||
|
platform_compat.which_tool = lambda name: None
|
||||||
|
monkeypatch.setitem(sys.modules, "core", core)
|
||||||
|
monkeypatch.setitem(sys.modules, "core.platform_compat", platform_compat)
|
||||||
|
|
||||||
|
spec = importlib.util.spec_from_file_location(
|
||||||
|
"builtin_mcp_under_test",
|
||||||
|
ROOT / "src" / "builtin_mcp.py",
|
||||||
|
)
|
||||||
|
module = importlib.util.module_from_spec(spec)
|
||||||
|
assert spec.loader is not None
|
||||||
|
spec.loader.exec_module(module)
|
||||||
|
return module
|
||||||
|
|
||||||
|
|
||||||
|
async def test_spawn_bg_holds_strong_ref_until_task_finishes(monkeypatch):
|
||||||
|
builtin_mcp = _load_builtin_mcp(monkeypatch)
|
||||||
|
|
||||||
|
started = asyncio.Event()
|
||||||
|
release = asyncio.Event()
|
||||||
|
|
||||||
|
async def work():
|
||||||
|
started.set()
|
||||||
|
await release.wait()
|
||||||
|
|
||||||
|
task = builtin_mcp._spawn_bg(work())
|
||||||
|
await started.wait()
|
||||||
|
# While the task is in flight it must be reachable from the module-level
|
||||||
|
# set — that strong reference is what keeps the GC from collecting it.
|
||||||
|
assert task in builtin_mcp._BG_TASKS
|
||||||
|
|
||||||
|
release.set()
|
||||||
|
await task
|
||||||
|
await asyncio.sleep(0) # let the done-callback run
|
||||||
|
# Once finished it is discarded so the set doesn't grow without bound.
|
||||||
|
assert task not in builtin_mcp._BG_TASKS
|
||||||
|
|
||||||
|
|
||||||
|
async def test_npx_probe_reaps_subprocess_on_cancel(monkeypatch):
|
||||||
|
builtin_mcp = _load_builtin_mcp(monkeypatch)
|
||||||
|
|
||||||
|
# Force the code past the fast cache hit so it spawns the probe subprocess.
|
||||||
|
monkeypatch.setattr(builtin_mcp, "_is_package_in_npx_cache", lambda spec: False)
|
||||||
|
|
||||||
|
state = {"killed": False, "waited": False}
|
||||||
|
started = asyncio.Event()
|
||||||
|
|
||||||
|
class FakeProc:
|
||||||
|
returncode = None
|
||||||
|
|
||||||
|
async def communicate(self):
|
||||||
|
started.set()
|
||||||
|
await asyncio.sleep(3600) # block until the probe is cancelled
|
||||||
|
|
||||||
|
def kill(self):
|
||||||
|
state["killed"] = True
|
||||||
|
|
||||||
|
async def wait(self):
|
||||||
|
state["waited"] = True
|
||||||
|
|
||||||
|
async def fake_create(*args, **kwargs):
|
||||||
|
return FakeProc()
|
||||||
|
|
||||||
|
monkeypatch.setattr(builtin_mcp.asyncio, "create_subprocess_exec", fake_create)
|
||||||
|
|
||||||
|
task = asyncio.create_task(
|
||||||
|
builtin_mcp._is_npx_package_cached("npx", "some-pkg@1.0.0", timeout_s=3600)
|
||||||
|
)
|
||||||
|
await started.wait()
|
||||||
|
task.cancel()
|
||||||
|
|
||||||
|
with pytest.raises(asyncio.CancelledError):
|
||||||
|
await task
|
||||||
|
|
||||||
|
# The child was killed and reaped rather than orphaned.
|
||||||
|
assert state["killed"] is True
|
||||||
|
assert state["waited"] is True
|
||||||
Reference in New Issue
Block a user