From 218b9ecbc8117990f612569e236eb0376ca7756d Mon Sep 17 00:00:00 2001 From: Mazen Tamer Salah <78306991+mazen-salah@users.noreply.github.com> Date: Wed, 10 Jun 2026 20:21:45 +0300 Subject: [PATCH] fix(startup): ping real endpoints in warmup/keepalive (#3641) _warmup_endpoints called model_discovery.get_endpoints(), which does not exist on ModelDiscovery. It raised AttributeError on every startup and on every 60s keepalive tick, was swallowed by the outer except, and pinged nothing, so the cold-start prevention the loop exists for never ran. Add ModelDiscovery.warmup_ping_urls(), which resolves the /models probe URLs from the real discover_models() output, and call it from the warmup loop via asyncio.to_thread (discovery does a blocking port scan, so keep it off the event loop). Adds tests/test_warmup_ping_urls.py: resolves /models URLs from discovered items, honors the limit, degrades to [] on discovery failure, and documents that get_endpoints never existed. --- app.py | 25 ++++++++++-------- src/model_discovery.py | 19 ++++++++++++++ tests/test_warmup_ping_urls.py | 47 ++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 10 deletions(-) create mode 100644 tests/test_warmup_ping_urls.py diff --git a/app.py b/app.py index 7cec8b0f1..2e1677ca2 100644 --- a/app.py +++ b/app.py @@ -946,16 +946,21 @@ async def _startup_event(): async def _warmup_endpoints(): try: import httpx - endpoints = model_discovery.get_endpoints() if model_discovery else [] - for ep in endpoints[:5]: - url = ep.get("url", "").replace("/chat/completions", "/models") - if url: - try: - async with httpx.AsyncClient(timeout=5.0) as client: - await client.get(url) - logger.info(f"Warmup ping OK: {url}") - except Exception as e: - logger.debug(f"Warmup ping failed for endpoint: {e}") + # model_discovery has no get_endpoints(); that call raised + # AttributeError every run and silently disabled warmup/keepalive. + # Resolve the /models probe URLs via the real discovery API, off the + # event loop since discovery does a blocking port scan. + urls = ( + await asyncio.to_thread(model_discovery.warmup_ping_urls) + if model_discovery else [] + ) + for url in urls: + try: + async with httpx.AsyncClient(timeout=5.0) as client: + await client.get(url) + logger.info(f"Warmup ping OK: {url}") + except Exception as e: + logger.debug(f"Warmup ping failed for endpoint: {e}") except Exception as e: logger.debug(f"Warmup ping skipped: {e}") diff --git a/src/model_discovery.py b/src/model_discovery.py index 68b402d25..506fcb6c4 100644 --- a/src/model_discovery.py +++ b/src/model_discovery.py @@ -223,6 +223,25 @@ class ModelDiscovery: ) return {"hosts": hosts, "items": items} + def warmup_ping_urls(self, limit: int = 5) -> List[str]: + """The ``/models`` URLs of up to ``limit`` discovered endpoints. + + Used by the startup warmup / keepalive loop to prime connections. Each + discovered item already carries a ``/v1/chat/completions`` url; swap the + suffix for the cheap ``/models`` probe. Failures degrade to an empty list + so warmup never crashes the caller. + """ + try: + items = (self.discover_models() or {}).get("items", []) + except Exception: + return [] + urls: List[str] = [] + for ep in items[:limit]: + url = (ep.get("url") or "").replace("/chat/completions", "/models") + if url: + urls.append(url) + return urls + def get_providers(self) -> Dict[str, Any]: """Get all available providers""" discovery = self.discover_models() diff --git a/tests/test_warmup_ping_urls.py b/tests/test_warmup_ping_urls.py new file mode 100644 index 000000000..7b5961831 --- /dev/null +++ b/tests/test_warmup_ping_urls.py @@ -0,0 +1,47 @@ +"""Startup warmup must resolve real endpoint URLs. + +The warmup/keepalive loop called `model_discovery.get_endpoints()`, which does +not exist on ModelDiscovery, so it raised AttributeError every run and pinged +nothing. `ModelDiscovery.warmup_ping_urls()` resolves the /models probe URLs +from the real discovery API. +""" +from src.model_discovery import ModelDiscovery + + +def _md(): + return ModelDiscovery.__new__(ModelDiscovery) + + +def test_old_method_never_existed(): + # Documents why the old warmup was a silent no-op. + assert not hasattr(ModelDiscovery, "get_endpoints") + + +def test_resolves_models_urls_from_discovered_items(): + md = _md() + md.discover_models = lambda: {"items": [ + {"url": "http://host:8000/v1/chat/completions", "models": ["a"]}, + {"url": "http://host:1234/v1/chat/completions", "models": ["b"]}, + ]} + assert md.warmup_ping_urls() == [ + "http://host:8000/v1/models", + "http://host:1234/v1/models", + ] + + +def test_limit_caps_results(): + md = _md() + md.discover_models = lambda: {"items": [ + {"url": f"http://h:{8000 + i}/v1/chat/completions"} for i in range(10) + ]} + assert len(md.warmup_ping_urls(limit=3)) == 3 + + +def test_discovery_failure_degrades_to_empty(): + md = _md() + + def boom(): + raise RuntimeError("port scan failed") + + md.discover_models = boom + assert md.warmup_ping_urls() == []