mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-15 17:25:26 -04:00
fix(startup): ping real endpoints in warmup/keepalive (#3641)
_warmup_endpoints called model_discovery.get_endpoints(), which does not exist on ModelDiscovery. It raised AttributeError on every startup and on every 60s keepalive tick, was swallowed by the outer except, and pinged nothing, so the cold-start prevention the loop exists for never ran. Add ModelDiscovery.warmup_ping_urls(), which resolves the /models probe URLs from the real discover_models() output, and call it from the warmup loop via asyncio.to_thread (discovery does a blocking port scan, so keep it off the event loop). Adds tests/test_warmup_ping_urls.py: resolves /models URLs from discovered items, honors the limit, degrades to [] on discovery failure, and documents that get_endpoints never existed.
This commit is contained in:
committed by
GitHub
parent
d9a4b99046
commit
218b9ecbc8
@@ -946,16 +946,21 @@ async def _startup_event():
|
||||
async def _warmup_endpoints():
|
||||
try:
|
||||
import httpx
|
||||
endpoints = model_discovery.get_endpoints() if model_discovery else []
|
||||
for ep in endpoints[:5]:
|
||||
url = ep.get("url", "").replace("/chat/completions", "/models")
|
||||
if url:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
await client.get(url)
|
||||
logger.info(f"Warmup ping OK: {url}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Warmup ping failed for endpoint: {e}")
|
||||
# model_discovery has no get_endpoints(); that call raised
|
||||
# AttributeError every run and silently disabled warmup/keepalive.
|
||||
# Resolve the /models probe URLs via the real discovery API, off the
|
||||
# event loop since discovery does a blocking port scan.
|
||||
urls = (
|
||||
await asyncio.to_thread(model_discovery.warmup_ping_urls)
|
||||
if model_discovery else []
|
||||
)
|
||||
for url in urls:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
await client.get(url)
|
||||
logger.info(f"Warmup ping OK: {url}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Warmup ping failed for endpoint: {e}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Warmup ping skipped: {e}")
|
||||
|
||||
|
||||
@@ -223,6 +223,25 @@ class ModelDiscovery:
|
||||
)
|
||||
return {"hosts": hosts, "items": items}
|
||||
|
||||
def warmup_ping_urls(self, limit: int = 5) -> List[str]:
|
||||
"""The ``/models`` URLs of up to ``limit`` discovered endpoints.
|
||||
|
||||
Used by the startup warmup / keepalive loop to prime connections. Each
|
||||
discovered item already carries a ``/v1/chat/completions`` url; swap the
|
||||
suffix for the cheap ``/models`` probe. Failures degrade to an empty list
|
||||
so warmup never crashes the caller.
|
||||
"""
|
||||
try:
|
||||
items = (self.discover_models() or {}).get("items", [])
|
||||
except Exception:
|
||||
return []
|
||||
urls: List[str] = []
|
||||
for ep in items[:limit]:
|
||||
url = (ep.get("url") or "").replace("/chat/completions", "/models")
|
||||
if url:
|
||||
urls.append(url)
|
||||
return urls
|
||||
|
||||
def get_providers(self) -> Dict[str, Any]:
|
||||
"""Get all available providers"""
|
||||
discovery = self.discover_models()
|
||||
|
||||
@@ -0,0 +1,47 @@
|
||||
"""Startup warmup must resolve real endpoint URLs.
|
||||
|
||||
The warmup/keepalive loop called `model_discovery.get_endpoints()`, which does
|
||||
not exist on ModelDiscovery, so it raised AttributeError every run and pinged
|
||||
nothing. `ModelDiscovery.warmup_ping_urls()` resolves the /models probe URLs
|
||||
from the real discovery API.
|
||||
"""
|
||||
from src.model_discovery import ModelDiscovery
|
||||
|
||||
|
||||
def _md():
|
||||
return ModelDiscovery.__new__(ModelDiscovery)
|
||||
|
||||
|
||||
def test_old_method_never_existed():
|
||||
# Documents why the old warmup was a silent no-op.
|
||||
assert not hasattr(ModelDiscovery, "get_endpoints")
|
||||
|
||||
|
||||
def test_resolves_models_urls_from_discovered_items():
|
||||
md = _md()
|
||||
md.discover_models = lambda: {"items": [
|
||||
{"url": "http://host:8000/v1/chat/completions", "models": ["a"]},
|
||||
{"url": "http://host:1234/v1/chat/completions", "models": ["b"]},
|
||||
]}
|
||||
assert md.warmup_ping_urls() == [
|
||||
"http://host:8000/v1/models",
|
||||
"http://host:1234/v1/models",
|
||||
]
|
||||
|
||||
|
||||
def test_limit_caps_results():
|
||||
md = _md()
|
||||
md.discover_models = lambda: {"items": [
|
||||
{"url": f"http://h:{8000 + i}/v1/chat/completions"} for i in range(10)
|
||||
]}
|
||||
assert len(md.warmup_ping_urls(limit=3)) == 3
|
||||
|
||||
|
||||
def test_discovery_failure_degrades_to_empty():
|
||||
md = _md()
|
||||
|
||||
def boom():
|
||||
raise RuntimeError("port scan failed")
|
||||
|
||||
md.discover_models = boom
|
||||
assert md.warmup_ping_urls() == []
|
||||
Reference in New Issue
Block a user