mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-15 17:25:26 -04:00
218b9ecbc8
_warmup_endpoints called model_discovery.get_endpoints(), which does not exist on ModelDiscovery. It raised AttributeError on every startup and on every 60s keepalive tick, was swallowed by the outer except, and pinged nothing, so the cold-start prevention the loop exists for never ran. Add ModelDiscovery.warmup_ping_urls(), which resolves the /models probe URLs from the real discover_models() output, and call it from the warmup loop via asyncio.to_thread (discovery does a blocking port scan, so keep it off the event loop). Adds tests/test_warmup_ping_urls.py: resolves /models URLs from discovered items, honors the limit, degrades to [] on discovery failure, and documents that get_endpoints never existed.
48 lines
1.4 KiB
Python
48 lines
1.4 KiB
Python
"""Startup warmup must resolve real endpoint URLs.
|
|
|
|
The warmup/keepalive loop called `model_discovery.get_endpoints()`, which does
|
|
not exist on ModelDiscovery, so it raised AttributeError every run and pinged
|
|
nothing. `ModelDiscovery.warmup_ping_urls()` resolves the /models probe URLs
|
|
from the real discovery API.
|
|
"""
|
|
from src.model_discovery import ModelDiscovery
|
|
|
|
|
|
def _md():
|
|
return ModelDiscovery.__new__(ModelDiscovery)
|
|
|
|
|
|
def test_old_method_never_existed():
|
|
# Documents why the old warmup was a silent no-op.
|
|
assert not hasattr(ModelDiscovery, "get_endpoints")
|
|
|
|
|
|
def test_resolves_models_urls_from_discovered_items():
|
|
md = _md()
|
|
md.discover_models = lambda: {"items": [
|
|
{"url": "http://host:8000/v1/chat/completions", "models": ["a"]},
|
|
{"url": "http://host:1234/v1/chat/completions", "models": ["b"]},
|
|
]}
|
|
assert md.warmup_ping_urls() == [
|
|
"http://host:8000/v1/models",
|
|
"http://host:1234/v1/models",
|
|
]
|
|
|
|
|
|
def test_limit_caps_results():
|
|
md = _md()
|
|
md.discover_models = lambda: {"items": [
|
|
{"url": f"http://h:{8000 + i}/v1/chat/completions"} for i in range(10)
|
|
]}
|
|
assert len(md.warmup_ping_urls(limit=3)) == 3
|
|
|
|
|
|
def test_discovery_failure_degrades_to_empty():
|
|
md = _md()
|
|
|
|
def boom():
|
|
raise RuntimeError("port scan failed")
|
|
|
|
md.discover_models = boom
|
|
assert md.warmup_ping_urls() == []
|