mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-17 10:15:27 -04:00
fix(models): query v1 models for llama-server endpoints (#3380)
* fix(models): query v1 models for llama-server endpoints * test(models): accept owner kwargs in llama-server regression
This commit is contained in:
@@ -857,7 +857,7 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
|
|||||||
and 400 <= result["status_code"] < 500
|
and 400 <= result["status_code"] < 500
|
||||||
and result["status_code"] not in (401, 403)
|
and result["status_code"] not in (401, 403)
|
||||||
):
|
):
|
||||||
models_url = base.rstrip("/") + "/models"
|
models_url = build_models_url(base)
|
||||||
try:
|
try:
|
||||||
r2 = httpx.get(models_url, headers=headers, timeout=timeout, verify=llm_verify())
|
r2 = httpx.get(models_url, headers=headers, timeout=timeout, verify=llm_verify())
|
||||||
result2 = _result_from_response(r2)
|
result2 = _result_from_response(r2)
|
||||||
|
|||||||
@@ -184,7 +184,7 @@ def build_chat_url(base: str) -> str:
|
|||||||
|
|
||||||
def build_models_url(base: str) -> Optional[str]:
|
def build_models_url(base: str) -> Optional[str]:
|
||||||
"""Return the provider-specific model-list endpoint URL for a base."""
|
"""Return the provider-specific model-list endpoint URL for a base."""
|
||||||
base = resolve_url(base)
|
base = normalize_base(resolve_url(base))
|
||||||
provider = _detect_provider(base)
|
provider = _detect_provider(base)
|
||||||
if provider == "anthropic":
|
if provider == "anthropic":
|
||||||
return _anthropic_api_root(base) + "/v1/models"
|
return _anthropic_api_root(base) + "/v1/models"
|
||||||
|
|||||||
+3
-1
@@ -1042,7 +1042,9 @@ def list_model_ids(
|
|||||||
if provider == "ollama":
|
if provider == "ollama":
|
||||||
models_url = _ollama_api_root(base_chat_url) + "/tags"
|
models_url = _ollama_api_root(base_chat_url) + "/tags"
|
||||||
else:
|
else:
|
||||||
models_url = base_chat_url.replace("/chat/completions", "/models")
|
from src.endpoint_resolver import build_models_url
|
||||||
|
|
||||||
|
models_url = build_models_url(base_chat_url)
|
||||||
r = httpx.get(models_url, headers=h, timeout=timeout)
|
r = httpx.get(models_url, headers=h, timeout=timeout)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
data = r.json()
|
data = r.json()
|
||||||
|
|||||||
@@ -297,7 +297,9 @@ def _query_context_length(endpoint_url: str, model: str) -> int:
|
|||||||
logger.info(f"Using known context window for {model}: {known}")
|
logger.info(f"Using known context window for {model}: {known}")
|
||||||
return known or DEFAULT_CONTEXT
|
return known or DEFAULT_CONTEXT
|
||||||
|
|
||||||
models_url = endpoint_url.replace("/chat/completions", "/models")
|
from src.endpoint_resolver import build_models_url
|
||||||
|
|
||||||
|
models_url = build_models_url(endpoint_url)
|
||||||
try:
|
try:
|
||||||
r = httpx.get(models_url, timeout=REQUEST_TIMEOUT)
|
r = httpx.get(models_url, timeout=REQUEST_TIMEOUT)
|
||||||
if r.is_success:
|
if r.is_success:
|
||||||
|
|||||||
@@ -0,0 +1,58 @@
|
|||||||
|
"""Regression coverage for llama-server style /v1 model-list endpoints (#3330)."""
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from src import endpoint_resolver, llm_core, model_context
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_models_url_accepts_v1_base_and_chat_url(monkeypatch):
|
||||||
|
monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
endpoint_resolver.build_models_url("http://127.0.0.1:8080/v1")
|
||||||
|
== "http://127.0.0.1:8080/v1/models"
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
endpoint_resolver.build_models_url("http://127.0.0.1:8080/v1/chat/completions")
|
||||||
|
== "http://127.0.0.1:8080/v1/models"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_llm_core_list_model_ids_queries_models_for_v1_base(monkeypatch):
|
||||||
|
monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
|
||||||
|
monkeypatch.setattr(llm_core, "_configured_cached_model_ids", lambda url, **kwargs: [])
|
||||||
|
seen = []
|
||||||
|
|
||||||
|
def fake_get(url, headers=None, timeout=None):
|
||||||
|
seen.append(url)
|
||||||
|
request = httpx.Request("GET", url)
|
||||||
|
return httpx.Response(200, json={"data": [{"id": "qwen3"}]}, request=request)
|
||||||
|
|
||||||
|
monkeypatch.setattr(llm_core.httpx, "get", fake_get)
|
||||||
|
|
||||||
|
assert llm_core.list_model_ids("http://127.0.0.1:8080/v1", timeout=1) == ["qwen3"]
|
||||||
|
assert seen == ["http://127.0.0.1:8080/v1/models"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_model_context_queries_models_for_v1_base(monkeypatch):
|
||||||
|
monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
|
||||||
|
seen = []
|
||||||
|
|
||||||
|
def fake_get(url, timeout=None):
|
||||||
|
seen.append(url)
|
||||||
|
request = httpx.Request("GET", url)
|
||||||
|
if url.endswith("/slots"):
|
||||||
|
return httpx.Response(404, request=request)
|
||||||
|
return httpx.Response(
|
||||||
|
200,
|
||||||
|
json={"data": [{"id": "qwen3", "context_length": 32768}]},
|
||||||
|
request=request,
|
||||||
|
)
|
||||||
|
|
||||||
|
monkeypatch.setattr(model_context.httpx, "get", fake_get)
|
||||||
|
|
||||||
|
assert model_context._query_context_length("http://127.0.0.1:8080/v1", "qwen3") == 32768
|
||||||
|
assert seen == [
|
||||||
|
"http://127.0.0.1:8080/slots",
|
||||||
|
"http://127.0.0.1:8080/v1/models",
|
||||||
|
]
|
||||||
Reference in New Issue
Block a user