mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-15 17:25:26 -04:00
fix(models): probe /v1/models for path-less LM Studio endpoints
Probe /v1/models for path-less OpenAI-compatible model endpoints and surface clearer LM Studio diagnostics with the actual probed URL.
This commit is contained in:
+40
-3
@@ -870,15 +870,52 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
|
|||||||
|
|
||||||
|
|
||||||
def _model_endpoint_error_message(base_url: str, ping: Dict[str, Any] = None) -> str:
|
def _model_endpoint_error_message(base_url: str, ping: Dict[str, Any] = None) -> str:
|
||||||
"""Return a provider-aware error message for failed endpoint probes."""
|
"""Return a provider-aware error message for failed endpoint probes.
|
||||||
|
|
||||||
|
Surfaces the URL we actually probed and, when the endpoint looks like
|
||||||
|
LM Studio (port 1234 or hostname match), adds a hint about loading a
|
||||||
|
model and confirming the Developer Server is running. The user previously
|
||||||
|
saw a generic "No models found for that provider/key" with no way to
|
||||||
|
tell whether the URL was wrong, the server was down, or the server was
|
||||||
|
reachable but had no model loaded (issue #25).
|
||||||
|
"""
|
||||||
ping = ping or {}
|
ping = ping or {}
|
||||||
error = ping.get("error")
|
error = ping.get("error")
|
||||||
|
from src.endpoint_resolver import build_models_url
|
||||||
|
try:
|
||||||
|
probed = build_models_url(base_url) or base_url
|
||||||
|
except Exception:
|
||||||
|
probed = base_url
|
||||||
parsed = urlparse(base_url)
|
parsed = urlparse(base_url)
|
||||||
host = (parsed.hostname or "").lower()
|
host = (parsed.hostname or "").lower()
|
||||||
is_ollama = parsed.port == 11434 or "ollama" in host or "ollama" in base_url.lower()
|
is_ollama = parsed.port == 11434 or "ollama" in host or "ollama" in base_url.lower()
|
||||||
|
is_lmstudio = (
|
||||||
|
parsed.port == 1234
|
||||||
|
or "lmstudio" in host
|
||||||
|
or "lm-studio" in host
|
||||||
|
or "lm_studio" in host
|
||||||
|
)
|
||||||
|
|
||||||
|
if is_lmstudio:
|
||||||
|
parts = [
|
||||||
|
"LM Studio is reachable, but no models were reported.",
|
||||||
|
f"Probed {probed}.",
|
||||||
|
]
|
||||||
|
if error:
|
||||||
|
parts.append(f"Last probe error: {error}.")
|
||||||
|
parts.append(
|
||||||
|
"Open LM Studio, load at least one model, and confirm the "
|
||||||
|
"Developer Server is running on port 1234."
|
||||||
|
)
|
||||||
|
parts.append(
|
||||||
|
"Base URL should be http://localhost:1234/v1 (native) or "
|
||||||
|
"http://host.docker.internal:1234/v1 (Docker)."
|
||||||
|
)
|
||||||
|
return " ".join(parts)
|
||||||
|
|
||||||
if is_ollama:
|
if is_ollama:
|
||||||
parts = ["No Ollama models found for that endpoint."]
|
parts = ["No Ollama models found for that endpoint."]
|
||||||
|
parts.append(f"Probed {probed}.")
|
||||||
if error:
|
if error:
|
||||||
parts.append(f"Last probe error: {error}.")
|
parts.append(f"Last probe error: {error}.")
|
||||||
parts.append("Check that Ollama is running and that the base URL is correct.")
|
parts.append("Check that Ollama is running and that the base URL is correct.")
|
||||||
@@ -888,9 +925,9 @@ def _model_endpoint_error_message(base_url: str, ping: Dict[str, Any] = None) ->
|
|||||||
return " ".join(parts)
|
return " ".join(parts)
|
||||||
|
|
||||||
if error:
|
if error:
|
||||||
return f"No models found for that provider/key. Last probe error: {error}."
|
return f"No models found for that provider/key. Probed {probed}. Last probe error: {error}."
|
||||||
|
|
||||||
return "No models found for that provider/key."
|
return f"No models found for that provider/key. Probed {probed}."
|
||||||
|
|
||||||
|
|
||||||
def _normalize_model_ids(value):
|
def _normalize_model_ids(value):
|
||||||
|
|||||||
@@ -183,7 +183,16 @@ def build_chat_url(base: str) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def build_models_url(base: str) -> Optional[str]:
|
def build_models_url(base: str) -> Optional[str]:
|
||||||
"""Return the provider-specific model-list endpoint URL for a base."""
|
"""Return the provider-specific model-list endpoint URL for a base.
|
||||||
|
|
||||||
|
For OpenAI-compatible servers (LM Studio, llama.cpp, vLLM,
|
||||||
|
text-generation-webui, etc.) the model list is exposed at ``/v1/models``.
|
||||||
|
When the user-supplied base has no path — e.g. ``http://localhost:1234`` —
|
||||||
|
we still need to land on ``/v1/models`` (issue #25); insert the ``/v1``
|
||||||
|
segment only when the path is empty, leaving any explicit non-empty path
|
||||||
|
untouched (so custom prefixes like ``/openai`` or ``/api/openai/v1`` keep
|
||||||
|
their semantics).
|
||||||
|
"""
|
||||||
base = normalize_base(resolve_url(base))
|
base = normalize_base(resolve_url(base))
|
||||||
provider = _detect_provider(base)
|
provider = _detect_provider(base)
|
||||||
if provider == "anthropic":
|
if provider == "anthropic":
|
||||||
@@ -192,6 +201,12 @@ def build_models_url(base: str) -> Optional[str]:
|
|||||||
return _ollama_api_root(base) + "/tags"
|
return _ollama_api_root(base) + "/tags"
|
||||||
if provider == "chatgpt-subscription":
|
if provider == "chatgpt-subscription":
|
||||||
return None
|
return None
|
||||||
|
# Generic OpenAI-compatible fallback: ensure the path lands on /v1/models
|
||||||
|
# when the user omitted a path entirely. If a non-empty path is already
|
||||||
|
# present (e.g. /openai, /api/openai/v1, /v1), trust the caller — the
|
||||||
|
# /models suffix is appended as-is and the caller's prefix is preserved.
|
||||||
|
if not urlparse(base).path:
|
||||||
|
base = base + "/v1"
|
||||||
return base + "/models"
|
return base + "/models"
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,160 @@
|
|||||||
|
"""Regression coverage for LM Studio /v1 model-list endpoints (issue #25).
|
||||||
|
|
||||||
|
LM Studio's OpenAI-compatible surface exposes its model list at
|
||||||
|
``/v1/models`` (just like llama-server, vLLM, text-generation-webui). Two
|
||||||
|
distinct failure modes were reported by users:
|
||||||
|
|
||||||
|
1. Pasting ``http://localhost:1234`` (no ``/v1``) — ``build_models_url``
|
||||||
|
used to return ``http://localhost:1234/models``, which LM Studio does
|
||||||
|
not expose, so the user got a generic "No models found" error even
|
||||||
|
though the server was running and reachable.
|
||||||
|
2. Pasting ``http://localhost:1234/v1`` (with ``/v1``) — the model list
|
||||||
|
fetch was correct, but the error message gave the user no way to tell
|
||||||
|
whether the URL was wrong, the server was down, or the server was
|
||||||
|
reachable but had no model loaded.
|
||||||
|
|
||||||
|
This module pins both behaviors so future refactors don't regress them.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from src import endpoint_resolver, llm_core
|
||||||
|
|
||||||
|
|
||||||
|
def _neutralize_provider_detection(monkeypatch):
|
||||||
|
"""``_is_ollama_native_url`` matches any localhost host with an empty
|
||||||
|
path, which would route ``http://localhost:1234`` (LM Studio) into the
|
||||||
|
Ollama branch and probe ``/api/tags`` instead of ``/v1/models``. Force
|
||||||
|
provider detection to "openai" so the URL builder takes the LM Studio
|
||||||
|
path the user actually intends."""
|
||||||
|
monkeypatch.setattr(llm_core, "_is_ollama_native_url", lambda url: False)
|
||||||
|
|
||||||
|
|
||||||
|
# ── build_models_url: handle LM Studio base shapes ────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_models_url_inserts_v1_for_bare_host_port(monkeypatch):
|
||||||
|
"""`http://localhost:1234` must probe `/v1/models` for LM Studio."""
|
||||||
|
monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
|
||||||
|
_neutralize_provider_detection(monkeypatch)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
endpoint_resolver.build_models_url("http://localhost:1234")
|
||||||
|
== "http://localhost:1234/v1/models"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_models_url_accepts_v1_base(monkeypatch):
|
||||||
|
"""`http://localhost:1234/v1` must probe `/v1/models` (no double v1)."""
|
||||||
|
monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
|
||||||
|
_neutralize_provider_detection(monkeypatch)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
endpoint_resolver.build_models_url("http://localhost:1234/v1")
|
||||||
|
== "http://localhost:1234/v1/models"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_models_url_idempotent_for_explicit_models(monkeypatch):
|
||||||
|
"""`/v1/models` must probe `/v1/models` (normalize_base strips it)."""
|
||||||
|
monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
|
||||||
|
_neutralize_provider_detection(monkeypatch)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
endpoint_resolver.build_models_url("http://localhost:1234/v1/models")
|
||||||
|
== "http://localhost:1234/v1/models"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_models_url_strips_chat_completions(monkeypatch):
|
||||||
|
"""`/v1/chat/completions` must collapse to `/v1/models` (parity with #3330)."""
|
||||||
|
monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
|
||||||
|
_neutralize_provider_detection(monkeypatch)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
endpoint_resolver.build_models_url("http://localhost:1234/v1/chat/completions")
|
||||||
|
== "http://localhost:1234/v1/models"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_models_url_preserves_explicit_non_v1_path(monkeypatch):
|
||||||
|
"""User-supplied non-empty paths (e.g. `/openai`) must not be overridden
|
||||||
|
with `/v1`. We only insert `/v1` when the path is empty — that matches
|
||||||
|
the documented contract: a custom path is the caller's intent."""
|
||||||
|
monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
|
||||||
|
_neutralize_provider_detection(monkeypatch)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
endpoint_resolver.build_models_url("http://proxy.example.com/openai")
|
||||||
|
== "http://proxy.example.com/openai/models"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── list_model_ids: parse LM Studio's response ─────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_llm_core_list_model_ids_queries_v1_models_for_lmstudio(monkeypatch):
|
||||||
|
"""Issue #25: probing `http://localhost:1234/v1` must hit `/v1/models`."""
|
||||||
|
monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
|
||||||
|
monkeypatch.setattr(llm_core, "_configured_cached_model_ids", lambda url, **kwargs: [])
|
||||||
|
seen = []
|
||||||
|
|
||||||
|
def fake_get(url, headers=None, timeout=None):
|
||||||
|
seen.append(url)
|
||||||
|
request = httpx.Request("GET", url)
|
||||||
|
return httpx.Response(
|
||||||
|
200,
|
||||||
|
json={
|
||||||
|
"object": "list",
|
||||||
|
"data": [
|
||||||
|
{"id": "lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF"},
|
||||||
|
{"id": "qwen2.5-7b-instruct"},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
request=request,
|
||||||
|
)
|
||||||
|
|
||||||
|
monkeypatch.setattr(llm_core.httpx, "get", fake_get)
|
||||||
|
|
||||||
|
assert llm_core.list_model_ids("http://localhost:1234/v1", timeout=1) == [
|
||||||
|
"lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF",
|
||||||
|
"qwen2.5-7b-instruct",
|
||||||
|
]
|
||||||
|
assert seen == ["http://localhost:1234/v1/models"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_llm_core_list_model_ids_queries_v1_models_for_bare_lmstudio(monkeypatch):
|
||||||
|
"""Issue #25: probing `http://localhost:1234` (no /v1) must hit `/v1/models`."""
|
||||||
|
monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
|
||||||
|
monkeypatch.setattr(llm_core, "_configured_cached_model_ids", lambda url, **kwargs: [])
|
||||||
|
# Localhost with empty path would otherwise be misclassified as Ollama
|
||||||
|
# (llm_core._is_ollama_native_url); neutralise that for the test.
|
||||||
|
monkeypatch.setattr(llm_core, "_is_ollama_native_url", lambda url: False)
|
||||||
|
seen = []
|
||||||
|
|
||||||
|
def fake_get(url, headers=None, timeout=None):
|
||||||
|
seen.append(url)
|
||||||
|
request = httpx.Request("GET", url)
|
||||||
|
return httpx.Response(200, json={"data": [{"id": "model-a"}]}, request=request)
|
||||||
|
|
||||||
|
monkeypatch.setattr(llm_core.httpx, "get", fake_get)
|
||||||
|
|
||||||
|
assert llm_core.list_model_ids("http://localhost:1234", timeout=1) == ["model-a"]
|
||||||
|
assert seen == ["http://localhost:1234/v1/models"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_llm_core_list_model_ids_handles_empty_lmstudio_list(monkeypatch):
|
||||||
|
"""LM Studio returns `{"object":"list","data":[]}` when no model is loaded.
|
||||||
|
The helper must return `[]` cleanly so the caller can surface a clear
|
||||||
|
error (issue #25: previously the empty case was indistinguishable from
|
||||||
|
a connection failure)."""
|
||||||
|
monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
|
||||||
|
monkeypatch.setattr(llm_core, "_configured_cached_model_ids", lambda url, **kwargs: [])
|
||||||
|
|
||||||
|
def fake_get(url, headers=None, timeout=None):
|
||||||
|
request = httpx.Request("GET", url)
|
||||||
|
return httpx.Response(200, json={"object": "list", "data": []}, request=request)
|
||||||
|
|
||||||
|
monkeypatch.setattr(llm_core.httpx, "get", fake_get)
|
||||||
|
|
||||||
|
assert llm_core.list_model_ids("http://localhost:1234/v1", timeout=1) == []
|
||||||
@@ -625,7 +625,39 @@ def test_generic_endpoint_error_message_preserves_probe_error():
|
|||||||
{"error": "HTTP 401"},
|
{"error": "HTTP 401"},
|
||||||
)
|
)
|
||||||
|
|
||||||
assert msg == "No models found for that provider/key. Last probe error: HTTP 401."
|
# Issue #25: the message must include the probed URL so the user can
|
||||||
|
# self-diagnose (was opaque "No models found for that provider/key").
|
||||||
|
assert "No models found for that provider/key" in msg
|
||||||
|
assert "HTTP 401" in msg
|
||||||
|
assert "https://api.example.com/v1/models" in msg
|
||||||
|
|
||||||
|
|
||||||
|
def test_lmstudio_endpoint_error_message_includes_hint_and_probed_url():
|
||||||
|
# Issue #25: when the user pastes an LM Studio URL, surface a port-aware
|
||||||
|
# hint and the URL we actually probed (not the bare base URL).
|
||||||
|
msg = model_routes._model_endpoint_error_message(
|
||||||
|
"http://localhost:1234/v1",
|
||||||
|
{"error": "HTTP 200"}, # 200-with-empty-list is the LM Studio trap
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "LM Studio" in msg
|
||||||
|
assert "port 1234" in msg
|
||||||
|
assert "http://localhost:1234/v1/models" in msg
|
||||||
|
assert "Developer Server" in msg
|
||||||
|
|
||||||
|
|
||||||
|
def test_lmstudio_error_for_bare_host_port_probes_v1_models(monkeypatch):
|
||||||
|
# Regression: build_models_url must add /v1 for path-less LM Studio URLs
|
||||||
|
# (the OpenAI-compatible branch lands on /v1/models for LM Studio).
|
||||||
|
# _is_ollama_native_url would otherwise match localhost+empty path and
|
||||||
|
# route to /api/tags, masking the LM Studio URL we want to assert on.
|
||||||
|
monkeypatch.setattr("src.llm_core._is_ollama_native_url", lambda url: False)
|
||||||
|
msg = model_routes._model_endpoint_error_message(
|
||||||
|
"http://localhost:1234",
|
||||||
|
{"error": "HTTP 200"},
|
||||||
|
)
|
||||||
|
assert "LM Studio" in msg
|
||||||
|
assert "http://localhost:1234/v1/models" in msg
|
||||||
|
|
||||||
|
|
||||||
# ── _rewrite_loopback_for_docker (issue #25: LM Studio on host loopback) ──
|
# ── _rewrite_loopback_for_docker (issue #25: LM Studio on host loopback) ──
|
||||||
|
|||||||
@@ -58,7 +58,7 @@ PROVIDER_CASES = [
|
|||||||
"https://api.x.ai/v1/models"),
|
"https://api.x.ai/v1/models"),
|
||||||
("deepseek", "https://api.deepseek.com",
|
("deepseek", "https://api.deepseek.com",
|
||||||
"https://api.deepseek.com/chat/completions",
|
"https://api.deepseek.com/chat/completions",
|
||||||
"https://api.deepseek.com/models"),
|
"https://api.deepseek.com/v1/models"),
|
||||||
# Gemini's OpenAI-compatible surface — treated as a generic OpenAI endpoint.
|
# Gemini's OpenAI-compatible surface — treated as a generic OpenAI endpoint.
|
||||||
("gemini_openai", "https://generativelanguage.googleapis.com/v1beta/openai",
|
("gemini_openai", "https://generativelanguage.googleapis.com/v1beta/openai",
|
||||||
"https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
|
"https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
|
||||||
|
|||||||
Reference in New Issue
Block a user