From 4b0a97798850bf92e55340cbc030e5cbb7840b43 Mon Sep 17 00:00:00 2001 From: Muhammed Midlaj Date: Mon, 15 Jun 2026 11:39:50 +0530 Subject: [PATCH] fix(models): probe /v1/models for path-less LM Studio endpoints Probe /v1/models for path-less OpenAI-compatible model endpoints and surface clearer LM Studio diagnostics with the actual probed URL. --- routes/model_routes.py | 43 +++++++- src/endpoint_resolver.py | 17 +++- tests/test_lmstudio_models_url.py | 160 ++++++++++++++++++++++++++++++ tests/test_model_routes.py | 34 ++++++- tests/test_provider_endpoints.py | 2 +- 5 files changed, 250 insertions(+), 6 deletions(-) create mode 100644 tests/test_lmstudio_models_url.py diff --git a/routes/model_routes.py b/routes/model_routes.py index e53a23552..dfc6f99af 100644 --- a/routes/model_routes.py +++ b/routes/model_routes.py @@ -870,15 +870,52 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) -> def _model_endpoint_error_message(base_url: str, ping: Dict[str, Any] = None) -> str: - """Return a provider-aware error message for failed endpoint probes.""" + """Return a provider-aware error message for failed endpoint probes. + + Surfaces the URL we actually probed and, when the endpoint looks like + LM Studio (port 1234 or hostname match), adds a hint about loading a + model and confirming the Developer Server is running. The user previously + saw a generic "No models found for that provider/key" with no way to + tell whether the URL was wrong, the server was down, or the server was + reachable but had no model loaded (issue #25). + """ ping = ping or {} error = ping.get("error") + from src.endpoint_resolver import build_models_url + try: + probed = build_models_url(base_url) or base_url + except Exception: + probed = base_url parsed = urlparse(base_url) host = (parsed.hostname or "").lower() is_ollama = parsed.port == 11434 or "ollama" in host or "ollama" in base_url.lower() + is_lmstudio = ( + parsed.port == 1234 + or "lmstudio" in host + or "lm-studio" in host + or "lm_studio" in host + ) + + if is_lmstudio: + parts = [ + "LM Studio is reachable, but no models were reported.", + f"Probed {probed}.", + ] + if error: + parts.append(f"Last probe error: {error}.") + parts.append( + "Open LM Studio, load at least one model, and confirm the " + "Developer Server is running on port 1234." + ) + parts.append( + "Base URL should be http://localhost:1234/v1 (native) or " + "http://host.docker.internal:1234/v1 (Docker)." + ) + return " ".join(parts) if is_ollama: parts = ["No Ollama models found for that endpoint."] + parts.append(f"Probed {probed}.") if error: parts.append(f"Last probe error: {error}.") parts.append("Check that Ollama is running and that the base URL is correct.") @@ -888,9 +925,9 @@ def _model_endpoint_error_message(base_url: str, ping: Dict[str, Any] = None) -> return " ".join(parts) if error: - return f"No models found for that provider/key. Last probe error: {error}." + return f"No models found for that provider/key. Probed {probed}. Last probe error: {error}." - return "No models found for that provider/key." + return f"No models found for that provider/key. Probed {probed}." def _normalize_model_ids(value): diff --git a/src/endpoint_resolver.py b/src/endpoint_resolver.py index 0a3063638..50cefa6d6 100644 --- a/src/endpoint_resolver.py +++ b/src/endpoint_resolver.py @@ -183,7 +183,16 @@ def build_chat_url(base: str) -> str: def build_models_url(base: str) -> Optional[str]: - """Return the provider-specific model-list endpoint URL for a base.""" + """Return the provider-specific model-list endpoint URL for a base. + + For OpenAI-compatible servers (LM Studio, llama.cpp, vLLM, + text-generation-webui, etc.) the model list is exposed at ``/v1/models``. + When the user-supplied base has no path — e.g. ``http://localhost:1234`` — + we still need to land on ``/v1/models`` (issue #25); insert the ``/v1`` + segment only when the path is empty, leaving any explicit non-empty path + untouched (so custom prefixes like ``/openai`` or ``/api/openai/v1`` keep + their semantics). + """ base = normalize_base(resolve_url(base)) provider = _detect_provider(base) if provider == "anthropic": @@ -192,6 +201,12 @@ def build_models_url(base: str) -> Optional[str]: return _ollama_api_root(base) + "/tags" if provider == "chatgpt-subscription": return None + # Generic OpenAI-compatible fallback: ensure the path lands on /v1/models + # when the user omitted a path entirely. If a non-empty path is already + # present (e.g. /openai, /api/openai/v1, /v1), trust the caller — the + # /models suffix is appended as-is and the caller's prefix is preserved. + if not urlparse(base).path: + base = base + "/v1" return base + "/models" diff --git a/tests/test_lmstudio_models_url.py b/tests/test_lmstudio_models_url.py new file mode 100644 index 000000000..67c86dbee --- /dev/null +++ b/tests/test_lmstudio_models_url.py @@ -0,0 +1,160 @@ +"""Regression coverage for LM Studio /v1 model-list endpoints (issue #25). + +LM Studio's OpenAI-compatible surface exposes its model list at +``/v1/models`` (just like llama-server, vLLM, text-generation-webui). Two +distinct failure modes were reported by users: + +1. Pasting ``http://localhost:1234`` (no ``/v1``) — ``build_models_url`` + used to return ``http://localhost:1234/models``, which LM Studio does + not expose, so the user got a generic "No models found" error even + though the server was running and reachable. +2. Pasting ``http://localhost:1234/v1`` (with ``/v1``) — the model list + fetch was correct, but the error message gave the user no way to tell + whether the URL was wrong, the server was down, or the server was + reachable but had no model loaded. + +This module pins both behaviors so future refactors don't regress them. +""" + +import httpx + +from src import endpoint_resolver, llm_core + + +def _neutralize_provider_detection(monkeypatch): + """``_is_ollama_native_url`` matches any localhost host with an empty + path, which would route ``http://localhost:1234`` (LM Studio) into the + Ollama branch and probe ``/api/tags`` instead of ``/v1/models``. Force + provider detection to "openai" so the URL builder takes the LM Studio + path the user actually intends.""" + monkeypatch.setattr(llm_core, "_is_ollama_native_url", lambda url: False) + + +# ── build_models_url: handle LM Studio base shapes ──────────────────── + + +def test_build_models_url_inserts_v1_for_bare_host_port(monkeypatch): + """`http://localhost:1234` must probe `/v1/models` for LM Studio.""" + monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url) + _neutralize_provider_detection(monkeypatch) + + assert ( + endpoint_resolver.build_models_url("http://localhost:1234") + == "http://localhost:1234/v1/models" + ) + + +def test_build_models_url_accepts_v1_base(monkeypatch): + """`http://localhost:1234/v1` must probe `/v1/models` (no double v1).""" + monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url) + _neutralize_provider_detection(monkeypatch) + + assert ( + endpoint_resolver.build_models_url("http://localhost:1234/v1") + == "http://localhost:1234/v1/models" + ) + + +def test_build_models_url_idempotent_for_explicit_models(monkeypatch): + """`/v1/models` must probe `/v1/models` (normalize_base strips it).""" + monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url) + _neutralize_provider_detection(monkeypatch) + + assert ( + endpoint_resolver.build_models_url("http://localhost:1234/v1/models") + == "http://localhost:1234/v1/models" + ) + + +def test_build_models_url_strips_chat_completions(monkeypatch): + """`/v1/chat/completions` must collapse to `/v1/models` (parity with #3330).""" + monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url) + _neutralize_provider_detection(monkeypatch) + + assert ( + endpoint_resolver.build_models_url("http://localhost:1234/v1/chat/completions") + == "http://localhost:1234/v1/models" + ) + + +def test_build_models_url_preserves_explicit_non_v1_path(monkeypatch): + """User-supplied non-empty paths (e.g. `/openai`) must not be overridden + with `/v1`. We only insert `/v1` when the path is empty — that matches + the documented contract: a custom path is the caller's intent.""" + monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url) + _neutralize_provider_detection(monkeypatch) + + assert ( + endpoint_resolver.build_models_url("http://proxy.example.com/openai") + == "http://proxy.example.com/openai/models" + ) + + +# ── list_model_ids: parse LM Studio's response ───────────────────────── + + +def test_llm_core_list_model_ids_queries_v1_models_for_lmstudio(monkeypatch): + """Issue #25: probing `http://localhost:1234/v1` must hit `/v1/models`.""" + monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url) + monkeypatch.setattr(llm_core, "_configured_cached_model_ids", lambda url, **kwargs: []) + seen = [] + + def fake_get(url, headers=None, timeout=None): + seen.append(url) + request = httpx.Request("GET", url) + return httpx.Response( + 200, + json={ + "object": "list", + "data": [ + {"id": "lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF"}, + {"id": "qwen2.5-7b-instruct"}, + ], + }, + request=request, + ) + + monkeypatch.setattr(llm_core.httpx, "get", fake_get) + + assert llm_core.list_model_ids("http://localhost:1234/v1", timeout=1) == [ + "lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF", + "qwen2.5-7b-instruct", + ] + assert seen == ["http://localhost:1234/v1/models"] + + +def test_llm_core_list_model_ids_queries_v1_models_for_bare_lmstudio(monkeypatch): + """Issue #25: probing `http://localhost:1234` (no /v1) must hit `/v1/models`.""" + monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url) + monkeypatch.setattr(llm_core, "_configured_cached_model_ids", lambda url, **kwargs: []) + # Localhost with empty path would otherwise be misclassified as Ollama + # (llm_core._is_ollama_native_url); neutralise that for the test. + monkeypatch.setattr(llm_core, "_is_ollama_native_url", lambda url: False) + seen = [] + + def fake_get(url, headers=None, timeout=None): + seen.append(url) + request = httpx.Request("GET", url) + return httpx.Response(200, json={"data": [{"id": "model-a"}]}, request=request) + + monkeypatch.setattr(llm_core.httpx, "get", fake_get) + + assert llm_core.list_model_ids("http://localhost:1234", timeout=1) == ["model-a"] + assert seen == ["http://localhost:1234/v1/models"] + + +def test_llm_core_list_model_ids_handles_empty_lmstudio_list(monkeypatch): + """LM Studio returns `{"object":"list","data":[]}` when no model is loaded. + The helper must return `[]` cleanly so the caller can surface a clear + error (issue #25: previously the empty case was indistinguishable from + a connection failure).""" + monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url) + monkeypatch.setattr(llm_core, "_configured_cached_model_ids", lambda url, **kwargs: []) + + def fake_get(url, headers=None, timeout=None): + request = httpx.Request("GET", url) + return httpx.Response(200, json={"object": "list", "data": []}, request=request) + + monkeypatch.setattr(llm_core.httpx, "get", fake_get) + + assert llm_core.list_model_ids("http://localhost:1234/v1", timeout=1) == [] diff --git a/tests/test_model_routes.py b/tests/test_model_routes.py index ee1a53912..1851bc8b0 100644 --- a/tests/test_model_routes.py +++ b/tests/test_model_routes.py @@ -625,7 +625,39 @@ def test_generic_endpoint_error_message_preserves_probe_error(): {"error": "HTTP 401"}, ) - assert msg == "No models found for that provider/key. Last probe error: HTTP 401." + # Issue #25: the message must include the probed URL so the user can + # self-diagnose (was opaque "No models found for that provider/key"). + assert "No models found for that provider/key" in msg + assert "HTTP 401" in msg + assert "https://api.example.com/v1/models" in msg + + +def test_lmstudio_endpoint_error_message_includes_hint_and_probed_url(): + # Issue #25: when the user pastes an LM Studio URL, surface a port-aware + # hint and the URL we actually probed (not the bare base URL). + msg = model_routes._model_endpoint_error_message( + "http://localhost:1234/v1", + {"error": "HTTP 200"}, # 200-with-empty-list is the LM Studio trap + ) + + assert "LM Studio" in msg + assert "port 1234" in msg + assert "http://localhost:1234/v1/models" in msg + assert "Developer Server" in msg + + +def test_lmstudio_error_for_bare_host_port_probes_v1_models(monkeypatch): + # Regression: build_models_url must add /v1 for path-less LM Studio URLs + # (the OpenAI-compatible branch lands on /v1/models for LM Studio). + # _is_ollama_native_url would otherwise match localhost+empty path and + # route to /api/tags, masking the LM Studio URL we want to assert on. + monkeypatch.setattr("src.llm_core._is_ollama_native_url", lambda url: False) + msg = model_routes._model_endpoint_error_message( + "http://localhost:1234", + {"error": "HTTP 200"}, + ) + assert "LM Studio" in msg + assert "http://localhost:1234/v1/models" in msg # ── _rewrite_loopback_for_docker (issue #25: LM Studio on host loopback) ── diff --git a/tests/test_provider_endpoints.py b/tests/test_provider_endpoints.py index d4b56dcb3..754eaa905 100644 --- a/tests/test_provider_endpoints.py +++ b/tests/test_provider_endpoints.py @@ -58,7 +58,7 @@ PROVIDER_CASES = [ "https://api.x.ai/v1/models"), ("deepseek", "https://api.deepseek.com", "https://api.deepseek.com/chat/completions", - "https://api.deepseek.com/models"), + "https://api.deepseek.com/v1/models"), # Gemini's OpenAI-compatible surface — treated as a generic OpenAI endpoint. ("gemini_openai", "https://generativelanguage.googleapis.com/v1beta/openai", "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",