fix(models): probe /v1/models for path-less LM Studio endpoints

Probe /v1/models for path-less OpenAI-compatible model endpoints and surface clearer LM Studio diagnostics with the actual probed URL.
2026-06-15 17:25:26 -04:00 · 2026-06-15 11:39:50 +05:30
parent 29180c4731
commit 4b0a977988
5 changed files with 250 additions and 6 deletions
@@ -870,15 +870,52 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
 def _model_endpoint_error_message(base_url: str, ping: Dict[str, Any] = None) -> str:
-    """Return a provider-aware error message for failed endpoint probes."""
+    """Return a provider-aware error message for failed endpoint probes.
    Surfaces the URL we actually probed and, when the endpoint looks like
    LM Studio (port 1234 or hostname match), adds a hint about loading a
    model and confirming the Developer Server is running. The user previously
    saw a generic "No models found for that provider/key" with no way to
    tell whether the URL was wrong, the server was down, or the server was
    reachable but had no model loaded (issue #25).
    """
    ping = ping or {}
    error = ping.get("error")
    from src.endpoint_resolver import build_models_url
    try:
        probed = build_models_url(base_url) or base_url
    except Exception:
        probed = base_url
    parsed = urlparse(base_url)
    host = (parsed.hostname or "").lower()
    is_ollama = parsed.port == 11434 or "ollama" in host or "ollama" in base_url.lower()
    is_lmstudio = (
        parsed.port == 1234
        or "lmstudio" in host
        or "lm-studio" in host
        or "lm_studio" in host
    )
    if is_lmstudio:
        parts = [
            "LM Studio is reachable, but no models were reported.",
            f"Probed {probed}.",
        ]
        if error:
            parts.append(f"Last probe error: {error}.")
        parts.append(
            "Open LM Studio, load at least one model, and confirm the "
            "Developer Server is running on port 1234."
        )
        parts.append(
            "Base URL should be http://localhost:1234/v1 (native) or "
            "http://host.docker.internal:1234/v1 (Docker)."
        )
        return " ".join(parts)
    if is_ollama:
        parts = ["No Ollama models found for that endpoint."]
        parts.append(f"Probed {probed}.")
        if error:
            parts.append(f"Last probe error: {error}.")
        parts.append("Check that Ollama is running and that the base URL is correct.")
@@ -888,9 +925,9 @@ def _model_endpoint_error_message(base_url: str, ping: Dict[str, Any] = None) ->
        return " ".join(parts)
    if error:
-        return f"No models found for that provider/key. Last probe error: {error}."
+        return f"No models found for that provider/key. Probed {probed}. Last probe error: {error}."
-    return "No models found for that provider/key."
+    return f"No models found for that provider/key. Probed {probed}."
 def _normalize_model_ids(value):
@@ -183,7 +183,16 @@ def build_chat_url(base: str) -> str:
 def build_models_url(base: str) -> Optional[str]:
-    """Return the provider-specific model-list endpoint URL for a base."""
+    """Return the provider-specific model-list endpoint URL for a base.
    For OpenAI-compatible servers (LM Studio, llama.cpp, vLLM,
    text-generation-webui, etc.) the model list is exposed at ``/v1/models``.
    When the user-supplied base has no path — e.g. ``http://localhost:1234`` —
    we still need to land on ``/v1/models`` (issue #25); insert the ``/v1``
    segment only when the path is empty, leaving any explicit non-empty path
    untouched (so custom prefixes like ``/openai`` or ``/api/openai/v1`` keep
    their semantics).
    """
    base = normalize_base(resolve_url(base))
    provider = _detect_provider(base)
    if provider == "anthropic":
@@ -192,6 +201,12 @@ def build_models_url(base: str) -> Optional[str]:
        return _ollama_api_root(base) + "/tags"
    if provider == "chatgpt-subscription":
        return None
    # Generic OpenAI-compatible fallback: ensure the path lands on /v1/models
    # when the user omitted a path entirely. If a non-empty path is already
    # present (e.g. /openai, /api/openai/v1, /v1), trust the caller — the
    # /models suffix is appended as-is and the caller's prefix is preserved.
    if not urlparse(base).path:
        base = base + "/v1"
    return base + "/models"
@@ -0,0 +1,160 @@
 """Regression coverage for LM Studio /v1 model-list endpoints (issue #25).
 LM Studio's OpenAI-compatible surface exposes its model list at
 ``/v1/models`` (just like llama-server, vLLM, text-generation-webui). Two
 distinct failure modes were reported by users:
 1. Pasting ``http://localhost:1234`` (no ``/v1``) — ``build_models_url``
   used to return ``http://localhost:1234/models``, which LM Studio does
   not expose, so the user got a generic "No models found" error even
   though the server was running and reachable.
 2. Pasting ``http://localhost:1234/v1`` (with ``/v1``) — the model list
   fetch was correct, but the error message gave the user no way to tell
   whether the URL was wrong, the server was down, or the server was
   reachable but had no model loaded.
 This module pins both behaviors so future refactors don't regress them.
 """
 import httpx
 from src import endpoint_resolver, llm_core
 def _neutralize_provider_detection(monkeypatch):
    """``_is_ollama_native_url`` matches any localhost host with an empty
    path, which would route ``http://localhost:1234`` (LM Studio) into the
    Ollama branch and probe ``/api/tags`` instead of ``/v1/models``. Force
    provider detection to "openai" so the URL builder takes the LM Studio
    path the user actually intends."""
    monkeypatch.setattr(llm_core, "_is_ollama_native_url", lambda url: False)
 # ── build_models_url: handle LM Studio base shapes ────────────────────
 def test_build_models_url_inserts_v1_for_bare_host_port(monkeypatch):
    """`http://localhost:1234` must probe `/v1/models` for LM Studio."""
    monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
    _neutralize_provider_detection(monkeypatch)
    assert (
        endpoint_resolver.build_models_url("http://localhost:1234")
        == "http://localhost:1234/v1/models"
    )
 def test_build_models_url_accepts_v1_base(monkeypatch):
    """`http://localhost:1234/v1` must probe `/v1/models` (no double v1)."""
    monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
    _neutralize_provider_detection(monkeypatch)
    assert (
        endpoint_resolver.build_models_url("http://localhost:1234/v1")
        == "http://localhost:1234/v1/models"
    )
 def test_build_models_url_idempotent_for_explicit_models(monkeypatch):
    """`/v1/models` must probe `/v1/models` (normalize_base strips it)."""
    monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
    _neutralize_provider_detection(monkeypatch)
    assert (
        endpoint_resolver.build_models_url("http://localhost:1234/v1/models")
        == "http://localhost:1234/v1/models"
    )
 def test_build_models_url_strips_chat_completions(monkeypatch):
    """`/v1/chat/completions` must collapse to `/v1/models` (parity with #3330)."""
    monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
    _neutralize_provider_detection(monkeypatch)
    assert (
        endpoint_resolver.build_models_url("http://localhost:1234/v1/chat/completions")
        == "http://localhost:1234/v1/models"
    )
 def test_build_models_url_preserves_explicit_non_v1_path(monkeypatch):
    """User-supplied non-empty paths (e.g. `/openai`) must not be overridden
    with `/v1`. We only insert `/v1` when the path is empty — that matches
    the documented contract: a custom path is the caller's intent."""
    monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
    _neutralize_provider_detection(monkeypatch)
    assert (
        endpoint_resolver.build_models_url("http://proxy.example.com/openai")
        == "http://proxy.example.com/openai/models"
    )
 # ── list_model_ids: parse LM Studio's response ─────────────────────────
 def test_llm_core_list_model_ids_queries_v1_models_for_lmstudio(monkeypatch):
    """Issue #25: probing `http://localhost:1234/v1` must hit `/v1/models`."""
    monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
    monkeypatch.setattr(llm_core, "_configured_cached_model_ids", lambda url, **kwargs: [])
    seen = []
    def fake_get(url, headers=None, timeout=None):
        seen.append(url)
        request = httpx.Request("GET", url)
        return httpx.Response(
            200,
            json={
                "object": "list",
                "data": [
                    {"id": "lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF"},
                    {"id": "qwen2.5-7b-instruct"},
                ],
            },
            request=request,
        )
    monkeypatch.setattr(llm_core.httpx, "get", fake_get)
    assert llm_core.list_model_ids("http://localhost:1234/v1", timeout=1) == [
        "lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF",
        "qwen2.5-7b-instruct",
    ]
    assert seen == ["http://localhost:1234/v1/models"]
 def test_llm_core_list_model_ids_queries_v1_models_for_bare_lmstudio(monkeypatch):
    """Issue #25: probing `http://localhost:1234` (no /v1) must hit `/v1/models`."""
    monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
    monkeypatch.setattr(llm_core, "_configured_cached_model_ids", lambda url, **kwargs: [])
    # Localhost with empty path would otherwise be misclassified as Ollama
    # (llm_core._is_ollama_native_url); neutralise that for the test.
    monkeypatch.setattr(llm_core, "_is_ollama_native_url", lambda url: False)
    seen = []
    def fake_get(url, headers=None, timeout=None):
        seen.append(url)
        request = httpx.Request("GET", url)
        return httpx.Response(200, json={"data": [{"id": "model-a"}]}, request=request)
    monkeypatch.setattr(llm_core.httpx, "get", fake_get)
    assert llm_core.list_model_ids("http://localhost:1234", timeout=1) == ["model-a"]
    assert seen == ["http://localhost:1234/v1/models"]
 def test_llm_core_list_model_ids_handles_empty_lmstudio_list(monkeypatch):
    """LM Studio returns `{"object":"list","data":[]}` when no model is loaded.
    The helper must return `[]` cleanly so the caller can surface a clear
    error (issue #25: previously the empty case was indistinguishable from
    a connection failure)."""
    monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
    monkeypatch.setattr(llm_core, "_configured_cached_model_ids", lambda url, **kwargs: [])
    def fake_get(url, headers=None, timeout=None):
        request = httpx.Request("GET", url)
        return httpx.Response(200, json={"object": "list", "data": []}, request=request)
    monkeypatch.setattr(llm_core.httpx, "get", fake_get)
    assert llm_core.list_model_ids("http://localhost:1234/v1", timeout=1) == []
@@ -625,7 +625,39 @@ def test_generic_endpoint_error_message_preserves_probe_error():
        {"error": "HTTP 401"},
    )
-    assert msg == "No models found for that provider/key. Last probe error: HTTP 401."
+    # Issue #25: the message must include the probed URL so the user can
    # self-diagnose (was opaque "No models found for that provider/key").
    assert "No models found for that provider/key" in msg
    assert "HTTP 401" in msg
    assert "https://api.example.com/v1/models" in msg
 def test_lmstudio_endpoint_error_message_includes_hint_and_probed_url():
    # Issue #25: when the user pastes an LM Studio URL, surface a port-aware
    # hint and the URL we actually probed (not the bare base URL).
    msg = model_routes._model_endpoint_error_message(
        "http://localhost:1234/v1",
        {"error": "HTTP 200"},  # 200-with-empty-list is the LM Studio trap
    )
    assert "LM Studio" in msg
    assert "port 1234" in msg
    assert "http://localhost:1234/v1/models" in msg
    assert "Developer Server" in msg
 def test_lmstudio_error_for_bare_host_port_probes_v1_models(monkeypatch):
    # Regression: build_models_url must add /v1 for path-less LM Studio URLs
    # (the OpenAI-compatible branch lands on /v1/models for LM Studio).
    # _is_ollama_native_url would otherwise match localhost+empty path and
    # route to /api/tags, masking the LM Studio URL we want to assert on.
    monkeypatch.setattr("src.llm_core._is_ollama_native_url", lambda url: False)
    msg = model_routes._model_endpoint_error_message(
        "http://localhost:1234",
        {"error": "HTTP 200"},
    )
    assert "LM Studio" in msg
    assert "http://localhost:1234/v1/models" in msg
 # ── _rewrite_loopback_for_docker (issue #25: LM Studio on host loopback) ──
@@ -58,7 +58,7 @@ PROVIDER_CASES = [
     "https://api.x.ai/v1/models"),
    ("deepseek", "https://api.deepseek.com",
     "https://api.deepseek.com/chat/completions",
-     "https://api.deepseek.com/models"),
+     "https://api.deepseek.com/v1/models"),
    # Gemini's OpenAI-compatible surface — treated as a generic OpenAI endpoint.
    ("gemini_openai", "https://generativelanguage.googleapis.com/v1beta/openai",
     "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",