fix(models): probe /v1/models for path-less LM Studio endpoints

Probe /v1/models for path-less OpenAI-compatible model endpoints and surface clearer LM Studio diagnostics with the actual probed URL.
This commit is contained in:
Muhammed Midlaj
2026-06-15 11:39:50 +05:30
committed by GitHub
parent 29180c4731
commit 4b0a977988
5 changed files with 250 additions and 6 deletions
+40 -3
View File
@@ -870,15 +870,52 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
def _model_endpoint_error_message(base_url: str, ping: Dict[str, Any] = None) -> str:
"""Return a provider-aware error message for failed endpoint probes."""
"""Return a provider-aware error message for failed endpoint probes.
Surfaces the URL we actually probed and, when the endpoint looks like
LM Studio (port 1234 or hostname match), adds a hint about loading a
model and confirming the Developer Server is running. The user previously
saw a generic "No models found for that provider/key" with no way to
tell whether the URL was wrong, the server was down, or the server was
reachable but had no model loaded (issue #25).
"""
ping = ping or {}
error = ping.get("error")
from src.endpoint_resolver import build_models_url
try:
probed = build_models_url(base_url) or base_url
except Exception:
probed = base_url
parsed = urlparse(base_url)
host = (parsed.hostname or "").lower()
is_ollama = parsed.port == 11434 or "ollama" in host or "ollama" in base_url.lower()
is_lmstudio = (
parsed.port == 1234
or "lmstudio" in host
or "lm-studio" in host
or "lm_studio" in host
)
if is_lmstudio:
parts = [
"LM Studio is reachable, but no models were reported.",
f"Probed {probed}.",
]
if error:
parts.append(f"Last probe error: {error}.")
parts.append(
"Open LM Studio, load at least one model, and confirm the "
"Developer Server is running on port 1234."
)
parts.append(
"Base URL should be http://localhost:1234/v1 (native) or "
"http://host.docker.internal:1234/v1 (Docker)."
)
return " ".join(parts)
if is_ollama:
parts = ["No Ollama models found for that endpoint."]
parts.append(f"Probed {probed}.")
if error:
parts.append(f"Last probe error: {error}.")
parts.append("Check that Ollama is running and that the base URL is correct.")
@@ -888,9 +925,9 @@ def _model_endpoint_error_message(base_url: str, ping: Dict[str, Any] = None) ->
return " ".join(parts)
if error:
return f"No models found for that provider/key. Last probe error: {error}."
return f"No models found for that provider/key. Probed {probed}. Last probe error: {error}."
return "No models found for that provider/key."
return f"No models found for that provider/key. Probed {probed}."
def _normalize_model_ids(value):
+16 -1
View File
@@ -183,7 +183,16 @@ def build_chat_url(base: str) -> str:
def build_models_url(base: str) -> Optional[str]:
"""Return the provider-specific model-list endpoint URL for a base."""
"""Return the provider-specific model-list endpoint URL for a base.
For OpenAI-compatible servers (LM Studio, llama.cpp, vLLM,
text-generation-webui, etc.) the model list is exposed at ``/v1/models``.
When the user-supplied base has no path — e.g. ``http://localhost:1234`` —
we still need to land on ``/v1/models`` (issue #25); insert the ``/v1``
segment only when the path is empty, leaving any explicit non-empty path
untouched (so custom prefixes like ``/openai`` or ``/api/openai/v1`` keep
their semantics).
"""
base = normalize_base(resolve_url(base))
provider = _detect_provider(base)
if provider == "anthropic":
@@ -192,6 +201,12 @@ def build_models_url(base: str) -> Optional[str]:
return _ollama_api_root(base) + "/tags"
if provider == "chatgpt-subscription":
return None
# Generic OpenAI-compatible fallback: ensure the path lands on /v1/models
# when the user omitted a path entirely. If a non-empty path is already
# present (e.g. /openai, /api/openai/v1, /v1), trust the caller — the
# /models suffix is appended as-is and the caller's prefix is preserved.
if not urlparse(base).path:
base = base + "/v1"
return base + "/models"
+160
View File
@@ -0,0 +1,160 @@
"""Regression coverage for LM Studio /v1 model-list endpoints (issue #25).
LM Studio's OpenAI-compatible surface exposes its model list at
``/v1/models`` (just like llama-server, vLLM, text-generation-webui). Two
distinct failure modes were reported by users:
1. Pasting ``http://localhost:1234`` (no ``/v1``) — ``build_models_url``
used to return ``http://localhost:1234/models``, which LM Studio does
not expose, so the user got a generic "No models found" error even
though the server was running and reachable.
2. Pasting ``http://localhost:1234/v1`` (with ``/v1``) — the model list
fetch was correct, but the error message gave the user no way to tell
whether the URL was wrong, the server was down, or the server was
reachable but had no model loaded.
This module pins both behaviors so future refactors don't regress them.
"""
import httpx
from src import endpoint_resolver, llm_core
def _neutralize_provider_detection(monkeypatch):
"""``_is_ollama_native_url`` matches any localhost host with an empty
path, which would route ``http://localhost:1234`` (LM Studio) into the
Ollama branch and probe ``/api/tags`` instead of ``/v1/models``. Force
provider detection to "openai" so the URL builder takes the LM Studio
path the user actually intends."""
monkeypatch.setattr(llm_core, "_is_ollama_native_url", lambda url: False)
# ── build_models_url: handle LM Studio base shapes ────────────────────
def test_build_models_url_inserts_v1_for_bare_host_port(monkeypatch):
"""`http://localhost:1234` must probe `/v1/models` for LM Studio."""
monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
_neutralize_provider_detection(monkeypatch)
assert (
endpoint_resolver.build_models_url("http://localhost:1234")
== "http://localhost:1234/v1/models"
)
def test_build_models_url_accepts_v1_base(monkeypatch):
"""`http://localhost:1234/v1` must probe `/v1/models` (no double v1)."""
monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
_neutralize_provider_detection(monkeypatch)
assert (
endpoint_resolver.build_models_url("http://localhost:1234/v1")
== "http://localhost:1234/v1/models"
)
def test_build_models_url_idempotent_for_explicit_models(monkeypatch):
"""`/v1/models` must probe `/v1/models` (normalize_base strips it)."""
monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
_neutralize_provider_detection(monkeypatch)
assert (
endpoint_resolver.build_models_url("http://localhost:1234/v1/models")
== "http://localhost:1234/v1/models"
)
def test_build_models_url_strips_chat_completions(monkeypatch):
"""`/v1/chat/completions` must collapse to `/v1/models` (parity with #3330)."""
monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
_neutralize_provider_detection(monkeypatch)
assert (
endpoint_resolver.build_models_url("http://localhost:1234/v1/chat/completions")
== "http://localhost:1234/v1/models"
)
def test_build_models_url_preserves_explicit_non_v1_path(monkeypatch):
"""User-supplied non-empty paths (e.g. `/openai`) must not be overridden
with `/v1`. We only insert `/v1` when the path is empty — that matches
the documented contract: a custom path is the caller's intent."""
monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
_neutralize_provider_detection(monkeypatch)
assert (
endpoint_resolver.build_models_url("http://proxy.example.com/openai")
== "http://proxy.example.com/openai/models"
)
# ── list_model_ids: parse LM Studio's response ─────────────────────────
def test_llm_core_list_model_ids_queries_v1_models_for_lmstudio(monkeypatch):
"""Issue #25: probing `http://localhost:1234/v1` must hit `/v1/models`."""
monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
monkeypatch.setattr(llm_core, "_configured_cached_model_ids", lambda url, **kwargs: [])
seen = []
def fake_get(url, headers=None, timeout=None):
seen.append(url)
request = httpx.Request("GET", url)
return httpx.Response(
200,
json={
"object": "list",
"data": [
{"id": "lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF"},
{"id": "qwen2.5-7b-instruct"},
],
},
request=request,
)
monkeypatch.setattr(llm_core.httpx, "get", fake_get)
assert llm_core.list_model_ids("http://localhost:1234/v1", timeout=1) == [
"lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF",
"qwen2.5-7b-instruct",
]
assert seen == ["http://localhost:1234/v1/models"]
def test_llm_core_list_model_ids_queries_v1_models_for_bare_lmstudio(monkeypatch):
"""Issue #25: probing `http://localhost:1234` (no /v1) must hit `/v1/models`."""
monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
monkeypatch.setattr(llm_core, "_configured_cached_model_ids", lambda url, **kwargs: [])
# Localhost with empty path would otherwise be misclassified as Ollama
# (llm_core._is_ollama_native_url); neutralise that for the test.
monkeypatch.setattr(llm_core, "_is_ollama_native_url", lambda url: False)
seen = []
def fake_get(url, headers=None, timeout=None):
seen.append(url)
request = httpx.Request("GET", url)
return httpx.Response(200, json={"data": [{"id": "model-a"}]}, request=request)
monkeypatch.setattr(llm_core.httpx, "get", fake_get)
assert llm_core.list_model_ids("http://localhost:1234", timeout=1) == ["model-a"]
assert seen == ["http://localhost:1234/v1/models"]
def test_llm_core_list_model_ids_handles_empty_lmstudio_list(monkeypatch):
"""LM Studio returns `{"object":"list","data":[]}` when no model is loaded.
The helper must return `[]` cleanly so the caller can surface a clear
error (issue #25: previously the empty case was indistinguishable from
a connection failure)."""
monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url)
monkeypatch.setattr(llm_core, "_configured_cached_model_ids", lambda url, **kwargs: [])
def fake_get(url, headers=None, timeout=None):
request = httpx.Request("GET", url)
return httpx.Response(200, json={"object": "list", "data": []}, request=request)
monkeypatch.setattr(llm_core.httpx, "get", fake_get)
assert llm_core.list_model_ids("http://localhost:1234/v1", timeout=1) == []
+33 -1
View File
@@ -625,7 +625,39 @@ def test_generic_endpoint_error_message_preserves_probe_error():
{"error": "HTTP 401"},
)
assert msg == "No models found for that provider/key. Last probe error: HTTP 401."
# Issue #25: the message must include the probed URL so the user can
# self-diagnose (was opaque "No models found for that provider/key").
assert "No models found for that provider/key" in msg
assert "HTTP 401" in msg
assert "https://api.example.com/v1/models" in msg
def test_lmstudio_endpoint_error_message_includes_hint_and_probed_url():
# Issue #25: when the user pastes an LM Studio URL, surface a port-aware
# hint and the URL we actually probed (not the bare base URL).
msg = model_routes._model_endpoint_error_message(
"http://localhost:1234/v1",
{"error": "HTTP 200"}, # 200-with-empty-list is the LM Studio trap
)
assert "LM Studio" in msg
assert "port 1234" in msg
assert "http://localhost:1234/v1/models" in msg
assert "Developer Server" in msg
def test_lmstudio_error_for_bare_host_port_probes_v1_models(monkeypatch):
# Regression: build_models_url must add /v1 for path-less LM Studio URLs
# (the OpenAI-compatible branch lands on /v1/models for LM Studio).
# _is_ollama_native_url would otherwise match localhost+empty path and
# route to /api/tags, masking the LM Studio URL we want to assert on.
monkeypatch.setattr("src.llm_core._is_ollama_native_url", lambda url: False)
msg = model_routes._model_endpoint_error_message(
"http://localhost:1234",
{"error": "HTTP 200"},
)
assert "LM Studio" in msg
assert "http://localhost:1234/v1/models" in msg
# ── _rewrite_loopback_for_docker (issue #25: LM Studio on host loopback) ──
+1 -1
View File
@@ -58,7 +58,7 @@ PROVIDER_CASES = [
"https://api.x.ai/v1/models"),
("deepseek", "https://api.deepseek.com",
"https://api.deepseek.com/chat/completions",
"https://api.deepseek.com/models"),
"https://api.deepseek.com/v1/models"),
# Gemini's OpenAI-compatible surface — treated as a generic OpenAI endpoint.
("gemini_openai", "https://generativelanguage.googleapis.com/v1beta/openai",
"https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",