fix: fall back to /models probe when base URL returns 404 (#3205)

_ping_endpoint() probes the bare base URL for non-Ollama endpoints.
OpenAI-compatible servers like llama-swap return 404 on the /v1 prefix
but 200 on /v1/models, causing endpoints to appear offline despite being
fully functional.

Add a /models fallback when the base URL returns a non-auth 4xx.
Auth failures (401/403) are treated as definitive — probing /models
would just repeat the same rejection.

Fixes #3181

Co-authored-by: michaelxer <michaelxer@users.noreply.github.com>
This commit is contained in:
michaelxer
2026-06-07 21:09:33 +07:00
committed by GitHub
parent 5d3e3c7053
commit bdf4ec8b24
2 changed files with 62 additions and 1 deletions
+20 -1
View File
@@ -755,7 +755,26 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
try:
r = httpx.get(base, headers=headers, timeout=timeout, verify=llm_verify())
return _result_from_response(r)
result = _result_from_response(r)
# If the bare base URL returns a non-auth 4xx (e.g. 404), try /models
# as a fallback. OpenAI-compatible servers like llama-swap return 404
# on the base /v1 prefix but 200 on /v1/models. Auth failures (401/403)
# are definitive — probing /models would just repeat the same rejection.
if (
not result["reachable"]
and result.get("status_code") is not None
and 400 <= result["status_code"] < 500
and result["status_code"] not in (401, 403)
):
models_url = base.rstrip("/") + "/models"
try:
r2 = httpx.get(models_url, headers=headers, timeout=timeout, verify=llm_verify())
result2 = _result_from_response(r2)
if result2["reachable"]:
return result2
except Exception:
pass
return result
except Exception as e:
last_error = str(e)[:120]
+42
View File
@@ -360,6 +360,48 @@ class TestClassifyEndpoint:
assert seen == [("GET", "http://100.117.136.97:34521/v1")]
assert all(not url.endswith("/models") for _, url in seen)
def test_ping_endpoint_falls_back_to_models_on_404(self, monkeypatch):
"""llama-swap returns 404 on /v1 but 200 on /v1/models."""
monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url, raising=False)
seen = []
def fake_get(url, headers=None, timeout=None, verify=None, **kwargs):
seen.append(url)
request = httpx.Request("GET", url)
if url.endswith("/models"):
return httpx.Response(200, request=request)
return httpx.Response(404, request=request)
monkeypatch.setattr(model_routes.httpx, "get", fake_get)
result = _ping_endpoint("http://172.17.0.1:8081/v1", timeout=1)
assert result["reachable"] is True
assert result["status_code"] == 200
assert seen == [
"http://172.17.0.1:8081/v1",
"http://172.17.0.1:8081/v1/models",
]
def test_ping_endpoint_no_models_fallback_on_auth_failure(self, monkeypatch):
"""401/403 are definitive — don't probe /models."""
monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url, raising=False)
seen = []
def fake_get(url, headers=None, timeout=None, verify=None, **kwargs):
seen.append(url)
request = httpx.Request("GET", url)
return httpx.Response(401, request=request)
monkeypatch.setattr(model_routes.httpx, "get", fake_get)
result = _ping_endpoint("http://10.0.0.1:8080/v1", "bad-key", timeout=1)
assert result["reachable"] is False
assert result["status_code"] == 401
# Should NOT have tried /models — 401 is definitive
assert len(seen) == 1
# ── setup probing ──