diff --git a/routes/model_routes.py b/routes/model_routes.py index 14d1b94e6..07d674e81 100644 --- a/routes/model_routes.py +++ b/routes/model_routes.py @@ -755,7 +755,26 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) -> try: r = httpx.get(base, headers=headers, timeout=timeout, verify=llm_verify()) - return _result_from_response(r) + result = _result_from_response(r) + # If the bare base URL returns a non-auth 4xx (e.g. 404), try /models + # as a fallback. OpenAI-compatible servers like llama-swap return 404 + # on the base /v1 prefix but 200 on /v1/models. Auth failures (401/403) + # are definitive — probing /models would just repeat the same rejection. + if ( + not result["reachable"] + and result.get("status_code") is not None + and 400 <= result["status_code"] < 500 + and result["status_code"] not in (401, 403) + ): + models_url = base.rstrip("/") + "/models" + try: + r2 = httpx.get(models_url, headers=headers, timeout=timeout, verify=llm_verify()) + result2 = _result_from_response(r2) + if result2["reachable"]: + return result2 + except Exception: + pass + return result except Exception as e: last_error = str(e)[:120] diff --git a/tests/test_model_routes.py b/tests/test_model_routes.py index f3475c30a..54a0b4125 100644 --- a/tests/test_model_routes.py +++ b/tests/test_model_routes.py @@ -360,6 +360,48 @@ class TestClassifyEndpoint: assert seen == [("GET", "http://100.117.136.97:34521/v1")] assert all(not url.endswith("/models") for _, url in seen) + def test_ping_endpoint_falls_back_to_models_on_404(self, monkeypatch): + """llama-swap returns 404 on /v1 but 200 on /v1/models.""" + monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url, raising=False) + seen = [] + + def fake_get(url, headers=None, timeout=None, verify=None, **kwargs): + seen.append(url) + request = httpx.Request("GET", url) + if url.endswith("/models"): + return httpx.Response(200, request=request) + return httpx.Response(404, request=request) + + monkeypatch.setattr(model_routes.httpx, "get", fake_get) + + result = _ping_endpoint("http://172.17.0.1:8081/v1", timeout=1) + + assert result["reachable"] is True + assert result["status_code"] == 200 + assert seen == [ + "http://172.17.0.1:8081/v1", + "http://172.17.0.1:8081/v1/models", + ] + + def test_ping_endpoint_no_models_fallback_on_auth_failure(self, monkeypatch): + """401/403 are definitive — don't probe /models.""" + monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url, raising=False) + seen = [] + + def fake_get(url, headers=None, timeout=None, verify=None, **kwargs): + seen.append(url) + request = httpx.Request("GET", url) + return httpx.Response(401, request=request) + + monkeypatch.setattr(model_routes.httpx, "get", fake_get) + + result = _ping_endpoint("http://10.0.0.1:8080/v1", "bad-key", timeout=1) + + assert result["reachable"] is False + assert result["status_code"] == 401 + # Should NOT have tried /models — 401 is definitive + assert len(seen) == 1 + # ── setup probing ──