fix(model-routes): harden _probe_endpoint against malformed model-list responses (#4789)

* fix(model-routes): harden _probe_endpoint against malformed model-list responses _probe_endpoint parsed model lists with data.get(...) at four sites without checking that data is a dict, and built the list with a truthiness-only filter. A /models (or /api/tags) endpoint returning HTTP 200 with valid but non-dict JSON ([], "x", null, 123) made data.get(...) raise AttributeError, and a non-string id like 123 passed the filter and then hit .startswith() / .lower() in the Z.AI/Kimi curated merge and _is_chat_model(). Both errors are swallowed by the broad except Exception, but the comprehension dies mid-list so the ENTIRE probed model list is discarded and the endpoint silently degrades — masking a misconfigured/non-compliant upstream as "no models". - Guard each data.get(...) with isinstance(data, dict) so a non-dict body falls through the existing `or []` default. - Restrict the OpenAI and Ollama model-list comprehensions to non-empty str values, protecting the .startswith() merges and both _is_chat_model calls. - Add an isinstance guard at the top of _is_chat_model (defense in depth for all four call sites). No behavior change for well-formed {"data":[...]} / {"models":[...]} responses. Adds regression tests (non-dict body via caplog, mixed/all non-string ids, _is_chat_model boundary) that fail before the fix and pass after. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * refactor(model-routes): extract _openai_model_ids / _ollama_model_names helpers Per review on #4789: the malformed-response guards were inlined four times in _probe_endpoint (two OpenAI-id comprehensions, two Ollama-name comprehensions). Pull each into a small, directly-testable helper so the security-relevant parsing lives in one place and a future malformed-shape fix doesn't have to be applied in four spots (CONTRIBUTING flags repeated logic for this reason). Behavior is unchanged. Adds direct unit tests for both helpers (non-dict body, non-string ids, non-dict entries, name>model precedence). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-26 14:45:33 -04:00 · 2026-06-24 22:35:31 +05:30
parent 4e46e415ea
commit 22379fe736
3 changed files with 108 additions and 4 deletions
@@ -523,6 +523,10 @@ _NON_CHAT_EXACT_PREFIXES = (

 def _is_chat_model(model_id: str) -> bool:
    """Return True if the model ID looks like a chat/completions-capable model."""
+    if not isinstance(model_id, str):
+        # Non-compliant upstreams can return non-string IDs (e.g. int/None);
+        # treat them as chat-capable rather than crashing on .lower().
+        return True
    mid = model_id.lower()
    for prefix in _NON_CHAT_PREFIXES:
        if mid.startswith(prefix):
@@ -726,6 +730,34 @@ def _is_loading_model_response(resp: Any) -> bool:



+def _openai_model_ids(data: Any) -> List[str]:
+    """Extract OpenAI-style model IDs (``{"data": [{"id": ...}]}``).
+
+    Tolerates a non-dict body and non-string IDs from non-compliant upstreams,
+    returning only non-empty string IDs.
+    """
+    items = data.get("data") if isinstance(data, dict) else None
+    return [m["id"] for m in (items or [])
+            if isinstance(m, dict) and isinstance(m.get("id"), str) and m["id"]]
+
+
+def _ollama_model_names(data: Any) -> List[str]:
+    """Extract native-Ollama model names (``{"models": [{"name"|"model": ...}]}``).
+
+    Same tolerance as :func:`_openai_model_ids`: a non-dict body or non-string
+    value is skipped rather than crashing, preserving name-then-model precedence.
+    """
+    items = data.get("models") if isinstance(data, dict) else None
+    out: List[str] = []
+    for m in (items or []):
+        if not isinstance(m, dict):
+            continue
+        v = m.get("name") or m.get("model")
+        if isinstance(v, str) and v:
+            out.append(v)
+    return out
+
+
 def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> List[str]:
    """Probe a base URL's /models endpoint and return list of model IDs.
    For Anthropic, queries their /v1/models API, falling back to hardcoded list."""
@@ -748,7 +780,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
            r = httpx.get(url, headers=headers, timeout=timeout, verify=llm_verify())
            r.raise_for_status()
            data = r.json()
-            models = [m.get("id") for m in (data.get("data") or []) if m.get("id")]
+            models = _openai_model_ids(data)
            if models:
                return models
        except httpx.HTTPStatusError as e:
@@ -770,10 +802,10 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
        r.raise_for_status()
        data = r.json()
        # OpenAI format: {"data": [{"id": "model-name"}]}
-        models = [m.get("id") for m in (data.get("data") or []) if m.get("id")]
+        models = _openai_model_ids(data)
        # Ollama format: {"models": [{"name": "model-name"}]}
        if not models:
-            models = [m.get("name") or m.get("model") for m in (data.get("models") or []) if m.get("name") or m.get("model")]
+            models = _ollama_model_names(data)
        if models:
            # Z.AI coding plan omits some working models from /models;
            # append curated-only entries for that endpoint only.
@@ -812,7 +844,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
            r = httpx.get(root + "/api/tags", timeout=timeout, verify=llm_verify())
            r.raise_for_status()
            data = r.json()
-            models = [m.get("name") or m.get("model") for m in (data.get("models") or []) if m.get("name") or m.get("model")]
+            models = _ollama_model_names(data)
            if models:
                return [m for m in models if _is_chat_model(m)]
    except Exception as e:
@@ -53,6 +53,8 @@ with preserve_import_state("core.database", "src.database", "core.session_manage
        _resolve_probe_key,
        _classify_endpoint,
        _rewrite_loopback_for_docker,
+        _openai_model_ids,
+        _ollama_model_names,
        _PROVIDER_CURATED,
    )

@@ -74,6 +76,33 @@ def _resp(status, *, json=None, headers=None, url="https://api.example.com/v1/mo
    return httpx.Response(status, **kwargs)


+# ── _openai_model_ids / _ollama_model_names: parsing helpers ──
+
+class TestModelListHelpers:
+    @pytest.mark.parametrize("data,expected", [
+        ({"data": [{"id": "gpt-4o"}, {"id": "gpt-4o-mini"}]}, ["gpt-4o", "gpt-4o-mini"]),
+        ({"data": [{"id": None}, {"id": 123}, {"id": "gpt-4o"}]}, ["gpt-4o"]),  # non-string ids dropped
+        ({"data": ["x", {"id": "ok"}]}, ["ok"]),                                # non-dict entries dropped
+        ({"data": []}, []),
+        ({"data": "oops"}, []),                                                 # non-list "data"
+        ([], []), ("nope", []), (None, []), (123, []),                          # non-dict body
+    ])
+    def test_openai_model_ids(self, data, expected):
+        assert _openai_model_ids(data) == expected
+
+    @pytest.mark.parametrize("data,expected", [
+        ({"models": [{"name": "llama3:8b"}, {"model": "qwen3:4b"}]}, ["llama3:8b", "qwen3:4b"]),
+        ({"models": [{"name": "a", "model": "b"}]}, ["a"]),                      # name precedence over model
+        ({"models": [{"name": 123}, {"model": None}, {"name": "ok"}]}, ["ok"]),  # non-string values dropped
+        ({"models": ["x", {"name": "ok"}]}, ["ok"]),                            # non-dict entries dropped
+        ({"models": []}, []),
+        ({"models": "oops"}, []),
+        ([], []), (None, []), (42, []),                                         # non-dict body
+    ])
+    def test_ollama_model_names(self, data, expected):
+        assert _ollama_model_names(data) == expected
+
+
 # ── _probe_endpoint: model-list parsing ──

 class TestProbeEndpointParsing:
@@ -121,6 +150,43 @@ class TestProbeEndpointParsing:
        )
        assert _probe_endpoint("https://api.example.com/v1") == []

+    @pytest.mark.parametrize("body", [[], "invalid", 123, True])
+    def test_non_dict_json_body_degrades_to_empty(self, monkeypatch, caplog, body):
+        # HTTP 200 with valid-but-non-dict JSON must not crash the probe with an
+        # AttributeError (data.get(...) on a list/str/int); it should fall through
+        # to the empty/curated path. caplog gives this test teeth: pre-fix the
+        # swallowed AttributeError logs "Failed to probe"; post-fix it does not.
+        _patch_resolve(monkeypatch)
+        monkeypatch.setattr(
+            model_routes.httpx, "get",
+            lambda url, headers=None, timeout=None, verify=None, **kwargs: _resp(200, json=body),
+        )
+        with caplog.at_level("WARNING", logger="routes.model_routes"):
+            assert _probe_endpoint("https://api.example.com/v1") == []
+        assert "Failed to probe" not in caplog.text
+
+    def test_skips_non_string_model_ids(self, monkeypatch):
+        # A non-compliant upstream returns int/None IDs alongside a valid one.
+        # The probe must not crash on .lower()/.startswith and must still surface
+        # the valid string model.
+        _patch_resolve(monkeypatch)
+        monkeypatch.setattr(
+            model_routes.httpx, "get",
+            lambda url, headers=None, timeout=None, verify=None, **kwargs: _resp(
+                200, json={"data": [{"id": None}, {"id": 123}, {"id": "gpt-4o"}]}),
+        )
+        assert _probe_endpoint("https://api.example.com/v1", "key") == ["gpt-4o"]
+
+    def test_all_non_string_ids_returns_empty(self, monkeypatch):
+        # Every id is non-string -> empty result, no exception, no curated leak.
+        _patch_resolve(monkeypatch)
+        monkeypatch.setattr(
+            model_routes.httpx, "get",
+            lambda url, headers=None, timeout=None, verify=None, **kwargs: _resp(
+                200, json={"data": [{"id": 123}, {"id": None}]}),
+        )
+        assert _probe_endpoint("https://api.example.com/v1") == []
+
    def test_chatgpt_subscription_probe_uses_discovery_only(self, monkeypatch):
        _patch_resolve(monkeypatch)
        calls = []
@@ -403,6 +403,12 @@ class TestIsChatModel:
    def test_legacy_openai_instruct_is_not_chat(self):
        assert _is_chat_model("gpt-3.5-turbo-instruct") is False

+    @pytest.mark.parametrize("bad", [None, 123, 4.5, ["x"], {"a": 1}])
+    def test_non_string_id_is_treated_as_chat(self, bad):
+        # Defensive boundary: a non-compliant upstream can yield a non-string
+        # model id; it must not crash on .lower() (treated as chat-capable).
+        assert _is_chat_model(bad) is True
+

 # ── _classify_endpoint ──