fix(kimi): resolve Kimi Code API 403 errors and User-Agent restrictions (#3549)

* fix(kimi): resolve Kimi Code API 403 errors and User-Agent restrictions Kimi Code subscription keys require a whitelisted coding-agent User-Agent to avoid access_terminated_error 403s. This adds User-Agent probing and caching for Kimi Code endpoints. Co-authored-by: Cursor <cursoragent@cursor.com> * fix(kimi): omit temperature for kimi-for-coding API calls Kimi Code rejects any non-default temperature with HTTP 400, which broke deep research probes and low-temp LLM rounds. Co-authored-by: Cursor <cursoragent@cursor.com> --------- Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-17 10:15:27 -04:00 · 2026-06-15 02:56:54 -04:00
parent 674457384a
commit 955455b797
10 changed files with 289 additions and 9 deletions
@@ -248,6 +248,9 @@ _PROVIDER_CURATED = {
    "zai-coding": [
        "glm-5.1", "glm-5v-turbo", "glm-5-turbo", "glm-4.7", "glm-4.5-air",
    ],
+    "kimi-code": [
+        "kimi-for-coding",
+    ],
    "deepseek": [
        "deepseek-chat", "deepseek-reasoner",
    ],
@@ -315,6 +318,8 @@ def _match_provider_curated(base_url: str, provider: str) -> str:
    parsed = urlparse(base_url)
    if _host_match(base_url, "z.ai") and "/api/coding" in (parsed.path or ""):
        return "zai-coding"
+    if _host_match(base_url, "kimi.com") and "/coding" in (parsed.path or ""):
+        return "kimi-code"
    for domain, key in _HOST_TO_CURATED:
        if _host_match(base_url, domain):
            return key
@@ -703,6 +708,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
    """Probe a base URL's /models endpoint and return list of model IDs.
    For Anthropic, queries their /v1/models API, falling back to hardcoded list."""
    from src.endpoint_resolver import resolve_url
+    from src.llm_core import httpx_get_kimi_aware
    base = resolve_url(_normalize_base(base_url))
    provider = _safe_detect_provider(base)
    if provider == "chatgpt-subscription":
@@ -738,7 +744,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
    url = _safe_build_models_url(base)
    headers = _safe_build_headers(api_key, base)
    try:
-        r = httpx.get(url, headers=headers, timeout=timeout, verify=llm_verify())
+        r = httpx_get_kimi_aware(url, headers, timeout=timeout, verify=llm_verify())
        r.raise_for_status()
        data = r.json()
        # OpenAI format: {"data": [{"id": "model-name"}]}
@@ -754,6 +760,11 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
                for _e in _PROVIDER_CURATED.get(_ck, []):
                    if _e not in set(models) and not any(m.startswith(_e) for m in models):
                        models.append(_e)
+            if _host_match(base, "kimi.com") and "/coding" in (urlparse(base).path or ""):
+                _ck = _match_provider_curated(base, None)
+                for _e in _PROVIDER_CURATED.get(_ck, []):
+                    if _e not in set(models) and not any(m.startswith(_e) for m in models):
+                        models.append(_e)
            return [m for m in models if _is_chat_model(m)]
    except httpx.HTTPStatusError as e:
        if api_key:
@@ -198,6 +198,8 @@ def setup_webhook_routes(
        "opencode-go": "https://opencode.ai/zen/go/v1",
        "fireworks": "https://api.fireworks.ai/inference/v1",
        "venice": "https://api.venice.ai/api/v1",
+        "kimi-code": "https://api.kimi.com/coding/v1",
+        "kimicode": "https://api.kimi.com/coding/v1",
    }

    # Model prefix → provider mapping for auto-detection
@@ -210,6 +212,8 @@ def setup_webhook_routes(
        "mistral": "mistral",
        "llama": "groq",
        "mixtral": "groq",
+        "kimi-for-coding": "kimi-code",
+        "kimi": "kimi-code",
    }

    def _resolve_base_url(model: Optional[str], provider: Optional[str]) -> Optional[str]: