From 955455b797daaf9327a1c8cf5665a97607788101 Mon Sep 17 00:00:00 2001 From: KYDNO Date: Mon, 15 Jun 2026 02:56:54 -0400 Subject: [PATCH] fix(kimi): resolve Kimi Code API 403 errors and User-Agent restrictions (#3549) * fix(kimi): resolve Kimi Code API 403 errors and User-Agent restrictions Kimi Code subscription keys require a whitelisted coding-agent User-Agent to avoid access_terminated_error 403s. This adds User-Agent probing and caching for Kimi Code endpoints. Co-authored-by: Cursor * fix(kimi): omit temperature for kimi-for-coding API calls Kimi Code rejects any non-default temperature with HTTP 400, which broke deep research probes and low-temp LLM rounds. Co-authored-by: Cursor --------- Co-authored-by: Cursor --- routes/model_routes.py | 13 ++- routes/webhook_routes.py | 4 + src/agent_loop.py | 2 +- src/endpoint_resolver.py | 4 +- src/llm_core.py | 155 ++++++++++++++++++++++++++++- src/teacher_escalation.py | 2 +- tests/test_kimi_code_hosts.py | 32 ++++++ tests/test_kimi_code_user_agent.py | 69 +++++++++++++ tests/test_llm_core_temperature.py | 8 +- tests/test_model_routes.py | 9 ++ 10 files changed, 289 insertions(+), 9 deletions(-) create mode 100644 tests/test_kimi_code_hosts.py create mode 100644 tests/test_kimi_code_user_agent.py diff --git a/routes/model_routes.py b/routes/model_routes.py index dfc6f99af..b5bd6ead8 100644 --- a/routes/model_routes.py +++ b/routes/model_routes.py @@ -248,6 +248,9 @@ _PROVIDER_CURATED = { "zai-coding": [ "glm-5.1", "glm-5v-turbo", "glm-5-turbo", "glm-4.7", "glm-4.5-air", ], + "kimi-code": [ + "kimi-for-coding", + ], "deepseek": [ "deepseek-chat", "deepseek-reasoner", ], @@ -315,6 +318,8 @@ def _match_provider_curated(base_url: str, provider: str) -> str: parsed = urlparse(base_url) if _host_match(base_url, "z.ai") and "/api/coding" in (parsed.path or ""): return "zai-coding" + if _host_match(base_url, "kimi.com") and "/coding" in (parsed.path or ""): + return "kimi-code" for domain, key in _HOST_TO_CURATED: if _host_match(base_url, domain): return key @@ -703,6 +708,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis """Probe a base URL's /models endpoint and return list of model IDs. For Anthropic, queries their /v1/models API, falling back to hardcoded list.""" from src.endpoint_resolver import resolve_url + from src.llm_core import httpx_get_kimi_aware base = resolve_url(_normalize_base(base_url)) provider = _safe_detect_provider(base) if provider == "chatgpt-subscription": @@ -738,7 +744,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis url = _safe_build_models_url(base) headers = _safe_build_headers(api_key, base) try: - r = httpx.get(url, headers=headers, timeout=timeout, verify=llm_verify()) + r = httpx_get_kimi_aware(url, headers, timeout=timeout, verify=llm_verify()) r.raise_for_status() data = r.json() # OpenAI format: {"data": [{"id": "model-name"}]} @@ -754,6 +760,11 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis for _e in _PROVIDER_CURATED.get(_ck, []): if _e not in set(models) and not any(m.startswith(_e) for m in models): models.append(_e) + if _host_match(base, "kimi.com") and "/coding" in (urlparse(base).path or ""): + _ck = _match_provider_curated(base, None) + for _e in _PROVIDER_CURATED.get(_ck, []): + if _e not in set(models) and not any(m.startswith(_e) for m in models): + models.append(_e) return [m for m in models if _is_chat_model(m)] except httpx.HTTPStatusError as e: if api_key: diff --git a/routes/webhook_routes.py b/routes/webhook_routes.py index da6288e7a..77902c24b 100644 --- a/routes/webhook_routes.py +++ b/routes/webhook_routes.py @@ -198,6 +198,8 @@ def setup_webhook_routes( "opencode-go": "https://opencode.ai/zen/go/v1", "fireworks": "https://api.fireworks.ai/inference/v1", "venice": "https://api.venice.ai/api/v1", + "kimi-code": "https://api.kimi.com/coding/v1", + "kimicode": "https://api.kimi.com/coding/v1", } # Model prefix → provider mapping for auto-detection @@ -210,6 +212,8 @@ def setup_webhook_routes( "mistral": "mistral", "llama": "groq", "mixtral": "groq", + "kimi-for-coding": "kimi-code", + "kimi": "kimi-code", } def _resolve_base_url(model: Optional[str], provider: Optional[str]) -> Optional[str]: diff --git a/src/agent_loop.py b/src/agent_loop.py index 39463ae7d..a42ec4b2e 100644 --- a/src/agent_loop.py +++ b/src/agent_loop.py @@ -606,7 +606,7 @@ _API_HOSTS = frozenset([ "api.deepseek.com", "deepseek.com", "api.together.xyz", "api.fireworks.ai", "api.perplexity.ai", "api.x.ai", - "ollama.com", "api.venice.ai", + "ollama.com", "api.venice.ai", "api.kimi.com", "api.githubcopilot.com", # Local OpenAI-compatible endpoints (llama.cpp, vLLM, LM Studio, etc.). # Without these, `_is_api_model` falls back to keyword sniffing on the diff --git a/src/endpoint_resolver.py b/src/endpoint_resolver.py index 79702ec1c..f3783cdfa 100644 --- a/src/endpoint_resolver.py +++ b/src/endpoint_resolver.py @@ -12,7 +12,7 @@ from typing import Optional, Tuple, Dict from urllib.parse import urlparse, urlunparse from core.database import SessionLocal, ModelEndpoint -from src.llm_core import _detect_provider, _host_match, _ollama_api_root +from src.llm_core import _detect_provider, _host_match, _is_kimi_code_url, KIMI_CODE_USER_AGENT, _ollama_api_root logger = logging.getLogger(__name__) @@ -230,6 +230,8 @@ def build_headers(api_key: Optional[str], base: str) -> Dict[str, str]: if provider == "openrouter": headers.setdefault("HTTP-Referer", "https://github.com/pewdiepie-archdaemon/odysseus") headers.setdefault("X-OpenRouter-Title", "Odysseus") + if _is_kimi_code_url(base): + headers.setdefault("User-Agent", KIMI_CODE_USER_AGENT) return headers diff --git a/src/llm_core.py b/src/llm_core.py index 3b7369153..9dfade2cd 100644 --- a/src/llm_core.py +++ b/src/llm_core.py @@ -442,6 +442,146 @@ def _host_match(url: str, *domains: str) -> bool: return any(host == d or host.endswith("." + d) for d in domains) +# Kimi Code subscription keys (api.kimi.com/coding/v1) require a whitelisted +# coding-agent User-Agent; otherwise the API returns 403 access_terminated_error. +# Tried in order; first success is cached per base URL for later requests. +KIMI_CODE_USER_AGENTS: tuple[str, ...] = ( + "claude-code/0.1.0", + "claude-code/1.0.0", + "KimiCLI/1.0", + "Kilo-Code/1.0", + "Roo-Code/1.0", + "Cursor/1.0", +) +KIMI_CODE_USER_AGENT = KIMI_CODE_USER_AGENTS[0] +_kimi_code_ua_cache: dict[str, str] = {} + + +def _is_kimi_code_url(url: str) -> bool: + if not url or not _host_match(url, "kimi.com"): + return False + try: + return "/coding" in (urlparse(url).path or "") + except Exception: + return False + + +def _kimi_code_base_key(url: str) -> str: + """Normalize a Kimi Code chat/models URL to its OpenAI base (.../coding/v1).""" + parsed = urlparse(url) + path = (parsed.path or "").rstrip("/") + for suffix in ("/chat/completions", "/models", "/completions"): + if path.endswith(suffix): + path = path[: -len(suffix)] + path = path.rstrip("/") or "/coding/v1" + return f"{parsed.scheme}://{parsed.netloc}{path}" + + +def _is_kimi_code_access_denied(status: int, body: bytes | str) -> bool: + if status != 403: + return False + text = body.decode("utf-8", errors="replace") if isinstance(body, bytes) else (body or "") + lower = text.lower() + return ( + "access_terminated_error" in lower + or "coding agents" in lower + or "only available for coding" in lower + ) + + +def _kimi_code_ua_candidates(url: str) -> list[str]: + if not _is_kimi_code_url(url): + return [] + base_key = _kimi_code_base_key(url) + cached = _kimi_code_ua_cache.get(base_key) + if cached: + return [cached] + [ua for ua in KIMI_CODE_USER_AGENTS if ua != cached] + return list(KIMI_CODE_USER_AGENTS) + + +def _remember_kimi_code_user_agent(url: str, user_agent: str) -> None: + _kimi_code_ua_cache[_kimi_code_base_key(url)] = user_agent + + +def apply_kimi_code_headers(headers: Optional[Dict], url: str) -> Dict[str, str]: + """Pick a Kimi Code User-Agent (cached probe when possible).""" + h = dict(headers or {}) + if not _is_kimi_code_url(url): + return h + base_key = _kimi_code_base_key(url) + cached = _kimi_code_ua_cache.get(base_key) + if cached: + h["User-Agent"] = cached + return h + models_url = base_key.rstrip("/") + "/models" + from src.tls_overrides import llm_verify + for ua in KIMI_CODE_USER_AGENTS: + trial = dict(h) + trial["User-Agent"] = ua + try: + r = httpx.get(models_url, headers=trial, timeout=8, verify=llm_verify()) + except Exception: + continue + if _is_kimi_code_access_denied(r.status_code, r.content): + logger.debug("Kimi Code rejected User-Agent %s (403), trying next", ua) + continue + if r.status_code < 400: + _remember_kimi_code_user_agent(url, ua) + h["User-Agent"] = ua + return h + break + h.setdefault("User-Agent", KIMI_CODE_USER_AGENT) + return h + + +def httpx_get_kimi_aware(url: str, headers: Optional[Dict], **kwargs): + h = apply_kimi_code_headers(headers, url) + if not _is_kimi_code_url(url): + return httpx.get(url, headers=h, **kwargs) + last = None + for ua in _kimi_code_ua_candidates(url): + trial = dict(h) + trial["User-Agent"] = ua + last = httpx.get(url, headers=trial, **kwargs) + if not _is_kimi_code_access_denied(last.status_code, last.content): + if last.status_code < 400: + _remember_kimi_code_user_agent(url, ua) + return last + return last + + +def httpx_post_kimi_aware(url: str, headers: Optional[Dict], **kwargs): + h = apply_kimi_code_headers(headers, url) + if not _is_kimi_code_url(url): + return httpx.post(url, headers=h, **kwargs) + last = None + for ua in _kimi_code_ua_candidates(url): + trial = dict(h) + trial["User-Agent"] = ua + last = httpx.post(url, headers=trial, **kwargs) + if not _is_kimi_code_access_denied(last.status_code, last.content): + if last.status_code < 400: + _remember_kimi_code_user_agent(url, ua) + return last + return last + + +async def httpx_post_kimi_aware_async(client, url: str, headers: Optional[Dict], **kwargs): + h = apply_kimi_code_headers(headers, url) + if not _is_kimi_code_url(url): + return await client.post(url, headers=h, **kwargs) + last = None + for ua in _kimi_code_ua_candidates(url): + trial = dict(h) + trial["User-Agent"] = ua + last = await client.post(url, headers=trial, **kwargs) + if not _is_kimi_code_access_denied(last.status_code, last.content): + if last.status_code < 400: + _remember_kimi_code_user_agent(url, ua) + return last + return last + + def _detect_provider(url: str) -> str: """Detect the API provider from a configured endpoint URL. @@ -561,6 +701,12 @@ def _provider_label(url: str) -> str: if _host_match(url, "googleapis.com"): return "Google" if _host_match(url, "together.xyz", "together.ai"): return "Together" if _host_match(url, "fireworks.ai"): return "Fireworks" + if _host_match(url, "kimi.com"): + try: + if "/coding" in (urlparse(url).path or ""): + return "Kimi Code" + except Exception: + pass if _is_ollama_native_url(url): return "Ollama" try: host = (urlparse(url).hostname or "").lower() @@ -701,7 +847,7 @@ def _uses_max_completion_tokens(model: str) -> bool: # perfectly good model as failing. For these models we omit the field and let # the API use its required default. (gpt-4.5 is intentionally excluded — it is # not a reasoning model and accepts temperature normally.) -_FIXED_TEMPERATURE_MODELS = ("o1", "o3", "o4", "gpt-5") +_FIXED_TEMPERATURE_MODELS = ("o1", "o3", "o4", "gpt-5", "kimi-for-coding") def _restricts_temperature(model: str) -> bool: """Check if a model rejects any non-default temperature.""" @@ -1157,7 +1303,7 @@ def list_model_ids( from src.endpoint_resolver import build_models_url models_url = build_models_url(base_chat_url) - r = httpx.get(models_url, headers=h, timeout=timeout) + r = httpx_get_kimi_aware(models_url, h, timeout=timeout) r.raise_for_status() data = r.json() model_ids = [m.get("id") for m in (data.get("data") or []) if m.get("id")] @@ -1265,7 +1411,7 @@ def llm_call(url: str, model: str, messages: List[Dict], temperature: float = LL payload[tok_key] = max_tokens try: note_model_activity(target_url, model) - r = httpx.post(target_url, headers=h, json=payload, timeout=timeout) + r = httpx_post_kimi_aware(target_url, h, json=payload, timeout=timeout) except Exception as e: raise HTTPException(502, f"POST {target_url} failed: {e}") if not r.is_success: @@ -1473,7 +1619,7 @@ async def llm_call_async( try: note_model_activity(target_url, model) client = _get_http_client() - r = await client.post(target_url, headers=h, json=payload, timeout=call_timeout) + r = await httpx_post_kimi_aware_async(client, target_url, h, json=payload, timeout=call_timeout) duration = time.time() - start if not r.is_success: friendly = _format_upstream_error(r.status_code, r.text, target_url) @@ -1870,6 +2016,7 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl events.append(_stream_delta_event(part)) return events + h = apply_kimi_code_headers(h, target_url) try: client = _get_http_client() async with client.stream('POST', target_url, json=payload, headers=h, timeout=stream_timeout) as r: diff --git a/src/teacher_escalation.py b/src/teacher_escalation.py index 29dabd076..62cb68ced 100644 --- a/src/teacher_escalation.py +++ b/src/teacher_escalation.py @@ -42,7 +42,7 @@ _SOTA_HOSTS = frozenset({ "api.together.xyz", "api.fireworks.ai", "api.perplexity.ai", "api.x.ai", "generativelanguage.googleapis.com", "api.groq.com", - "openrouter.ai", "ollama.com", "api.venice.ai", + "openrouter.ai", "ollama.com", "api.venice.ai", "api.kimi.com", }) diff --git a/tests/test_kimi_code_hosts.py b/tests/test_kimi_code_hosts.py new file mode 100644 index 000000000..9d4272292 --- /dev/null +++ b/tests/test_kimi_code_hosts.py @@ -0,0 +1,32 @@ +"""Kimi Code host-allowlist behavior (follow-up to provider support). + +Kimi Code (https://api.kimi.com/coding/v1) is a subscription, OpenAI-compatible +cloud API with native tool-calling. These tests pin the three host-list integrations: + - agent loop sends native tool schemas to Kimi Code (not fenced-block parsing), + - teacher escalation treats Kimi Code as SOTA (loop OFF, no added latency). +""" +from src import agent_loop, teacher_escalation + + +class TestAgentToolHosts: + def test_kimi_code_in_api_hosts(self): + assert "api.kimi.com" in agent_loop._API_HOSTS + + def test_kimi_code_url_matches_api_host(self): + url = "https://api.kimi.com/coding/v1/chat/completions" + assert any(h in url for h in agent_loop._API_HOSTS) + + def test_unknown_host_not_matched(self): + url = "https://example.invalid/v1/chat/completions" + assert not any(h in url for h in agent_loop._API_HOSTS) + + +class TestTeacherEscalationSota: + def test_kimi_code_is_sota_not_self_hosted(self): + assert teacher_escalation.is_self_hosted("https://api.kimi.com/coding/v1/chat/completions") is False + + def test_known_cloud_still_sota(self): + assert teacher_escalation.is_self_hosted("https://api.openai.com/v1") is False + + def test_local_endpoint_still_self_hosted(self): + assert teacher_escalation.is_self_hosted("http://localhost:8000/v1") is True diff --git a/tests/test_kimi_code_user_agent.py b/tests/test_kimi_code_user_agent.py new file mode 100644 index 000000000..0d9f1cb01 --- /dev/null +++ b/tests/test_kimi_code_user_agent.py @@ -0,0 +1,69 @@ +"""Kimi Code User-Agent fallback list and 403 detection.""" +from src.llm_core import ( + KIMI_CODE_USER_AGENTS, + KIMI_CODE_USER_AGENT, + _is_kimi_code_access_denied, + _is_kimi_code_url, + _kimi_code_base_key, + _kimi_code_ua_cache, + _kimi_code_ua_candidates, + _remember_kimi_code_user_agent, + httpx_post_kimi_aware, +) + + +class TestKimiCodeUserAgents: + def test_default_is_first_fallback(self): + assert KIMI_CODE_USER_AGENT == KIMI_CODE_USER_AGENTS[0] + + def test_multiple_fallbacks_configured(self): + assert len(KIMI_CODE_USER_AGENTS) >= 3 + assert "KimiCLI/1.0" in KIMI_CODE_USER_AGENTS + + def test_detects_coding_agent_403(self): + body = '{"error":{"message":"only available for Coding Agents","type":"access_terminated_error"}}' + assert _is_kimi_code_access_denied(403, body) is True + + def test_non_403_not_access_denied(self): + assert _is_kimi_code_access_denied(401, "unauthorized") is False + + def test_ua_candidates_prefers_cache(self): + _kimi_code_ua_cache.clear() + url = "https://api.kimi.com/coding/v1/chat/completions" + _remember_kimi_code_user_agent(url, "Kilo-Code/1.0") + candidates = _kimi_code_ua_candidates(url) + assert candidates[0] == "Kilo-Code/1.0" + assert len(candidates) == len(KIMI_CODE_USER_AGENTS) + _kimi_code_ua_cache.clear() + + def test_non_kimi_url_has_no_candidates(self): + assert _kimi_code_ua_candidates("https://api.openai.com/v1") == [] + + def test_base_key_normalizes_chat_url(self): + assert _kimi_code_base_key("https://api.kimi.com/coding/v1/chat/completions") == ( + "https://api.kimi.com/coding/v1" + ) + + def test_post_retries_next_user_agent_on_403(self, monkeypatch): + _kimi_code_ua_cache.clear() + calls = [] + + class _Resp: + def __init__(self, status, text=""): + self.status_code = status + self.content = text.encode() + self.text = text + + def fake_post(url, headers=None, **kwargs): + calls.append(headers.get("User-Agent")) + if headers.get("User-Agent") == KIMI_CODE_USER_AGENTS[0]: + return _Resp(403, '{"error":{"type":"access_terminated_error"}}') + return _Resp(200, "{}") + + monkeypatch.setattr("src.llm_core.httpx.post", fake_post) + url = "https://api.kimi.com/coding/v1/chat/completions" + r = httpx_post_kimi_aware(url, {"Authorization": "Bearer x"}, json={}) + assert r.status_code == 200 + assert calls[0] == KIMI_CODE_USER_AGENTS[0] + assert calls[1] == KIMI_CODE_USER_AGENTS[1] + _kimi_code_ua_cache.clear() diff --git a/tests/test_llm_core_temperature.py b/tests/test_llm_core_temperature.py index 121a7ff4b..685313011 100644 --- a/tests/test_llm_core_temperature.py +++ b/tests/test_llm_core_temperature.py @@ -14,7 +14,7 @@ from src import llm_core @pytest.mark.parametrize( "model", ["o1", "o1-mini", "o3", "o3-mini", "o4-mini", "gpt-5", "gpt-5-mini", - "openrouter/openai/o3-mini", "OpenAI/GPT-5"], + "openrouter/openai/o3-mini", "OpenAI/GPT-5", "kimi-for-coding"], ) def test_reasoning_models_restrict_temperature(model): assert llm_core._restricts_temperature(model) is True @@ -62,6 +62,12 @@ def test_reasoning_model_payload_omits_temperature(monkeypatch): assert payload["max_completion_tokens"] == 5 +def test_kimi_for_coding_payload_omits_temperature(monkeypatch): + payload = _capture_openai_payload(monkeypatch, "kimi-for-coding", 0.1) + assert "temperature" not in payload + assert payload["max_tokens"] == 5 + + def test_normal_model_payload_keeps_temperature(monkeypatch): payload = _capture_openai_payload(monkeypatch, "gpt-4o", 0.2) assert payload["temperature"] == 0.2 diff --git a/tests/test_model_routes.py b/tests/test_model_routes.py index 1851bc8b0..bceb6c11f 100644 --- a/tests/test_model_routes.py +++ b/tests/test_model_routes.py @@ -205,6 +205,9 @@ class TestMatchProviderCurated: def test_ollama_url(self): assert _match_provider_curated("https://ollama.com/api", "openai") == "ollama" + def test_kimi_code_url(self): + assert _match_provider_curated("https://api.kimi.com/coding/v1", "openai") == "kimi-code" + def test_no_url_match_returns_provider(self): assert _match_provider_curated("https://localhost:1234", "openai") == "openai" @@ -312,6 +315,12 @@ class TestCurateModels: assert curated == models assert extra == [] + def test_kimi_code_partitions(self): + models = ["kimi-for-coding", "other-model"] + curated, extra = _curate_models(models, "kimi-code") + assert "kimi-for-coding" in curated + assert "other-model" in extra + def test_curated_sorted_by_priority(self): models = ["gpt-4o-mini", "gpt-4o", "o3"] curated, _ = _curate_models(models, "openai")