fix(kimi): resolve Kimi Code API 403 errors and User-Agent restrictions (#3549)

* fix(kimi): resolve Kimi Code API 403 errors and User-Agent restrictions Kimi Code subscription keys require a whitelisted coding-agent User-Agent to avoid access_terminated_error 403s. This adds User-Agent probing and caching for Kimi Code endpoints. Co-authored-by: Cursor <cursoragent@cursor.com> * fix(kimi): omit temperature for kimi-for-coding API calls Kimi Code rejects any non-default temperature with HTTP 400, which broke deep research probes and low-temp LLM rounds. Co-authored-by: Cursor <cursoragent@cursor.com> --------- Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-17 02:05:22 -04:00 · 2026-06-15 02:56:54 -04:00
parent 674457384a
commit 955455b797
10 changed files with 289 additions and 9 deletions
@@ -14,7 +14,7 @@ from src import llm_core
@pytest.mark.parametrize(
    "model",
    ["o1", "o1-mini", "o3", "o3-mini", "o4-mini", "gpt-5", "gpt-5-mini",
-     "openrouter/openai/o3-mini", "OpenAI/GPT-5"],
+     "openrouter/openai/o3-mini", "OpenAI/GPT-5", "kimi-for-coding"],
 )
 def test_reasoning_models_restrict_temperature(model):
    assert llm_core._restricts_temperature(model) is True
@@ -62,6 +62,12 @@ def test_reasoning_model_payload_omits_temperature(monkeypatch):
    assert payload["max_completion_tokens"] == 5


+def test_kimi_for_coding_payload_omits_temperature(monkeypatch):
+    payload = _capture_openai_payload(monkeypatch, "kimi-for-coding", 0.1)
+    assert "temperature" not in payload
+    assert payload["max_tokens"] == 5
+
+
 def test_normal_model_payload_keeps_temperature(monkeypatch):
    payload = _capture_openai_payload(monkeypatch, "gpt-4o", 0.2)
    assert payload["temperature"] == 0.2