From f5d3e5098a03de77b538e2ba18151c6760ded262 Mon Sep 17 00:00:00 2001
From: Josh Patra <joshpatra12@gmail.com>
Date: Mon, 15 Jun 2026 07:29:22 -0400
Subject: [PATCH] fix(llm): omit temperature for Kimi K2.5 and K2.6 (#3960)

---
 src/llm_core.py                    | 30 +++++++++++--
 tests/test_llm_core_temperature.py | 67 +++++++++++++++++++++++++++++-
 2 files changed, 92 insertions(+), 5 deletions(-)

diff --git a/src/llm_core.py b/src/llm_core.py
index 9dfade2cd..1338ef91a 100644
--- a/src/llm_core.py
+++ b/src/llm_core.py
@@ -605,6 +605,8 @@ def _detect_provider(url: str) -> str:
         return "groq"
     if _host_match(url, "nvidia.com"):
         return "nvidia"
+    if _host_match(url, "moonshot.ai") or _host_match(url, "moonshot.cn"):
+        return "moonshot"
     from src.chatgpt_subscription import is_chatgpt_subscription_base
     if is_chatgpt_subscription_base(url):
         return "chatgpt-subscription"
@@ -856,6 +858,28 @@ def _restricts_temperature(model: str) -> bool:
     m = model.lower()
     return any(m.startswith(p) or f"/{p}" in m for p in _FIXED_TEMPERATURE_MODELS)
 
+
+# The official Moonshot API fixes temperature at 1.0 in thinking mode and 0.6
+# when thinking is explicitly disabled for Kimi K2.5/K2.6. Any other explicit
+# value returns HTTP 400. Odysseus does not currently send the `thinking` mode
+# control, so omit temperature and let Moonshot use its default thinking mode.
+# Keep the gate provider-specific: self-hosted Kimi deployments may accept
+# custom sampling values, and older Moonshot models have different defaults.
+def _moonshot_rejects_custom_temperature(provider: str, model: str) -> bool:
+    """Check if the official Moonshot API fixes temperature for this model."""
+    if provider != "moonshot" or not isinstance(model, str):
+        return False
+    model_id = model.lower().rsplit("/", 1)[-1]
+    return bool(re.match(r"^kimi-k2\.(?:5|6)(?:$|[-_:])", model_id))
+
+
+def _omit_temperature(provider: str, model: str) -> bool:
+    """Check if a request should use the provider's default temperature."""
+    return _restricts_temperature(model) or _moonshot_rejects_custom_temperature(
+        provider, model
+    )
+
+
 # Anthropic removed the sampling parameters (temperature, top_p, top_k) starting
 # with Claude Opus 4.7. On Opus 4.7 and later, sending `temperature` at all —
 # even 0.0 — returns HTTP 400. Earlier Claude models (Opus 4.6 and below, every
@@ -1404,7 +1428,7 @@ def llm_call(url: str, model: str, messages: List[Dict], temperature: float = LL
             "messages": messages_copy,
             "temperature": temperature,
         }
-        if _restricts_temperature(model):
+        if _omit_temperature(provider, model):
             payload.pop("temperature", None)
         if max_tokens and max_tokens > 0:
             tok_key = "max_completion_tokens" if _uses_max_completion_tokens(model) else "max_tokens"
@@ -1598,7 +1622,7 @@ async def llm_call_async(
             "messages": messages_copy,
             "temperature": temperature,
         }
-        if _restricts_temperature(model):
+        if _omit_temperature(provider, model):
             payload.pop("temperature", None)
         if max_tokens and max_tokens > 0:
             tok_key = "max_completion_tokens" if _uses_max_completion_tokens(model) else "max_tokens"
@@ -1715,7 +1739,7 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
             "temperature": temperature,
             "stream": True,
         }
-        if _restricts_temperature(model):
+        if _omit_temperature(provider, model):
             payload.pop("temperature", None)
         if provider not in {"openrouter", "groq"}:
             payload["stream_options"] = {"include_usage": True}
diff --git a/tests/test_llm_core_temperature.py b/tests/test_llm_core_temperature.py
index 685313011..ab6334f36 100644
--- a/tests/test_llm_core_temperature.py
+++ b/tests/test_llm_core_temperature.py
@@ -29,7 +29,12 @@ def test_normal_models_allow_temperature(model):
     assert llm_core._restricts_temperature(model) is False
 
 
-def _capture_openai_payload(monkeypatch, model, temperature):
+def _capture_openai_payload(
+    monkeypatch,
+    model,
+    temperature,
+    url="https://api.openai.com/v1/chat/completions",
+):
     """Run a synchronous OpenAI-compatible call and return the posted JSON body."""
     llm_core._response_cache.clear()
     seen = {}
@@ -45,7 +50,7 @@ def _capture_openai_payload(monkeypatch, model, temperature):
 
     monkeypatch.setattr(llm_core.httpx, "post", fake_post)
     result = llm_core.llm_call(
-        "https://api.openai.com/v1/chat/completions",
+        url,
         model,
         [{"role": "user", "content": "Say OK"}],
         temperature=temperature,
@@ -131,3 +136,61 @@ def test_anthropic_payload_clamps_negative():
 def test_anthropic_payload_none_temperature_does_not_crash():
     payload = _anthropic_payload(None)
     assert payload["temperature"] is None
+
+
+@pytest.mark.parametrize(
+    "model",
+    [
+        "kimi-k2.5",
+        "kimi-k2.6",
+        "moonshot/kimi-k2.6",
+        "kimi-k2.6-preview",
+    ],
+)
+def test_moonshot_k2_5_plus_uses_fixed_temperature(model):
+    assert llm_core._moonshot_rejects_custom_temperature("moonshot", model)
+
+
+@pytest.mark.parametrize(
+    "provider,model",
+    [
+        ("openai", "kimi-k2.6"),
+        ("moonshot", "kimi-k2-0905-preview"),
+        ("moonshot", "kimi-k2-thinking"),
+        ("moonshot", "kimi-k2.50"),
+        ("moonshot", None),
+    ],
+)
+def test_other_models_keep_temperature(provider, model):
+    assert not llm_core._moonshot_rejects_custom_temperature(provider, model)
+
+
+@pytest.mark.parametrize(
+    "url",
+    [
+        "https://api.moonshot.ai/v1/chat/completions",
+        "https://api.moonshot.cn/v1/chat/completions",
+    ],
+)
+def test_moonshot_provider_detection(url):
+    assert llm_core._detect_provider(url) == "moonshot"
+
+
+def test_moonshot_k2_6_payload_omits_temperature(monkeypatch):
+    payload = _capture_openai_payload(
+        monkeypatch,
+        "kimi-k2.6",
+        0.7,
+        url="https://api.moonshot.ai/v1/chat/completions",
+    )
+    assert "temperature" not in payload
+
+
+def test_self_hosted_kimi_k2_6_payload_keeps_temperature(monkeypatch):
+    payload = _capture_openai_payload(
+        monkeypatch,
+        "kimi-k2.6",
+        0.7,
+        url="http://localhost:8000/v1/chat/completions",
+    )
+    assert payload["temperature"] == 0.7