mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-17 02:05:22 -04:00
fix(llm): omit temperature for Kimi K2.5 and K2.6 (#3960)
This commit is contained in:
+27
-3
@@ -605,6 +605,8 @@ def _detect_provider(url: str) -> str:
|
|||||||
return "groq"
|
return "groq"
|
||||||
if _host_match(url, "nvidia.com"):
|
if _host_match(url, "nvidia.com"):
|
||||||
return "nvidia"
|
return "nvidia"
|
||||||
|
if _host_match(url, "moonshot.ai") or _host_match(url, "moonshot.cn"):
|
||||||
|
return "moonshot"
|
||||||
from src.chatgpt_subscription import is_chatgpt_subscription_base
|
from src.chatgpt_subscription import is_chatgpt_subscription_base
|
||||||
if is_chatgpt_subscription_base(url):
|
if is_chatgpt_subscription_base(url):
|
||||||
return "chatgpt-subscription"
|
return "chatgpt-subscription"
|
||||||
@@ -856,6 +858,28 @@ def _restricts_temperature(model: str) -> bool:
|
|||||||
m = model.lower()
|
m = model.lower()
|
||||||
return any(m.startswith(p) or f"/{p}" in m for p in _FIXED_TEMPERATURE_MODELS)
|
return any(m.startswith(p) or f"/{p}" in m for p in _FIXED_TEMPERATURE_MODELS)
|
||||||
|
|
||||||
|
|
||||||
|
# The official Moonshot API fixes temperature at 1.0 in thinking mode and 0.6
|
||||||
|
# when thinking is explicitly disabled for Kimi K2.5/K2.6. Any other explicit
|
||||||
|
# value returns HTTP 400. Odysseus does not currently send the `thinking` mode
|
||||||
|
# control, so omit temperature and let Moonshot use its default thinking mode.
|
||||||
|
# Keep the gate provider-specific: self-hosted Kimi deployments may accept
|
||||||
|
# custom sampling values, and older Moonshot models have different defaults.
|
||||||
|
def _moonshot_rejects_custom_temperature(provider: str, model: str) -> bool:
|
||||||
|
"""Check if the official Moonshot API fixes temperature for this model."""
|
||||||
|
if provider != "moonshot" or not isinstance(model, str):
|
||||||
|
return False
|
||||||
|
model_id = model.lower().rsplit("/", 1)[-1]
|
||||||
|
return bool(re.match(r"^kimi-k2\.(?:5|6)(?:$|[-_:])", model_id))
|
||||||
|
|
||||||
|
|
||||||
|
def _omit_temperature(provider: str, model: str) -> bool:
|
||||||
|
"""Check if a request should use the provider's default temperature."""
|
||||||
|
return _restricts_temperature(model) or _moonshot_rejects_custom_temperature(
|
||||||
|
provider, model
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# Anthropic removed the sampling parameters (temperature, top_p, top_k) starting
|
# Anthropic removed the sampling parameters (temperature, top_p, top_k) starting
|
||||||
# with Claude Opus 4.7. On Opus 4.7 and later, sending `temperature` at all —
|
# with Claude Opus 4.7. On Opus 4.7 and later, sending `temperature` at all —
|
||||||
# even 0.0 — returns HTTP 400. Earlier Claude models (Opus 4.6 and below, every
|
# even 0.0 — returns HTTP 400. Earlier Claude models (Opus 4.6 and below, every
|
||||||
@@ -1404,7 +1428,7 @@ def llm_call(url: str, model: str, messages: List[Dict], temperature: float = LL
|
|||||||
"messages": messages_copy,
|
"messages": messages_copy,
|
||||||
"temperature": temperature,
|
"temperature": temperature,
|
||||||
}
|
}
|
||||||
if _restricts_temperature(model):
|
if _omit_temperature(provider, model):
|
||||||
payload.pop("temperature", None)
|
payload.pop("temperature", None)
|
||||||
if max_tokens and max_tokens > 0:
|
if max_tokens and max_tokens > 0:
|
||||||
tok_key = "max_completion_tokens" if _uses_max_completion_tokens(model) else "max_tokens"
|
tok_key = "max_completion_tokens" if _uses_max_completion_tokens(model) else "max_tokens"
|
||||||
@@ -1598,7 +1622,7 @@ async def llm_call_async(
|
|||||||
"messages": messages_copy,
|
"messages": messages_copy,
|
||||||
"temperature": temperature,
|
"temperature": temperature,
|
||||||
}
|
}
|
||||||
if _restricts_temperature(model):
|
if _omit_temperature(provider, model):
|
||||||
payload.pop("temperature", None)
|
payload.pop("temperature", None)
|
||||||
if max_tokens and max_tokens > 0:
|
if max_tokens and max_tokens > 0:
|
||||||
tok_key = "max_completion_tokens" if _uses_max_completion_tokens(model) else "max_tokens"
|
tok_key = "max_completion_tokens" if _uses_max_completion_tokens(model) else "max_tokens"
|
||||||
@@ -1715,7 +1739,7 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
|
|||||||
"temperature": temperature,
|
"temperature": temperature,
|
||||||
"stream": True,
|
"stream": True,
|
||||||
}
|
}
|
||||||
if _restricts_temperature(model):
|
if _omit_temperature(provider, model):
|
||||||
payload.pop("temperature", None)
|
payload.pop("temperature", None)
|
||||||
if provider not in {"openrouter", "groq"}:
|
if provider not in {"openrouter", "groq"}:
|
||||||
payload["stream_options"] = {"include_usage": True}
|
payload["stream_options"] = {"include_usage": True}
|
||||||
|
|||||||
@@ -29,7 +29,12 @@ def test_normal_models_allow_temperature(model):
|
|||||||
assert llm_core._restricts_temperature(model) is False
|
assert llm_core._restricts_temperature(model) is False
|
||||||
|
|
||||||
|
|
||||||
def _capture_openai_payload(monkeypatch, model, temperature):
|
def _capture_openai_payload(
|
||||||
|
monkeypatch,
|
||||||
|
model,
|
||||||
|
temperature,
|
||||||
|
url="https://api.openai.com/v1/chat/completions",
|
||||||
|
):
|
||||||
"""Run a synchronous OpenAI-compatible call and return the posted JSON body."""
|
"""Run a synchronous OpenAI-compatible call and return the posted JSON body."""
|
||||||
llm_core._response_cache.clear()
|
llm_core._response_cache.clear()
|
||||||
seen = {}
|
seen = {}
|
||||||
@@ -45,7 +50,7 @@ def _capture_openai_payload(monkeypatch, model, temperature):
|
|||||||
|
|
||||||
monkeypatch.setattr(llm_core.httpx, "post", fake_post)
|
monkeypatch.setattr(llm_core.httpx, "post", fake_post)
|
||||||
result = llm_core.llm_call(
|
result = llm_core.llm_call(
|
||||||
"https://api.openai.com/v1/chat/completions",
|
url,
|
||||||
model,
|
model,
|
||||||
[{"role": "user", "content": "Say OK"}],
|
[{"role": "user", "content": "Say OK"}],
|
||||||
temperature=temperature,
|
temperature=temperature,
|
||||||
@@ -131,3 +136,61 @@ def test_anthropic_payload_clamps_negative():
|
|||||||
def test_anthropic_payload_none_temperature_does_not_crash():
|
def test_anthropic_payload_none_temperature_does_not_crash():
|
||||||
payload = _anthropic_payload(None)
|
payload = _anthropic_payload(None)
|
||||||
assert payload["temperature"] is None
|
assert payload["temperature"] is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"model",
|
||||||
|
[
|
||||||
|
"kimi-k2.5",
|
||||||
|
"kimi-k2.6",
|
||||||
|
"moonshot/kimi-k2.6",
|
||||||
|
"kimi-k2.6-preview",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_moonshot_k2_5_plus_uses_fixed_temperature(model):
|
||||||
|
assert llm_core._moonshot_rejects_custom_temperature("moonshot", model)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"provider,model",
|
||||||
|
[
|
||||||
|
("openai", "kimi-k2.6"),
|
||||||
|
("moonshot", "kimi-k2-0905-preview"),
|
||||||
|
("moonshot", "kimi-k2-thinking"),
|
||||||
|
("moonshot", "kimi-k2.50"),
|
||||||
|
("moonshot", None),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_other_models_keep_temperature(provider, model):
|
||||||
|
assert not llm_core._moonshot_rejects_custom_temperature(provider, model)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"url",
|
||||||
|
[
|
||||||
|
"https://api.moonshot.ai/v1/chat/completions",
|
||||||
|
"https://api.moonshot.cn/v1/chat/completions",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_moonshot_provider_detection(url):
|
||||||
|
assert llm_core._detect_provider(url) == "moonshot"
|
||||||
|
|
||||||
|
|
||||||
|
def test_moonshot_k2_6_payload_omits_temperature(monkeypatch):
|
||||||
|
payload = _capture_openai_payload(
|
||||||
|
monkeypatch,
|
||||||
|
"kimi-k2.6",
|
||||||
|
0.7,
|
||||||
|
url="https://api.moonshot.ai/v1/chat/completions",
|
||||||
|
)
|
||||||
|
assert "temperature" not in payload
|
||||||
|
|
||||||
|
|
||||||
|
def test_self_hosted_kimi_k2_6_payload_keeps_temperature(monkeypatch):
|
||||||
|
payload = _capture_openai_payload(
|
||||||
|
monkeypatch,
|
||||||
|
"kimi-k2.6",
|
||||||
|
0.7,
|
||||||
|
url="http://localhost:8000/v1/chat/completions",
|
||||||
|
)
|
||||||
|
assert payload["temperature"] == 0.7
|
||||||
|
|||||||
Reference in New Issue
Block a user