mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-15 17:25:26 -04:00
fix: omit temperature for Opus 4.7+ on native Anthropic path (#3117)
Anthropic removed the sampling parameters (temperature, top_p, top_k) starting with Claude Opus 4.7 — sending temperature at all, even 0.0, returns HTTP 400. _build_anthropic_payload sent it unconditionally, so every native-Anthropic request to Opus 4.7/4.8 failed: the research probe (ResearchHandler._probe_endpoint, temperature=0) aborted runs before they started, and all DeepResearcher._llm calls 400'd. Add _anthropic_rejects_temperature (version-gates opus-N-M >= (4,7)) and omit temperature in the Anthropic builder for those models. Older Claude models (Opus 4.6 and below, Sonnet/Haiku) keep temperature and the existing [0,1] clamp. The version gate is hardened against real-world model id shapes: - a word-boundary anchor so a substring like `octopus-4-8` is not read as Opus and stripped of temperature; - a 1-2 digit minor cap so a dated id such as `claude-opus-4-20250514` (Opus 4.0, listed in ANTHROPIC_MODELS) parses as major-only and keeps temperature, while dated 4.7+ snapshots still match; - a non-string guard so a non-string model can't raise AttributeError (the previous builder never called .lower() on it). Adds regression tests covering 4.7/4.8 omission, older/dated/legacy retention, the substring overmatch, and non-string inputs. Fixes #3065 Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+25
-1
@@ -681,6 +681,27 @@ def _restricts_temperature(model: str) -> bool:
|
||||
m = model.lower()
|
||||
return any(m.startswith(p) or f"/{p}" in m for p in _FIXED_TEMPERATURE_MODELS)
|
||||
|
||||
# Anthropic removed the sampling parameters (temperature, top_p, top_k) starting
|
||||
# with Claude Opus 4.7. On Opus 4.7 and later, sending `temperature` at all —
|
||||
# even 0.0 — returns HTTP 400. Earlier Claude models (Opus 4.6 and below, every
|
||||
# Sonnet/Haiku) still accept temperature in [0.0, 1.0], so the omission must be
|
||||
# version-gated rather than applied to all `claude-*` models.
|
||||
def _anthropic_rejects_temperature(model: str) -> bool:
|
||||
"""Check if a native-Anthropic model rejects the temperature field (Opus 4.7+)."""
|
||||
if not isinstance(model, str) or not model:
|
||||
return False
|
||||
# `(?<![a-z])` anchors "opus" to a word boundary so a substring match like
|
||||
# `oct-opus`/`octopus-4-8` can't be read as Opus (it would otherwise strip
|
||||
# temperature). Cap the minor at 1-2 digits and forbid a trailing digit so a
|
||||
# dated id like `claude-opus-4-20250514` (Opus 4.0) parses as major-only (no
|
||||
# minor match, kept) instead of reading the date `20250514` as a giant minor
|
||||
# that would falsely test >= 4.7. Dated 4.7+ snapshots (`claude-opus-4-7-
|
||||
# 20260201`) keep their explicit minor and are still matched.
|
||||
match = re.search(r"(?<![a-z])opus[-_]?(\d+)[-_.](\d{1,2})(?!\d)", model.lower())
|
||||
if not match:
|
||||
return False
|
||||
return (int(match.group(1)), int(match.group(2))) >= (4, 7)
|
||||
|
||||
# Models that support structured thinking — may output </think> without opening tag
|
||||
_THINKING_MODEL_PATTERNS = ("qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax", "m2-reap", "gemma")
|
||||
|
||||
@@ -784,8 +805,11 @@ def _build_anthropic_payload(model, messages, temperature, max_tokens, stream=Fa
|
||||
"model": model,
|
||||
"messages": chat_messages,
|
||||
"max_tokens": max_tokens if max_tokens and max_tokens > 0 else 4096,
|
||||
"temperature": temperature,
|
||||
}
|
||||
# Opus 4.7+ removed the sampling parameters — sending `temperature` (even 0.0)
|
||||
# returns HTTP 400. Omit it for those models; older Claude models still take it.
|
||||
if not _anthropic_rejects_temperature(model):
|
||||
payload["temperature"] = temperature
|
||||
if system_parts:
|
||||
system_text = "\n\n".join(system_parts)
|
||||
# Send `system` as a structured text block so we can attach a prompt-cache
|
||||
|
||||
@@ -0,0 +1,94 @@
|
||||
"""Regression guard: Opus 4.7+ rejects the temperature field entirely.
|
||||
|
||||
Anthropic removed the sampling parameters (temperature, top_p, top_k) starting
|
||||
with Claude Opus 4.7 — sending `temperature` at all, even 0.0, returns HTTP 400.
|
||||
This broke every native-Anthropic call to Opus 4.7/4.8, including the research
|
||||
endpoint probe (temperature=0) and all DeepResearcher LLM calls, because
|
||||
_build_anthropic_payload sent `temperature` unconditionally.
|
||||
|
||||
Earlier Claude models (Opus 4.6 and below, every Sonnet/Haiku) still accept
|
||||
temperature in [0.0, 1.0], so the omission is version-gated — the clamp-to-[0,1]
|
||||
behavior for those models (test_llm_core_anthropic_temp_clamp.py) is unchanged.
|
||||
"""
|
||||
import os
|
||||
|
||||
os.environ.setdefault("DATABASE_URL", "sqlite:///:memory:")
|
||||
|
||||
import pytest
|
||||
|
||||
from src.llm_core import _anthropic_rejects_temperature, _build_anthropic_payload
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"claude-opus-4-7",
|
||||
"claude-opus-4-8",
|
||||
"claude-opus-4-8-20260101", # tolerate a dated snapshot suffix
|
||||
"claude-opus-4-7-20260201", # dated 4.7 snapshot — explicit minor, still >= 4.7
|
||||
"anthropic/claude-opus-4-7", # tolerate a provider-prefixed id
|
||||
"claude-opus-4-10", # future minor still >= 4.7
|
||||
"claude-opus-5-0", # future major
|
||||
],
|
||||
)
|
||||
def test_opus_47_plus_rejects_temperature(model):
|
||||
assert _anthropic_rejects_temperature(model) is True
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"claude-opus-4-6",
|
||||
"claude-opus-4-5",
|
||||
"claude-opus-4-1",
|
||||
"claude-opus-4-0",
|
||||
"claude-opus-4", # bare major (no minor) — kept
|
||||
"claude-opus-4-20250514", # Opus 4.0 dated id — the date must NOT read as a 4.7+ minor
|
||||
"claude-opus-4-1-20250805", # Opus 4.1 dated id — explicit minor before the date
|
||||
"claude-opus-4-6-20251201", # dated 4.6 snapshot — older, still keeps temperature
|
||||
"claude-sonnet-4-6",
|
||||
"claude-3-5-sonnet",
|
||||
"claude-3-opus-20240229", # legacy Claude 3 Opus — no opus-N-M pattern, kept
|
||||
"claude-haiku-4-5",
|
||||
"claude-x",
|
||||
"octopus-4-8", # "opus" only as a substring of another word — must not match
|
||||
"myproxy/octopus-4-8", # same, behind a provider prefix
|
||||
"",
|
||||
None,
|
||||
],
|
||||
)
|
||||
def test_older_claude_models_keep_temperature(model):
|
||||
assert _anthropic_rejects_temperature(model) is False
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", [123, 1.5, ["claude-opus-4-8"], {"a": 1}, object()])
|
||||
def test_non_string_model_is_handled_without_crashing(model):
|
||||
# Defensive: the gate must not raise on a non-string model (the old builder
|
||||
# never called .lower() on it). Truthy non-strings should classify as False.
|
||||
assert _anthropic_rejects_temperature(model) is False
|
||||
|
||||
|
||||
def _payload(model, temperature=0.0):
|
||||
return _build_anthropic_payload(
|
||||
model, [{"role": "user", "content": "hi"}], temperature, 100
|
||||
)
|
||||
|
||||
|
||||
def test_payload_omits_temperature_for_opus_47_plus():
|
||||
# The endpoint probe sends temperature=0; on Opus 4.7+ that field must be gone.
|
||||
payload = _payload("claude-opus-4-8", 0.0)
|
||||
assert "temperature" not in payload
|
||||
|
||||
|
||||
def test_payload_keeps_temperature_for_older_models():
|
||||
payload = _payload("claude-opus-4-6", 0.3)
|
||||
assert payload["temperature"] == 0.3
|
||||
# Older models retain the [0,1] clamp (Nietzsche preset at 1.2 -> 1.0).
|
||||
assert _payload("claude-3-5-sonnet", 1.2)["temperature"] == 1.0
|
||||
|
||||
|
||||
def test_payload_keeps_temperature_for_dated_opus_4_0():
|
||||
# Anthropic's dated id for Opus 4.0 (claude-opus-4-20250514) is in this repo's
|
||||
# ANTHROPIC_MODELS list. The date must not be misread as a >= 4.7 minor, or the
|
||||
# user's temperature would be silently dropped on a model that accepts it.
|
||||
assert _payload("claude-opus-4-20250514", 0.5)["temperature"] == 0.5
|
||||
Reference in New Issue
Block a user