fix: omit temperature for Opus 4.7+ on native Anthropic path (#3117)

Anthropic removed the sampling parameters (temperature, top_p, top_k)
starting with Claude Opus 4.7 — sending temperature at all, even 0.0,
returns HTTP 400. _build_anthropic_payload sent it unconditionally, so
every native-Anthropic request to Opus 4.7/4.8 failed: the research probe
(ResearchHandler._probe_endpoint, temperature=0) aborted runs before they
started, and all DeepResearcher._llm calls 400'd.

Add _anthropic_rejects_temperature (version-gates opus-N-M >= (4,7)) and
omit temperature in the Anthropic builder for those models. Older Claude
models (Opus 4.6 and below, Sonnet/Haiku) keep temperature and the
existing [0,1] clamp.

The version gate is hardened against real-world model id shapes:
- a word-boundary anchor so a substring like `octopus-4-8` is not read
  as Opus and stripped of temperature;
- a 1-2 digit minor cap so a dated id such as `claude-opus-4-20250514`
  (Opus 4.0, listed in ANTHROPIC_MODELS) parses as major-only and keeps
  temperature, while dated 4.7+ snapshots still match;
- a non-string guard so a non-string model can't raise AttributeError
  (the previous builder never called .lower() on it).

Adds regression tests covering 4.7/4.8 omission, older/dated/legacy
retention, the substring overmatch, and non-string inputs.

Fixes #3065

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
George Lawton
2026-06-11 23:27:40 +10:00
committed by GitHub
parent af61b2d4e6
commit 4f48cfa9ae
2 changed files with 119 additions and 1 deletions
+25 -1
View File
@@ -681,6 +681,27 @@ def _restricts_temperature(model: str) -> bool:
m = model.lower()
return any(m.startswith(p) or f"/{p}" in m for p in _FIXED_TEMPERATURE_MODELS)
# Anthropic removed the sampling parameters (temperature, top_p, top_k) starting
# with Claude Opus 4.7. On Opus 4.7 and later, sending `temperature` at all —
# even 0.0 — returns HTTP 400. Earlier Claude models (Opus 4.6 and below, every
# Sonnet/Haiku) still accept temperature in [0.0, 1.0], so the omission must be
# version-gated rather than applied to all `claude-*` models.
def _anthropic_rejects_temperature(model: str) -> bool:
"""Check if a native-Anthropic model rejects the temperature field (Opus 4.7+)."""
if not isinstance(model, str) or not model:
return False
# `(?<![a-z])` anchors "opus" to a word boundary so a substring match like
# `oct-opus`/`octopus-4-8` can't be read as Opus (it would otherwise strip
# temperature). Cap the minor at 1-2 digits and forbid a trailing digit so a
# dated id like `claude-opus-4-20250514` (Opus 4.0) parses as major-only (no
# minor match, kept) instead of reading the date `20250514` as a giant minor
# that would falsely test >= 4.7. Dated 4.7+ snapshots (`claude-opus-4-7-
# 20260201`) keep their explicit minor and are still matched.
match = re.search(r"(?<![a-z])opus[-_]?(\d+)[-_.](\d{1,2})(?!\d)", model.lower())
if not match:
return False
return (int(match.group(1)), int(match.group(2))) >= (4, 7)
# Models that support structured thinking — may output </think> without opening tag
_THINKING_MODEL_PATTERNS = ("qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax", "m2-reap", "gemma")
@@ -784,8 +805,11 @@ def _build_anthropic_payload(model, messages, temperature, max_tokens, stream=Fa
"model": model,
"messages": chat_messages,
"max_tokens": max_tokens if max_tokens and max_tokens > 0 else 4096,
"temperature": temperature,
}
# Opus 4.7+ removed the sampling parameters — sending `temperature` (even 0.0)
# returns HTTP 400. Omit it for those models; older Claude models still take it.
if not _anthropic_rejects_temperature(model):
payload["temperature"] = temperature
if system_parts:
system_text = "\n\n".join(system_parts)
# Send `system` as a structured text block so we can attach a prompt-cache
@@ -0,0 +1,94 @@
"""Regression guard: Opus 4.7+ rejects the temperature field entirely.
Anthropic removed the sampling parameters (temperature, top_p, top_k) starting
with Claude Opus 4.7 — sending `temperature` at all, even 0.0, returns HTTP 400.
This broke every native-Anthropic call to Opus 4.7/4.8, including the research
endpoint probe (temperature=0) and all DeepResearcher LLM calls, because
_build_anthropic_payload sent `temperature` unconditionally.
Earlier Claude models (Opus 4.6 and below, every Sonnet/Haiku) still accept
temperature in [0.0, 1.0], so the omission is version-gated — the clamp-to-[0,1]
behavior for those models (test_llm_core_anthropic_temp_clamp.py) is unchanged.
"""
import os
os.environ.setdefault("DATABASE_URL", "sqlite:///:memory:")
import pytest
from src.llm_core import _anthropic_rejects_temperature, _build_anthropic_payload
@pytest.mark.parametrize(
"model",
[
"claude-opus-4-7",
"claude-opus-4-8",
"claude-opus-4-8-20260101", # tolerate a dated snapshot suffix
"claude-opus-4-7-20260201", # dated 4.7 snapshot — explicit minor, still >= 4.7
"anthropic/claude-opus-4-7", # tolerate a provider-prefixed id
"claude-opus-4-10", # future minor still >= 4.7
"claude-opus-5-0", # future major
],
)
def test_opus_47_plus_rejects_temperature(model):
assert _anthropic_rejects_temperature(model) is True
@pytest.mark.parametrize(
"model",
[
"claude-opus-4-6",
"claude-opus-4-5",
"claude-opus-4-1",
"claude-opus-4-0",
"claude-opus-4", # bare major (no minor) — kept
"claude-opus-4-20250514", # Opus 4.0 dated id — the date must NOT read as a 4.7+ minor
"claude-opus-4-1-20250805", # Opus 4.1 dated id — explicit minor before the date
"claude-opus-4-6-20251201", # dated 4.6 snapshot — older, still keeps temperature
"claude-sonnet-4-6",
"claude-3-5-sonnet",
"claude-3-opus-20240229", # legacy Claude 3 Opus — no opus-N-M pattern, kept
"claude-haiku-4-5",
"claude-x",
"octopus-4-8", # "opus" only as a substring of another word — must not match
"myproxy/octopus-4-8", # same, behind a provider prefix
"",
None,
],
)
def test_older_claude_models_keep_temperature(model):
assert _anthropic_rejects_temperature(model) is False
@pytest.mark.parametrize("model", [123, 1.5, ["claude-opus-4-8"], {"a": 1}, object()])
def test_non_string_model_is_handled_without_crashing(model):
# Defensive: the gate must not raise on a non-string model (the old builder
# never called .lower() on it). Truthy non-strings should classify as False.
assert _anthropic_rejects_temperature(model) is False
def _payload(model, temperature=0.0):
return _build_anthropic_payload(
model, [{"role": "user", "content": "hi"}], temperature, 100
)
def test_payload_omits_temperature_for_opus_47_plus():
# The endpoint probe sends temperature=0; on Opus 4.7+ that field must be gone.
payload = _payload("claude-opus-4-8", 0.0)
assert "temperature" not in payload
def test_payload_keeps_temperature_for_older_models():
payload = _payload("claude-opus-4-6", 0.3)
assert payload["temperature"] == 0.3
# Older models retain the [0,1] clamp (Nietzsche preset at 1.2 -> 1.0).
assert _payload("claude-3-5-sonnet", 1.2)["temperature"] == 1.0
def test_payload_keeps_temperature_for_dated_opus_4_0():
# Anthropic's dated id for Opus 4.0 (claude-opus-4-20250514) is in this repo's
# ANTHROPIC_MODELS list. The date must not be misread as a >= 4.7 minor, or the
# user's temperature would be silently dropped on a model that accepts it.
assert _payload("claude-opus-4-20250514", 0.5)["temperature"] == 0.5