Files
odysseus/tests/test_llm_core_temperature.py
T
stocky789 1e0d9b92af feat: add ChatGPT Subscription provider (#2876)
* feat: Add ChatGPT Subscription support and related features

- Introduced a new provider option for ChatGPT Subscription in the endpoint selection UI.
- Implemented OAuth flow for ChatGPT Subscription sign-in, including polling for authorization status.
- Updated admin interface to handle ChatGPT Subscription, including disabling API key input and providing user guidance.
- Enhanced cost tracking logic to differentiate between subscription and non-subscription endpoints.
- Added new slash commands for managing skills, including listing, searching, and invoking skills.
- Implemented caching for skill catalog to optimize performance.
- Updated tests to cover new ChatGPT Subscription functionality and ensure proper endpoint probing.
- Refactored existing code to accommodate new features and improve maintainability.

* refactor: share provider device-flow setup

- reuse one device-flow backend for Copilot and ChatGPT Subscription
- add one frontend device-flow helper for Settings and /setup
- put GitHub Copilot back into Add Models, now as a dropdown option
- make provider selection just select; clicking Add starts sign-in
- stop ChatGPT Subscription setup from opening auth tabs automatically
- make /setup copilot and /setup chatgpt-subscription work from chat
- show ChatGPT Subscription in the /setup suggestions
- show the real error message when setup fails
- add focused tests for the shared flow and setup UI

* feat(chatgpt-subscription): harden credential lifecycle and streamline auth UX

Backend:
- Resolve runtime bearer for provider-auth endpoints at probe time via a
  shared _resolve_probe_key() that delegates to resolve_endpoint_runtime,
  applied across all probe/refresh call sites.
- Skip live completion probes and health pings for discovery-only providers
  (centralized behind _is_discovery_only_provider) — the Codex/Responses API
  has no such endpoints, so status is derived from cached models.
- Never persist the short lived ChatGPT bearer to the plaintext sessions
  table; proactively clear any stale bearer left by an earlier code path.
- Revoke orphaned ProviderAuthSession credentials when the last endpoint
  backing them is deleted (_delete_orphaned_provider_auth), surfaced via
  cleared_provider_auth in the delete response.

Frontend (admin.js):
- Auto-start the device-auth flow on provider selection so the authorization
  panel (code + Authorize) shows immediately instead of behind a "Sign in" click.
- Remove the redundant top button for device auth providers, move retry
  into the panel via an inline "Try again".
- Drop the self-evident hint text and add an execCommand clipboard fallback so
  Copy works in non-secure (HTTP/LAN) contexts.

* fix: harden chatgpt subscription provider

* chore: remove PR media from branch

* Fix chatgpt subscription recovery and token handling

---------

Co-authored-by: 5p00kyy <admin@5p00ky.dev>
2026-06-08 10:19:18 +02:00

125 lines
3.9 KiB
Python

"""Regression tests: OpenAI reasoning models reject a non-default temperature.
o1/o3/o4/gpt-5 only accept the default temperature (1); sending an explicit
value — even 0.0 — returns HTTP 400 "Only the default (1) value is supported".
The OpenAI-compatible payload builders must omit the temperature field for these
models so chat (with a non-default preset) and endpoint probing don't break.
"""
import httpx
import pytest
from src import llm_core
@pytest.mark.parametrize(
"model",
["o1", "o1-mini", "o3", "o3-mini", "o4-mini", "gpt-5", "gpt-5-mini",
"openrouter/openai/o3-mini", "OpenAI/GPT-5"],
)
def test_reasoning_models_restrict_temperature(model):
assert llm_core._restricts_temperature(model) is True
@pytest.mark.parametrize(
"model",
["gpt-4o", "gpt-4.1", "gpt-3.5-turbo", "gpt-4.5-preview",
"claude-3-5-sonnet", "llama3.1", "", None],
)
def test_normal_models_allow_temperature(model):
assert llm_core._restricts_temperature(model) is False
def _capture_openai_payload(monkeypatch, model, temperature):
"""Run a synchronous OpenAI-compatible call and return the posted JSON body."""
llm_core._response_cache.clear()
seen = {}
def fake_post(url, headers=None, json=None, timeout=None):
seen["json"] = json
request = httpx.Request("POST", url)
return httpx.Response(
200,
request=request,
json={"choices": [{"message": {"content": "OK"}}]},
)
monkeypatch.setattr(llm_core.httpx, "post", fake_post)
result = llm_core.llm_call(
"https://api.openai.com/v1/chat/completions",
model,
[{"role": "user", "content": "Say OK"}],
temperature=temperature,
max_tokens=5,
)
assert result == "OK"
return seen["json"]
def test_reasoning_model_payload_omits_temperature(monkeypatch):
payload = _capture_openai_payload(monkeypatch, "o3-mini", 0.0)
assert "temperature" not in payload
# Reasoning models also use max_completion_tokens, which must survive.
assert payload["max_completion_tokens"] == 5
def test_normal_model_payload_keeps_temperature(monkeypatch):
payload = _capture_openai_payload(monkeypatch, "gpt-4o", 0.2)
assert payload["temperature"] == 0.2
assert payload["max_tokens"] == 5
def test_normal_model_payload_keeps_temperature_above_one(monkeypatch):
# OpenAI/local providers may validly use temperatures above 1.0; the clamp
# is Anthropic-only and must not touch this path.
payload = _capture_openai_payload(monkeypatch, "gpt-4o", 1.2)
assert payload["temperature"] == 1.2
def test_chatgpt_subscription_payload_uses_max_output_tokens():
payload = llm_core._build_chatgpt_responses_payload(
"gpt-5.1-codex",
[{"role": "user", "content": "Say OK"}],
temperature=0.2,
max_tokens=37,
)
assert payload["max_output_tokens"] == 37
def test_chatgpt_subscription_payload_omits_empty_max_output_tokens():
payload = llm_core._build_chatgpt_responses_payload(
"gpt-5.1-codex",
[{"role": "user", "content": "Say OK"}],
temperature=0.2,
max_tokens=0,
)
assert "max_output_tokens" not in payload
def _anthropic_payload(temperature):
return llm_core._build_anthropic_payload(
"claude-3-5-sonnet",
[{"role": "user", "content": "Hi"}],
temperature,
max_tokens=5,
)
def test_anthropic_payload_clamps_above_one():
# Anthropic rejects temperature > 1.0 (e.g. the Nietzsche preset's 1.2).
assert _anthropic_payload(1.2)["temperature"] == 1.0
def test_anthropic_payload_keeps_in_range():
assert _anthropic_payload(0.7)["temperature"] == 0.7
def test_anthropic_payload_clamps_negative():
assert _anthropic_payload(-0.5)["temperature"] == 0.0
def test_anthropic_payload_none_temperature_does_not_crash():
payload = _anthropic_payload(None)
assert payload["temperature"] is None