From 9e74a327f86ac04474a0bf6b303eae41032db89a Mon Sep 17 00:00:00 2001 From: Sid Date: Tue, 9 Jun 2026 21:12:12 +0530 Subject: [PATCH] fix(llm): remove max_output_tokens from ChatGPT Subscription payload (#3656) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ChatGPT's Codex API rejects any request that includes max_output_tokens, returning HTTP 400 "Unsupported parameter: max_output_tokens". This caused Deep Research to always fail during the endpoint probe when a ChatGPT Subscription model was selected. Remove the conditional that set payload["max_output_tokens"] in _build_chatgpt_responses_payload(). The parameter is simply not sent. Also update the two affected tests: - Rename test_chatgpt_subscription_payload_uses_max_output_tokens → test_chatgpt_subscription_payload_omits_max_output_tokens - Rename test_chatgpt_subscription_payload_omits_empty_max_output_tokens → test_chatgpt_subscription_payload_omits_max_output_tokens_when_zero - Assert max_output_tokens is absent rather than present Fixes #3650 --- src/llm_core.py | 5 +++-- tests/test_llm_core_temperature.py | 9 ++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/llm_core.py b/src/llm_core.py index b012638fa..8da2c46e0 100644 --- a/src/llm_core.py +++ b/src/llm_core.py @@ -563,8 +563,9 @@ def _build_chatgpt_responses_payload( } if not _restricts_temperature(model): payload["temperature"] = temperature - if max_tokens and max_tokens > 0: - payload["max_output_tokens"] = max_tokens + # ChatGPT Subscription Codex API does not support max_output_tokens — + # passing it returns HTTP 400 "Unsupported parameter: max_output_tokens". + # Do not include it in the payload. return payload diff --git a/tests/test_llm_core_temperature.py b/tests/test_llm_core_temperature.py index f49d3dba0..121a7ff4b 100644 --- a/tests/test_llm_core_temperature.py +++ b/tests/test_llm_core_temperature.py @@ -75,7 +75,10 @@ def test_normal_model_payload_keeps_temperature_above_one(monkeypatch): assert payload["temperature"] == 1.2 -def test_chatgpt_subscription_payload_uses_max_output_tokens(): +def test_chatgpt_subscription_payload_omits_max_output_tokens(): + # ChatGPT Subscription Codex API does not support max_output_tokens — + # passing it returns HTTP 400 "Unsupported parameter: max_output_tokens". + # The payload should NOT include max_output_tokens regardless of max_tokens. payload = llm_core._build_chatgpt_responses_payload( "gpt-5.1-codex", [{"role": "user", "content": "Say OK"}], @@ -83,10 +86,10 @@ def test_chatgpt_subscription_payload_uses_max_output_tokens(): max_tokens=37, ) - assert payload["max_output_tokens"] == 37 + assert "max_output_tokens" not in payload -def test_chatgpt_subscription_payload_omits_empty_max_output_tokens(): +def test_chatgpt_subscription_payload_omits_max_output_tokens_when_zero(): payload = llm_core._build_chatgpt_responses_payload( "gpt-5.1-codex", [{"role": "user", "content": "Say OK"}],