From 9e74a327f86ac04474a0bf6b303eae41032db89a Mon Sep 17 00:00:00 2001
From: Sid <kushwahasiddhartha31@gmail.com>
Date: Tue, 9 Jun 2026 21:12:12 +0530
Subject: [PATCH] fix(llm): remove max_output_tokens from ChatGPT Subscription
 payload (#3656)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ChatGPT's Codex API rejects any request that includes max_output_tokens,
returning HTTP 400 "Unsupported parameter: max_output_tokens". This caused
Deep Research to always fail during the endpoint probe when a ChatGPT
Subscription model was selected.

Remove the conditional that set payload["max_output_tokens"] in
_build_chatgpt_responses_payload(). The parameter is simply not sent.

Also update the two affected tests:
- Rename test_chatgpt_subscription_payload_uses_max_output_tokens →
  test_chatgpt_subscription_payload_omits_max_output_tokens
- Rename test_chatgpt_subscription_payload_omits_empty_max_output_tokens →
  test_chatgpt_subscription_payload_omits_max_output_tokens_when_zero
- Assert max_output_tokens is absent rather than present

Fixes #3650
---
 src/llm_core.py                    | 5 +++--
 tests/test_llm_core_temperature.py | 9 ++++++---
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/llm_core.py b/src/llm_core.py
index b012638fa..8da2c46e0 100644
--- a/src/llm_core.py
+++ b/src/llm_core.py
@@ -563,8 +563,9 @@ def _build_chatgpt_responses_payload(
     }
     if not _restricts_temperature(model):
         payload["temperature"] = temperature
-    if max_tokens and max_tokens > 0:
-        payload["max_output_tokens"] = max_tokens
+    # ChatGPT Subscription Codex API does not support max_output_tokens —
+    # passing it returns HTTP 400 "Unsupported parameter: max_output_tokens".
+    # Do not include it in the payload.
     return payload
 
 
diff --git a/tests/test_llm_core_temperature.py b/tests/test_llm_core_temperature.py
index f49d3dba0..121a7ff4b 100644
--- a/tests/test_llm_core_temperature.py
+++ b/tests/test_llm_core_temperature.py
@@ -75,7 +75,10 @@ def test_normal_model_payload_keeps_temperature_above_one(monkeypatch):
     assert payload["temperature"] == 1.2
 
 
-def test_chatgpt_subscription_payload_uses_max_output_tokens():
+def test_chatgpt_subscription_payload_omits_max_output_tokens():
+    # ChatGPT Subscription Codex API does not support max_output_tokens —
+    # passing it returns HTTP 400 "Unsupported parameter: max_output_tokens".
+    # The payload should NOT include max_output_tokens regardless of max_tokens.
     payload = llm_core._build_chatgpt_responses_payload(
         "gpt-5.1-codex",
         [{"role": "user", "content": "Say OK"}],
@@ -83,10 +86,10 @@ def test_chatgpt_subscription_payload_uses_max_output_tokens():
         max_tokens=37,
     )
 
-    assert payload["max_output_tokens"] == 37
+    assert "max_output_tokens" not in payload
 
 
-def test_chatgpt_subscription_payload_omits_empty_max_output_tokens():
+def test_chatgpt_subscription_payload_omits_max_output_tokens_when_zero():
     payload = llm_core._build_chatgpt_responses_payload(
         "gpt-5.1-codex",
         [{"role": "user", "content": "Say OK"}],