fix ask-user choices across reloads (#4669)

This commit is contained in:
Gabriel Peña
2026-06-22 14:49:49 -04:00
committed by GitHub
parent 4c82e4a172
commit 91bba117c1
7 changed files with 308 additions and 151 deletions
+24 -6
View File
@@ -3215,9 +3215,12 @@ async def stream_agent_loop(
f'data: {json.dumps({"type": "ui_control", "data": result})}\n\n'
)
# ask_user: the agent posed a multiple-choice question. Emit it so the
# frontend renders clickable options, then end the turn (below) and
# wait — the user's pick becomes the next message.
# ask_user: remember the payload now, but emit the interactive event
# only *after* tool_output below. Emitting it before tool_output let
# the subsequent tool-card rewrite/scroll push the choices out of
# view. The payload is also copied into the persisted tool event so
# history reload can reconstruct an unanswered card.
_pending_ask_user_event = None
if "ask_user" in result:
# The question lives in the tool args. ChatMessage.to_dict()
# replays only role+content to the model next turn — tool_event
@@ -3232,9 +3235,7 @@ async def stream_agent_loop(
_auq_delta = ("\n\n" if full_response.strip() else "") + _auq_q
full_response += _auq_delta
yield 'data: ' + json.dumps({"delta": _auq_delta}) + '\n\n'
yield (
f'data: {json.dumps({"type": "ask_user", "data": result["ask_user"]})}\n\n'
)
_pending_ask_user_event = _auq
_awaiting_user = True
# update_plan: agent wrote back to the plan (ticked a step / revised).
@@ -3289,6 +3290,10 @@ async def stream_agent_loop(
# Emit tool_output (include ui_event data if present)
tool_output_data = {"type": "tool_output", "tool": block.tool_type, "command": cmd_display, "output": output_text, "exit_code": result.get("exit_code")}
if _pending_ask_user_event:
# Keep enough state in the streamed tool result for alternate
# clients to render the prompt without depending on event order.
tool_output_data["ask_user"] = _pending_ask_user_event
if "ui_event" in result:
tool_output_data["ui_event"] = result["ui_event"]
for k in (
@@ -3319,6 +3324,14 @@ async def stream_agent_loop(
tool_output_data["diff"] = result["diff"]
yield f'data: {json.dumps(tool_output_data)}\n\n'
# This must be the final UI event for ask_user: the frontend appends
# the card below the now-settled tool node and cancels any between-
# round spinner. The turn ends after the current tool batch.
if _pending_ask_user_event:
yield (
f'data: {json.dumps({"type": "ask_user", "data": _pending_ask_user_event})}\n\n'
)
# Native document tools open in the editor + carry the REAL doc id.
# Emit a doc_update so the frontend opens/activates it and sends it
# back as active_doc_id next turn (otherwise the agent can't "see"
@@ -3376,6 +3389,11 @@ async def stream_agent_loop(
# this the diff shows live but vanishes from saved history.
if result.get("diff"):
tool_event["diff"] = result["diff"]
if _pending_ask_user_event:
# Persist the structured question with the tool event. On a
# reload, chatRenderer can restore the card; a later user
# message removes it as answered.
tool_event["ask_user"] = _pending_ask_user_event
tool_events.append(tool_event)
if block.tool_type in _VERIFIER_EFFECTFUL_TOOLS:
_effectful_used = True
+1 -1
View File
@@ -103,7 +103,7 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = {
"list_sessions": "List all chats with their metadata (the UI calls these 'chats'). Use for 'list my chats', 'rename all my chats' (list first, then manage_session to rename each).",
"send_to_session": "Send a message to another chat. Cross-chat communication.",
"search_chats": "Search past session transcripts across chats.",
"ask_user": "Ask the user a multiple-choice question to get a decision or clarification. Use this when the task is genuinely ambiguous and the answer changes what you do next — pick between approaches, confirm an assumption, choose among options — instead of guessing. Provide a clear `question` and 2-6 `options` (each with a short `label`, optional `description`). Calling this ENDS your turn: the user sees clickable buttons and their choice arrives as your next message. Don't use it for things you can decide from context or sensible defaults, or for irreversible-action confirmation if a dedicated flow exists.",
"ask_user": "Ask the user a multiple-choice question to get a decision or clarification. Use this when the task is genuinely ambiguous and the answer changes what you do next — pick between approaches, confirm an assumption, choose among options — instead of guessing. Provide a clear `question` and 2-6 `options` (each with a short `label`, optional `description`). Omit `multi`/keep it false unless the question explicitly permits choosing multiple options. Calling this ENDS your turn: the user sees clickable buttons and their choice arrives as your next message. Don't use it for things you can decide from context or sensible defaults, or for irreversible-action confirmation if a dedicated flow exists.",
"update_plan": "Write back to the ACTIVE PLAN while executing an approved plan: mark steps done or revise them. After finishing a step call this with the full checklist and that step marked done; when the user asks to change the plan call it with the revised checklist. Always pass the COMPLETE markdown checklist (`- [ ]` / `- [x]`), not a diff. The user's docked plan window updates live. No effect when there is no active plan.",
"ui_control": "Control the UI and toggle tools on/off. Use this to turn off / turn on / disable / enable individual tools and features: shell (bash), search (web), research, browser, documents, incognito. Open panels (documents library, gallery, email inbox, sessions, notes, memories/brain, skills, settings, cookbook) via `open_panel <name>`. Use `open_email_reply <uid> <folder> reply` to open an email reply draft document without sending. To pre-fill the reply body in one shot (USE THIS whenever the user told you what to say — opening an empty draft when they asked you to write is wrong), append the body after the mode: `open_email_reply <uid> <folder> reply <body text>`. Body can continue on subsequent lines for multi-line replies. Also switches between chat/agent modes, changes the current model, and applies/creates themes.",
"list_email_accounts": "List configured email accounts and default status. Use before reading or sending mail when the user mentions Gmail, work mail, custom domain mail, another mailbox, or asks to compare/check multiple inboxes.",
+8 -2
View File
@@ -467,7 +467,7 @@ FUNCTION_TOOL_SCHEMAS = [
"question": {"type": "string", "description": "The question to ask. Be specific and self-contained."},
"options": {
"type": "array",
"description": "2-6 mutually exclusive choices. Each is an object with a short `label` and an optional `description` explaining the trade-off.",
"description": "2-6 choices. Each is an object with a short `label` and an optional `description` explaining the trade-off.",
"items": {
"type": "object",
"properties": {
@@ -477,7 +477,7 @@ FUNCTION_TOOL_SCHEMAS = [
"required": ["label"]
}
},
"multi": {"type": "boolean", "description": "Set true to let the user select multiple options instead of one. Default false."}
"multi": {"type": "boolean", "description": "Set true ONLY when the question explicitly allows choosing more than one option. Otherwise omit it or set false. Default false."}
},
"required": ["question", "options"]
}
@@ -1406,6 +1406,12 @@ def function_call_to_tool_block(name: str, arguments: str) -> Optional[ToolBlock
content = json.dumps(args)
elif tool_type == "ask_teacher":
content = args.get("model", "auto") + "\n" + args.get("problem", "")
elif tool_type == "ask_user":
# Keep user-facing labels readable in the tool trace. The outer SSE
# JSON encoder will escape them for transport and JSON.parse restores
# them once; pre-escaping here caused literal ``\u00f1`` sequences to
# remain visible in the debug panel.
content = json.dumps(args, ensure_ascii=False)
else:
content = json.dumps(args)