Add ask_user tool: agent-posed multiple-choice questions (#2111)

Let the agent pause and ask the user a multiple-choice question when a
task is genuinely ambiguous and the answer changes what it does next —
choosing between approaches, confirming an assumption, picking a target —
instead of guessing.

Modeled on the existing `ui_control` marker pattern: the `ask_user` tool
returns an `ask_user` payload that the agent loop emits as an SSE event
and then ends the turn. The frontend renders the question with clickable
option buttons, a free-text "Other" input, and an x to dismiss; the user's
choice is sent as the next message and the agent resumes with it in
context.

- src/tool_execution.py: `ask_user` handler — pure UI marker, no I/O.
  Validates a non-empty question + 2..6 options, normalizes string/object
  options, returns the payload.
- src/agent_loop.py: emit the `ask_user` event and break the round loop so
  the turn ends and waits for the user's selection. Stream the question as
  assistant text so it persists/replays (prevents a re-ask loop).
- Registration: TOOL_TAGS, ALWAYS_AVAILABLE, BUILTIN_TOOL_DESCRIPTIONS,
  FUNCTION_TOOL_SCHEMAS, the system-prompt blurb. Not admin-gated (any
  user can be asked); the structured args serialize via the default
  json.dumps path.
- routes/chat_routes.py: relay the `ask_user` event to the client.
- static/js/chat.js + static/style.css: render the question card (options +
  free-text Other + dismiss x; removed once answered). Reuses CSS vars and
  the .modal-close button; emoji go through the monochrome-SVG pipeline.
  Bump chat.js cache pin.
- tests/test_ask_user_tool.py: payload, multi flag, string options, option
  cap, validation errors, serializer round-trip, registration.
This commit is contained in:
Kenny Van de Maele
2026-06-05 11:49:11 +02:00
committed by GitHub
parent 621885ac06
commit 0a2adc9c96
9 changed files with 432 additions and 1 deletions
+147
View File
@@ -12,6 +12,7 @@ import chatRenderer from './chatRenderer.js';
import chatStream from './chatStream.js';
import { addAITTSButton } from './tts-ai.js';
import markdownModule from './markdown.js';
import { svgifyEmoji } from './markdown.js';
import spinnerModule from './spinner.js';
import presetsModule from './presets.js';
import fileHandlerModule from './fileHandler.js';
@@ -2261,6 +2262,152 @@ import createResearchSynapse from './researchSynapse.js';
if (_isBg) continue;
chatStream.handleUIControl(json.data || {});
} else if (json.type === 'ask_user') {
if (_isBg) continue;
// The agent posed a multiple-choice question; the turn has ended.
// Render clickable options at the bottom of the history. The
// user's pick is sent as the next message and the agent resumes.
_cancelThinkingTimer();
_removeThinkingSpinner();
const _aq = json.data || {};
const _opts = Array.isArray(_aq.options) ? _aq.options : [];
if (_aq.question && _opts.length) {
const chatBox = document.getElementById('chat-history');
// Drop any prior unanswered card so only the latest shows.
chatBox.querySelectorAll('.ask-user-card').forEach(n => n.remove());
const card = document.createElement('div');
card.className = 'ask-user-card';
const multi = !!_aq.multi;
// Group the choices for assistive tech and label the group with
// the question (set below); make the card focusable so it can be
// moved to when it appears.
card.setAttribute('role', 'group');
card.tabIndex = -1;
// Render any emoji in agent-supplied text through the app's
// pipeline: escape, then svgify to monochrome theme-tinted
// glyphs (project rule: never colorful emoji; respects the
// "Text-only Emojis" setting like the rest of the chat).
const _emo = (s) => svgifyEmoji(uiModule.esc(String(s)));
// Header row holds the close (×) to dismiss the affordances and
// just type a reply instead.
const head = document.createElement('div');
head.className = 'ask-user-head';
const closeBtn = document.createElement('button');
closeBtn.type = 'button';
closeBtn.className = 'modal-close ask-user-close';
closeBtn.setAttribute('aria-label', 'Dismiss question');
closeBtn.textContent = '×';
closeBtn.addEventListener('click', () => {
card.remove();
const mi = uiModule.el('message');
if (mi) mi.focus();
});
head.appendChild(closeBtn);
card.appendChild(head);
// Render the question inside the card so it's self-contained:
// some models call ask_user without first narrating the question
// as assistant text, in which case the card would otherwise show
// bare options with no prompt.
if (_aq.question) {
const q = document.createElement('div');
q.className = 'ask-user-question';
q.id = `ask-user-q-${Date.now()}-${Math.floor(Math.random() * 1e4)}`;
q.innerHTML = _emo(_aq.question);
card.appendChild(q);
// Label the choice group with the question for screen readers.
card.setAttribute('aria-labelledby', q.id);
} else {
card.setAttribute('aria-label', 'Question from the assistant');
}
const list = document.createElement('div');
list.className = 'ask-user-options';
card.appendChild(list);
const _send = (text) => {
if (!text) return;
// Remove the card once answered — the choice is sent as a
// normal user message (and the question persists as the
// assistant text above), so the affordances are spent.
card.remove();
const mi = uiModule.el('message');
if (mi) mi.value = text;
const sb = document.querySelector('.send-btn');
if (sb) sb.click();
};
_opts.forEach((opt, i) => {
const label = (opt && opt.label) ? String(opt.label) : String(opt || '');
if (!label) return;
const descr = (opt && opt.description) ? String(opt.description) : '';
const row = document.createElement(multi ? 'label' : 'button');
row.className = 'ask-user-option';
if (multi) {
const cb = document.createElement('input');
cb.type = 'checkbox';
cb.value = label;
row.appendChild(cb);
}
const txt = document.createElement('span');
txt.className = 'ask-user-option-label';
txt.innerHTML = _emo(label);
row.appendChild(txt);
if (descr) {
const d = document.createElement('span');
d.className = 'ask-user-option-desc';
d.innerHTML = _emo(descr);
row.appendChild(d);
}
if (!multi) {
row.type = 'button';
row.addEventListener('click', () => _send(label));
}
list.appendChild(row);
});
// Free-text "Other" — type a custom answer + send (Enter or →).
const other = document.createElement('div');
other.className = 'ask-user-other';
const otherInput = document.createElement('input');
otherInput.type = 'text';
otherInput.className = 'styled-prompt-input ask-user-other-input';
otherInput.placeholder = multi ? 'Other (added to selection)…' : 'Other… (type your own answer)';
otherInput.setAttribute('aria-label', multi ? 'Add a custom option' : 'Type a custom answer');
const otherSend = document.createElement('button');
otherSend.type = 'button';
otherSend.className = 'confirm-btn confirm-btn-primary ask-user-other-send';
otherSend.setAttribute('aria-label', 'Send answer');
otherSend.textContent = multi ? 'Send selection' : 'Send';
const _submit = () => {
const free = otherInput.value.trim();
if (multi) {
const picked = Array.from(card.querySelectorAll('.ask-user-option input:checked')).map(c => c.value);
if (free) picked.push(free);
if (picked.length) _send(picked.join(', '));
} else if (free) {
_send(free);
}
};
otherSend.addEventListener('click', _submit);
otherInput.addEventListener('keydown', (e) => {
if (e.key === 'Enter' && !e.shiftKey && !e.isComposing) {
e.preventDefault();
_submit();
}
});
other.appendChild(otherInput);
other.appendChild(otherSend);
card.appendChild(other);
chatBox.appendChild(card);
card.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
// Move focus to the card so keyboard/screen-reader users land on
// the question + choices when it appears.
try { card.focus(); } catch (_) {}
}
} else if (json.type === 'agent_step') {
if (_isBg) continue;
_cancelThinkingTimer();