diff --git a/static/js/chat.js b/static/js/chat.js index c9b73a8f1..50d296dcc 100644 --- a/static/js/chat.js +++ b/static/js/chat.js @@ -571,6 +571,24 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer let timeoutId = null; let responseTimeoutCleared = false; let clearResponseTimeout = () => {}; + let firstTokenWaitTimers = []; + const clearFirstTokenWaitTimers = () => { + firstTokenWaitTimers.forEach(t => { try { clearTimeout(t); } catch (_) {} }); + firstTokenWaitTimers = []; + }; + const scheduleFirstTokenWaitMessages = () => { + clearFirstTokenWaitTimers(); + const steps = [ + [20000, 'Still waiting for first token'], + [60000, 'Large local model is pre-filling context'], + [120000, 'Still working - no tokens yet from the model'], + ]; + firstTokenWaitTimers = steps.map(([ms, text]) => setTimeout(() => { + if (!accumulated && spinner && spinner.element && !(currentAbort && currentAbort.signal.aborted)) { + spinner.updateMessage(text); + } + }, ms)); + }; const clearProcessingProbe = () => { if (processingProbeTimer) { clearTimeout(processingProbeTimer); @@ -921,56 +939,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer setTimeout(() => spinner.updateMessage('Analyzing sources'), 1500); } else { spinner.updateMessage('Processing request'); - const endpointUrlForProbe = sessionModule.getCurrentEndpointUrl ? sessionModule.getCurrentEndpointUrl() : null; - if (endpointUrlForProbe && modelName) { - processingProbeTimer = setTimeout(async () => { - processingProbeTimer = null; - if (accumulated || !spinner || !spinner.element || (currentAbort && currentAbort.signal.aborted)) return; - processingProbeAbort = new AbortController(); - try { - spinner.updateMessage('Checking model endpoint'); - const status = await _probeCurrentEndpointStatus(endpointUrlForProbe, processingProbeAbort.signal); - if (accumulated || !spinner || !spinner.element || (currentAbort && currentAbort.signal.aborted)) return; - if (!status) { - spinner.updateMessage('Still waiting for model'); - } else if (status.alive) { - const latency = status.latency_ms ? ` (${status.latency_ms}ms)` : ''; - spinner.updateMessage(`Endpoint online${latency}; waiting for first token`); - } else { - // Probe confirms the endpoint isn't responding. Don't - // sit on a hung fetch — give the user 5s to read the - // status, then auto-abort with reason='offline' so the - // catch handler shows a clean "switch model" message - // instead of leaving the spinner spinning forever. - if (status.error) console.warn('Model endpoint probe failed:', status.error); - let _countdown = 5; - spinner.updateMessage(`Endpoint offline — cancelling in ${_countdown}s`); - const _tick = setInterval(() => { - _countdown--; - if (!spinner || !spinner.element || (currentAbort && currentAbort.signal.aborted) || accumulated) { - clearInterval(_tick); - return; - } - if (_countdown > 0) { - spinner.updateMessage(`Endpoint offline — cancelling in ${_countdown}s`); - } else { - clearInterval(_tick); - if (currentAbort && !currentAbort.signal.aborted) { - currentAbort._reason = 'offline'; - currentAbort.abort(); - } - } - }, 1000); - } - } catch (e) { - if (e && e.name !== 'AbortError' && spinner && spinner.element && !accumulated) { - spinner.updateMessage('Still waiting for model'); - } - } finally { - processingProbeAbort = null; - } - }, 10000); - } + scheduleFirstTokenWaitMessages(); } const researchBtn = el('research-toggle-btn'); @@ -1150,6 +1119,11 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer uiModule.scrollHistory(); } + function _replaceThinkingSpinner(label) { + _removeThinkingSpinner(); + _showThinkingSpinner(label); + } + // Auto-show thinking spinner after text stops streaming let _textPauseTimer = null; function _scheduleThinkingSpinner() { @@ -1173,9 +1147,22 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer let _liveThinkHeader = null; let _liveThinkSpinnerSlot = null; let _liveThinkTimerEl = null; + let _liveThinkTokenCount = 0; let _liveThinkToggle = null; let _liveThinkDomId = null; + function _estimateThinkingTokens(text) { + const clean = (text || '').trim(); + if (!clean) return 0; + return Math.max(1, Math.ceil(clean.length / 4)); + } + + function _formatThinkStats(seconds, tokenCount) { + const time = seconds ? seconds + 's' : ''; + const tokens = tokenCount ? tokenCount + ' tok' : ''; + return time && tokens ? time + ' · ' + tokens : (time || tokens); + } + function _replyAfterClosedThinking(text) { const closeRe = /<\/(?:think(?:ing)?|thought)>|/gi; let match = null; @@ -1277,6 +1264,12 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer let _nextIsError = false; let _streamSawDone = false; + let _firstVisibleOutputSeen = false; + const markFirstVisibleOutput = () => { + if (_firstVisibleOutputSeen) return; + _firstVisibleOutputSeen = true; + clearFirstTokenWaitTimers(); + }; while (true) { const { done, value } = await reader.read(); @@ -1296,6 +1289,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer } if (line.startsWith('data: ')) { const data = line.slice(6); + if (data && data !== '[DONE]') markFirstVisibleOutput(); // (thinking spinner removal is handled in agent_step / tool_start / content handlers) @@ -1357,7 +1351,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer if (_liveThinkHeader) _liveThinkHeader.textContent = 'View thinking process'; if (_liveThinkSpinnerSlot) _liveThinkSpinnerSlot.remove(); if (_liveThinkTimerEl && _elapsedDone) { - _liveThinkTimerEl.textContent = _elapsedDone + 's'; + _liveThinkTimerEl.textContent = _formatThinkStats(_elapsedDone, _liveThinkTokenCount); _liveThinkTimerEl.style.marginLeft = 'auto'; _liveThinkTimerEl.style.marginRight = '5px'; var _hdrDone = _liveThinkTimerEl.closest('.thinking-header'); @@ -1399,9 +1393,17 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer typewriterInto(roundHolder.querySelector('.body'), errMsg); break; } - if (json.delta || json.type === 'tool_start' || json.type === 'tool_output' || json.type === 'tool_progress' || json.type === 'agent_step' || json.type === 'doc_stream_open' || json.type === 'doc_stream_delta' || json.type === 'research_progress') { + if (json.delta || json.type === 'agent_prep' || json.type === 'tool_start' || json.type === 'tool_output' || json.type === 'tool_progress' || json.type === 'agent_step' || json.type === 'doc_stream_open' || json.type === 'doc_stream_delta' || json.type === 'research_progress') { clearResponseTimeout(); clearProcessingProbe(); + clearFirstTokenWaitTimers(); + } + if (json.type === 'agent_prep') { + if (!_isBg) { + _cancelThinkingTimer(); + _replaceThinkingSpinner('Preparing agent'); + } + continue; } if (json.delta) { _cancelThinkingTimer(); @@ -1554,7 +1556,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer function _tickThinkTimer() { if (!_liveThinkTimerEl || !_liveThinkTimerEl.isConnected) return; var s = ((Date.now() - _thinkTimerStart) / 1000).toFixed(1); - _liveThinkTimerEl.textContent = s + 's'; + _liveThinkTimerEl.textContent = _formatThinkStats(s, _liveThinkTokenCount); _thinkTimerRAF = requestAnimationFrame(_tickThinkTimer); } _thinkTimerRAF = requestAnimationFrame(_tickThinkTimer); @@ -1576,7 +1578,12 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer .replace(/<\|channel>response\s*\n?/gi, '') .replace(//gi, ''); thinkText = thinkText.replace(/^\s*Thinking(?:\s+Process)?:\s*/i, ''); + _liveThinkTokenCount = _estimateThinkingTokens(thinkText); _liveThinkInner.innerHTML = markdownModule.mdToHtml(thinkText); + if (_liveThinkTimerEl) { + var _elapsedLive = thinkingStartTime ? ((Date.now() - thinkingStartTime) / 1000).toFixed(1) : ''; + _liveThinkTimerEl.textContent = _formatThinkStats(_elapsedLive, _liveThinkTokenCount); + } // Keep thinking box scrolled to bottom, but let user scroll up var thinkBox = _liveThinkInner.closest('.thinking-content'); if (thinkBox) { @@ -1600,6 +1607,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer _liveThinkHeader = null; _liveThinkSpinnerSlot = null; _liveThinkTimerEl = null; + _liveThinkTokenCount = 0; _liveThinkToggle = null; _liveThinkDomId = null; // Fall through to normal streaming @@ -1622,7 +1630,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer if (_liveThinkSpinnerSlot) _liveThinkSpinnerSlot.remove(); // Move timer to right side of header if (_liveThinkTimerEl && elapsed) { - _liveThinkTimerEl.textContent = elapsed + 's'; + _liveThinkTimerEl.textContent = _formatThinkStats(elapsed, _liveThinkTokenCount); _liveThinkTimerEl.style.marginLeft = 'auto'; _liveThinkTimerEl.style.marginRight = '5px'; var _hdrRow = _liveThinkTimerEl.closest('.thinking-header'); @@ -2023,7 +2031,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer cancelAnimationFrame(_thinkTimerRAF); var _elapsed2 = thinkingStartTime ? ((Date.now() - thinkingStartTime) / 1000).toFixed(1) : null; if (_liveThinkHeader) _liveThinkHeader.textContent = 'View thinking process'; - if (_liveThinkTimerEl) _liveThinkTimerEl.textContent = _elapsed2 ? _elapsed2 + 's' : ''; + if (_liveThinkTimerEl) _liveThinkTimerEl.textContent = _elapsed2 ? _formatThinkStats(_elapsed2, _liveThinkTokenCount) : ''; if (_liveThinkSpinnerSlot) _liveThinkSpinnerSlot.remove(); // Assign stable IDs var _thinkId2 = 'think-' + Date.now(); @@ -3018,6 +3026,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer } finally { clearResponseTimeout(); clearProcessingProbe(); + clearFirstTokenWaitTimers(); // Streaming done — let screen readers announce the settled response. const _chatLogDone = document.getElementById('chat-history'); if (_chatLogDone) _chatLogDone.setAttribute('aria-busy', 'false');