Chat: first-token wait timer cleanup so per-pane timeouts dont leak when a response finishes mid-wait

2026-06-22 04:35:29 -04:00 · 2026-06-19 00:34:47 +00:00
parent 20c6ba8f32
commit 8cc76b53a2
1 changed files with 64 additions and 55 deletions
@@ -571,6 +571,24 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
    let timeoutId = null;
    let responseTimeoutCleared = false;
    let clearResponseTimeout = () => {};
+    let firstTokenWaitTimers = [];
+    const clearFirstTokenWaitTimers = () => {
+      firstTokenWaitTimers.forEach(t => { try { clearTimeout(t); } catch (_) {} });
+      firstTokenWaitTimers = [];
+    };
+    const scheduleFirstTokenWaitMessages = () => {
+      clearFirstTokenWaitTimers();
+      const steps = [
+        [20000, 'Still waiting for first token'],
+        [60000, 'Large local model is pre-filling context'],
+        [120000, 'Still working - no tokens yet from the model'],
+      ];
+      firstTokenWaitTimers = steps.map(([ms, text]) => setTimeout(() => {
+        if (!accumulated && spinner && spinner.element && !(currentAbort && currentAbort.signal.aborted)) {
+          spinner.updateMessage(text);
+        }
+      }, ms));
+    };
    const clearProcessingProbe = () => {
      if (processingProbeTimer) {
        clearTimeout(processingProbeTimer);
@@ -921,56 +939,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
        setTimeout(() => spinner.updateMessage('Analyzing sources'), 1500);
      } else {
        spinner.updateMessage('Processing request');
-        const endpointUrlForProbe = sessionModule.getCurrentEndpointUrl ? sessionModule.getCurrentEndpointUrl() : null;
-        if (endpointUrlForProbe && modelName) {
-          processingProbeTimer = setTimeout(async () => {
-            processingProbeTimer = null;
-            if (accumulated || !spinner || !spinner.element || (currentAbort && currentAbort.signal.aborted)) return;
-            processingProbeAbort = new AbortController();
-            try {
-              spinner.updateMessage('Checking model endpoint');
-              const status = await _probeCurrentEndpointStatus(endpointUrlForProbe, processingProbeAbort.signal);
-              if (accumulated || !spinner || !spinner.element || (currentAbort && currentAbort.signal.aborted)) return;
-              if (!status) {
-                spinner.updateMessage('Still waiting for model');
-              } else if (status.alive) {
-                const latency = status.latency_ms ? ` (${status.latency_ms}ms)` : '';
-                spinner.updateMessage(`Endpoint online${latency}; waiting for first token`);
-              } else {
-                // Probe confirms the endpoint isn't responding. Don't
-                // sit on a hung fetch — give the user 5s to read the
-                // status, then auto-abort with reason='offline' so the
-                // catch handler shows a clean "switch model" message
-                // instead of leaving the spinner spinning forever.
-                if (status.error) console.warn('Model endpoint probe failed:', status.error);
-                let _countdown = 5;
-                spinner.updateMessage(`Endpoint offline — cancelling in ${_countdown}s`);
-                const _tick = setInterval(() => {
-                  _countdown--;
-                  if (!spinner || !spinner.element || (currentAbort && currentAbort.signal.aborted) || accumulated) {
-                    clearInterval(_tick);
-                    return;
-                  }
-                  if (_countdown > 0) {
-                    spinner.updateMessage(`Endpoint offline — cancelling in ${_countdown}s`);
-                  } else {
-                    clearInterval(_tick);
-                    if (currentAbort && !currentAbort.signal.aborted) {
-                      currentAbort._reason = 'offline';
-                      currentAbort.abort();
-                    }
-                  }
-                }, 1000);
-              }
-            } catch (e) {
-              if (e && e.name !== 'AbortError' && spinner && spinner.element && !accumulated) {
-                spinner.updateMessage('Still waiting for model');
-              }
-            } finally {
-              processingProbeAbort = null;
-            }
-          }, 10000);
-        }
+        scheduleFirstTokenWaitMessages();
      }
      
      const researchBtn = el('research-toggle-btn');
@@ -1150,6 +1119,11 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
        uiModule.scrollHistory();
      }

+      function _replaceThinkingSpinner(label) {
+        _removeThinkingSpinner();
+        _showThinkingSpinner(label);
+      }
+
      // Auto-show thinking spinner after text stops streaming
      let _textPauseTimer = null;
      function _scheduleThinkingSpinner() {
@@ -1173,9 +1147,22 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
      let _liveThinkHeader = null;
      let _liveThinkSpinnerSlot = null;
      let _liveThinkTimerEl = null;
+      let _liveThinkTokenCount = 0;
      let _liveThinkToggle = null;
      let _liveThinkDomId = null;

+      function _estimateThinkingTokens(text) {
+        const clean = (text || '').trim();
+        if (!clean) return 0;
+        return Math.max(1, Math.ceil(clean.length / 4));
+      }
+
+      function _formatThinkStats(seconds, tokenCount) {
+        const time = seconds ? seconds + 's' : '';
+        const tokens = tokenCount ? tokenCount + ' tok' : '';
+        return time && tokens ? time + ' · ' + tokens : (time || tokens);
+      }
+
      function _replyAfterClosedThinking(text) {
        const closeRe = /<\/(?:think(?:ing)?|thought)>|<channel\|>/gi;
        let match = null;
@@ -1277,6 +1264,12 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer

      let _nextIsError = false;
      let _streamSawDone = false;
+      let _firstVisibleOutputSeen = false;
+      const markFirstVisibleOutput = () => {
+        if (_firstVisibleOutputSeen) return;
+        _firstVisibleOutputSeen = true;
+        clearFirstTokenWaitTimers();
+      };

      while (true) {
        const { done, value } = await reader.read();
@@ -1296,6 +1289,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
          }
          if (line.startsWith('data: ')) {
            const data = line.slice(6);
+            if (data && data !== '[DONE]') markFirstVisibleOutput();

            // (thinking spinner removal is handled in agent_step / tool_start / content handlers)

@@ -1357,7 +1351,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
                if (_liveThinkHeader) _liveThinkHeader.textContent = 'View thinking process';
                if (_liveThinkSpinnerSlot) _liveThinkSpinnerSlot.remove();
                if (_liveThinkTimerEl && _elapsedDone) {
-                  _liveThinkTimerEl.textContent = _elapsedDone + 's';
+                  _liveThinkTimerEl.textContent = _formatThinkStats(_elapsedDone, _liveThinkTokenCount);
                  _liveThinkTimerEl.style.marginLeft = 'auto';
                  _liveThinkTimerEl.style.marginRight = '5px';
                  var _hdrDone = _liveThinkTimerEl.closest('.thinking-header');
@@ -1399,9 +1393,17 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
                typewriterInto(roundHolder.querySelector('.body'), errMsg);
                break;
              }
-              if (json.delta || json.type === 'tool_start' || json.type === 'tool_output' || json.type === 'tool_progress' || json.type === 'agent_step' || json.type === 'doc_stream_open' || json.type === 'doc_stream_delta' || json.type === 'research_progress') {
+              if (json.delta || json.type === 'agent_prep' || json.type === 'tool_start' || json.type === 'tool_output' || json.type === 'tool_progress' || json.type === 'agent_step' || json.type === 'doc_stream_open' || json.type === 'doc_stream_delta' || json.type === 'research_progress') {
                clearResponseTimeout();
                clearProcessingProbe();
+                clearFirstTokenWaitTimers();
+              }
+              if (json.type === 'agent_prep') {
+                if (!_isBg) {
+                  _cancelThinkingTimer();
+                  _replaceThinkingSpinner('Preparing agent');
+                }
+                continue;
              }
              if (json.delta) {
                _cancelThinkingTimer();
@@ -1554,7 +1556,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
                  function _tickThinkTimer() {
                    if (!_liveThinkTimerEl || !_liveThinkTimerEl.isConnected) return;
                    var s = ((Date.now() - _thinkTimerStart) / 1000).toFixed(1);
-                    _liveThinkTimerEl.textContent = s + 's';
+                    _liveThinkTimerEl.textContent = _formatThinkStats(s, _liveThinkTokenCount);
                    _thinkTimerRAF = requestAnimationFrame(_tickThinkTimer);
                  }
                  _thinkTimerRAF = requestAnimationFrame(_tickThinkTimer);
@@ -1576,7 +1578,12 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
                      .replace(/<\|channel>response\s*\n?/gi, '')
                      .replace(/<channel\|>/gi, '');
                    thinkText = thinkText.replace(/^\s*Thinking(?:\s+Process)?:\s*/i, '');
+                    _liveThinkTokenCount = _estimateThinkingTokens(thinkText);
                    _liveThinkInner.innerHTML = markdownModule.mdToHtml(thinkText);
+                    if (_liveThinkTimerEl) {
+                      var _elapsedLive = thinkingStartTime ? ((Date.now() - thinkingStartTime) / 1000).toFixed(1) : '';
+                      _liveThinkTimerEl.textContent = _formatThinkStats(_elapsedLive, _liveThinkTokenCount);
+                    }
                    // Keep thinking box scrolled to bottom, but let user scroll up
                    var thinkBox = _liveThinkInner.closest('.thinking-content');
                    if (thinkBox) {
@@ -1600,6 +1607,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
                    _liveThinkHeader = null;
                    _liveThinkSpinnerSlot = null;
                    _liveThinkTimerEl = null;
+                    _liveThinkTokenCount = 0;
                    _liveThinkToggle = null;
                    _liveThinkDomId = null;
                    // Fall through to normal streaming
@@ -1622,7 +1630,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
                  if (_liveThinkSpinnerSlot) _liveThinkSpinnerSlot.remove();
                  // Move timer to right side of header
                  if (_liveThinkTimerEl && elapsed) {
-                    _liveThinkTimerEl.textContent = elapsed + 's';
+                    _liveThinkTimerEl.textContent = _formatThinkStats(elapsed, _liveThinkTokenCount);
                    _liveThinkTimerEl.style.marginLeft = 'auto';
                    _liveThinkTimerEl.style.marginRight = '5px';
                    var _hdrRow = _liveThinkTimerEl.closest('.thinking-header');
@@ -2023,7 +2031,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
                  cancelAnimationFrame(_thinkTimerRAF);
                  var _elapsed2 = thinkingStartTime ? ((Date.now() - thinkingStartTime) / 1000).toFixed(1) : null;
                  if (_liveThinkHeader) _liveThinkHeader.textContent = 'View thinking process';
-                  if (_liveThinkTimerEl) _liveThinkTimerEl.textContent = _elapsed2 ? _elapsed2 + 's' : '';
+                  if (_liveThinkTimerEl) _liveThinkTimerEl.textContent = _elapsed2 ? _formatThinkStats(_elapsed2, _liveThinkTokenCount) : '';
                  if (_liveThinkSpinnerSlot) _liveThinkSpinnerSlot.remove();
                  // Assign stable IDs
                  var _thinkId2 = 'think-' + Date.now();
@@ -3018,6 +3026,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
    } finally {
      clearResponseTimeout();
      clearProcessingProbe();
+      clearFirstTokenWaitTimers();
      // Streaming done — let screen readers announce the settled response.
      const _chatLogDone = document.getElementById('chat-history');
      if (_chatLogDone) _chatLogDone.setAttribute('aria-busy', 'false');