fix(chat): stop code-block button flicker during streaming (#3023)

Render streamed markdown incrementally (freeze finalized blocks, re-render only the growing tail) instead of re-rendering the whole message every token, which recreated every <pre> and dropped CSS :hover.
2026-06-15 17:25:26 -04:00 · 2026-06-06 16:08:54 +06:00
parent fb9c7cf3da
commit 2e37d72155
8 changed files with 719 additions and 63 deletions
@@ -23,6 +23,7 @@ import * as emailInbox from './emailInbox.js';
 import codeRunnerModule from './codeRunner.js';
 import slashCommands, { initSlashCommands, isCommand, handleSlashCommand, handleSetupInput, handleSetupWizard, typewriterInto } from './slashCommands.js';
 import createResearchSynapse from './researchSynapse.js';
+import { createStreamRenderer } from './streamingRenderer.js';
  const RESEARCH_TIMEOUT_MS = 360000;
  const DEFAULT_TIMEOUT_MS = 120000;
  const RESEARCH_SVG = '<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><circle cx="11" cy="11" r="8"/><path d="M21 21l-4.35-4.35"/></svg>';
@@ -1167,9 +1168,6 @@ import createResearchSynapse from './researchSynapse.js';
      let _liveThinkToggle = null;
      let _liveThinkDomId = null;

-      // Offscreen measurement div — reused across renders
-      let _measureDiv = null;
-
      function _replyAfterClosedThinking(text) {
        const closeRe = /<\/(?:think(?:ing)?|thought)>|<channel\|>/gi;
        let match = null;
@@ -1224,19 +1222,18 @@ import createResearchSynapse from './researchSynapse.js';
            }
          }
          if (replyTrimmed) {
-            const replyHtml = markdownModule.mdToHtml(markdownModule.squashOutsideCode(replyTrimmed));
-            const prevLen = liveReply._prevTextLen || 0;
-            liveReply.innerHTML = replyHtml;
-            _fadeNewTokens(liveReply, prevLen);
-            liveReply._prevTextLen = liveReply.textContent.length;
-            if (window.hljs) liveReply.querySelectorAll('pre code').forEach((b) => window.hljs.highlightElement(b));
+            const r = liveReply._streamRenderer ||
+              (liveReply._streamRenderer = createStreamRenderer(liveReply, {
+                render: (t) => markdownModule.mdToHtml(markdownModule.squashOutsideCode(t)),
+                hljs: window.hljs,
+              }));
+            r.update(replyTrimmed);
          }
          // Reply empty or not — preserve thinking bar, don't fall through to full re-render
          uiModule.scrollHistory();
          return;
        }

-        const prevLen = contentEl._prevTextLen || 0;
        // If thinking is still streaming (unclosed <think>), show indicator instead of raw text
        if (markdownModule.hasUnclosedThinkTag && markdownModule.hasUnclosedThinkTag(dt)) {
          const thinkStart = dt.search(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>|<\|channel>thought/i);
@@ -1250,66 +1247,26 @@ import createResearchSynapse from './researchSynapse.js';
          contentEl.innerHTML =
            '<div class="thinking-section"><div class="thinking-header"><div class="thinking-header-left">Thinking' +
            (lines > 1 ? ` (${lines} lines)` : '') + '</div></div></div>';
-          contentEl._prevTextLen = 0;
+          // The stream renderer self-heals when it next sees this overwritten
+          // container (streamingRenderer.js), so no explicit reset is needed here.
          uiModule.scrollHistory();
          return;
        }
-        const html = markdownModule.processWithThinking(markdownModule.squashOutsideCode(dt));

-        // Smooth expand only for regular chat text (not thinking/agent blocks)
-        const _hasThinking = html.includes('thinking-section');
-        const _isAgentRound = roundHolder !== holder;
-        if (!_hasThinking && !_isAgentRound) {
-          // Render into offscreen clone to measure new height before swapping
-          if (!_measureDiv) {
-            _measureDiv = document.createElement('div');
-            _measureDiv.style.cssText = 'position:absolute;visibility:hidden;pointer-events:none;z-index:-1;';
-          }
-          _measureDiv.style.width = contentEl.offsetWidth + 'px';
-          _measureDiv.className = contentEl.className;
-          _measureDiv.innerHTML = html;
-          contentEl.parentNode.appendChild(_measureDiv);
-          const measuredH = _measureDiv.offsetHeight;
-          _measureDiv.remove();
-          const curMin = parseFloat(contentEl.style.minHeight) || 0;
-          contentEl.style.minHeight = Math.max(curMin, measuredH) + 'px';
-        } else {
-          contentEl.style.minHeight = '';
-        }
-
-        contentEl.innerHTML = html;
-        _fadeNewTokens(contentEl, prevLen);
-        contentEl._prevTextLen = contentEl.textContent.length;
-        if (window.hljs) contentEl.querySelectorAll('pre code').forEach((b) => window.hljs.highlightElement(b));
+        // Incremental streaming render: freeze finalized blocks, re-render only the
+        // growing tail, and highlight each code block once on completion. This is
+        // what keeps code-block hover buttons from flickering and avoids the O(N^2)
+        // re-parse/re-highlight of the whole message on every token.
+        // See streamingRenderer.js / streamingSegmenter.js.
+        const renderer = contentEl._streamRenderer ||
+          (contentEl._streamRenderer = createStreamRenderer(contentEl, {
+            render: (t) => markdownModule.processWithThinking(markdownModule.squashOutsideCode(t)),
+            hljs: window.hljs,
+          }));
+        renderer.update(dt);
        uiModule.scrollHistory();
      };

-      // Walk text nodes, skip past `prevLen` characters of old text,
-      // wrap everything after that in <span class="token-new"> for fade-in
-      function _fadeNewTokens(container, prevLen) {
-        if (!prevLen) return; // First chunk — skip, whole msg already has entrance anim
-        const walker = document.createTreeWalker(container, NodeFilter.SHOW_TEXT);
-        let charCount = 0;
-        const toWrap = [];
-        while (walker.nextNode()) {
-          const node = walker.currentNode;
-          const len = node.textContent.length;
-          if (charCount + len <= prevLen) { charCount += len; continue; }
-          const splitAt = charCount < prevLen ? prevLen - charCount : 0;
-          toWrap.push({ node, splitAt });
-          charCount += len;
-        }
-        for (const { node, splitAt } of toWrap) {
-          const parent = node.parentNode;
-          if (!parent || parent.closest('pre, .think-content')) continue;
-          const target = splitAt > 0 ? node.splitText(splitAt) : node;
-          const span = document.createElement('span');
-          span.className = 'token-new';
-          parent.replaceChild(span, target);
-          span.appendChild(target);
-        }
-      }
-
      let _nextIsError = false;
      let _streamSawDone = false;

@@ -0,0 +1,206 @@
+// streamingRenderer.js
+//
+// The DOM shell for incremental streaming markdown rendering. One instance owns
+// the DOM of one streaming assistant message and is the only thing that writes to
+// it while it streams.
+//
+// It keeps the message as two regions, separated by an invisible comment marker so
+// the rendered blocks are direct children of the container (no wrapper elements to
+// disturb CSS):
+//
+//     [ finalized block, frozen ][ finalized block, frozen ] <!--tail--> [ live tail ]
+//
+//   - Finalized blocks are rendered once and never touched again — so code-block
+//     hover buttons can't flicker and code is highlighted exactly once.
+//   - The live tail (the still-growing trailing block) is re-rendered each token,
+//     except an open code fence, which streams in append-mode (text appended to a
+//     stable <pre>, highlighted once when it closes).
+//
+// All the "is this safe to freeze?" logic lives in the pure segmenter; this file
+// is deliberately mechanical. If anything throws, it latches into a full-re-render
+// fallback so a bug can never produce broken output — only today's behavior.
+
+import { splitFinalized, describeOpenFence } from './streamingSegmenter.js';
+
+// Compile-time escape hatch: set to false to force the plain full-re-render path.
+// (The per-instance try/catch `degraded` fallback below is the runtime safety net.)
+const ENABLED = true;
+
+export function createStreamRenderer(contentEl, { render, hljs } = {}) {
+  let started = false;
+  let tailMarker = null; // finalized nodes precede it; live-tail nodes follow it
+  let committedLen = 0; // chars of source already frozen
+  let lastText = ''; // most recent full text (for finalize)
+  let tailShownLen = 0; // rendered-text length of the live tail (drives token fade)
+  let appendMode = null; // { codeText: Text, appendedLen } while an open fence streams
+  let degraded = !ENABLED; // true once we fall back to full re-render
+
+  function start() {
+    contentEl.textContent = '';
+    tailMarker = document.createComment('tail');
+    contentEl.appendChild(tailMarker);
+    started = true;
+  }
+
+  function highlight(root) {
+    if (hljs) root.querySelectorAll('pre code').forEach((b) => hljs.highlightElement(b));
+  }
+
+  function clearTail() {
+    while (tailMarker.nextSibling) tailMarker.nextSibling.remove();
+  }
+
+  // Render `src` and freeze the nodes before the tail marker. Highlighting happens
+  // here, once, on the detached fragment before the nodes are ever shown.
+  function freeze(src) {
+    const holder = document.createElement('div');
+    holder.innerHTML = render(src);
+    highlight(holder);
+    while (holder.firstChild) contentEl.insertBefore(holder.firstChild, tailMarker);
+  }
+
+  // Re-render the live tail. An open trailing fence streams in append-mode.
+  function renderTail(tailText) {
+    const fence = tailText ? describeOpenFence(tailText) : null;
+    if (fence) {
+      appendOpenFence(tailText, fence);
+      return;
+    }
+    appendMode = null;
+    clearTail();
+    if (!tailText) {
+      tailShownLen = 0;
+      return;
+    }
+    const holder = document.createElement('div');
+    holder.innerHTML = render(tailText);
+    fadeNewText(holder, tailShownLen);
+    tailShownLen = holder.textContent.length;
+    while (holder.firstChild) contentEl.appendChild(holder.firstChild);
+  }
+
+  // Stream the body of an unterminated code fence by appending only the new
+  // characters to a stable <pre><code> text node — no re-parse, no re-highlight.
+  function appendOpenFence(tailText, fence) {
+    if (!appendMode) {
+      clearTail();
+      const pre = document.createElement('pre');
+      const code = document.createElement('code');
+      if (fence.lang) code.className = `language-${fence.lang}`;
+      const textNode = document.createTextNode('');
+      code.appendChild(textNode);
+      pre.appendChild(code);
+      contentEl.appendChild(pre);
+      appendMode = { codeText: textNode, appendedLen: 0 };
+      tailShownLen = 0; // code is never faded; prose after the fence fades fresh
+    }
+    const code = tailText.slice(fence.contentStart);
+    if (code.length > appendMode.appendedLen) {
+      appendMode.codeText.appendData(code.slice(appendMode.appendedLen));
+      appendMode.appendedLen = code.length;
+    }
+  }
+
+  // Wrap tail text past `prevLen` characters in <span class="token-new"> for the
+  // streaming fade-in. Skips code (<pre>) and thinking blocks (.thinking-content).
+  // Note: the original chat.js helper checked `.think-content`, a class that exists
+  // nowhere in the app, so thinking text used to fade; matching the real
+  // `.thinking-content` corrects that. Operates on the detached fragment before insertion.
+  function fadeNewText(container, prevLen) {
+    if (!prevLen) return;
+    const walker = document.createTreeWalker(container, NodeFilter.SHOW_TEXT);
+    let count = 0;
+    const toWrap = [];
+    while (walker.nextNode()) {
+      const node = walker.currentNode;
+      const len = node.textContent.length;
+      if (count + len <= prevLen) {
+        count += len;
+        continue;
+      }
+      toWrap.push({ node, splitAt: count < prevLen ? prevLen - count : 0 });
+      count += len;
+    }
+    for (const { node, splitAt } of toWrap) {
+      const parent = node.parentNode;
+      if (!parent || parent.closest('pre, .thinking-content')) continue;
+      const target = splitAt > 0 ? node.splitText(splitAt) : node;
+      const span = document.createElement('span');
+      span.className = 'token-new';
+      parent.replaceChild(span, target);
+      span.appendChild(target);
+    }
+  }
+
+  function fullRender(fullText) {
+    contentEl.innerHTML = render(fullText);
+    highlight(contentEl);
+  }
+
+  // Render the latest full source text.
+  //
+  // PRECONDITION: callers must pass append-only text — each call's `fullText` must
+  // extend the previous one with the already-seen prefix UNCHANGED. Finalized
+  // blocks are frozen and never re-rendered, so a feed that rewrites earlier text
+  // would leave stale frozen blocks (corrected only by the next full re-render).
+  // chat.js satisfies this: its stripToolBlocks output only strips not-yet-finalized
+  // trailing tool syntax, never text that has already been frozen.
+  function update(fullText) {
+    lastText = fullText;
+    if (degraded) {
+      fullRender(fullText);
+      return;
+    }
+    try {
+      // Self-heal: if our DOM was replaced out from under us — chat.js writes
+      // contentEl.innerHTML directly for thinking indicators and tool blocks, and
+      // finalize() removes the marker — our tail marker is no longer a child of the
+      // container. Rebuild from scratch so we never append onto foreign content or
+      // touch a detached marker.
+      if (started && (!tailMarker || tailMarker.parentNode !== contentEl)) {
+        started = false;
+        committedLen = 0;
+        tailShownLen = 0;
+        appendMode = null;
+      }
+      if (!started) start();
+      const next = splitFinalized(fullText, render, committedLen);
+      if (next > committedLen) {
+        freeze(fullText.slice(committedLen, next));
+        committedLen = next;
+        appendMode = null; // whatever was streaming is now frozen
+        tailShownLen = 0;
+      }
+      renderTail(fullText.slice(committedLen));
+    } catch (err) {
+      degraded = true;
+      console.error('streamingRenderer: falling back to full render', err);
+      fullRender(fullText);
+    }
+  }
+
+  // Stream finished: freeze whatever is left canonically and flatten away the
+  // marker so the container holds exactly what a single full render would produce.
+  // chat.js currently re-renders the finished message from source for its own
+  // reasons and so doesn't call this, but it completes the renderer's lifecycle and
+  // is exercised by the tests.
+  function finalize() {
+    if (degraded) return;
+    try {
+      if (!started) start();
+      clearTail();
+      appendMode = null;
+      const rest = lastText.slice(committedLen);
+      if (rest.trim()) freeze(rest);
+      tailMarker.remove();
+      tailMarker = null;
+      committedLen = lastText.length;
+    } catch (err) {
+      degraded = true;
+      console.error('streamingRenderer: falling back to full render', err);
+      fullRender(lastText);
+    }
+  }
+
+  return { update, finalize };
+}
@@ -0,0 +1,190 @@
+// streamingSegmenter.js
+//
+// Pure logic for incremental ("block-at-a-time") streaming markdown rendering.
+//
+// While an assistant message streams in, re-rendering the whole accumulated
+// markdown on every token is wasteful (O(N^2)) and recreates DOM nodes, which
+// makes code-block hover buttons flicker. The fix is to FREEZE the leading part
+// of the message that can no longer change, and only re-render the growing tail.
+//
+// This module answers the one hard question that makes freezing safe:
+//
+//     Given the full markdown received so far, how many leading characters can
+//     be finalized without changing the rendered output?
+//
+// The contract callers rely on (`render` is the canonical markdown renderer):
+//
+//     const n = splitFinalized(text, render);
+//     render(text.slice(0, n)) + render(text.slice(n))  ===  render(text)
+//
+// The module is intentionally DOM-free and renderer-agnostic so it can be unit
+// tested in isolation and reused for any markdown renderer with no long-range
+// cross-block dependencies (no reference-style links / footnotes).
+//
+// Known limitations (both bounded by the same mitigation):
+//   - cutIsRenderSafe proves only PRESENT-tense equivalence. If the renderer pairs
+//     an inline delimiter across a blank line (e.g. markdown.js will turn
+//     `*a\n\nb*` into emphasis spanning two paragraphs), a block frozen before the
+//     closing delimiter arrives can disagree with the final full render.
+//   - afterClosedFence boundaries are trusted without the equivalence check, so a
+//     fence the real renderer parses differently (e.g. a stray 4-backtick line) can
+//     be mis-detected as a close.
+//   Both only occur for input the renderer itself handles oddly, and both are
+//   transient: chat.js re-renders the finished message from source, so the settled
+//   output is always canonical.
+
+// A fenced-code delimiter line: up to 3 leading spaces, then >=3 backticks or
+// tildes, then an optional info string.
+const FENCE_RE = /^ {0,3}(`{3,}|~{3,})(.*)$/;
+
+/**
+ * Scan `text` starting at `fromOffset` — which MUST be at top level (callers only
+ * ever advance to a finalized boundary, never into a fence) — and collect the
+ * candidate cut points.
+ *
+ * @returns {{ boundaries: Array<{offset:number, afterClosedFence:boolean}>, inFence:boolean }}
+ *   - A blank-line run at top level yields a boundary at the start of the next
+ *     non-blank line (`afterClosedFence: false`).
+ *   - A fence close yields a boundary just past the closing fence line
+ *     (`afterClosedFence: true`) — such a cut is unconditionally safe, since
+ *     nothing can ever merge into a completed code block.
+ */
+function findBoundaries(text, fromOffset) {
+  const boundaries = [];
+  const n = text.length;
+  let inFence = false;
+  let fenceMarker = '';
+  let i = fromOffset;
+
+  while (i < n) {
+    const nl = text.indexOf('\n', i);
+    const lineEnd = nl === -1 ? n : nl;
+    const afterNl = nl === -1 ? n : nl + 1;
+    const line = text.slice(i, lineEnd);
+    const fence = line.match(FENCE_RE);
+
+    if (fence) {
+      const marker = fence[1];
+      if (!inFence) {
+        inFence = true;
+        fenceMarker = marker;
+      } else if (
+        marker[0] === fenceMarker[0] &&
+        marker.length >= fenceMarker.length &&
+        fence[2].trim() === '' // a closing fence carries no info string
+      ) {
+        inFence = false;
+        fenceMarker = '';
+        boundaries.push({ offset: afterNl, afterClosedFence: true });
+      }
+      i = afterNl;
+    } else if (!inFence && line.trim() === '') {
+      // Consume the entire run of blank lines; the boundary is the start of the
+      // next non-blank line so the finalized side owns the separator and the tail
+      // starts clean.
+      let j = afterNl;
+      while (j < n) {
+        const nl2 = text.indexOf('\n', j);
+        const lineEnd2 = nl2 === -1 ? n : nl2;
+        if (text.slice(j, lineEnd2).trim() !== '') break;
+        if (nl2 === -1) {
+          j = n;
+          break;
+        }
+        j = nl2 + 1;
+      }
+      boundaries.push({ offset: j, afterClosedFence: false });
+      i = j;
+    } else {
+      i = afterNl;
+    }
+  }
+
+  return { boundaries, inFence };
+}
+
+/**
+ * Does cutting between `before` and `after` leave the rendered output unchanged?
+ * This is the self-verifying safety check: it directly compares rendering the two
+ * sides separately against rendering them joined, so constructs that span the cut
+ * (loose lists, setext headings, lazy blockquote continuations, tables) are caught
+ * with no hand-coded grammar rules.
+ *
+ * Renderer non-determinism (e.g. mermaid ids seeded with Date.now()) can only make
+ * this return a false negative, never a false positive — so the bias is always
+ * toward under-finalizing, which is the safe direction.
+ */
+function cutIsRenderSafe(before, after, render) {
+  return render(before) + render(after) === render(before + after);
+}
+
+/**
+ * Return how many leading characters of `text` can be safely finalized, scanning
+ * forward from `committedLen` (the amount already finalized).
+ *
+ * Guarantees `render(text.slice(0, n)) + render(text.slice(n)) === render(text)`,
+ * and `committedLen <= n <= text.length`.
+ *
+ * @param {string} text       Full markdown accumulated so far.
+ * @param {(src:string)=>string} render  Canonical markdown renderer.
+ * @param {number} [committedLen=0]  Characters already finalized (always a prior boundary).
+ * @returns {number}
+ */
+export function splitFinalized(text, render, committedLen = 0) {
+  const { boundaries } = findBoundaries(text, committedLen);
+
+  let best = committedLen;
+  let segStart = committedLen;
+
+  for (let k = 0; k < boundaries.length; k++) {
+    const { offset, afterClosedFence } = boundaries[k];
+
+    if (afterClosedFence) {
+      // A completed code block — always safe to freeze through here.
+      best = offset;
+    } else {
+      // A prose/list/table boundary. We need a following block to compare
+      // against (the last block must stay live, it can still grow), and the cut
+      // must be render-equivalent locally.
+      const nextOffset = k + 1 < boundaries.length ? boundaries[k + 1].offset : text.length;
+      const before = text.slice(segStart, offset);
+      const after = text.slice(offset, nextOffset);
+      if (after.trim() !== '' && cutIsRenderSafe(before, after, render)) {
+        best = offset;
+      }
+    }
+    segStart = offset;
+  }
+
+  return best;
+}
+
+/**
+ * If `text` begins with a fenced-code opener whose fence never closes, describe it
+ * so the renderer can stream the code in append-mode instead of re-rendering it.
+ * Returns `{ lang, contentStart }` (contentStart = offset of the first code char),
+ * or null when `text` does not start with a still-open fence.
+ *
+ * The opener line must be complete (terminated by a newline) so the info string /
+ * language is known before append-mode begins.
+ */
+export function describeOpenFence(text) {
+  const open = text.match(/^( {0,3})(`{3,}|~{3,})([^\n]*)\n/);
+  if (!open) return null;
+  const marker = open[2];
+  const contentStart = open[0].length;
+
+  for (let i = contentStart; i < text.length; ) {
+    const nl = text.indexOf('\n', i);
+    const line = text.slice(i, nl === -1 ? text.length : nl);
+    const close = line.match(/^ {0,3}(`{3,}|~{3,})\s*$/);
+    if (close && close[1][0] === marker[0] && close[1].length >= marker.length) {
+      return null; // the fence closes — let the normal finalize path handle it
+    }
+    if (nl === -1) break;
+    i = nl + 1;
+  }
+
+  const lang = (open[3] || '').trim().split(/\s+/)[0] || '';
+  return { lang, contentStart };
+}
@@ -0,0 +1,27 @@
+// A spread of markdown samples exercising the constructs the renderer supports.
+// Used by the streaming-invariant fuzz test (fed token-by-token) and the renderer
+// integration test. Keep samples small but structurally varied — the fuzz test
+// runs every prefix of every sample, so length is quadratic on cost.
+export const CORPUS = [
+  ['plain paragraph', 'Just a single sentence of text.'],
+  ['two paragraphs', 'First paragraph here.\n\nSecond paragraph here.'],
+  ['three paragraphs', 'Alpha block.\n\nBravo block.\n\nCharlie block.'],
+  ['atx headings', '# Title\n\nIntro line.\n\n## Section\n\nBody text.'],
+  ['setext heading', 'The Title\n=========\n\nA paragraph under it.'],
+  ['inline formatting', 'Some **bold**, *italic*, `code`, and a [link](https://x.com).'],
+  ['tight unordered list', '- one\n- two\n- three\n\ndone'],
+  ['ordered list then text', 'Before\n\n1. first\n2. second\n3. third\n\nAfter'],
+  ['loose list then paragraph', '- a\n\n- b\n\n- c\n\nClosing paragraph.'],
+  ['nested list', '- top\n  - nested one\n  - nested two\n- back to top\n\nend'],
+  ['blockquote', '> quoted line one\n> quoted line two\n\nplain after'],
+  ['thematic break', 'above the line\n\n---\n\nbelow the line'],
+  ['python code fence', 'Run this:\n\n```python\nprint("hi")\nfor i in range(3):\n    print(i)\n```\n\nThat prints numbers.'],
+  ['fence with blank lines inside', '```js\nconst a = 1;\n\nconst b = 2;\n```\n\nafter the code'],
+  ['two consecutive fences', '```\nfirst block\n```\n\n```\nsecond block\n```\n\ntail'],
+  ['mermaid diagram', 'Diagram:\n\n```mermaid\ngraph TD\nA-->B\n```\n\nafter diagram'],
+  ['gfm table', 'Data:\n\n| A | B |\n|---|---|\n| 1 | 2 |\n| 3 | 4 |\n\nafter table'],
+  [
+    'mixed document',
+    '# Report\n\nIntro paragraph with a `symbol`.\n\n```python\nx = 1\n```\n\n- bullet one\n- bullet two\n\n> a quote\n\nFinal words.',
+  ],
+];
@@ -0,0 +1,107 @@
+// The centerpiece correctness test: stream every corpus sample in token-by-token,
+// driving the segmenter exactly as the renderer will, and assert the freeze/tail
+// split stays render-equivalent to a single full render at EVERY step.
+//
+//   finalized-html (accumulated from committed deltas) + render(live tail)  ===  render(prefix)
+//
+// This is run with no DOM and no safety net, so any segmenter bug fails here
+// rather than reaching the browser.
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
+import { loadMarkdown, normalizeRender } from './markdownHarness.mjs';
+import { splitFinalized } from '../../static/js/streamingSegmenter.js';
+import { CORPUS } from './corpus.mjs';
+
+const md = await loadMarkdown();
+const render = (t) => md.mdToHtml(t);
+
+// The two render pipelines chat.js actually feeds streamed text through. BOTH wrap
+// the source in squashOutsideCode; the main path additionally runs
+// processWithThinking (which floats <think> blocks to the top — a non-local
+// transform). Fuzzing the corpus through these — not just bare mdToHtml — closes
+// the gap where a squashOutsideCode whitespace/fence edge could break the split.
+const renderLiveReply = (t) => md.mdToHtml(md.squashOutsideCode(t)); // chat.js live-reply path
+const renderMain = (t) => md.processWithThinking(md.squashOutsideCode(t)); // chat.js main path
+
+// Reproduce the renderer's exact use of the segmenter over a sequence of prefixes.
+function simulate(text, prefixLengths, renderFn = render) {
+  let committed = 0;
+  let finalizedHtml = '';
+  for (const len of prefixLengths) {
+    const prefix = text.slice(0, len);
+    const next = splitFinalized(prefix, renderFn, committed);
+
+    assert.ok(
+      next >= committed && next <= prefix.length,
+      `committed must stay monotonic and in range (${committed} -> ${next} at length ${len})`,
+    );
+    if (next > committed) {
+      // The renderer renders each finalized delta once and never touches it again.
+      finalizedHtml += renderFn(prefix.slice(committed, next));
+      committed = next;
+    }
+
+    const got = normalizeRender(finalizedHtml + renderFn(prefix.slice(committed)));
+    const want = normalizeRender(renderFn(prefix));
+    assert.equal(got, want, `invariant broke at prefix length ${len} of ${JSON.stringify(text)}`);
+  }
+}
+
+const everyPrefix = (t) => Array.from({ length: t.length + 1 }, (_, i) => i);
+function chunkAtWhitespace(t) {
+  const lens = [];
+  for (let i = 1; i <= t.length; i++) {
+    if (i === t.length || /\s/.test(t[i - 1])) lens.push(i);
+  }
+  return lens.length ? lens : [t.length];
+}
+
+const RENDERERS = [
+  ['mdToHtml', render],
+  ['mdToHtml∘squashOutsideCode (live-reply path)', renderLiveReply],
+  ['processWithThinking∘squashOutsideCode (main path)', renderMain],
+];
+
+for (const [rname, renderFn] of RENDERERS) {
+  for (const [name, text] of CORPUS) {
+    test(`invariant — ${rname} — char-by-char — ${name}`, () => {
+      simulate(text, everyPrefix(text), renderFn);
+    });
+    test(`invariant — ${rname} — whitespace-chunked — ${name}`, () => {
+      simulate(text, chunkAtWhitespace(text), renderFn);
+    });
+  }
+}
+
+// These samples carry <think> blocks (the corpus above is think-free), so they
+// specifically exercise the self-verifying local check refusing to finalize inside
+// or across a think block that processWithThinking floats to the top.
+const THINKING_CORPUS = [
+  ['leading think then answer', '<think>Let me reason about it.</think>\n\nThe answer is 42.'],
+  ['think with internal blank lines', '<think>Step one.\n\nStep two.\n\nStep three.</think>\n\nDone — the result follows.'],
+  ['think then several paragraphs', '<thinking>analyzing the request</thinking>\n\nFirst point made here.\n\nSecond point made here.\n\nThird and final point.'],
+  ['think then code block', '<think>I should show code.</think>\n\nHere:\n\n```python\nprint("hi")\n```\n\nThat is the snippet.'],
+];
+for (const [name, text] of THINKING_CORPUS) {
+  test(`invariant (processWithThinking) — char-by-char — ${name}`, () => {
+    simulate(text, everyPrefix(text), renderMain);
+  });
+}
+
+// A final-output check independent of chunking: streaming to completion must equal
+// a single full render.
+test('streamed-to-completion output equals full render for whole corpus', () => {
+  for (const [name, text] of CORPUS) {
+    let committed = 0;
+    let html = '';
+    for (let len = 1; len <= text.length; len++) {
+      const next = splitFinalized(text.slice(0, len), render, committed);
+      if (next > committed) {
+        html += render(text.slice(committed, next));
+        committed = next;
+      }
+    }
+    html += render(text.slice(committed));
+    assert.equal(normalizeRender(html), normalizeRender(render(text)), `final mismatch for ${name}`);
+  }
+});
@@ -0,0 +1,66 @@
+// Loads the real browser markdown renderer (static/js/markdown.js) under Node by
+// mocking the minimal browser globals it touches and stubbing its sibling imports.
+// This mirrors the loader in tests/test_markdown_rendering_js.py so the streaming
+// tests exercise the exact same renderer the browser runs.
+import fs from 'node:fs';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const REPO = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..', '..');
+
+export async function loadMarkdown() {
+  globalThis.window = { location: { origin: 'http://localhost' }, katex: null };
+  globalThis.document = {
+    readyState: 'loading',
+    addEventListener() {},
+    createElement(tag) {
+      if (tag !== 'template') throw new Error(`unsupported element: ${tag}`);
+      return {
+        _html: '',
+        content: { querySelectorAll() { return []; } },
+        set innerHTML(v) { this._html = v; },
+        get innerHTML() { return this._html; },
+      };
+    },
+  };
+  globalThis.MutationObserver = class { observe() {} };
+
+  let src = fs.readFileSync(path.join(REPO, 'static/js/markdown.js'), 'utf8');
+  src = src.replace(/import uiModule from ['"]\.\/ui\.js['"];/, '');
+  src = src.replace(
+    /import \{ splitTableRow \} from ['"]\.\/markdown\/tableRow\.js['"];/,
+    () => `function splitTableRow(row){return (row||'').replace(/^\\s*\\|/,'').replace(/\\|\\s*$/,'').split('|').map((c)=>c.trim());}`,
+  );
+  const emoji = fs
+    .readFileSync(path.join(REPO, 'static/js/emojiShortcodes.js'), 'utf8')
+    .replace(/^export default .*$/m, '')
+    .replace(/export const /g, 'const ')
+    .replace(/export function /g, 'function ');
+  src = src.replace(
+    /import \{ replaceEmojiShortcodes, hasEmojiShortcode \} from ['"]\.\/emojiShortcodes\.js['"];/,
+    () => emoji,
+  );
+  src = src.replace(
+    /var escapeHtml = uiModule\.esc;/,
+    () =>
+      `var escapeHtml = (v) => String(v ?? '').replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/"/g,'&quot;').replace(/'/g,'&#39;');`,
+  );
+  const url = 'data:text/javascript;base64,' + Buffer.from(src).toString('base64');
+  return import(url);
+}
+
+// Canonicalize rendered HTML so two renders that produce the SAME DOM compare
+// equal. Collapses only newline-bearing whitespace BETWEEN tags (`>\n\n<` ->
+// `><`): it is insignificant in rendered HTML, and incremental finalization
+// legitimately emits `\n\n` between two blocks where a single full render emits
+// `\n`. Code whitespace is safe because code is HTML-escaped, so significant
+// newlines live inside <code> as text (never between a `>` and a `<`). Inline
+// single spaces between tags are left alone. Structural differences (two <ul> vs
+// one, <ol> vs <ul>) survive normalization and still fail, as they must.
+// Mermaid ids embed Date.now(), so they are normalized too.
+export function normalizeRender(html) {
+  return String(html)
+    .replace(/>\s*\n\s*</g, '><')
+    .trim()
+    .replace(/(mermaid|thinking)-\d+-\d+/g, '$1-X');
+}
@@ -0,0 +1,65 @@
+// Tests for the pure streaming-markdown segmenter.
+//
+// The segmenter's one job: given the full accumulated markdown text so far,
+// report how many leading characters are SAFE to finalize — i.e. freeze and
+// never re-render. "Safe" means: rendering the finalized prefix and the live
+// tail separately produces the same DOM as rendering the whole text at once.
+//
+// Invariant under test everywhere:  render(text[0:n]) + render(text[n:]) === render(text)
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
+import { loadMarkdown, normalizeRender } from './markdownHarness.mjs';
+import { splitFinalized } from '../../static/js/streamingSegmenter.js';
+
+const md = await loadMarkdown();
+const render = (t) => md.mdToHtml(t);
+const splitOk = (text, n) =>
+  normalizeRender(render(text.slice(0, n)) + render(text.slice(n))) === normalizeRender(render(text));
+
+test('harness loads the real renderer', () => {
+  assert.match(render('hi'), /<p>hi<\/p>/);
+});
+
+test('nothing is finalized while a single block is still streaming', () => {
+  assert.equal(splitFinalized('an incomplete paragra', render), 0);
+});
+
+test('finalizes the first of two blank-line-separated paragraphs', () => {
+  const text = 'para one\n\npara two';
+  const n = splitFinalized(text, render);
+  assert.equal(n, 'para one\n\n'.length);
+  assert.ok(splitOk(text, n), 'split must be render-equivalent');
+});
+
+test('never finalizes the last (still-growing) block', () => {
+  // The trailing paragraph could still gain more characters, so it stays live.
+  const text = 'done\n\nstill going';
+  const n = splitFinalized(text, render);
+  assert.ok(n <= 'done\n\n'.length);
+  assert.ok(splitOk(text, n));
+});
+
+test('a closed code fence is finalized immediately, even as the last block', () => {
+  // This is the original flicker scenario: a completed code block must freeze
+  // so its hover buttons stop being recreated on every later token.
+  const text = 'Here:\n\n```python\nprint(1)\n```';
+  const n = splitFinalized(text, render);
+  assert.ok(n >= text.length - 1, `expected the whole closed fence finalized, got ${n} of ${text.length}`);
+  assert.ok(splitOk(text, n));
+});
+
+test('does NOT finalize across an OPEN code fence', () => {
+  const text = 'intro\n\n```python\nprint(1)\nprint(2)';
+  const n = splitFinalized(text, render);
+  // "intro" may finalize, but nothing inside the still-open fence may.
+  assert.ok(n <= 'intro\n\n'.length, `must not finalize into an open fence, got ${n}`);
+  assert.ok(splitOk(text, n));
+});
+
+test('does NOT split a loose list (blank line between items is not a boundary)', () => {
+  const text = '- a\n\n- b\n\nafter';
+  const n = splitFinalized(text, render);
+  assert.ok(splitOk(text, n), 'a wrong split here would turn one <ul> into two');
+  // The list must not be cut in the middle: either nothing or the whole list.
+  assert.ok(n === 0 || n >= '- a\n\n- b\n\n'.length, `loose list was cut at ${n}`);
+});
@@ -0,0 +1,38 @@
+"""Runs the Node-based streaming-render segmenter suite (tests/streaming/*.test.mjs).
+
+Covers the pure incremental-render segmenter (static/js/streamingSegmenter.js):
+unit boundaries plus a streaming-invariant fuzz that feeds a markdown corpus in
+token-by-token and asserts the freeze/tail split always matches a single full
+render. Pure JS — no DOM, no extra dependencies. Skipped when node is
+unavailable, mirroring tests/test_markdown_rendering_js.py.
+
+The renderer's DOM behavior (streamingRenderer.js) is exercised against a running
+app, not here, consistent with how this project tests browser-coupled code.
+"""
+
+import shutil
+import subprocess
+from pathlib import Path
+
+import pytest
+
+_REPO = Path(__file__).resolve().parent.parent
+_HAS_NODE = shutil.which("node") is not None
+
+
+@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
+def test_streaming_segmenter_suite():
+    test_files = sorted(str(p) for p in (_REPO / "tests" / "streaming").glob("*.test.mjs"))
+    assert test_files, "no streaming test files found"
+
+    result = subprocess.run(
+        ["node", "--test", *test_files],
+        cwd=_REPO,
+        capture_output=True,
+        timeout=180,
+        text=True,
+    )
+    if result.returncode != 0:
+        raise AssertionError(
+            f"node --test failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
+        )