, highlighted once when it closes).
+//
+// All the "is this safe to freeze?" logic lives in the pure segmenter; this file
+// is deliberately mechanical. If anything throws, it latches into a full-re-render
+// fallback so a bug can never produce broken output — only today's behavior.
+
+import { splitFinalized, describeOpenFence } from './streamingSegmenter.js';
+
+// Compile-time escape hatch: set to false to force the plain full-re-render path.
+// (The per-instance try/catch `degraded` fallback below is the runtime safety net.)
+const ENABLED = true;
+
+export function createStreamRenderer(contentEl, { render, hljs } = {}) {
+ let started = false;
+ let tailMarker = null; // finalized nodes precede it; live-tail nodes follow it
+ let committedLen = 0; // chars of source already frozen
+ let lastText = ''; // most recent full text (for finalize)
+ let tailShownLen = 0; // rendered-text length of the live tail (drives token fade)
+ let appendMode = null; // { codeText: Text, appendedLen } while an open fence streams
+ let degraded = !ENABLED; // true once we fall back to full re-render
+
+ function start() {
+ contentEl.textContent = '';
+ tailMarker = document.createComment('tail');
+ contentEl.appendChild(tailMarker);
+ started = true;
+ }
+
+ function highlight(root) {
+ if (hljs) root.querySelectorAll('pre code').forEach((b) => hljs.highlightElement(b));
+ }
+
+ function clearTail() {
+ while (tailMarker.nextSibling) tailMarker.nextSibling.remove();
+ }
+
+ // Render `src` and freeze the nodes before the tail marker. Highlighting happens
+ // here, once, on the detached fragment before the nodes are ever shown.
+ function freeze(src) {
+ const holder = document.createElement('div');
+ holder.innerHTML = render(src);
+ highlight(holder);
+ while (holder.firstChild) contentEl.insertBefore(holder.firstChild, tailMarker);
+ }
+
+ // Re-render the live tail. An open trailing fence streams in append-mode.
+ function renderTail(tailText) {
+ const fence = tailText ? describeOpenFence(tailText) : null;
+ if (fence) {
+ appendOpenFence(tailText, fence);
+ return;
+ }
+ appendMode = null;
+ clearTail();
+ if (!tailText) {
+ tailShownLen = 0;
+ return;
+ }
+ const holder = document.createElement('div');
+ holder.innerHTML = render(tailText);
+ fadeNewText(holder, tailShownLen);
+ tailShownLen = holder.textContent.length;
+ while (holder.firstChild) contentEl.appendChild(holder.firstChild);
+ }
+
+ // Stream the body of an unterminated code fence by appending only the new
+ // characters to a stable text node — no re-parse, no re-highlight.
+ function appendOpenFence(tailText, fence) {
+ if (!appendMode) {
+ clearTail();
+ const pre = document.createElement('pre');
+ const code = document.createElement('code');
+ if (fence.lang) code.className = `language-${fence.lang}`;
+ const textNode = document.createTextNode('');
+ code.appendChild(textNode);
+ pre.appendChild(code);
+ contentEl.appendChild(pre);
+ appendMode = { codeText: textNode, appendedLen: 0 };
+ tailShownLen = 0; // code is never faded; prose after the fence fades fresh
+ }
+ const code = tailText.slice(fence.contentStart);
+ if (code.length > appendMode.appendedLen) {
+ appendMode.codeText.appendData(code.slice(appendMode.appendedLen));
+ appendMode.appendedLen = code.length;
+ }
+ }
+
+ // Wrap tail text past `prevLen` characters in for the
+ // streaming fade-in. Skips code () and thinking blocks (.thinking-content).
+ // Note: the original chat.js helper checked `.think-content`, a class that exists
+ // nowhere in the app, so thinking text used to fade; matching the real
+ // `.thinking-content` corrects that. Operates on the detached fragment before insertion.
+ function fadeNewText(container, prevLen) {
+ if (!prevLen) return;
+ const walker = document.createTreeWalker(container, NodeFilter.SHOW_TEXT);
+ let count = 0;
+ const toWrap = [];
+ while (walker.nextNode()) {
+ const node = walker.currentNode;
+ const len = node.textContent.length;
+ if (count + len <= prevLen) {
+ count += len;
+ continue;
+ }
+ toWrap.push({ node, splitAt: count < prevLen ? prevLen - count : 0 });
+ count += len;
+ }
+ for (const { node, splitAt } of toWrap) {
+ const parent = node.parentNode;
+ if (!parent || parent.closest('pre, .thinking-content')) continue;
+ const target = splitAt > 0 ? node.splitText(splitAt) : node;
+ const span = document.createElement('span');
+ span.className = 'token-new';
+ parent.replaceChild(span, target);
+ span.appendChild(target);
+ }
+ }
+
+ function fullRender(fullText) {
+ contentEl.innerHTML = render(fullText);
+ highlight(contentEl);
+ }
+
+ // Render the latest full source text.
+ //
+ // PRECONDITION: callers must pass append-only text — each call's `fullText` must
+ // extend the previous one with the already-seen prefix UNCHANGED. Finalized
+ // blocks are frozen and never re-rendered, so a feed that rewrites earlier text
+ // would leave stale frozen blocks (corrected only by the next full re-render).
+ // chat.js satisfies this: its stripToolBlocks output only strips not-yet-finalized
+ // trailing tool syntax, never text that has already been frozen.
+ function update(fullText) {
+ lastText = fullText;
+ if (degraded) {
+ fullRender(fullText);
+ return;
+ }
+ try {
+ // Self-heal: if our DOM was replaced out from under us — chat.js writes
+ // contentEl.innerHTML directly for thinking indicators and tool blocks, and
+ // finalize() removes the marker — our tail marker is no longer a child of the
+ // container. Rebuild from scratch so we never append onto foreign content or
+ // touch a detached marker.
+ if (started && (!tailMarker || tailMarker.parentNode !== contentEl)) {
+ started = false;
+ committedLen = 0;
+ tailShownLen = 0;
+ appendMode = null;
+ }
+ if (!started) start();
+ const next = splitFinalized(fullText, render, committedLen);
+ if (next > committedLen) {
+ freeze(fullText.slice(committedLen, next));
+ committedLen = next;
+ appendMode = null; // whatever was streaming is now frozen
+ tailShownLen = 0;
+ }
+ renderTail(fullText.slice(committedLen));
+ } catch (err) {
+ degraded = true;
+ console.error('streamingRenderer: falling back to full render', err);
+ fullRender(fullText);
+ }
+ }
+
+ // Stream finished: freeze whatever is left canonically and flatten away the
+ // marker so the container holds exactly what a single full render would produce.
+ // chat.js currently re-renders the finished message from source for its own
+ // reasons and so doesn't call this, but it completes the renderer's lifecycle and
+ // is exercised by the tests.
+ function finalize() {
+ if (degraded) return;
+ try {
+ if (!started) start();
+ clearTail();
+ appendMode = null;
+ const rest = lastText.slice(committedLen);
+ if (rest.trim()) freeze(rest);
+ tailMarker.remove();
+ tailMarker = null;
+ committedLen = lastText.length;
+ } catch (err) {
+ degraded = true;
+ console.error('streamingRenderer: falling back to full render', err);
+ fullRender(lastText);
+ }
+ }
+
+ return { update, finalize };
+}
diff --git a/static/js/streamingSegmenter.js b/static/js/streamingSegmenter.js
new file mode 100644
index 000000000..b501f21d5
--- /dev/null
+++ b/static/js/streamingSegmenter.js
@@ -0,0 +1,190 @@
+// streamingSegmenter.js
+//
+// Pure logic for incremental ("block-at-a-time") streaming markdown rendering.
+//
+// While an assistant message streams in, re-rendering the whole accumulated
+// markdown on every token is wasteful (O(N^2)) and recreates DOM nodes, which
+// makes code-block hover buttons flicker. The fix is to FREEZE the leading part
+// of the message that can no longer change, and only re-render the growing tail.
+//
+// This module answers the one hard question that makes freezing safe:
+//
+// Given the full markdown received so far, how many leading characters can
+// be finalized without changing the rendered output?
+//
+// The contract callers rely on (`render` is the canonical markdown renderer):
+//
+// const n = splitFinalized(text, render);
+// render(text.slice(0, n)) + render(text.slice(n)) === render(text)
+//
+// The module is intentionally DOM-free and renderer-agnostic so it can be unit
+// tested in isolation and reused for any markdown renderer with no long-range
+// cross-block dependencies (no reference-style links / footnotes).
+//
+// Known limitations (both bounded by the same mitigation):
+// - cutIsRenderSafe proves only PRESENT-tense equivalence. If the renderer pairs
+// an inline delimiter across a blank line (e.g. markdown.js will turn
+// `*a\n\nb*` into emphasis spanning two paragraphs), a block frozen before the
+// closing delimiter arrives can disagree with the final full render.
+// - afterClosedFence boundaries are trusted without the equivalence check, so a
+// fence the real renderer parses differently (e.g. a stray 4-backtick line) can
+// be mis-detected as a close.
+// Both only occur for input the renderer itself handles oddly, and both are
+// transient: chat.js re-renders the finished message from source, so the settled
+// output is always canonical.
+
+// A fenced-code delimiter line: up to 3 leading spaces, then >=3 backticks or
+// tildes, then an optional info string.
+const FENCE_RE = /^ {0,3}(`{3,}|~{3,})(.*)$/;
+
+/**
+ * Scan `text` starting at `fromOffset` — which MUST be at top level (callers only
+ * ever advance to a finalized boundary, never into a fence) — and collect the
+ * candidate cut points.
+ *
+ * @returns {{ boundaries: Array<{offset:number, afterClosedFence:boolean}>, inFence:boolean }}
+ * - A blank-line run at top level yields a boundary at the start of the next
+ * non-blank line (`afterClosedFence: false`).
+ * - A fence close yields a boundary just past the closing fence line
+ * (`afterClosedFence: true`) — such a cut is unconditionally safe, since
+ * nothing can ever merge into a completed code block.
+ */
+function findBoundaries(text, fromOffset) {
+ const boundaries = [];
+ const n = text.length;
+ let inFence = false;
+ let fenceMarker = '';
+ let i = fromOffset;
+
+ while (i < n) {
+ const nl = text.indexOf('\n', i);
+ const lineEnd = nl === -1 ? n : nl;
+ const afterNl = nl === -1 ? n : nl + 1;
+ const line = text.slice(i, lineEnd);
+ const fence = line.match(FENCE_RE);
+
+ if (fence) {
+ const marker = fence[1];
+ if (!inFence) {
+ inFence = true;
+ fenceMarker = marker;
+ } else if (
+ marker[0] === fenceMarker[0] &&
+ marker.length >= fenceMarker.length &&
+ fence[2].trim() === '' // a closing fence carries no info string
+ ) {
+ inFence = false;
+ fenceMarker = '';
+ boundaries.push({ offset: afterNl, afterClosedFence: true });
+ }
+ i = afterNl;
+ } else if (!inFence && line.trim() === '') {
+ // Consume the entire run of blank lines; the boundary is the start of the
+ // next non-blank line so the finalized side owns the separator and the tail
+ // starts clean.
+ let j = afterNl;
+ while (j < n) {
+ const nl2 = text.indexOf('\n', j);
+ const lineEnd2 = nl2 === -1 ? n : nl2;
+ if (text.slice(j, lineEnd2).trim() !== '') break;
+ if (nl2 === -1) {
+ j = n;
+ break;
+ }
+ j = nl2 + 1;
+ }
+ boundaries.push({ offset: j, afterClosedFence: false });
+ i = j;
+ } else {
+ i = afterNl;
+ }
+ }
+
+ return { boundaries, inFence };
+}
+
+/**
+ * Does cutting between `before` and `after` leave the rendered output unchanged?
+ * This is the self-verifying safety check: it directly compares rendering the two
+ * sides separately against rendering them joined, so constructs that span the cut
+ * (loose lists, setext headings, lazy blockquote continuations, tables) are caught
+ * with no hand-coded grammar rules.
+ *
+ * Renderer non-determinism (e.g. mermaid ids seeded with Date.now()) can only make
+ * this return a false negative, never a false positive — so the bias is always
+ * toward under-finalizing, which is the safe direction.
+ */
+function cutIsRenderSafe(before, after, render) {
+ return render(before) + render(after) === render(before + after);
+}
+
+/**
+ * Return how many leading characters of `text` can be safely finalized, scanning
+ * forward from `committedLen` (the amount already finalized).
+ *
+ * Guarantees `render(text.slice(0, n)) + render(text.slice(n)) === render(text)`,
+ * and `committedLen <= n <= text.length`.
+ *
+ * @param {string} text Full markdown accumulated so far.
+ * @param {(src:string)=>string} render Canonical markdown renderer.
+ * @param {number} [committedLen=0] Characters already finalized (always a prior boundary).
+ * @returns {number}
+ */
+export function splitFinalized(text, render, committedLen = 0) {
+ const { boundaries } = findBoundaries(text, committedLen);
+
+ let best = committedLen;
+ let segStart = committedLen;
+
+ for (let k = 0; k < boundaries.length; k++) {
+ const { offset, afterClosedFence } = boundaries[k];
+
+ if (afterClosedFence) {
+ // A completed code block — always safe to freeze through here.
+ best = offset;
+ } else {
+ // A prose/list/table boundary. We need a following block to compare
+ // against (the last block must stay live, it can still grow), and the cut
+ // must be render-equivalent locally.
+ const nextOffset = k + 1 < boundaries.length ? boundaries[k + 1].offset : text.length;
+ const before = text.slice(segStart, offset);
+ const after = text.slice(offset, nextOffset);
+ if (after.trim() !== '' && cutIsRenderSafe(before, after, render)) {
+ best = offset;
+ }
+ }
+ segStart = offset;
+ }
+
+ return best;
+}
+
+/**
+ * If `text` begins with a fenced-code opener whose fence never closes, describe it
+ * so the renderer can stream the code in append-mode instead of re-rendering it.
+ * Returns `{ lang, contentStart }` (contentStart = offset of the first code char),
+ * or null when `text` does not start with a still-open fence.
+ *
+ * The opener line must be complete (terminated by a newline) so the info string /
+ * language is known before append-mode begins.
+ */
+export function describeOpenFence(text) {
+ const open = text.match(/^( {0,3})(`{3,}|~{3,})([^\n]*)\n/);
+ if (!open) return null;
+ const marker = open[2];
+ const contentStart = open[0].length;
+
+ for (let i = contentStart; i < text.length; ) {
+ const nl = text.indexOf('\n', i);
+ const line = text.slice(i, nl === -1 ? text.length : nl);
+ const close = line.match(/^ {0,3}(`{3,}|~{3,})\s*$/);
+ if (close && close[1][0] === marker[0] && close[1].length >= marker.length) {
+ return null; // the fence closes — let the normal finalize path handle it
+ }
+ if (nl === -1) break;
+ i = nl + 1;
+ }
+
+ const lang = (open[3] || '').trim().split(/\s+/)[0] || '';
+ return { lang, contentStart };
+}
diff --git a/tests/streaming/corpus.mjs b/tests/streaming/corpus.mjs
new file mode 100644
index 000000000..d66768ea1
--- /dev/null
+++ b/tests/streaming/corpus.mjs
@@ -0,0 +1,27 @@
+// A spread of markdown samples exercising the constructs the renderer supports.
+// Used by the streaming-invariant fuzz test (fed token-by-token) and the renderer
+// integration test. Keep samples small but structurally varied — the fuzz test
+// runs every prefix of every sample, so length is quadratic on cost.
+export const CORPUS = [
+ ['plain paragraph', 'Just a single sentence of text.'],
+ ['two paragraphs', 'First paragraph here.\n\nSecond paragraph here.'],
+ ['three paragraphs', 'Alpha block.\n\nBravo block.\n\nCharlie block.'],
+ ['atx headings', '# Title\n\nIntro line.\n\n## Section\n\nBody text.'],
+ ['setext heading', 'The Title\n=========\n\nA paragraph under it.'],
+ ['inline formatting', 'Some **bold**, *italic*, `code`, and a [link](https://x.com).'],
+ ['tight unordered list', '- one\n- two\n- three\n\ndone'],
+ ['ordered list then text', 'Before\n\n1. first\n2. second\n3. third\n\nAfter'],
+ ['loose list then paragraph', '- a\n\n- b\n\n- c\n\nClosing paragraph.'],
+ ['nested list', '- top\n - nested one\n - nested two\n- back to top\n\nend'],
+ ['blockquote', '> quoted line one\n> quoted line two\n\nplain after'],
+ ['thematic break', 'above the line\n\n---\n\nbelow the line'],
+ ['python code fence', 'Run this:\n\n```python\nprint("hi")\nfor i in range(3):\n print(i)\n```\n\nThat prints numbers.'],
+ ['fence with blank lines inside', '```js\nconst a = 1;\n\nconst b = 2;\n```\n\nafter the code'],
+ ['two consecutive fences', '```\nfirst block\n```\n\n```\nsecond block\n```\n\ntail'],
+ ['mermaid diagram', 'Diagram:\n\n```mermaid\ngraph TD\nA-->B\n```\n\nafter diagram'],
+ ['gfm table', 'Data:\n\n| A | B |\n|---|---|\n| 1 | 2 |\n| 3 | 4 |\n\nafter table'],
+ [
+ 'mixed document',
+ '# Report\n\nIntro paragraph with a `symbol`.\n\n```python\nx = 1\n```\n\n- bullet one\n- bullet two\n\n> a quote\n\nFinal words.',
+ ],
+];
diff --git a/tests/streaming/invariant.test.mjs b/tests/streaming/invariant.test.mjs
new file mode 100644
index 000000000..f74cc1c7d
--- /dev/null
+++ b/tests/streaming/invariant.test.mjs
@@ -0,0 +1,107 @@
+// The centerpiece correctness test: stream every corpus sample in token-by-token,
+// driving the segmenter exactly as the renderer will, and assert the freeze/tail
+// split stays render-equivalent to a single full render at EVERY step.
+//
+// finalized-html (accumulated from committed deltas) + render(live tail) === render(prefix)
+//
+// This is run with no DOM and no safety net, so any segmenter bug fails here
+// rather than reaching the browser.
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
+import { loadMarkdown, normalizeRender } from './markdownHarness.mjs';
+import { splitFinalized } from '../../static/js/streamingSegmenter.js';
+import { CORPUS } from './corpus.mjs';
+
+const md = await loadMarkdown();
+const render = (t) => md.mdToHtml(t);
+
+// The two render pipelines chat.js actually feeds streamed text through. BOTH wrap
+// the source in squashOutsideCode; the main path additionally runs
+// processWithThinking (which floats blocks to the top — a non-local
+// transform). Fuzzing the corpus through these — not just bare mdToHtml — closes
+// the gap where a squashOutsideCode whitespace/fence edge could break the split.
+const renderLiveReply = (t) => md.mdToHtml(md.squashOutsideCode(t)); // chat.js live-reply path
+const renderMain = (t) => md.processWithThinking(md.squashOutsideCode(t)); // chat.js main path
+
+// Reproduce the renderer's exact use of the segmenter over a sequence of prefixes.
+function simulate(text, prefixLengths, renderFn = render) {
+ let committed = 0;
+ let finalizedHtml = '';
+ for (const len of prefixLengths) {
+ const prefix = text.slice(0, len);
+ const next = splitFinalized(prefix, renderFn, committed);
+
+ assert.ok(
+ next >= committed && next <= prefix.length,
+ `committed must stay monotonic and in range (${committed} -> ${next} at length ${len})`,
+ );
+ if (next > committed) {
+ // The renderer renders each finalized delta once and never touches it again.
+ finalizedHtml += renderFn(prefix.slice(committed, next));
+ committed = next;
+ }
+
+ const got = normalizeRender(finalizedHtml + renderFn(prefix.slice(committed)));
+ const want = normalizeRender(renderFn(prefix));
+ assert.equal(got, want, `invariant broke at prefix length ${len} of ${JSON.stringify(text)}`);
+ }
+}
+
+const everyPrefix = (t) => Array.from({ length: t.length + 1 }, (_, i) => i);
+function chunkAtWhitespace(t) {
+ const lens = [];
+ for (let i = 1; i <= t.length; i++) {
+ if (i === t.length || /\s/.test(t[i - 1])) lens.push(i);
+ }
+ return lens.length ? lens : [t.length];
+}
+
+const RENDERERS = [
+ ['mdToHtml', render],
+ ['mdToHtml∘squashOutsideCode (live-reply path)', renderLiveReply],
+ ['processWithThinking∘squashOutsideCode (main path)', renderMain],
+];
+
+for (const [rname, renderFn] of RENDERERS) {
+ for (const [name, text] of CORPUS) {
+ test(`invariant — ${rname} — char-by-char — ${name}`, () => {
+ simulate(text, everyPrefix(text), renderFn);
+ });
+ test(`invariant — ${rname} — whitespace-chunked — ${name}`, () => {
+ simulate(text, chunkAtWhitespace(text), renderFn);
+ });
+ }
+}
+
+// These samples carry blocks (the corpus above is think-free), so they
+// specifically exercise the self-verifying local check refusing to finalize inside
+// or across a think block that processWithThinking floats to the top.
+const THINKING_CORPUS = [
+ ['leading think then answer', 'Let me reason about it. \n\nThe answer is 42.'],
+ ['think with internal blank lines', 'Step one.\n\nStep two.\n\nStep three. \n\nDone — the result follows.'],
+ ['think then several paragraphs', 'analyzing the request \n\nFirst point made here.\n\nSecond point made here.\n\nThird and final point.'],
+ ['think then code block', 'I should show code. \n\nHere:\n\n```python\nprint("hi")\n```\n\nThat is the snippet.'],
+];
+for (const [name, text] of THINKING_CORPUS) {
+ test(`invariant (processWithThinking) — char-by-char — ${name}`, () => {
+ simulate(text, everyPrefix(text), renderMain);
+ });
+}
+
+// A final-output check independent of chunking: streaming to completion must equal
+// a single full render.
+test('streamed-to-completion output equals full render for whole corpus', () => {
+ for (const [name, text] of CORPUS) {
+ let committed = 0;
+ let html = '';
+ for (let len = 1; len <= text.length; len++) {
+ const next = splitFinalized(text.slice(0, len), render, committed);
+ if (next > committed) {
+ html += render(text.slice(committed, next));
+ committed = next;
+ }
+ }
+ html += render(text.slice(committed));
+ assert.equal(normalizeRender(html), normalizeRender(render(text)), `final mismatch for ${name}`);
+ }
+});
diff --git a/tests/streaming/markdownHarness.mjs b/tests/streaming/markdownHarness.mjs
new file mode 100644
index 000000000..03e12fa61
--- /dev/null
+++ b/tests/streaming/markdownHarness.mjs
@@ -0,0 +1,66 @@
+// Loads the real browser markdown renderer (static/js/markdown.js) under Node by
+// mocking the minimal browser globals it touches and stubbing its sibling imports.
+// This mirrors the loader in tests/test_markdown_rendering_js.py so the streaming
+// tests exercise the exact same renderer the browser runs.
+import fs from 'node:fs';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const REPO = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..', '..');
+
+export async function loadMarkdown() {
+ globalThis.window = { location: { origin: 'http://localhost' }, katex: null };
+ globalThis.document = {
+ readyState: 'loading',
+ addEventListener() {},
+ createElement(tag) {
+ if (tag !== 'template') throw new Error(`unsupported element: ${tag}`);
+ return {
+ _html: '',
+ content: { querySelectorAll() { return []; } },
+ set innerHTML(v) { this._html = v; },
+ get innerHTML() { return this._html; },
+ };
+ },
+ };
+ globalThis.MutationObserver = class { observe() {} };
+
+ let src = fs.readFileSync(path.join(REPO, 'static/js/markdown.js'), 'utf8');
+ src = src.replace(/import uiModule from ['"]\.\/ui\.js['"];/, '');
+ src = src.replace(
+ /import \{ splitTableRow \} from ['"]\.\/markdown\/tableRow\.js['"];/,
+ () => `function splitTableRow(row){return (row||'').replace(/^\\s*\\|/,'').replace(/\\|\\s*$/,'').split('|').map((c)=>c.trim());}`,
+ );
+ const emoji = fs
+ .readFileSync(path.join(REPO, 'static/js/emojiShortcodes.js'), 'utf8')
+ .replace(/^export default .*$/m, '')
+ .replace(/export const /g, 'const ')
+ .replace(/export function /g, 'function ');
+ src = src.replace(
+ /import \{ replaceEmojiShortcodes, hasEmojiShortcode \} from ['"]\.\/emojiShortcodes\.js['"];/,
+ () => emoji,
+ );
+ src = src.replace(
+ /var escapeHtml = uiModule\.esc;/,
+ () =>
+ `var escapeHtml = (v) => String(v ?? '').replace(/&/g,'&').replace(//g,'>').replace(/"/g,'"').replace(/'/g,''');`,
+ );
+ const url = 'data:text/javascript;base64,' + Buffer.from(src).toString('base64');
+ return import(url);
+}
+
+// Canonicalize rendered HTML so two renders that produce the SAME DOM compare
+// equal. Collapses only newline-bearing whitespace BETWEEN tags (`>\n\n<` ->
+// `><`): it is insignificant in rendered HTML, and incremental finalization
+// legitimately emits `\n\n` between two blocks where a single full render emits
+// `\n`. Code whitespace is safe because code is HTML-escaped, so significant
+// newlines live inside as text (never between a `>` and a `<`). Inline
+// single spaces between tags are left alone. Structural differences (two vs
+// one, vs ) survive normalization and still fail, as they must.
+// Mermaid ids embed Date.now(), so they are normalized too.
+export function normalizeRender(html) {
+ return String(html)
+ .replace(/>\s*\n\s*<')
+ .trim()
+ .replace(/(mermaid|thinking)-\d+-\d+/g, '$1-X');
+}
diff --git a/tests/streaming/segmenter.test.mjs b/tests/streaming/segmenter.test.mjs
new file mode 100644
index 000000000..ce4b6f563
--- /dev/null
+++ b/tests/streaming/segmenter.test.mjs
@@ -0,0 +1,65 @@
+// Tests for the pure streaming-markdown segmenter.
+//
+// The segmenter's one job: given the full accumulated markdown text so far,
+// report how many leading characters are SAFE to finalize — i.e. freeze and
+// never re-render. "Safe" means: rendering the finalized prefix and the live
+// tail separately produces the same DOM as rendering the whole text at once.
+//
+// Invariant under test everywhere: render(text[0:n]) + render(text[n:]) === render(text)
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
+import { loadMarkdown, normalizeRender } from './markdownHarness.mjs';
+import { splitFinalized } from '../../static/js/streamingSegmenter.js';
+
+const md = await loadMarkdown();
+const render = (t) => md.mdToHtml(t);
+const splitOk = (text, n) =>
+ normalizeRender(render(text.slice(0, n)) + render(text.slice(n))) === normalizeRender(render(text));
+
+test('harness loads the real renderer', () => {
+ assert.match(render('hi'), /hi<\/p>/);
+});
+
+test('nothing is finalized while a single block is still streaming', () => {
+ assert.equal(splitFinalized('an incomplete paragra', render), 0);
+});
+
+test('finalizes the first of two blank-line-separated paragraphs', () => {
+ const text = 'para one\n\npara two';
+ const n = splitFinalized(text, render);
+ assert.equal(n, 'para one\n\n'.length);
+ assert.ok(splitOk(text, n), 'split must be render-equivalent');
+});
+
+test('never finalizes the last (still-growing) block', () => {
+ // The trailing paragraph could still gain more characters, so it stays live.
+ const text = 'done\n\nstill going';
+ const n = splitFinalized(text, render);
+ assert.ok(n <= 'done\n\n'.length);
+ assert.ok(splitOk(text, n));
+});
+
+test('a closed code fence is finalized immediately, even as the last block', () => {
+ // This is the original flicker scenario: a completed code block must freeze
+ // so its hover buttons stop being recreated on every later token.
+ const text = 'Here:\n\n```python\nprint(1)\n```';
+ const n = splitFinalized(text, render);
+ assert.ok(n >= text.length - 1, `expected the whole closed fence finalized, got ${n} of ${text.length}`);
+ assert.ok(splitOk(text, n));
+});
+
+test('does NOT finalize across an OPEN code fence', () => {
+ const text = 'intro\n\n```python\nprint(1)\nprint(2)';
+ const n = splitFinalized(text, render);
+ // "intro" may finalize, but nothing inside the still-open fence may.
+ assert.ok(n <= 'intro\n\n'.length, `must not finalize into an open fence, got ${n}`);
+ assert.ok(splitOk(text, n));
+});
+
+test('does NOT split a loose list (blank line between items is not a boundary)', () => {
+ const text = '- a\n\n- b\n\nafter';
+ const n = splitFinalized(text, render);
+ assert.ok(splitOk(text, n), 'a wrong split here would turn one
into two');
+ // The list must not be cut in the middle: either nothing or the whole list.
+ assert.ok(n === 0 || n >= '- a\n\n- b\n\n'.length, `loose list was cut at ${n}`);
+});
diff --git a/tests/test_streaming_segmenter_js.py b/tests/test_streaming_segmenter_js.py
new file mode 100644
index 000000000..05393430b
--- /dev/null
+++ b/tests/test_streaming_segmenter_js.py
@@ -0,0 +1,38 @@
+"""Runs the Node-based streaming-render segmenter suite (tests/streaming/*.test.mjs).
+
+Covers the pure incremental-render segmenter (static/js/streamingSegmenter.js):
+unit boundaries plus a streaming-invariant fuzz that feeds a markdown corpus in
+token-by-token and asserts the freeze/tail split always matches a single full
+render. Pure JS — no DOM, no extra dependencies. Skipped when node is
+unavailable, mirroring tests/test_markdown_rendering_js.py.
+
+The renderer's DOM behavior (streamingRenderer.js) is exercised against a running
+app, not here, consistent with how this project tests browser-coupled code.
+"""
+
+import shutil
+import subprocess
+from pathlib import Path
+
+import pytest
+
+_REPO = Path(__file__).resolve().parent.parent
+_HAS_NODE = shutil.which("node") is not None
+
+
+@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
+def test_streaming_segmenter_suite():
+ test_files = sorted(str(p) for p in (_REPO / "tests" / "streaming").glob("*.test.mjs"))
+ assert test_files, "no streaming test files found"
+
+ result = subprocess.run(
+ ["node", "--test", *test_files],
+ cwd=_REPO,
+ capture_output=True,
+ timeout=180,
+ text=True,
+ )
+ if result.returncode != 0:
+ raise AssertionError(
+ f"node --test failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
+ )