fix(chat): stop code-block button flicker during streaming (#3023)

Render streamed markdown incrementally (freeze finalized blocks, re-render only the growing tail) instead of re-rendering the whole message every token, which recreated every <pre> and dropped CSS :hover.
2026-06-16 09:45:24 -04:00 · 2026-06-06 16:08:54 +06:00
parent fb9c7cf3da
commit 2e37d72155
8 changed files with 719 additions and 63 deletions
@@ -0,0 +1,27 @@
+// A spread of markdown samples exercising the constructs the renderer supports.
+// Used by the streaming-invariant fuzz test (fed token-by-token) and the renderer
+// integration test. Keep samples small but structurally varied — the fuzz test
+// runs every prefix of every sample, so length is quadratic on cost.
+export const CORPUS = [
+  ['plain paragraph', 'Just a single sentence of text.'],
+  ['two paragraphs', 'First paragraph here.\n\nSecond paragraph here.'],
+  ['three paragraphs', 'Alpha block.\n\nBravo block.\n\nCharlie block.'],
+  ['atx headings', '# Title\n\nIntro line.\n\n## Section\n\nBody text.'],
+  ['setext heading', 'The Title\n=========\n\nA paragraph under it.'],
+  ['inline formatting', 'Some **bold**, *italic*, `code`, and a [link](https://x.com).'],
+  ['tight unordered list', '- one\n- two\n- three\n\ndone'],
+  ['ordered list then text', 'Before\n\n1. first\n2. second\n3. third\n\nAfter'],
+  ['loose list then paragraph', '- a\n\n- b\n\n- c\n\nClosing paragraph.'],
+  ['nested list', '- top\n  - nested one\n  - nested two\n- back to top\n\nend'],
+  ['blockquote', '> quoted line one\n> quoted line two\n\nplain after'],
+  ['thematic break', 'above the line\n\n---\n\nbelow the line'],
+  ['python code fence', 'Run this:\n\n```python\nprint("hi")\nfor i in range(3):\n    print(i)\n```\n\nThat prints numbers.'],
+  ['fence with blank lines inside', '```js\nconst a = 1;\n\nconst b = 2;\n```\n\nafter the code'],
+  ['two consecutive fences', '```\nfirst block\n```\n\n```\nsecond block\n```\n\ntail'],
+  ['mermaid diagram', 'Diagram:\n\n```mermaid\ngraph TD\nA-->B\n```\n\nafter diagram'],
+  ['gfm table', 'Data:\n\n| A | B |\n|---|---|\n| 1 | 2 |\n| 3 | 4 |\n\nafter table'],
+  [
+    'mixed document',
+    '# Report\n\nIntro paragraph with a `symbol`.\n\n```python\nx = 1\n```\n\n- bullet one\n- bullet two\n\n> a quote\n\nFinal words.',
+  ],
+];
@@ -0,0 +1,107 @@
+// The centerpiece correctness test: stream every corpus sample in token-by-token,
+// driving the segmenter exactly as the renderer will, and assert the freeze/tail
+// split stays render-equivalent to a single full render at EVERY step.
+//
+//   finalized-html (accumulated from committed deltas) + render(live tail)  ===  render(prefix)
+//
+// This is run with no DOM and no safety net, so any segmenter bug fails here
+// rather than reaching the browser.
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
+import { loadMarkdown, normalizeRender } from './markdownHarness.mjs';
+import { splitFinalized } from '../../static/js/streamingSegmenter.js';
+import { CORPUS } from './corpus.mjs';
+
+const md = await loadMarkdown();
+const render = (t) => md.mdToHtml(t);
+
+// The two render pipelines chat.js actually feeds streamed text through. BOTH wrap
+// the source in squashOutsideCode; the main path additionally runs
+// processWithThinking (which floats <think> blocks to the top — a non-local
+// transform). Fuzzing the corpus through these — not just bare mdToHtml — closes
+// the gap where a squashOutsideCode whitespace/fence edge could break the split.
+const renderLiveReply = (t) => md.mdToHtml(md.squashOutsideCode(t)); // chat.js live-reply path
+const renderMain = (t) => md.processWithThinking(md.squashOutsideCode(t)); // chat.js main path
+
+// Reproduce the renderer's exact use of the segmenter over a sequence of prefixes.
+function simulate(text, prefixLengths, renderFn = render) {
+  let committed = 0;
+  let finalizedHtml = '';
+  for (const len of prefixLengths) {
+    const prefix = text.slice(0, len);
+    const next = splitFinalized(prefix, renderFn, committed);
+
+    assert.ok(
+      next >= committed && next <= prefix.length,
+      `committed must stay monotonic and in range (${committed} -> ${next} at length ${len})`,
+    );
+    if (next > committed) {
+      // The renderer renders each finalized delta once and never touches it again.
+      finalizedHtml += renderFn(prefix.slice(committed, next));
+      committed = next;
+    }
+
+    const got = normalizeRender(finalizedHtml + renderFn(prefix.slice(committed)));
+    const want = normalizeRender(renderFn(prefix));
+    assert.equal(got, want, `invariant broke at prefix length ${len} of ${JSON.stringify(text)}`);
+  }
+}
+
+const everyPrefix = (t) => Array.from({ length: t.length + 1 }, (_, i) => i);
+function chunkAtWhitespace(t) {
+  const lens = [];
+  for (let i = 1; i <= t.length; i++) {
+    if (i === t.length || /\s/.test(t[i - 1])) lens.push(i);
+  }
+  return lens.length ? lens : [t.length];
+}
+
+const RENDERERS = [
+  ['mdToHtml', render],
+  ['mdToHtml∘squashOutsideCode (live-reply path)', renderLiveReply],
+  ['processWithThinking∘squashOutsideCode (main path)', renderMain],
+];
+
+for (const [rname, renderFn] of RENDERERS) {
+  for (const [name, text] of CORPUS) {
+    test(`invariant — ${rname} — char-by-char — ${name}`, () => {
+      simulate(text, everyPrefix(text), renderFn);
+    });
+    test(`invariant — ${rname} — whitespace-chunked — ${name}`, () => {
+      simulate(text, chunkAtWhitespace(text), renderFn);
+    });
+  }
+}
+
+// These samples carry <think> blocks (the corpus above is think-free), so they
+// specifically exercise the self-verifying local check refusing to finalize inside
+// or across a think block that processWithThinking floats to the top.
+const THINKING_CORPUS = [
+  ['leading think then answer', '<think>Let me reason about it.</think>\n\nThe answer is 42.'],
+  ['think with internal blank lines', '<think>Step one.\n\nStep two.\n\nStep three.</think>\n\nDone — the result follows.'],
+  ['think then several paragraphs', '<thinking>analyzing the request</thinking>\n\nFirst point made here.\n\nSecond point made here.\n\nThird and final point.'],
+  ['think then code block', '<think>I should show code.</think>\n\nHere:\n\n```python\nprint("hi")\n```\n\nThat is the snippet.'],
+];
+for (const [name, text] of THINKING_CORPUS) {
+  test(`invariant (processWithThinking) — char-by-char — ${name}`, () => {
+    simulate(text, everyPrefix(text), renderMain);
+  });
+}
+
+// A final-output check independent of chunking: streaming to completion must equal
+// a single full render.
+test('streamed-to-completion output equals full render for whole corpus', () => {
+  for (const [name, text] of CORPUS) {
+    let committed = 0;
+    let html = '';
+    for (let len = 1; len <= text.length; len++) {
+      const next = splitFinalized(text.slice(0, len), render, committed);
+      if (next > committed) {
+        html += render(text.slice(committed, next));
+        committed = next;
+      }
+    }
+    html += render(text.slice(committed));
+    assert.equal(normalizeRender(html), normalizeRender(render(text)), `final mismatch for ${name}`);
+  }
+});
@@ -0,0 +1,66 @@
+// Loads the real browser markdown renderer (static/js/markdown.js) under Node by
+// mocking the minimal browser globals it touches and stubbing its sibling imports.
+// This mirrors the loader in tests/test_markdown_rendering_js.py so the streaming
+// tests exercise the exact same renderer the browser runs.
+import fs from 'node:fs';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const REPO = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..', '..');
+
+export async function loadMarkdown() {
+  globalThis.window = { location: { origin: 'http://localhost' }, katex: null };
+  globalThis.document = {
+    readyState: 'loading',
+    addEventListener() {},
+    createElement(tag) {
+      if (tag !== 'template') throw new Error(`unsupported element: ${tag}`);
+      return {
+        _html: '',
+        content: { querySelectorAll() { return []; } },
+        set innerHTML(v) { this._html = v; },
+        get innerHTML() { return this._html; },
+      };
+    },
+  };
+  globalThis.MutationObserver = class { observe() {} };
+
+  let src = fs.readFileSync(path.join(REPO, 'static/js/markdown.js'), 'utf8');
+  src = src.replace(/import uiModule from ['"]\.\/ui\.js['"];/, '');
+  src = src.replace(
+    /import \{ splitTableRow \} from ['"]\.\/markdown\/tableRow\.js['"];/,
+    () => `function splitTableRow(row){return (row||'').replace(/^\\s*\\|/,'').replace(/\\|\\s*$/,'').split('|').map((c)=>c.trim());}`,
+  );
+  const emoji = fs
+    .readFileSync(path.join(REPO, 'static/js/emojiShortcodes.js'), 'utf8')
+    .replace(/^export default .*$/m, '')
+    .replace(/export const /g, 'const ')
+    .replace(/export function /g, 'function ');
+  src = src.replace(
+    /import \{ replaceEmojiShortcodes, hasEmojiShortcode \} from ['"]\.\/emojiShortcodes\.js['"];/,
+    () => emoji,
+  );
+  src = src.replace(
+    /var escapeHtml = uiModule\.esc;/,
+    () =>
+      `var escapeHtml = (v) => String(v ?? '').replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/"/g,'&quot;').replace(/'/g,'&#39;');`,
+  );
+  const url = 'data:text/javascript;base64,' + Buffer.from(src).toString('base64');
+  return import(url);
+}
+
+// Canonicalize rendered HTML so two renders that produce the SAME DOM compare
+// equal. Collapses only newline-bearing whitespace BETWEEN tags (`>\n\n<` ->
+// `><`): it is insignificant in rendered HTML, and incremental finalization
+// legitimately emits `\n\n` between two blocks where a single full render emits
+// `\n`. Code whitespace is safe because code is HTML-escaped, so significant
+// newlines live inside <code> as text (never between a `>` and a `<`). Inline
+// single spaces between tags are left alone. Structural differences (two <ul> vs
+// one, <ol> vs <ul>) survive normalization and still fail, as they must.
+// Mermaid ids embed Date.now(), so they are normalized too.
+export function normalizeRender(html) {
+  return String(html)
+    .replace(/>\s*\n\s*</g, '><')
+    .trim()
+    .replace(/(mermaid|thinking)-\d+-\d+/g, '$1-X');
+}
@@ -0,0 +1,65 @@
+// Tests for the pure streaming-markdown segmenter.
+//
+// The segmenter's one job: given the full accumulated markdown text so far,
+// report how many leading characters are SAFE to finalize — i.e. freeze and
+// never re-render. "Safe" means: rendering the finalized prefix and the live
+// tail separately produces the same DOM as rendering the whole text at once.
+//
+// Invariant under test everywhere:  render(text[0:n]) + render(text[n:]) === render(text)
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
+import { loadMarkdown, normalizeRender } from './markdownHarness.mjs';
+import { splitFinalized } from '../../static/js/streamingSegmenter.js';
+
+const md = await loadMarkdown();
+const render = (t) => md.mdToHtml(t);
+const splitOk = (text, n) =>
+  normalizeRender(render(text.slice(0, n)) + render(text.slice(n))) === normalizeRender(render(text));
+
+test('harness loads the real renderer', () => {
+  assert.match(render('hi'), /<p>hi<\/p>/);
+});
+
+test('nothing is finalized while a single block is still streaming', () => {
+  assert.equal(splitFinalized('an incomplete paragra', render), 0);
+});
+
+test('finalizes the first of two blank-line-separated paragraphs', () => {
+  const text = 'para one\n\npara two';
+  const n = splitFinalized(text, render);
+  assert.equal(n, 'para one\n\n'.length);
+  assert.ok(splitOk(text, n), 'split must be render-equivalent');
+});
+
+test('never finalizes the last (still-growing) block', () => {
+  // The trailing paragraph could still gain more characters, so it stays live.
+  const text = 'done\n\nstill going';
+  const n = splitFinalized(text, render);
+  assert.ok(n <= 'done\n\n'.length);
+  assert.ok(splitOk(text, n));
+});
+
+test('a closed code fence is finalized immediately, even as the last block', () => {
+  // This is the original flicker scenario: a completed code block must freeze
+  // so its hover buttons stop being recreated on every later token.
+  const text = 'Here:\n\n```python\nprint(1)\n```';
+  const n = splitFinalized(text, render);
+  assert.ok(n >= text.length - 1, `expected the whole closed fence finalized, got ${n} of ${text.length}`);
+  assert.ok(splitOk(text, n));
+});
+
+test('does NOT finalize across an OPEN code fence', () => {
+  const text = 'intro\n\n```python\nprint(1)\nprint(2)';
+  const n = splitFinalized(text, render);
+  // "intro" may finalize, but nothing inside the still-open fence may.
+  assert.ok(n <= 'intro\n\n'.length, `must not finalize into an open fence, got ${n}`);
+  assert.ok(splitOk(text, n));
+});
+
+test('does NOT split a loose list (blank line between items is not a boundary)', () => {
+  const text = '- a\n\n- b\n\nafter';
+  const n = splitFinalized(text, render);
+  assert.ok(splitOk(text, n), 'a wrong split here would turn one <ul> into two');
+  // The list must not be cut in the middle: either nothing or the whole list.
+  assert.ok(n === 0 || n >= '- a\n\n- b\n\n'.length, `loose list was cut at ${n}`);
+});