fix(chat): stop code-block button flicker during streaming (#3023)

Render streamed markdown incrementally (freeze finalized blocks,
re-render only the growing tail) instead of re-rendering the whole
message every token, which recreated every <pre> and dropped CSS :hover.
This commit is contained in:
Merajul Arefin
2026-06-06 16:08:54 +06:00
committed by GitHub
parent fb9c7cf3da
commit 2e37d72155
8 changed files with 719 additions and 63 deletions
+27
View File
@@ -0,0 +1,27 @@
// A spread of markdown samples exercising the constructs the renderer supports.
// Used by the streaming-invariant fuzz test (fed token-by-token) and the renderer
// integration test. Keep samples small but structurally varied — the fuzz test
// runs every prefix of every sample, so length is quadratic on cost.
export const CORPUS = [
['plain paragraph', 'Just a single sentence of text.'],
['two paragraphs', 'First paragraph here.\n\nSecond paragraph here.'],
['three paragraphs', 'Alpha block.\n\nBravo block.\n\nCharlie block.'],
['atx headings', '# Title\n\nIntro line.\n\n## Section\n\nBody text.'],
['setext heading', 'The Title\n=========\n\nA paragraph under it.'],
['inline formatting', 'Some **bold**, *italic*, `code`, and a [link](https://x.com).'],
['tight unordered list', '- one\n- two\n- three\n\ndone'],
['ordered list then text', 'Before\n\n1. first\n2. second\n3. third\n\nAfter'],
['loose list then paragraph', '- a\n\n- b\n\n- c\n\nClosing paragraph.'],
['nested list', '- top\n - nested one\n - nested two\n- back to top\n\nend'],
['blockquote', '> quoted line one\n> quoted line two\n\nplain after'],
['thematic break', 'above the line\n\n---\n\nbelow the line'],
['python code fence', 'Run this:\n\n```python\nprint("hi")\nfor i in range(3):\n print(i)\n```\n\nThat prints numbers.'],
['fence with blank lines inside', '```js\nconst a = 1;\n\nconst b = 2;\n```\n\nafter the code'],
['two consecutive fences', '```\nfirst block\n```\n\n```\nsecond block\n```\n\ntail'],
['mermaid diagram', 'Diagram:\n\n```mermaid\ngraph TD\nA-->B\n```\n\nafter diagram'],
['gfm table', 'Data:\n\n| A | B |\n|---|---|\n| 1 | 2 |\n| 3 | 4 |\n\nafter table'],
[
'mixed document',
'# Report\n\nIntro paragraph with a `symbol`.\n\n```python\nx = 1\n```\n\n- bullet one\n- bullet two\n\n> a quote\n\nFinal words.',
],
];
+107
View File
@@ -0,0 +1,107 @@
// The centerpiece correctness test: stream every corpus sample in token-by-token,
// driving the segmenter exactly as the renderer will, and assert the freeze/tail
// split stays render-equivalent to a single full render at EVERY step.
//
// finalized-html (accumulated from committed deltas) + render(live tail) === render(prefix)
//
// This is run with no DOM and no safety net, so any segmenter bug fails here
// rather than reaching the browser.
import { test } from 'node:test';
import assert from 'node:assert/strict';
import { loadMarkdown, normalizeRender } from './markdownHarness.mjs';
import { splitFinalized } from '../../static/js/streamingSegmenter.js';
import { CORPUS } from './corpus.mjs';
const md = await loadMarkdown();
const render = (t) => md.mdToHtml(t);
// The two render pipelines chat.js actually feeds streamed text through. BOTH wrap
// the source in squashOutsideCode; the main path additionally runs
// processWithThinking (which floats <think> blocks to the top — a non-local
// transform). Fuzzing the corpus through these — not just bare mdToHtml — closes
// the gap where a squashOutsideCode whitespace/fence edge could break the split.
const renderLiveReply = (t) => md.mdToHtml(md.squashOutsideCode(t)); // chat.js live-reply path
const renderMain = (t) => md.processWithThinking(md.squashOutsideCode(t)); // chat.js main path
// Reproduce the renderer's exact use of the segmenter over a sequence of prefixes.
function simulate(text, prefixLengths, renderFn = render) {
let committed = 0;
let finalizedHtml = '';
for (const len of prefixLengths) {
const prefix = text.slice(0, len);
const next = splitFinalized(prefix, renderFn, committed);
assert.ok(
next >= committed && next <= prefix.length,
`committed must stay monotonic and in range (${committed} -> ${next} at length ${len})`,
);
if (next > committed) {
// The renderer renders each finalized delta once and never touches it again.
finalizedHtml += renderFn(prefix.slice(committed, next));
committed = next;
}
const got = normalizeRender(finalizedHtml + renderFn(prefix.slice(committed)));
const want = normalizeRender(renderFn(prefix));
assert.equal(got, want, `invariant broke at prefix length ${len} of ${JSON.stringify(text)}`);
}
}
const everyPrefix = (t) => Array.from({ length: t.length + 1 }, (_, i) => i);
function chunkAtWhitespace(t) {
const lens = [];
for (let i = 1; i <= t.length; i++) {
if (i === t.length || /\s/.test(t[i - 1])) lens.push(i);
}
return lens.length ? lens : [t.length];
}
const RENDERERS = [
['mdToHtml', render],
['mdToHtml∘squashOutsideCode (live-reply path)', renderLiveReply],
['processWithThinking∘squashOutsideCode (main path)', renderMain],
];
for (const [rname, renderFn] of RENDERERS) {
for (const [name, text] of CORPUS) {
test(`invariant — ${rname} — char-by-char — ${name}`, () => {
simulate(text, everyPrefix(text), renderFn);
});
test(`invariant — ${rname} — whitespace-chunked — ${name}`, () => {
simulate(text, chunkAtWhitespace(text), renderFn);
});
}
}
// These samples carry <think> blocks (the corpus above is think-free), so they
// specifically exercise the self-verifying local check refusing to finalize inside
// or across a think block that processWithThinking floats to the top.
const THINKING_CORPUS = [
['leading think then answer', '<think>Let me reason about it.</think>\n\nThe answer is 42.'],
['think with internal blank lines', '<think>Step one.\n\nStep two.\n\nStep three.</think>\n\nDone — the result follows.'],
['think then several paragraphs', '<thinking>analyzing the request</thinking>\n\nFirst point made here.\n\nSecond point made here.\n\nThird and final point.'],
['think then code block', '<think>I should show code.</think>\n\nHere:\n\n```python\nprint("hi")\n```\n\nThat is the snippet.'],
];
for (const [name, text] of THINKING_CORPUS) {
test(`invariant (processWithThinking) — char-by-char — ${name}`, () => {
simulate(text, everyPrefix(text), renderMain);
});
}
// A final-output check independent of chunking: streaming to completion must equal
// a single full render.
test('streamed-to-completion output equals full render for whole corpus', () => {
for (const [name, text] of CORPUS) {
let committed = 0;
let html = '';
for (let len = 1; len <= text.length; len++) {
const next = splitFinalized(text.slice(0, len), render, committed);
if (next > committed) {
html += render(text.slice(committed, next));
committed = next;
}
}
html += render(text.slice(committed));
assert.equal(normalizeRender(html), normalizeRender(render(text)), `final mismatch for ${name}`);
}
});
+66
View File
@@ -0,0 +1,66 @@
// Loads the real browser markdown renderer (static/js/markdown.js) under Node by
// mocking the minimal browser globals it touches and stubbing its sibling imports.
// This mirrors the loader in tests/test_markdown_rendering_js.py so the streaming
// tests exercise the exact same renderer the browser runs.
import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
const REPO = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..', '..');
export async function loadMarkdown() {
globalThis.window = { location: { origin: 'http://localhost' }, katex: null };
globalThis.document = {
readyState: 'loading',
addEventListener() {},
createElement(tag) {
if (tag !== 'template') throw new Error(`unsupported element: ${tag}`);
return {
_html: '',
content: { querySelectorAll() { return []; } },
set innerHTML(v) { this._html = v; },
get innerHTML() { return this._html; },
};
},
};
globalThis.MutationObserver = class { observe() {} };
let src = fs.readFileSync(path.join(REPO, 'static/js/markdown.js'), 'utf8');
src = src.replace(/import uiModule from ['"]\.\/ui\.js['"];/, '');
src = src.replace(
/import \{ splitTableRow \} from ['"]\.\/markdown\/tableRow\.js['"];/,
() => `function splitTableRow(row){return (row||'').replace(/^\\s*\\|/,'').replace(/\\|\\s*$/,'').split('|').map((c)=>c.trim());}`,
);
const emoji = fs
.readFileSync(path.join(REPO, 'static/js/emojiShortcodes.js'), 'utf8')
.replace(/^export default .*$/m, '')
.replace(/export const /g, 'const ')
.replace(/export function /g, 'function ');
src = src.replace(
/import \{ replaceEmojiShortcodes, hasEmojiShortcode \} from ['"]\.\/emojiShortcodes\.js['"];/,
() => emoji,
);
src = src.replace(
/var escapeHtml = uiModule\.esc;/,
() =>
`var escapeHtml = (v) => String(v ?? '').replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/"/g,'&quot;').replace(/'/g,'&#39;');`,
);
const url = 'data:text/javascript;base64,' + Buffer.from(src).toString('base64');
return import(url);
}
// Canonicalize rendered HTML so two renders that produce the SAME DOM compare
// equal. Collapses only newline-bearing whitespace BETWEEN tags (`>\n\n<` ->
// `><`): it is insignificant in rendered HTML, and incremental finalization
// legitimately emits `\n\n` between two blocks where a single full render emits
// `\n`. Code whitespace is safe because code is HTML-escaped, so significant
// newlines live inside <code> as text (never between a `>` and a `<`). Inline
// single spaces between tags are left alone. Structural differences (two <ul> vs
// one, <ol> vs <ul>) survive normalization and still fail, as they must.
// Mermaid ids embed Date.now(), so they are normalized too.
export function normalizeRender(html) {
return String(html)
.replace(/>\s*\n\s*</g, '><')
.trim()
.replace(/(mermaid|thinking)-\d+-\d+/g, '$1-X');
}
+65
View File
@@ -0,0 +1,65 @@
// Tests for the pure streaming-markdown segmenter.
//
// The segmenter's one job: given the full accumulated markdown text so far,
// report how many leading characters are SAFE to finalize — i.e. freeze and
// never re-render. "Safe" means: rendering the finalized prefix and the live
// tail separately produces the same DOM as rendering the whole text at once.
//
// Invariant under test everywhere: render(text[0:n]) + render(text[n:]) === render(text)
import { test } from 'node:test';
import assert from 'node:assert/strict';
import { loadMarkdown, normalizeRender } from './markdownHarness.mjs';
import { splitFinalized } from '../../static/js/streamingSegmenter.js';
const md = await loadMarkdown();
const render = (t) => md.mdToHtml(t);
const splitOk = (text, n) =>
normalizeRender(render(text.slice(0, n)) + render(text.slice(n))) === normalizeRender(render(text));
test('harness loads the real renderer', () => {
assert.match(render('hi'), /<p>hi<\/p>/);
});
test('nothing is finalized while a single block is still streaming', () => {
assert.equal(splitFinalized('an incomplete paragra', render), 0);
});
test('finalizes the first of two blank-line-separated paragraphs', () => {
const text = 'para one\n\npara two';
const n = splitFinalized(text, render);
assert.equal(n, 'para one\n\n'.length);
assert.ok(splitOk(text, n), 'split must be render-equivalent');
});
test('never finalizes the last (still-growing) block', () => {
// The trailing paragraph could still gain more characters, so it stays live.
const text = 'done\n\nstill going';
const n = splitFinalized(text, render);
assert.ok(n <= 'done\n\n'.length);
assert.ok(splitOk(text, n));
});
test('a closed code fence is finalized immediately, even as the last block', () => {
// This is the original flicker scenario: a completed code block must freeze
// so its hover buttons stop being recreated on every later token.
const text = 'Here:\n\n```python\nprint(1)\n```';
const n = splitFinalized(text, render);
assert.ok(n >= text.length - 1, `expected the whole closed fence finalized, got ${n} of ${text.length}`);
assert.ok(splitOk(text, n));
});
test('does NOT finalize across an OPEN code fence', () => {
const text = 'intro\n\n```python\nprint(1)\nprint(2)';
const n = splitFinalized(text, render);
// "intro" may finalize, but nothing inside the still-open fence may.
assert.ok(n <= 'intro\n\n'.length, `must not finalize into an open fence, got ${n}`);
assert.ok(splitOk(text, n));
});
test('does NOT split a loose list (blank line between items is not a boundary)', () => {
const text = '- a\n\n- b\n\nafter';
const n = splitFinalized(text, render);
assert.ok(splitOk(text, n), 'a wrong split here would turn one <ul> into two');
// The list must not be cut in the middle: either nothing or the whole list.
assert.ok(n === 0 || n >= '- a\n\n- b\n\n'.length, `loose list was cut at ${n}`);
});