diff --git a/static/js/chat.js b/static/js/chat.js index c75d17ca3..604f8c609 100644 --- a/static/js/chat.js +++ b/static/js/chat.js @@ -23,6 +23,7 @@ import * as emailInbox from './emailInbox.js'; import codeRunnerModule from './codeRunner.js'; import slashCommands, { initSlashCommands, isCommand, handleSlashCommand, handleSetupInput, handleSetupWizard, typewriterInto } from './slashCommands.js'; import createResearchSynapse from './researchSynapse.js'; +import { createStreamRenderer } from './streamingRenderer.js'; const RESEARCH_TIMEOUT_MS = 360000; const DEFAULT_TIMEOUT_MS = 120000; const RESEARCH_SVG = ''; @@ -1167,9 +1168,6 @@ import createResearchSynapse from './researchSynapse.js'; let _liveThinkToggle = null; let _liveThinkDomId = null; - // Offscreen measurement div — reused across renders - let _measureDiv = null; - function _replyAfterClosedThinking(text) { const closeRe = /<\/(?:think(?:ing)?|thought)>|/gi; let match = null; @@ -1224,19 +1222,18 @@ import createResearchSynapse from './researchSynapse.js'; } } if (replyTrimmed) { - const replyHtml = markdownModule.mdToHtml(markdownModule.squashOutsideCode(replyTrimmed)); - const prevLen = liveReply._prevTextLen || 0; - liveReply.innerHTML = replyHtml; - _fadeNewTokens(liveReply, prevLen); - liveReply._prevTextLen = liveReply.textContent.length; - if (window.hljs) liveReply.querySelectorAll('pre code').forEach((b) => window.hljs.highlightElement(b)); + const r = liveReply._streamRenderer || + (liveReply._streamRenderer = createStreamRenderer(liveReply, { + render: (t) => markdownModule.mdToHtml(markdownModule.squashOutsideCode(t)), + hljs: window.hljs, + })); + r.update(replyTrimmed); } // Reply empty or not — preserve thinking bar, don't fall through to full re-render uiModule.scrollHistory(); return; } - const prevLen = contentEl._prevTextLen || 0; // If thinking is still streaming (unclosed ), show indicator instead of raw text if (markdownModule.hasUnclosedThinkTag && markdownModule.hasUnclosedThinkTag(dt)) { const thinkStart = dt.search(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>|<\|channel>thought/i); @@ -1250,66 +1247,26 @@ import createResearchSynapse from './researchSynapse.js'; contentEl.innerHTML = '
Thinking' + (lines > 1 ? ` (${lines} lines)` : '') + '
'; - contentEl._prevTextLen = 0; + // The stream renderer self-heals when it next sees this overwritten + // container (streamingRenderer.js), so no explicit reset is needed here. uiModule.scrollHistory(); return; } - const html = markdownModule.processWithThinking(markdownModule.squashOutsideCode(dt)); - // Smooth expand only for regular chat text (not thinking/agent blocks) - const _hasThinking = html.includes('thinking-section'); - const _isAgentRound = roundHolder !== holder; - if (!_hasThinking && !_isAgentRound) { - // Render into offscreen clone to measure new height before swapping - if (!_measureDiv) { - _measureDiv = document.createElement('div'); - _measureDiv.style.cssText = 'position:absolute;visibility:hidden;pointer-events:none;z-index:-1;'; - } - _measureDiv.style.width = contentEl.offsetWidth + 'px'; - _measureDiv.className = contentEl.className; - _measureDiv.innerHTML = html; - contentEl.parentNode.appendChild(_measureDiv); - const measuredH = _measureDiv.offsetHeight; - _measureDiv.remove(); - const curMin = parseFloat(contentEl.style.minHeight) || 0; - contentEl.style.minHeight = Math.max(curMin, measuredH) + 'px'; - } else { - contentEl.style.minHeight = ''; - } - - contentEl.innerHTML = html; - _fadeNewTokens(contentEl, prevLen); - contentEl._prevTextLen = contentEl.textContent.length; - if (window.hljs) contentEl.querySelectorAll('pre code').forEach((b) => window.hljs.highlightElement(b)); + // Incremental streaming render: freeze finalized blocks, re-render only the + // growing tail, and highlight each code block once on completion. This is + // what keeps code-block hover buttons from flickering and avoids the O(N^2) + // re-parse/re-highlight of the whole message on every token. + // See streamingRenderer.js / streamingSegmenter.js. + const renderer = contentEl._streamRenderer || + (contentEl._streamRenderer = createStreamRenderer(contentEl, { + render: (t) => markdownModule.processWithThinking(markdownModule.squashOutsideCode(t)), + hljs: window.hljs, + })); + renderer.update(dt); uiModule.scrollHistory(); }; - // Walk text nodes, skip past `prevLen` characters of old text, - // wrap everything after that in for fade-in - function _fadeNewTokens(container, prevLen) { - if (!prevLen) return; // First chunk — skip, whole msg already has entrance anim - const walker = document.createTreeWalker(container, NodeFilter.SHOW_TEXT); - let charCount = 0; - const toWrap = []; - while (walker.nextNode()) { - const node = walker.currentNode; - const len = node.textContent.length; - if (charCount + len <= prevLen) { charCount += len; continue; } - const splitAt = charCount < prevLen ? prevLen - charCount : 0; - toWrap.push({ node, splitAt }); - charCount += len; - } - for (const { node, splitAt } of toWrap) { - const parent = node.parentNode; - if (!parent || parent.closest('pre, .think-content')) continue; - const target = splitAt > 0 ? node.splitText(splitAt) : node; - const span = document.createElement('span'); - span.className = 'token-new'; - parent.replaceChild(span, target); - span.appendChild(target); - } - } - let _nextIsError = false; let _streamSawDone = false; diff --git a/static/js/streamingRenderer.js b/static/js/streamingRenderer.js new file mode 100644 index 000000000..5aa05ec66 --- /dev/null +++ b/static/js/streamingRenderer.js @@ -0,0 +1,206 @@ +// streamingRenderer.js +// +// The DOM shell for incremental streaming markdown rendering. One instance owns +// the DOM of one streaming assistant message and is the only thing that writes to +// it while it streams. +// +// It keeps the message as two regions, separated by an invisible comment marker so +// the rendered blocks are direct children of the container (no wrapper elements to +// disturb CSS): +// +// [ finalized block, frozen ][ finalized block, frozen ] [ live tail ] +// +// - Finalized blocks are rendered once and never touched again — so code-block +// hover buttons can't flicker and code is highlighted exactly once. +// - The live tail (the still-growing trailing block) is re-rendered each token, +// except an open code fence, which streams in append-mode (text appended to a +// stable
, highlighted once when it closes).
+//
+// All the "is this safe to freeze?" logic lives in the pure segmenter; this file
+// is deliberately mechanical. If anything throws, it latches into a full-re-render
+// fallback so a bug can never produce broken output — only today's behavior.
+
+import { splitFinalized, describeOpenFence } from './streamingSegmenter.js';
+
+// Compile-time escape hatch: set to false to force the plain full-re-render path.
+// (The per-instance try/catch `degraded` fallback below is the runtime safety net.)
+const ENABLED = true;
+
+export function createStreamRenderer(contentEl, { render, hljs } = {}) {
+  let started = false;
+  let tailMarker = null; // finalized nodes precede it; live-tail nodes follow it
+  let committedLen = 0; // chars of source already frozen
+  let lastText = ''; // most recent full text (for finalize)
+  let tailShownLen = 0; // rendered-text length of the live tail (drives token fade)
+  let appendMode = null; // { codeText: Text, appendedLen } while an open fence streams
+  let degraded = !ENABLED; // true once we fall back to full re-render
+
+  function start() {
+    contentEl.textContent = '';
+    tailMarker = document.createComment('tail');
+    contentEl.appendChild(tailMarker);
+    started = true;
+  }
+
+  function highlight(root) {
+    if (hljs) root.querySelectorAll('pre code').forEach((b) => hljs.highlightElement(b));
+  }
+
+  function clearTail() {
+    while (tailMarker.nextSibling) tailMarker.nextSibling.remove();
+  }
+
+  // Render `src` and freeze the nodes before the tail marker. Highlighting happens
+  // here, once, on the detached fragment before the nodes are ever shown.
+  function freeze(src) {
+    const holder = document.createElement('div');
+    holder.innerHTML = render(src);
+    highlight(holder);
+    while (holder.firstChild) contentEl.insertBefore(holder.firstChild, tailMarker);
+  }
+
+  // Re-render the live tail. An open trailing fence streams in append-mode.
+  function renderTail(tailText) {
+    const fence = tailText ? describeOpenFence(tailText) : null;
+    if (fence) {
+      appendOpenFence(tailText, fence);
+      return;
+    }
+    appendMode = null;
+    clearTail();
+    if (!tailText) {
+      tailShownLen = 0;
+      return;
+    }
+    const holder = document.createElement('div');
+    holder.innerHTML = render(tailText);
+    fadeNewText(holder, tailShownLen);
+    tailShownLen = holder.textContent.length;
+    while (holder.firstChild) contentEl.appendChild(holder.firstChild);
+  }
+
+  // Stream the body of an unterminated code fence by appending only the new
+  // characters to a stable 
 text node — no re-parse, no re-highlight.
+  function appendOpenFence(tailText, fence) {
+    if (!appendMode) {
+      clearTail();
+      const pre = document.createElement('pre');
+      const code = document.createElement('code');
+      if (fence.lang) code.className = `language-${fence.lang}`;
+      const textNode = document.createTextNode('');
+      code.appendChild(textNode);
+      pre.appendChild(code);
+      contentEl.appendChild(pre);
+      appendMode = { codeText: textNode, appendedLen: 0 };
+      tailShownLen = 0; // code is never faded; prose after the fence fades fresh
+    }
+    const code = tailText.slice(fence.contentStart);
+    if (code.length > appendMode.appendedLen) {
+      appendMode.codeText.appendData(code.slice(appendMode.appendedLen));
+      appendMode.appendedLen = code.length;
+    }
+  }
+
+  // Wrap tail text past `prevLen` characters in  for the
+  // streaming fade-in. Skips code (
) and thinking blocks (.thinking-content).
+  // Note: the original chat.js helper checked `.think-content`, a class that exists
+  // nowhere in the app, so thinking text used to fade; matching the real
+  // `.thinking-content` corrects that. Operates on the detached fragment before insertion.
+  function fadeNewText(container, prevLen) {
+    if (!prevLen) return;
+    const walker = document.createTreeWalker(container, NodeFilter.SHOW_TEXT);
+    let count = 0;
+    const toWrap = [];
+    while (walker.nextNode()) {
+      const node = walker.currentNode;
+      const len = node.textContent.length;
+      if (count + len <= prevLen) {
+        count += len;
+        continue;
+      }
+      toWrap.push({ node, splitAt: count < prevLen ? prevLen - count : 0 });
+      count += len;
+    }
+    for (const { node, splitAt } of toWrap) {
+      const parent = node.parentNode;
+      if (!parent || parent.closest('pre, .thinking-content')) continue;
+      const target = splitAt > 0 ? node.splitText(splitAt) : node;
+      const span = document.createElement('span');
+      span.className = 'token-new';
+      parent.replaceChild(span, target);
+      span.appendChild(target);
+    }
+  }
+
+  function fullRender(fullText) {
+    contentEl.innerHTML = render(fullText);
+    highlight(contentEl);
+  }
+
+  // Render the latest full source text.
+  //
+  // PRECONDITION: callers must pass append-only text — each call's `fullText` must
+  // extend the previous one with the already-seen prefix UNCHANGED. Finalized
+  // blocks are frozen and never re-rendered, so a feed that rewrites earlier text
+  // would leave stale frozen blocks (corrected only by the next full re-render).
+  // chat.js satisfies this: its stripToolBlocks output only strips not-yet-finalized
+  // trailing tool syntax, never text that has already been frozen.
+  function update(fullText) {
+    lastText = fullText;
+    if (degraded) {
+      fullRender(fullText);
+      return;
+    }
+    try {
+      // Self-heal: if our DOM was replaced out from under us — chat.js writes
+      // contentEl.innerHTML directly for thinking indicators and tool blocks, and
+      // finalize() removes the marker — our tail marker is no longer a child of the
+      // container. Rebuild from scratch so we never append onto foreign content or
+      // touch a detached marker.
+      if (started && (!tailMarker || tailMarker.parentNode !== contentEl)) {
+        started = false;
+        committedLen = 0;
+        tailShownLen = 0;
+        appendMode = null;
+      }
+      if (!started) start();
+      const next = splitFinalized(fullText, render, committedLen);
+      if (next > committedLen) {
+        freeze(fullText.slice(committedLen, next));
+        committedLen = next;
+        appendMode = null; // whatever was streaming is now frozen
+        tailShownLen = 0;
+      }
+      renderTail(fullText.slice(committedLen));
+    } catch (err) {
+      degraded = true;
+      console.error('streamingRenderer: falling back to full render', err);
+      fullRender(fullText);
+    }
+  }
+
+  // Stream finished: freeze whatever is left canonically and flatten away the
+  // marker so the container holds exactly what a single full render would produce.
+  // chat.js currently re-renders the finished message from source for its own
+  // reasons and so doesn't call this, but it completes the renderer's lifecycle and
+  // is exercised by the tests.
+  function finalize() {
+    if (degraded) return;
+    try {
+      if (!started) start();
+      clearTail();
+      appendMode = null;
+      const rest = lastText.slice(committedLen);
+      if (rest.trim()) freeze(rest);
+      tailMarker.remove();
+      tailMarker = null;
+      committedLen = lastText.length;
+    } catch (err) {
+      degraded = true;
+      console.error('streamingRenderer: falling back to full render', err);
+      fullRender(lastText);
+    }
+  }
+
+  return { update, finalize };
+}
diff --git a/static/js/streamingSegmenter.js b/static/js/streamingSegmenter.js
new file mode 100644
index 000000000..b501f21d5
--- /dev/null
+++ b/static/js/streamingSegmenter.js
@@ -0,0 +1,190 @@
+// streamingSegmenter.js
+//
+// Pure logic for incremental ("block-at-a-time") streaming markdown rendering.
+//
+// While an assistant message streams in, re-rendering the whole accumulated
+// markdown on every token is wasteful (O(N^2)) and recreates DOM nodes, which
+// makes code-block hover buttons flicker. The fix is to FREEZE the leading part
+// of the message that can no longer change, and only re-render the growing tail.
+//
+// This module answers the one hard question that makes freezing safe:
+//
+//     Given the full markdown received so far, how many leading characters can
+//     be finalized without changing the rendered output?
+//
+// The contract callers rely on (`render` is the canonical markdown renderer):
+//
+//     const n = splitFinalized(text, render);
+//     render(text.slice(0, n)) + render(text.slice(n))  ===  render(text)
+//
+// The module is intentionally DOM-free and renderer-agnostic so it can be unit
+// tested in isolation and reused for any markdown renderer with no long-range
+// cross-block dependencies (no reference-style links / footnotes).
+//
+// Known limitations (both bounded by the same mitigation):
+//   - cutIsRenderSafe proves only PRESENT-tense equivalence. If the renderer pairs
+//     an inline delimiter across a blank line (e.g. markdown.js will turn
+//     `*a\n\nb*` into emphasis spanning two paragraphs), a block frozen before the
+//     closing delimiter arrives can disagree with the final full render.
+//   - afterClosedFence boundaries are trusted without the equivalence check, so a
+//     fence the real renderer parses differently (e.g. a stray 4-backtick line) can
+//     be mis-detected as a close.
+//   Both only occur for input the renderer itself handles oddly, and both are
+//   transient: chat.js re-renders the finished message from source, so the settled
+//   output is always canonical.
+
+// A fenced-code delimiter line: up to 3 leading spaces, then >=3 backticks or
+// tildes, then an optional info string.
+const FENCE_RE = /^ {0,3}(`{3,}|~{3,})(.*)$/;
+
+/**
+ * Scan `text` starting at `fromOffset` — which MUST be at top level (callers only
+ * ever advance to a finalized boundary, never into a fence) — and collect the
+ * candidate cut points.
+ *
+ * @returns {{ boundaries: Array<{offset:number, afterClosedFence:boolean}>, inFence:boolean }}
+ *   - A blank-line run at top level yields a boundary at the start of the next
+ *     non-blank line (`afterClosedFence: false`).
+ *   - A fence close yields a boundary just past the closing fence line
+ *     (`afterClosedFence: true`) — such a cut is unconditionally safe, since
+ *     nothing can ever merge into a completed code block.
+ */
+function findBoundaries(text, fromOffset) {
+  const boundaries = [];
+  const n = text.length;
+  let inFence = false;
+  let fenceMarker = '';
+  let i = fromOffset;
+
+  while (i < n) {
+    const nl = text.indexOf('\n', i);
+    const lineEnd = nl === -1 ? n : nl;
+    const afterNl = nl === -1 ? n : nl + 1;
+    const line = text.slice(i, lineEnd);
+    const fence = line.match(FENCE_RE);
+
+    if (fence) {
+      const marker = fence[1];
+      if (!inFence) {
+        inFence = true;
+        fenceMarker = marker;
+      } else if (
+        marker[0] === fenceMarker[0] &&
+        marker.length >= fenceMarker.length &&
+        fence[2].trim() === '' // a closing fence carries no info string
+      ) {
+        inFence = false;
+        fenceMarker = '';
+        boundaries.push({ offset: afterNl, afterClosedFence: true });
+      }
+      i = afterNl;
+    } else if (!inFence && line.trim() === '') {
+      // Consume the entire run of blank lines; the boundary is the start of the
+      // next non-blank line so the finalized side owns the separator and the tail
+      // starts clean.
+      let j = afterNl;
+      while (j < n) {
+        const nl2 = text.indexOf('\n', j);
+        const lineEnd2 = nl2 === -1 ? n : nl2;
+        if (text.slice(j, lineEnd2).trim() !== '') break;
+        if (nl2 === -1) {
+          j = n;
+          break;
+        }
+        j = nl2 + 1;
+      }
+      boundaries.push({ offset: j, afterClosedFence: false });
+      i = j;
+    } else {
+      i = afterNl;
+    }
+  }
+
+  return { boundaries, inFence };
+}
+
+/**
+ * Does cutting between `before` and `after` leave the rendered output unchanged?
+ * This is the self-verifying safety check: it directly compares rendering the two
+ * sides separately against rendering them joined, so constructs that span the cut
+ * (loose lists, setext headings, lazy blockquote continuations, tables) are caught
+ * with no hand-coded grammar rules.
+ *
+ * Renderer non-determinism (e.g. mermaid ids seeded with Date.now()) can only make
+ * this return a false negative, never a false positive — so the bias is always
+ * toward under-finalizing, which is the safe direction.
+ */
+function cutIsRenderSafe(before, after, render) {
+  return render(before) + render(after) === render(before + after);
+}
+
+/**
+ * Return how many leading characters of `text` can be safely finalized, scanning
+ * forward from `committedLen` (the amount already finalized).
+ *
+ * Guarantees `render(text.slice(0, n)) + render(text.slice(n)) === render(text)`,
+ * and `committedLen <= n <= text.length`.
+ *
+ * @param {string} text       Full markdown accumulated so far.
+ * @param {(src:string)=>string} render  Canonical markdown renderer.
+ * @param {number} [committedLen=0]  Characters already finalized (always a prior boundary).
+ * @returns {number}
+ */
+export function splitFinalized(text, render, committedLen = 0) {
+  const { boundaries } = findBoundaries(text, committedLen);
+
+  let best = committedLen;
+  let segStart = committedLen;
+
+  for (let k = 0; k < boundaries.length; k++) {
+    const { offset, afterClosedFence } = boundaries[k];
+
+    if (afterClosedFence) {
+      // A completed code block — always safe to freeze through here.
+      best = offset;
+    } else {
+      // A prose/list/table boundary. We need a following block to compare
+      // against (the last block must stay live, it can still grow), and the cut
+      // must be render-equivalent locally.
+      const nextOffset = k + 1 < boundaries.length ? boundaries[k + 1].offset : text.length;
+      const before = text.slice(segStart, offset);
+      const after = text.slice(offset, nextOffset);
+      if (after.trim() !== '' && cutIsRenderSafe(before, after, render)) {
+        best = offset;
+      }
+    }
+    segStart = offset;
+  }
+
+  return best;
+}
+
+/**
+ * If `text` begins with a fenced-code opener whose fence never closes, describe it
+ * so the renderer can stream the code in append-mode instead of re-rendering it.
+ * Returns `{ lang, contentStart }` (contentStart = offset of the first code char),
+ * or null when `text` does not start with a still-open fence.
+ *
+ * The opener line must be complete (terminated by a newline) so the info string /
+ * language is known before append-mode begins.
+ */
+export function describeOpenFence(text) {
+  const open = text.match(/^( {0,3})(`{3,}|~{3,})([^\n]*)\n/);
+  if (!open) return null;
+  const marker = open[2];
+  const contentStart = open[0].length;
+
+  for (let i = contentStart; i < text.length; ) {
+    const nl = text.indexOf('\n', i);
+    const line = text.slice(i, nl === -1 ? text.length : nl);
+    const close = line.match(/^ {0,3}(`{3,}|~{3,})\s*$/);
+    if (close && close[1][0] === marker[0] && close[1].length >= marker.length) {
+      return null; // the fence closes — let the normal finalize path handle it
+    }
+    if (nl === -1) break;
+    i = nl + 1;
+  }
+
+  const lang = (open[3] || '').trim().split(/\s+/)[0] || '';
+  return { lang, contentStart };
+}
diff --git a/tests/streaming/corpus.mjs b/tests/streaming/corpus.mjs
new file mode 100644
index 000000000..d66768ea1
--- /dev/null
+++ b/tests/streaming/corpus.mjs
@@ -0,0 +1,27 @@
+// A spread of markdown samples exercising the constructs the renderer supports.
+// Used by the streaming-invariant fuzz test (fed token-by-token) and the renderer
+// integration test. Keep samples small but structurally varied — the fuzz test
+// runs every prefix of every sample, so length is quadratic on cost.
+export const CORPUS = [
+  ['plain paragraph', 'Just a single sentence of text.'],
+  ['two paragraphs', 'First paragraph here.\n\nSecond paragraph here.'],
+  ['three paragraphs', 'Alpha block.\n\nBravo block.\n\nCharlie block.'],
+  ['atx headings', '# Title\n\nIntro line.\n\n## Section\n\nBody text.'],
+  ['setext heading', 'The Title\n=========\n\nA paragraph under it.'],
+  ['inline formatting', 'Some **bold**, *italic*, `code`, and a [link](https://x.com).'],
+  ['tight unordered list', '- one\n- two\n- three\n\ndone'],
+  ['ordered list then text', 'Before\n\n1. first\n2. second\n3. third\n\nAfter'],
+  ['loose list then paragraph', '- a\n\n- b\n\n- c\n\nClosing paragraph.'],
+  ['nested list', '- top\n  - nested one\n  - nested two\n- back to top\n\nend'],
+  ['blockquote', '> quoted line one\n> quoted line two\n\nplain after'],
+  ['thematic break', 'above the line\n\n---\n\nbelow the line'],
+  ['python code fence', 'Run this:\n\n```python\nprint("hi")\nfor i in range(3):\n    print(i)\n```\n\nThat prints numbers.'],
+  ['fence with blank lines inside', '```js\nconst a = 1;\n\nconst b = 2;\n```\n\nafter the code'],
+  ['two consecutive fences', '```\nfirst block\n```\n\n```\nsecond block\n```\n\ntail'],
+  ['mermaid diagram', 'Diagram:\n\n```mermaid\ngraph TD\nA-->B\n```\n\nafter diagram'],
+  ['gfm table', 'Data:\n\n| A | B |\n|---|---|\n| 1 | 2 |\n| 3 | 4 |\n\nafter table'],
+  [
+    'mixed document',
+    '# Report\n\nIntro paragraph with a `symbol`.\n\n```python\nx = 1\n```\n\n- bullet one\n- bullet two\n\n> a quote\n\nFinal words.',
+  ],
+];
diff --git a/tests/streaming/invariant.test.mjs b/tests/streaming/invariant.test.mjs
new file mode 100644
index 000000000..f74cc1c7d
--- /dev/null
+++ b/tests/streaming/invariant.test.mjs
@@ -0,0 +1,107 @@
+// The centerpiece correctness test: stream every corpus sample in token-by-token,
+// driving the segmenter exactly as the renderer will, and assert the freeze/tail
+// split stays render-equivalent to a single full render at EVERY step.
+//
+//   finalized-html (accumulated from committed deltas) + render(live tail)  ===  render(prefix)
+//
+// This is run with no DOM and no safety net, so any segmenter bug fails here
+// rather than reaching the browser.
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
+import { loadMarkdown, normalizeRender } from './markdownHarness.mjs';
+import { splitFinalized } from '../../static/js/streamingSegmenter.js';
+import { CORPUS } from './corpus.mjs';
+
+const md = await loadMarkdown();
+const render = (t) => md.mdToHtml(t);
+
+// The two render pipelines chat.js actually feeds streamed text through. BOTH wrap
+// the source in squashOutsideCode; the main path additionally runs
+// processWithThinking (which floats  blocks to the top — a non-local
+// transform). Fuzzing the corpus through these — not just bare mdToHtml — closes
+// the gap where a squashOutsideCode whitespace/fence edge could break the split.
+const renderLiveReply = (t) => md.mdToHtml(md.squashOutsideCode(t)); // chat.js live-reply path
+const renderMain = (t) => md.processWithThinking(md.squashOutsideCode(t)); // chat.js main path
+
+// Reproduce the renderer's exact use of the segmenter over a sequence of prefixes.
+function simulate(text, prefixLengths, renderFn = render) {
+  let committed = 0;
+  let finalizedHtml = '';
+  for (const len of prefixLengths) {
+    const prefix = text.slice(0, len);
+    const next = splitFinalized(prefix, renderFn, committed);
+
+    assert.ok(
+      next >= committed && next <= prefix.length,
+      `committed must stay monotonic and in range (${committed} -> ${next} at length ${len})`,
+    );
+    if (next > committed) {
+      // The renderer renders each finalized delta once and never touches it again.
+      finalizedHtml += renderFn(prefix.slice(committed, next));
+      committed = next;
+    }
+
+    const got = normalizeRender(finalizedHtml + renderFn(prefix.slice(committed)));
+    const want = normalizeRender(renderFn(prefix));
+    assert.equal(got, want, `invariant broke at prefix length ${len} of ${JSON.stringify(text)}`);
+  }
+}
+
+const everyPrefix = (t) => Array.from({ length: t.length + 1 }, (_, i) => i);
+function chunkAtWhitespace(t) {
+  const lens = [];
+  for (let i = 1; i <= t.length; i++) {
+    if (i === t.length || /\s/.test(t[i - 1])) lens.push(i);
+  }
+  return lens.length ? lens : [t.length];
+}
+
+const RENDERERS = [
+  ['mdToHtml', render],
+  ['mdToHtml∘squashOutsideCode (live-reply path)', renderLiveReply],
+  ['processWithThinking∘squashOutsideCode (main path)', renderMain],
+];
+
+for (const [rname, renderFn] of RENDERERS) {
+  for (const [name, text] of CORPUS) {
+    test(`invariant — ${rname} — char-by-char — ${name}`, () => {
+      simulate(text, everyPrefix(text), renderFn);
+    });
+    test(`invariant — ${rname} — whitespace-chunked — ${name}`, () => {
+      simulate(text, chunkAtWhitespace(text), renderFn);
+    });
+  }
+}
+
+// These samples carry  blocks (the corpus above is think-free), so they
+// specifically exercise the self-verifying local check refusing to finalize inside
+// or across a think block that processWithThinking floats to the top.
+const THINKING_CORPUS = [
+  ['leading think then answer', 'Let me reason about it.\n\nThe answer is 42.'],
+  ['think with internal blank lines', 'Step one.\n\nStep two.\n\nStep three.\n\nDone — the result follows.'],
+  ['think then several paragraphs', 'analyzing the request\n\nFirst point made here.\n\nSecond point made here.\n\nThird and final point.'],
+  ['think then code block', 'I should show code.\n\nHere:\n\n```python\nprint("hi")\n```\n\nThat is the snippet.'],
+];
+for (const [name, text] of THINKING_CORPUS) {
+  test(`invariant (processWithThinking) — char-by-char — ${name}`, () => {
+    simulate(text, everyPrefix(text), renderMain);
+  });
+}
+
+// A final-output check independent of chunking: streaming to completion must equal
+// a single full render.
+test('streamed-to-completion output equals full render for whole corpus', () => {
+  for (const [name, text] of CORPUS) {
+    let committed = 0;
+    let html = '';
+    for (let len = 1; len <= text.length; len++) {
+      const next = splitFinalized(text.slice(0, len), render, committed);
+      if (next > committed) {
+        html += render(text.slice(committed, next));
+        committed = next;
+      }
+    }
+    html += render(text.slice(committed));
+    assert.equal(normalizeRender(html), normalizeRender(render(text)), `final mismatch for ${name}`);
+  }
+});
diff --git a/tests/streaming/markdownHarness.mjs b/tests/streaming/markdownHarness.mjs
new file mode 100644
index 000000000..03e12fa61
--- /dev/null
+++ b/tests/streaming/markdownHarness.mjs
@@ -0,0 +1,66 @@
+// Loads the real browser markdown renderer (static/js/markdown.js) under Node by
+// mocking the minimal browser globals it touches and stubbing its sibling imports.
+// This mirrors the loader in tests/test_markdown_rendering_js.py so the streaming
+// tests exercise the exact same renderer the browser runs.
+import fs from 'node:fs';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const REPO = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..', '..');
+
+export async function loadMarkdown() {
+  globalThis.window = { location: { origin: 'http://localhost' }, katex: null };
+  globalThis.document = {
+    readyState: 'loading',
+    addEventListener() {},
+    createElement(tag) {
+      if (tag !== 'template') throw new Error(`unsupported element: ${tag}`);
+      return {
+        _html: '',
+        content: { querySelectorAll() { return []; } },
+        set innerHTML(v) { this._html = v; },
+        get innerHTML() { return this._html; },
+      };
+    },
+  };
+  globalThis.MutationObserver = class { observe() {} };
+
+  let src = fs.readFileSync(path.join(REPO, 'static/js/markdown.js'), 'utf8');
+  src = src.replace(/import uiModule from ['"]\.\/ui\.js['"];/, '');
+  src = src.replace(
+    /import \{ splitTableRow \} from ['"]\.\/markdown\/tableRow\.js['"];/,
+    () => `function splitTableRow(row){return (row||'').replace(/^\\s*\\|/,'').replace(/\\|\\s*$/,'').split('|').map((c)=>c.trim());}`,
+  );
+  const emoji = fs
+    .readFileSync(path.join(REPO, 'static/js/emojiShortcodes.js'), 'utf8')
+    .replace(/^export default .*$/m, '')
+    .replace(/export const /g, 'const ')
+    .replace(/export function /g, 'function ');
+  src = src.replace(
+    /import \{ replaceEmojiShortcodes, hasEmojiShortcode \} from ['"]\.\/emojiShortcodes\.js['"];/,
+    () => emoji,
+  );
+  src = src.replace(
+    /var escapeHtml = uiModule\.esc;/,
+    () =>
+      `var escapeHtml = (v) => String(v ?? '').replace(/&/g,'&').replace(//g,'>').replace(/"/g,'"').replace(/'/g,''');`,
+  );
+  const url = 'data:text/javascript;base64,' + Buffer.from(src).toString('base64');
+  return import(url);
+}
+
+// Canonicalize rendered HTML so two renders that produce the SAME DOM compare
+// equal. Collapses only newline-bearing whitespace BETWEEN tags (`>\n\n<` ->
+// `><`): it is insignificant in rendered HTML, and incremental finalization
+// legitimately emits `\n\n` between two blocks where a single full render emits
+// `\n`. Code whitespace is safe because code is HTML-escaped, so significant
+// newlines live inside  as text (never between a `>` and a `<`). Inline
+// single spaces between tags are left alone. Structural differences (two 
    vs +// one,
      vs
        ) survive normalization and still fail, as they must. +// Mermaid ids embed Date.now(), so they are normalized too. +export function normalizeRender(html) { + return String(html) + .replace(/>\s*\n\s*<') + .trim() + .replace(/(mermaid|thinking)-\d+-\d+/g, '$1-X'); +} diff --git a/tests/streaming/segmenter.test.mjs b/tests/streaming/segmenter.test.mjs new file mode 100644 index 000000000..ce4b6f563 --- /dev/null +++ b/tests/streaming/segmenter.test.mjs @@ -0,0 +1,65 @@ +// Tests for the pure streaming-markdown segmenter. +// +// The segmenter's one job: given the full accumulated markdown text so far, +// report how many leading characters are SAFE to finalize — i.e. freeze and +// never re-render. "Safe" means: rendering the finalized prefix and the live +// tail separately produces the same DOM as rendering the whole text at once. +// +// Invariant under test everywhere: render(text[0:n]) + render(text[n:]) === render(text) +import { test } from 'node:test'; +import assert from 'node:assert/strict'; +import { loadMarkdown, normalizeRender } from './markdownHarness.mjs'; +import { splitFinalized } from '../../static/js/streamingSegmenter.js'; + +const md = await loadMarkdown(); +const render = (t) => md.mdToHtml(t); +const splitOk = (text, n) => + normalizeRender(render(text.slice(0, n)) + render(text.slice(n))) === normalizeRender(render(text)); + +test('harness loads the real renderer', () => { + assert.match(render('hi'), /

        hi<\/p>/); +}); + +test('nothing is finalized while a single block is still streaming', () => { + assert.equal(splitFinalized('an incomplete paragra', render), 0); +}); + +test('finalizes the first of two blank-line-separated paragraphs', () => { + const text = 'para one\n\npara two'; + const n = splitFinalized(text, render); + assert.equal(n, 'para one\n\n'.length); + assert.ok(splitOk(text, n), 'split must be render-equivalent'); +}); + +test('never finalizes the last (still-growing) block', () => { + // The trailing paragraph could still gain more characters, so it stays live. + const text = 'done\n\nstill going'; + const n = splitFinalized(text, render); + assert.ok(n <= 'done\n\n'.length); + assert.ok(splitOk(text, n)); +}); + +test('a closed code fence is finalized immediately, even as the last block', () => { + // This is the original flicker scenario: a completed code block must freeze + // so its hover buttons stop being recreated on every later token. + const text = 'Here:\n\n```python\nprint(1)\n```'; + const n = splitFinalized(text, render); + assert.ok(n >= text.length - 1, `expected the whole closed fence finalized, got ${n} of ${text.length}`); + assert.ok(splitOk(text, n)); +}); + +test('does NOT finalize across an OPEN code fence', () => { + const text = 'intro\n\n```python\nprint(1)\nprint(2)'; + const n = splitFinalized(text, render); + // "intro" may finalize, but nothing inside the still-open fence may. + assert.ok(n <= 'intro\n\n'.length, `must not finalize into an open fence, got ${n}`); + assert.ok(splitOk(text, n)); +}); + +test('does NOT split a loose list (blank line between items is not a boundary)', () => { + const text = '- a\n\n- b\n\nafter'; + const n = splitFinalized(text, render); + assert.ok(splitOk(text, n), 'a wrong split here would turn one

          into two'); + // The list must not be cut in the middle: either nothing or the whole list. + assert.ok(n === 0 || n >= '- a\n\n- b\n\n'.length, `loose list was cut at ${n}`); +}); diff --git a/tests/test_streaming_segmenter_js.py b/tests/test_streaming_segmenter_js.py new file mode 100644 index 000000000..05393430b --- /dev/null +++ b/tests/test_streaming_segmenter_js.py @@ -0,0 +1,38 @@ +"""Runs the Node-based streaming-render segmenter suite (tests/streaming/*.test.mjs). + +Covers the pure incremental-render segmenter (static/js/streamingSegmenter.js): +unit boundaries plus a streaming-invariant fuzz that feeds a markdown corpus in +token-by-token and asserts the freeze/tail split always matches a single full +render. Pure JS — no DOM, no extra dependencies. Skipped when node is +unavailable, mirroring tests/test_markdown_rendering_js.py. + +The renderer's DOM behavior (streamingRenderer.js) is exercised against a running +app, not here, consistent with how this project tests browser-coupled code. +""" + +import shutil +import subprocess +from pathlib import Path + +import pytest + +_REPO = Path(__file__).resolve().parent.parent +_HAS_NODE = shutil.which("node") is not None + + +@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH") +def test_streaming_segmenter_suite(): + test_files = sorted(str(p) for p in (_REPO / "tests" / "streaming").glob("*.test.mjs")) + assert test_files, "no streaming test files found" + + result = subprocess.run( + ["node", "--test", *test_files], + cwd=_REPO, + capture_output=True, + timeout=180, + text=True, + ) + if result.returncode != 0: + raise AssertionError( + f"node --test failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" + )