From e812a292331f6430d0fff2618656eb2fde52acdc Mon Sep 17 00:00:00 2001 From: Ashvin <76151462+ashvinctrl@users.noreply.github.com> Date: Mon, 22 Jun 2026 20:53:55 +0530 Subject: [PATCH] fix(markdown): preserve URLs inside inline code spans (#4681) Inline backtick spans were converted to only at the end of mdToHtml, after the bare-URL autolink and /allowed-HTML passes. A URL inside inline code is preceded by a space, so the autolink wrapped it in an tag and swapped it for an ___ALLOWED_HTML_ placeholder, corrupting commands like `irm http://127.0.0.1:3000/x`. Extract inline code into placeholders before the link passes, mirroring the existing fenced-code-block handling, and restore them last so placeholders carried inside restored blocks resolve. Escape the code at extraction time since it now bypasses the global escape pass. --- static/js/markdown.js | 28 +++++++++++++++++++++------ tests/test_markdown_rendering_js.py | 30 +++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 6 deletions(-) diff --git a/static/js/markdown.js b/static/js/markdown.js index 0dde5ae3c..f2bb7f85f 100644 --- a/static/js/markdown.js +++ b/static/js/markdown.js @@ -483,6 +483,7 @@ export function processWithThinking(text) { export function mdToHtml(src, opts) { const allowedHtmlBlocks = []; const codeBlocks = []; + const inlineCodeBlocks = []; const mermaidBlocks = []; let s = (src ?? ''); @@ -521,6 +522,19 @@ export function mdToHtml(src, opts) { return placeholder; }); + // Extract inline code spans before the link/autolink/HTML passes, mirroring + // the fenced-block handling above. A URL inside `inline code` (e.g. + // `irm http://127.0.0.1:3000/x`) is preceded by a space, so the bare-URL + // autolink matches it, wraps it in an tag, and swaps that for an + // ___ALLOWED_HTML_ placeholder — corrupting the command. The old inline-code + // pass ran after those passes, too late to protect it. + s = s.replace(/`([^`]+?)`/g, (match, code) => { + if (code.startsWith('___CODE_BLOCK_') || code.startsWith('___MERMAID_BLOCK_')) return match; + const placeholder = `___INLINE_CODE_${inlineCodeBlocks.length}___`; + inlineCodeBlocks.push(`${escapeHtml(code)}`); + return placeholder; + }); + // Repair common ways the agent mangles the entity-anchor convention // (`[Name](#kind-)`). Models reliably get the single-link case // right but slip into other formats when listing many in a table. @@ -678,12 +692,6 @@ export function mdToHtml(src, opts) { return html; }); - // Inline code (but not placeholders) - s = s.replace(/`([^`]+?)`/g, (match, code) => { - if (code.startsWith('___CODE_BLOCK_') || code.startsWith('___ALLOWED_HTML_')) return match; - return `${code}`; - }); - // Horizontal rules (must come before bold/italic to avoid * conflicts) s = s.replace(/^(?:---|\*\*\*|___)\s*$/gm, '
'); @@ -756,6 +764,14 @@ export function mdToHtml(src, opts) { s = s.replace(`___CODE_BLOCK_${index}___`, block); }); + // Restore inline code spans last, so placeholders carried inside restored + //
/allowed-HTML blocks are resolved too. The function replacer keeps the + // escaped code literal — e.g. a shell snippet like `echo $1` is not treated + // as a regex back-reference. + inlineCodeBlocks.forEach((block, index) => { + s = s.replace(`___INLINE_CODE_${index}___`, () => block); + }); + return _useSvgEmoji() ? svgifyEmoji(s, opts) : s; } diff --git a/tests/test_markdown_rendering_js.py b/tests/test_markdown_rendering_js.py index 70c7d3b81..e0f493eff 100644 --- a/tests/test_markdown_rendering_js.py +++ b/tests/test_markdown_rendering_js.py @@ -170,6 +170,36 @@ def test_extract_thinking_blocks_handles_thought_tag(node_available): assert result["content"] == "Final answer." +def test_url_inside_inline_code_is_not_autolinked(node_available): + # A URL inside a backtick span is preceded by a space, so the bare-URL + # autolink used to wrap it in an tag (then swap it for an + # ___ALLOWED_HTML_ placeholder), corrupting the command shown to the user. + html = _run_markdown_case("Run `$j = irm http://127.0.0.1:3000/x` to fetch.") + + assert "$j = irm http://127.0.0.1:3000/x" in html + assert "___ALLOWED_HTML_" not in html + assert "irm
" in html + assert 'href="https://example.com/page"' in html + + +def test_inline_code_content_is_html_escaped(node_available): + # Inline code is now extracted before the global escape pass, so it must be + # escaped at extraction time (matching the fenced-code-block handling). + html = _run_markdown_case("Render `$1 & 'q'` literally.") + + assert "<b>$1 & 'q'</b>" in html + assert "" not in html + + def test_dotted_python_import_paths_are_not_autolinked(node_available): html = _run_markdown_case( "from imblearn.combine import SMOTETomek\n"