mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-28 15:45:22 -04:00
fix(markdown): preserve URLs inside inline code spans (#4681)
Inline backtick spans were converted to <code> only at the end of mdToHtml, after the bare-URL autolink and <a>/allowed-HTML passes. A URL inside inline code is preceded by a space, so the autolink wrapped it in an <a> tag and swapped it for an ___ALLOWED_HTML_ placeholder, corrupting commands like `irm http://127.0.0.1:3000/x`. Extract inline code into placeholders before the link passes, mirroring the existing fenced-code-block handling, and restore them last so placeholders carried inside restored <a> blocks resolve. Escape the code at extraction time since it now bypasses the global escape pass.
This commit is contained in:
+22
-6
@@ -483,6 +483,7 @@ export function processWithThinking(text) {
|
|||||||
export function mdToHtml(src, opts) {
|
export function mdToHtml(src, opts) {
|
||||||
const allowedHtmlBlocks = [];
|
const allowedHtmlBlocks = [];
|
||||||
const codeBlocks = [];
|
const codeBlocks = [];
|
||||||
|
const inlineCodeBlocks = [];
|
||||||
const mermaidBlocks = [];
|
const mermaidBlocks = [];
|
||||||
let s = (src ?? '');
|
let s = (src ?? '');
|
||||||
|
|
||||||
@@ -521,6 +522,19 @@ export function mdToHtml(src, opts) {
|
|||||||
return placeholder;
|
return placeholder;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Extract inline code spans before the link/autolink/HTML passes, mirroring
|
||||||
|
// the fenced-block handling above. A URL inside `inline code` (e.g.
|
||||||
|
// `irm http://127.0.0.1:3000/x`) is preceded by a space, so the bare-URL
|
||||||
|
// autolink matches it, wraps it in an <a> tag, and swaps that for an
|
||||||
|
// ___ALLOWED_HTML_ placeholder — corrupting the command. The old inline-code
|
||||||
|
// pass ran after those passes, too late to protect it.
|
||||||
|
s = s.replace(/`([^`]+?)`/g, (match, code) => {
|
||||||
|
if (code.startsWith('___CODE_BLOCK_') || code.startsWith('___MERMAID_BLOCK_')) return match;
|
||||||
|
const placeholder = `___INLINE_CODE_${inlineCodeBlocks.length}___`;
|
||||||
|
inlineCodeBlocks.push(`<code>${escapeHtml(code)}</code>`);
|
||||||
|
return placeholder;
|
||||||
|
});
|
||||||
|
|
||||||
// Repair common ways the agent mangles the entity-anchor convention
|
// Repair common ways the agent mangles the entity-anchor convention
|
||||||
// (`[Name](#kind-<id>)`). Models reliably get the single-link case
|
// (`[Name](#kind-<id>)`). Models reliably get the single-link case
|
||||||
// right but slip into other formats when listing many in a table.
|
// right but slip into other formats when listing many in a table.
|
||||||
@@ -678,12 +692,6 @@ export function mdToHtml(src, opts) {
|
|||||||
return html;
|
return html;
|
||||||
});
|
});
|
||||||
|
|
||||||
// Inline code (but not placeholders)
|
|
||||||
s = s.replace(/`([^`]+?)`/g, (match, code) => {
|
|
||||||
if (code.startsWith('___CODE_BLOCK_') || code.startsWith('___ALLOWED_HTML_')) return match;
|
|
||||||
return `<code>${code}</code>`;
|
|
||||||
});
|
|
||||||
|
|
||||||
// Horizontal rules (must come before bold/italic to avoid * conflicts)
|
// Horizontal rules (must come before bold/italic to avoid * conflicts)
|
||||||
s = s.replace(/^(?:---|\*\*\*|___)\s*$/gm, '<hr>');
|
s = s.replace(/^(?:---|\*\*\*|___)\s*$/gm, '<hr>');
|
||||||
|
|
||||||
@@ -756,6 +764,14 @@ export function mdToHtml(src, opts) {
|
|||||||
s = s.replace(`___CODE_BLOCK_${index}___`, block);
|
s = s.replace(`___CODE_BLOCK_${index}___`, block);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Restore inline code spans last, so placeholders carried inside restored
|
||||||
|
// <a>/allowed-HTML blocks are resolved too. The function replacer keeps the
|
||||||
|
// escaped code literal — e.g. a shell snippet like `echo $1` is not treated
|
||||||
|
// as a regex back-reference.
|
||||||
|
inlineCodeBlocks.forEach((block, index) => {
|
||||||
|
s = s.replace(`___INLINE_CODE_${index}___`, () => block);
|
||||||
|
});
|
||||||
|
|
||||||
return _useSvgEmoji() ? svgifyEmoji(s, opts) : s;
|
return _useSvgEmoji() ? svgifyEmoji(s, opts) : s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -170,6 +170,36 @@ def test_extract_thinking_blocks_handles_thought_tag(node_available):
|
|||||||
assert result["content"] == "Final answer."
|
assert result["content"] == "Final answer."
|
||||||
|
|
||||||
|
|
||||||
|
def test_url_inside_inline_code_is_not_autolinked(node_available):
|
||||||
|
# A URL inside a backtick span is preceded by a space, so the bare-URL
|
||||||
|
# autolink used to wrap it in an <a> tag (then swap it for an
|
||||||
|
# ___ALLOWED_HTML_ placeholder), corrupting the command shown to the user.
|
||||||
|
html = _run_markdown_case("Run `$j = irm http://127.0.0.1:3000/x` to fetch.")
|
||||||
|
|
||||||
|
assert "<code>$j = irm http://127.0.0.1:3000/x</code>" in html
|
||||||
|
assert "___ALLOWED_HTML_" not in html
|
||||||
|
assert "<a " not in html
|
||||||
|
assert 'href="http://127.0.0.1:3000/x"' not in html
|
||||||
|
|
||||||
|
|
||||||
|
def test_url_outside_inline_code_is_still_autolinked(node_available):
|
||||||
|
# Inline code must not disable autolinking for bare URLs elsewhere in the
|
||||||
|
# same line.
|
||||||
|
html = _run_markdown_case("Use `irm` then visit https://example.com/page now.")
|
||||||
|
|
||||||
|
assert "<code>irm</code>" in html
|
||||||
|
assert 'href="https://example.com/page"' in html
|
||||||
|
|
||||||
|
|
||||||
|
def test_inline_code_content_is_html_escaped(node_available):
|
||||||
|
# Inline code is now extracted before the global escape pass, so it must be
|
||||||
|
# escaped at extraction time (matching the fenced-code-block handling).
|
||||||
|
html = _run_markdown_case("Render `<b>$1 & 'q'</b>` literally.")
|
||||||
|
|
||||||
|
assert "<code><b>$1 & 'q'</b></code>" in html
|
||||||
|
assert "<b>" not in html
|
||||||
|
|
||||||
|
|
||||||
def test_dotted_python_import_paths_are_not_autolinked(node_available):
|
def test_dotted_python_import_paths_are_not_autolinked(node_available):
|
||||||
html = _run_markdown_case(
|
html = _run_markdown_case(
|
||||||
"from imblearn.combine import SMOTETomek\n"
|
"from imblearn.combine import SMOTETomek\n"
|
||||||
|
|||||||
Reference in New Issue
Block a user