fix(markdown): preserve URLs inside inline code spans (#4681)

Inline backtick spans were converted to <code> only at the end of
mdToHtml, after the bare-URL autolink and <a>/allowed-HTML passes. A URL
inside inline code is preceded by a space, so the autolink wrapped it in
an <a> tag and swapped it for an ___ALLOWED_HTML_ placeholder, corrupting
commands like `irm http://127.0.0.1:3000/x`.

Extract inline code into placeholders before the link passes, mirroring
the existing fenced-code-block handling, and restore them last so
placeholders carried inside restored <a> blocks resolve. Escape the code
at extraction time since it now bypasses the global escape pass.
This commit is contained in:
Ashvin
2026-06-22 20:53:55 +05:30
committed by GitHub
parent ca4973c41f
commit e812a29233
2 changed files with 52 additions and 6 deletions
+22 -6
View File
@@ -483,6 +483,7 @@ export function processWithThinking(text) {
export function mdToHtml(src, opts) {
const allowedHtmlBlocks = [];
const codeBlocks = [];
const inlineCodeBlocks = [];
const mermaidBlocks = [];
let s = (src ?? '');
@@ -521,6 +522,19 @@ export function mdToHtml(src, opts) {
return placeholder;
});
// Extract inline code spans before the link/autolink/HTML passes, mirroring
// the fenced-block handling above. A URL inside `inline code` (e.g.
// `irm http://127.0.0.1:3000/x`) is preceded by a space, so the bare-URL
// autolink matches it, wraps it in an <a> tag, and swaps that for an
// ___ALLOWED_HTML_ placeholder — corrupting the command. The old inline-code
// pass ran after those passes, too late to protect it.
s = s.replace(/`([^`]+?)`/g, (match, code) => {
if (code.startsWith('___CODE_BLOCK_') || code.startsWith('___MERMAID_BLOCK_')) return match;
const placeholder = `___INLINE_CODE_${inlineCodeBlocks.length}___`;
inlineCodeBlocks.push(`<code>${escapeHtml(code)}</code>`);
return placeholder;
});
// Repair common ways the agent mangles the entity-anchor convention
// (`[Name](#kind-<id>)`). Models reliably get the single-link case
// right but slip into other formats when listing many in a table.
@@ -678,12 +692,6 @@ export function mdToHtml(src, opts) {
return html;
});
// Inline code (but not placeholders)
s = s.replace(/`([^`]+?)`/g, (match, code) => {
if (code.startsWith('___CODE_BLOCK_') || code.startsWith('___ALLOWED_HTML_')) return match;
return `<code>${code}</code>`;
});
// Horizontal rules (must come before bold/italic to avoid * conflicts)
s = s.replace(/^(?:---|\*\*\*|___)\s*$/gm, '<hr>');
@@ -756,6 +764,14 @@ export function mdToHtml(src, opts) {
s = s.replace(`___CODE_BLOCK_${index}___`, block);
});
// Restore inline code spans last, so placeholders carried inside restored
// <a>/allowed-HTML blocks are resolved too. The function replacer keeps the
// escaped code literal — e.g. a shell snippet like `echo $1` is not treated
// as a regex back-reference.
inlineCodeBlocks.forEach((block, index) => {
s = s.replace(`___INLINE_CODE_${index}___`, () => block);
});
return _useSvgEmoji() ? svgifyEmoji(s, opts) : s;
}