mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-17 10:15:27 -04:00
fix(markdown): keep allowed-html placeholders out of fenced code (#1788)
This commit is contained in:
+37
-34
@@ -372,10 +372,46 @@ export function processWithThinking(text) {
|
|||||||
* Convert markdown to HTML
|
* Convert markdown to HTML
|
||||||
*/
|
*/
|
||||||
export function mdToHtml(src) {
|
export function mdToHtml(src) {
|
||||||
// CRITICAL: Extract allowed HTML blocks first (details/summary)
|
|
||||||
const allowedHtmlBlocks = [];
|
const allowedHtmlBlocks = [];
|
||||||
|
const codeBlocks = [];
|
||||||
|
const mermaidBlocks = [];
|
||||||
let s = (src ?? '');
|
let s = (src ?? '');
|
||||||
|
|
||||||
|
// Extract fenced code blocks before any markdown/HTML preservation passes.
|
||||||
|
// Otherwise placeholders from the allowed-HTML sanitizer (e.g.
|
||||||
|
// ___ALLOWED_HTML_0___) can leak into quoted HTML/JS samples, because the
|
||||||
|
// placeholder gets captured as literal code content and never restored inside
|
||||||
|
// the final <pre><code> block.
|
||||||
|
s = s.replace(/```(\w+)?\n([\s\S]*?)```/g, (_, lang, code) => {
|
||||||
|
const cleaned = code
|
||||||
|
.replace(/\r\n/g, '\n')
|
||||||
|
.replace(/[ \t]+$/gm, '')
|
||||||
|
.replace(/^\s*\n+/, '')
|
||||||
|
.replace(/\n+\s*$/g, '');
|
||||||
|
|
||||||
|
// Mermaid diagrams: render as diagram instead of code block
|
||||||
|
if (lang && lang.toLowerCase() === 'mermaid') {
|
||||||
|
const mermaidId = 'mermaid-' + Date.now() + '-' + mermaidBlocks.length;
|
||||||
|
const raw = cleaned.replace(/</g, '<').replace(/>/g, '>').replace(/&/g, '&');
|
||||||
|
const placeholder = `___MERMAID_BLOCK_${mermaidBlocks.length}___`;
|
||||||
|
mermaidBlocks.push(`<div class="mermaid-container"><pre class="mermaid" id="${mermaidId}">${escapeHtml(raw)}</pre></div>`);
|
||||||
|
return placeholder;
|
||||||
|
}
|
||||||
|
|
||||||
|
const escaped = cleaned.replace(/</g, '<').replace(/>/g, '>').replace(/&/g, '&');
|
||||||
|
const placeholder = `___CODE_BLOCK_${codeBlocks.length}___`;
|
||||||
|
|
||||||
|
const langClass = lang ? ` class="language-${lang}"` : '';
|
||||||
|
const runnableLangs = ['python','py','javascript','js','html','bash','sh','shell','zsh'];
|
||||||
|
const runBtn = (lang && runnableLangs.includes(lang.toLowerCase()))
|
||||||
|
? `<button type="button" class="run-code" data-code="${escapeHtml(escaped)}" data-lang="${lang}" title="Run code"><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><polygon points="5 3 19 12 5 21 5 3"/></svg></button>`
|
||||||
|
: '';
|
||||||
|
const editBtn = `<button type="button" class="edit-code" title="Edit"><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7"/><path d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z"/></svg></button>`;
|
||||||
|
codeBlocks.push(`<pre><code${langClass} data-lang="${lang || ''}">${escapeHtml(escaped)}</code>${runBtn}${editBtn}<button type="button" class="copy-code" data-code="${escapeHtml(escaped)}"><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2"/><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/></svg></button></pre>`);
|
||||||
|
|
||||||
|
return placeholder;
|
||||||
|
});
|
||||||
|
|
||||||
// Repair common ways the agent mangles the entity-anchor convention
|
// Repair common ways the agent mangles the entity-anchor convention
|
||||||
// (`[Name](#kind-<id>)`). Models reliably get the single-link case
|
// (`[Name](#kind-<id>)`). Models reliably get the single-link case
|
||||||
// right but slip into other formats when listing many in a table.
|
// right but slip into other formats when listing many in a table.
|
||||||
@@ -450,39 +486,6 @@ export function mdToHtml(src) {
|
|||||||
|
|
||||||
s = s.replace(/\n{3,}/g, '\n\n');
|
s = s.replace(/\n{3,}/g, '\n\n');
|
||||||
|
|
||||||
// CRITICAL: Extract code blocks and replace with placeholders
|
|
||||||
const codeBlocks = [];
|
|
||||||
const mermaidBlocks = [];
|
|
||||||
s = s.replace(/```(\w+)?\n([\s\S]*?)```/g, (_, lang, code) => {
|
|
||||||
const cleaned = code
|
|
||||||
.replace(/\r\n/g, '\n')
|
|
||||||
.replace(/[ \t]+$/gm, '')
|
|
||||||
.replace(/^\s*\n+/, '')
|
|
||||||
.replace(/\n+\s*$/g, '');
|
|
||||||
|
|
||||||
// Mermaid diagrams: render as diagram instead of code block
|
|
||||||
if (lang && lang.toLowerCase() === 'mermaid') {
|
|
||||||
const mermaidId = 'mermaid-' + Date.now() + '-' + mermaidBlocks.length;
|
|
||||||
const raw = cleaned.replace(/</g, '<').replace(/>/g, '>').replace(/&/g, '&');
|
|
||||||
const placeholder = `___MERMAID_BLOCK_${mermaidBlocks.length}___`;
|
|
||||||
mermaidBlocks.push(`<div class="mermaid-container"><pre class="mermaid" id="${mermaidId}">${escapeHtml(raw)}</pre></div>`);
|
|
||||||
return placeholder;
|
|
||||||
}
|
|
||||||
|
|
||||||
const escaped = cleaned.replace(/</g, '<').replace(/>/g, '>').replace(/&/g, '&');
|
|
||||||
const placeholder = `___CODE_BLOCK_${codeBlocks.length}___`;
|
|
||||||
|
|
||||||
const langClass = lang ? ` class="language-${lang}"` : '';
|
|
||||||
const runnableLangs = ['python','py','javascript','js','html','bash','sh','shell','zsh'];
|
|
||||||
const runBtn = (lang && runnableLangs.includes(lang.toLowerCase()))
|
|
||||||
? `<button type="button" class="run-code" data-code="${escapeHtml(escaped)}" data-lang="${lang}" title="Run code"><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><polygon points="5 3 19 12 5 21 5 3"/></svg></button>`
|
|
||||||
: '';
|
|
||||||
const editBtn = `<button type="button" class="edit-code" title="Edit"><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7"/><path d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z"/></svg></button>`;
|
|
||||||
codeBlocks.push(`<pre><code${langClass} data-lang="${lang || ''}">${escapeHtml(escaped)}</code>${runBtn}${editBtn}<button type="button" class="copy-code" data-code="${escapeHtml(escaped)}"><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2"/><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/></svg></button></pre>`);
|
|
||||||
|
|
||||||
return placeholder;
|
|
||||||
});
|
|
||||||
|
|
||||||
// KaTeX math rendering (after code blocks are extracted, so math in code is safe)
|
// KaTeX math rendering (after code blocks are extracted, so math in code is safe)
|
||||||
const mathBlocks = [];
|
const mathBlocks = [];
|
||||||
if (window.katex) {
|
if (window.katex) {
|
||||||
|
|||||||
@@ -0,0 +1,65 @@
|
|||||||
|
import assert from 'node:assert/strict';
|
||||||
|
import fs from 'node:fs';
|
||||||
|
import path from 'node:path';
|
||||||
|
import vm from 'node:vm';
|
||||||
|
import { fileURLToPath } from 'node:url';
|
||||||
|
|
||||||
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||||
|
const markdownPath = path.join(__dirname, '..', 'static', 'js', 'markdown.js');
|
||||||
|
let src = fs.readFileSync(markdownPath, 'utf8');
|
||||||
|
|
||||||
|
src = src.replace(
|
||||||
|
/import uiModule from '\.\/ui\.js';/,
|
||||||
|
'const uiModule = { esc: (s) => String(s).replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/\\"/g, """) };'
|
||||||
|
);
|
||||||
|
src = src.replace(
|
||||||
|
/import \{ splitTableRow \} from '\.\/markdown\/tableRow\.js';/,
|
||||||
|
'const splitTableRow = (row) => row.split("|").filter((cell) => cell.trim() !== "");'
|
||||||
|
);
|
||||||
|
src = src.replace(/export function /g, 'function ');
|
||||||
|
src = src.replace(/export const /g, 'const ');
|
||||||
|
src = src.replace(/export default markdownModule;?/g, '');
|
||||||
|
src += '\nthis.__mdToHtml = mdToHtml;';
|
||||||
|
|
||||||
|
class MutationObserver {
|
||||||
|
observe() {}
|
||||||
|
disconnect() {}
|
||||||
|
}
|
||||||
|
|
||||||
|
const sandbox = {
|
||||||
|
console,
|
||||||
|
URL,
|
||||||
|
MutationObserver,
|
||||||
|
localStorage: { getItem() { return '[]'; }, setItem() {} },
|
||||||
|
document: {
|
||||||
|
body: { classList: { contains() { return true; } } },
|
||||||
|
addEventListener() {},
|
||||||
|
querySelectorAll() { return []; },
|
||||||
|
getElementById() { return null; },
|
||||||
|
contains() { return true; },
|
||||||
|
},
|
||||||
|
window: {
|
||||||
|
location: { origin: 'http://localhost' },
|
||||||
|
katex: null,
|
||||||
|
mermaid: null,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
vm.createContext(sandbox);
|
||||||
|
vm.runInContext(src, sandbox, { filename: markdownPath });
|
||||||
|
|
||||||
|
const input = [
|
||||||
|
'> ```html',
|
||||||
|
'> <script>',
|
||||||
|
'> newWindow.addEventListener(\'click\', () => {',
|
||||||
|
'> desktop.appendChild(newWindow);',
|
||||||
|
'> });',
|
||||||
|
'> </script>',
|
||||||
|
'> ```',
|
||||||
|
].join('\n');
|
||||||
|
|
||||||
|
const html = sandbox.__mdToHtml(input);
|
||||||
|
assert.equal(html.includes('___ALLOWED_HTML_'), false, html);
|
||||||
|
assert.equal(html.includes('appendChild'), true, html);
|
||||||
|
|
||||||
|
console.log('ok');
|
||||||
Reference in New Issue
Block a user