mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-22 20:55:29 -04:00
e812a29233
Inline backtick spans were converted to <code> only at the end of mdToHtml, after the bare-URL autolink and <a>/allowed-HTML passes. A URL inside inline code is preceded by a space, so the autolink wrapped it in an <a> tag and swapped it for an ___ALLOWED_HTML_ placeholder, corrupting commands like `irm http://127.0.0.1:3000/x`. Extract inline code into placeholders before the link passes, mirroring the existing fenced-code-block handling, and restore them last so placeholders carried inside restored <a> blocks resolve. Escape the code at extraction time since it now bypasses the global escape pass.
218 lines
8.0 KiB
Python
218 lines
8.0 KiB
Python
"""Regression coverage for the browser markdown renderer."""
|
|
|
|
import json
|
|
import shutil
|
|
import subprocess
|
|
import textwrap
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
_REPO = Path(__file__).resolve().parent.parent
|
|
_HAS_NODE = shutil.which("node") is not None
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def node_available():
|
|
if not _HAS_NODE:
|
|
pytest.skip("node binary not on PATH")
|
|
|
|
|
|
def _run_markdown_case(markdown: str, render_expr: str = "mod.mdToHtml(input)"):
|
|
script = textwrap.dedent(
|
|
r"""
|
|
import fs from 'node:fs';
|
|
|
|
globalThis.window = { location: { origin: 'http://localhost' }, katex: null };
|
|
globalThis.document = {
|
|
readyState: 'loading',
|
|
addEventListener() {},
|
|
createElement(tag) {
|
|
if (tag !== 'template') throw new Error(`unsupported element: ${tag}`);
|
|
return {
|
|
_html: '',
|
|
content: { querySelectorAll() { return []; } },
|
|
set innerHTML(value) { this._html = value; },
|
|
get innerHTML() { return this._html; },
|
|
};
|
|
},
|
|
};
|
|
globalThis.MutationObserver = class { observe() {} };
|
|
|
|
let source = fs.readFileSync('./static/js/markdown.js', 'utf8');
|
|
source = source.replace(
|
|
/import uiModule from ['"]\.\/ui\.js['"];/,
|
|
''
|
|
);
|
|
source = source.replace(
|
|
/import \{ splitTableRow \} from ['"]\.\/markdown\/tableRow\.js['"];/,
|
|
`function splitTableRow(row) {
|
|
return (row || '').replace(/^\\s*\\|/, '').replace(/\\|\\s*$/, '').split('|').map(c => c.trim());
|
|
}`
|
|
);
|
|
// markdown.js imports the emoji-shortcode helpers relatively (issue #345),
|
|
// which a data: URL module can't resolve. Inline the REAL helpers (minus
|
|
// their export keywords) so the renderer's shortcode pass behaves exactly
|
|
// as it does in the browser.
|
|
const emojiSource = fs.readFileSync('./static/js/emojiShortcodes.js', 'utf8')
|
|
.replace(/^export default .*$/m, '')
|
|
.replace(/export const /g, 'const ')
|
|
.replace(/export function /g, 'function ');
|
|
source = source.replace(
|
|
/import \{ replaceEmojiShortcodes, hasEmojiShortcode \} from ['"]\.\/emojiShortcodes\.js['"];/,
|
|
() => emojiSource
|
|
);
|
|
source = source.replace(
|
|
/var escapeHtml = uiModule\.esc;/,
|
|
`var escapeHtml = (value) => String(value ?? '')
|
|
.replace(/&/g, '&')
|
|
.replace(/</g, '<')
|
|
.replace(/>/g, '>')
|
|
.replace(/"/g, '"')
|
|
.replace(/'/g, ''');`
|
|
);
|
|
|
|
const moduleUrl = 'data:text/javascript;base64,' + Buffer.from(source).toString('base64');
|
|
const mod = await import(moduleUrl);
|
|
const input = JSON.parse(process.argv[1]);
|
|
console.log(JSON.stringify({ html: __RENDER_EXPR__ }));
|
|
"""
|
|
).replace("__RENDER_EXPR__", render_expr)
|
|
result = subprocess.run(
|
|
["node", "--input-type=module", "-e", script, json.dumps(markdown)],
|
|
cwd=_REPO,
|
|
capture_output=True,
|
|
timeout=15,
|
|
text=True,
|
|
)
|
|
if result.returncode != 0:
|
|
raise AssertionError(f"node failed:\nSTDERR:\n{result.stderr}\nSTDOUT:\n{result.stdout}")
|
|
return json.loads(result.stdout.splitlines()[-1])["html"]
|
|
|
|
|
|
def test_ordered_lists_render_as_one_unwrapped_ol(node_available):
|
|
html = _run_markdown_case(
|
|
"Before\n\n"
|
|
"1. **Check against the home page** — that's the visual reference for how things should feel.\n"
|
|
"2. **Open DevTools** and inspect the element — check fonts, colors, and spacing against this guide.\n"
|
|
"3. **Flag it** — note the page, the section, what's wrong, and what CSS rule you suspect.\n"
|
|
"4. **Small fixes** — if you know the fix (e.g. wrong CSS variable, wrong font), go ahead and change it in the CSS Module file.\n"
|
|
"5. **Big changes** — Talk it through before making wide changes across many pages.\n\n"
|
|
"After"
|
|
)
|
|
|
|
assert html.count("<ol>") == 1
|
|
assert html.count("</ol>") == 1
|
|
assert html.count("<li>") == 5
|
|
assert "<ul>" not in html
|
|
assert "<oli>" not in html
|
|
assert "<uli>" not in html
|
|
assert "<p><ol>" not in html
|
|
assert "<p><li>" not in html
|
|
assert "<p>Before</p>" in html
|
|
assert "<p>After</p>" in html
|
|
|
|
|
|
def test_table_separator_row_not_rendered_as_data(node_available):
|
|
html = _run_markdown_case("| A | B |\n|---|---|\n| 1 | 2 |")
|
|
|
|
assert html.count("<tr>") == 2
|
|
assert "<th" in html
|
|
assert "<td" in html
|
|
assert "---" not in html
|
|
|
|
|
|
def test_process_with_thinking_handles_gemma4_thought_channel(node_available):
|
|
html = _run_markdown_case(
|
|
"<|channel>thought\ninternal reasoning<channel|>Final answer.",
|
|
"mod.processWithThinking(input)",
|
|
)
|
|
|
|
assert "thinking-section" in html
|
|
assert "internal reasoning" in html
|
|
assert "Final answer." in html
|
|
assert "<|channel>" not in html
|
|
assert "<|channel>" not in html
|
|
|
|
|
|
def test_process_with_thinking_strips_empty_gemma4_thought_channel(node_available):
|
|
html = _run_markdown_case(
|
|
"<|channel>thought\n<channel|>Final answer.",
|
|
"mod.processWithThinking(input)",
|
|
)
|
|
|
|
assert "thinking-section" not in html
|
|
assert "Final answer." in html
|
|
assert "<|channel>" not in html
|
|
assert "<|channel>" not in html
|
|
|
|
|
|
def test_process_with_thinking_unwraps_gemma4_response_channel(node_available):
|
|
html = _run_markdown_case(
|
|
"<|channel>thought\ninternal reasoning<channel|><|channel>response\nFinal answer.<channel|>",
|
|
"mod.processWithThinking(input)",
|
|
)
|
|
|
|
assert "thinking-section" in html
|
|
assert "internal reasoning" in html
|
|
assert "Final answer." in html
|
|
assert "<|channel>" not in html
|
|
assert "<|channel>" not in html
|
|
|
|
|
|
def test_extract_thinking_blocks_handles_thought_tag(node_available):
|
|
result = _run_markdown_case(
|
|
"<thought>internal reasoning</thought>Final answer.",
|
|
"mod.extractThinkingBlocks(input)",
|
|
)
|
|
|
|
assert result["thinkingBlocks"] == ["internal reasoning"]
|
|
assert result["content"] == "Final answer."
|
|
|
|
|
|
def test_url_inside_inline_code_is_not_autolinked(node_available):
|
|
# A URL inside a backtick span is preceded by a space, so the bare-URL
|
|
# autolink used to wrap it in an <a> tag (then swap it for an
|
|
# ___ALLOWED_HTML_ placeholder), corrupting the command shown to the user.
|
|
html = _run_markdown_case("Run `$j = irm http://127.0.0.1:3000/x` to fetch.")
|
|
|
|
assert "<code>$j = irm http://127.0.0.1:3000/x</code>" in html
|
|
assert "___ALLOWED_HTML_" not in html
|
|
assert "<a " not in html
|
|
assert 'href="http://127.0.0.1:3000/x"' not in html
|
|
|
|
|
|
def test_url_outside_inline_code_is_still_autolinked(node_available):
|
|
# Inline code must not disable autolinking for bare URLs elsewhere in the
|
|
# same line.
|
|
html = _run_markdown_case("Use `irm` then visit https://example.com/page now.")
|
|
|
|
assert "<code>irm</code>" in html
|
|
assert 'href="https://example.com/page"' in html
|
|
|
|
|
|
def test_inline_code_content_is_html_escaped(node_available):
|
|
# Inline code is now extracted before the global escape pass, so it must be
|
|
# escaped at extraction time (matching the fenced-code-block handling).
|
|
html = _run_markdown_case("Render `<b>$1 & 'q'</b>` literally.")
|
|
|
|
assert "<code><b>$1 & 'q'</b></code>" in html
|
|
assert "<b>" not in html
|
|
|
|
|
|
def test_dotted_python_import_paths_are_not_autolinked(node_available):
|
|
html = _run_markdown_case(
|
|
"from imblearn.combine import SMOTETomek\n"
|
|
"from sklearn.metrics import f1_score\n"
|
|
"from sklearn.compose import ColumnTransformer\n\n"
|
|
"See example.com/docs for normal domain autolinking."
|
|
)
|
|
|
|
assert "___ALLOWED_HTML_" not in html
|
|
assert "imblearn.combine" in html
|
|
assert "sklearn.metrics" in html
|
|
assert "sklearn.compose" in html
|
|
assert 'href="https://imblearn.com' not in html
|
|
assert 'href="https://sklearn.me' not in html
|
|
assert 'href="https://example.com/docs"' in html
|