mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-30 00:22:10 -04:00
fix(visual_report): ignore fenced headings in TOC extraction
Strip fenced code blocks before extracting visual-report headings so heading-looking lines inside code fences do not desync TOC anchors. Add regression coverage for backtick and tilde fences while preserving normal heading extraction.
This commit is contained in:
@@ -107,6 +107,13 @@ def _extract_headings(md_text: str) -> List[Dict[str, str]]:
|
|||||||
headings = []
|
headings = []
|
||||||
seen_slugs: Dict[str, int] = {}
|
seen_slugs: Dict[str, int] = {}
|
||||||
|
|
||||||
|
# Strip fenced code blocks before scanning for "## ..." lines: a heading-
|
||||||
|
# looking comment inside ``` / ~~~ is NOT rendered as an <h2> by the
|
||||||
|
# markdown renderer, so counting it here desynced the TOC anchor ids
|
||||||
|
# (built by zipping these headings against the rendered <h2>/<h3>), making
|
||||||
|
# every later TOC link point at the wrong section.
|
||||||
|
md_text = re.sub(r'(?ms)^[ \t]*(`{3,}|~{3,})[^\n]*\n.*?^[ \t]*\1[ \t]*$', '', md_text)
|
||||||
|
|
||||||
def _plain_heading_text(text: str) -> str:
|
def _plain_heading_text(text: str) -> str:
|
||||||
text = text.strip().rstrip("#").strip()
|
text = text.strip().rstrip("#").strip()
|
||||||
text = re.sub(r'!\[([^\]]*)\]\([^)]+\)', r'\1', text)
|
text = re.sub(r'!\[([^\]]*)\]\([^)]+\)', r'\1', text)
|
||||||
|
|||||||
@@ -0,0 +1,28 @@
|
|||||||
|
"""TOC heading extraction must ignore headings inside code fences.
|
||||||
|
|
||||||
|
A "## ..." comment inside a ``` or ~~~ block is not rendered as an <h2>, but
|
||||||
|
_extract_headings counted it, so _apply_heading_ids (which zips TOC headings
|
||||||
|
against rendered <h2>/<h3> by position) gave later sections the wrong anchor
|
||||||
|
id and the trailing TOC link went dead.
|
||||||
|
"""
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
pytest.importorskip("bs4")
|
||||||
|
|
||||||
|
from src.visual_report import _extract_headings
|
||||||
|
|
||||||
|
|
||||||
|
def test_backtick_fenced_heading_is_ignored():
|
||||||
|
md = "## Intro\n\n```bash\n## not a heading\n```\n\n## Conclusion"
|
||||||
|
assert [h["text"] for h in _extract_headings(md)] == ["Intro", "Conclusion"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_tilde_fenced_heading_is_ignored():
|
||||||
|
md = "## A\n\n~~~\n## fake\n~~~\n\n## B"
|
||||||
|
assert [h["text"] for h in _extract_headings(md)] == ["A", "B"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_normal_headings_unaffected():
|
||||||
|
md = "## One\n\nsome text\n\n### Two"
|
||||||
|
out = [(h["level"], h["text"]) for h in _extract_headings(md)]
|
||||||
|
assert out == [(2, "One"), (3, "Two")]
|
||||||
Reference in New Issue
Block a user