mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-29 16:12:06 -04:00
fix(visual_report): make TOC heading slugs unique
Ensure generated visual-report TOC slugs cannot collide with naturally occurring slug names. Add regression coverage for duplicate headings, natural suffix collisions, and unchanged distinct headings.
This commit is contained in:
+17
-9
@@ -118,15 +118,23 @@ def _extract_headings(md_text: str) -> List[Dict[str, str]]:
|
|||||||
return re.sub(r'\s+', ' ', text).strip()
|
return re.sub(r'\s+', ' ', text).strip()
|
||||||
|
|
||||||
def _make_slug(text: str) -> str:
|
def _make_slug(text: str) -> str:
|
||||||
slug = re.sub(r'[^a-z0-9]+', '-', text.lower()).strip('-')
|
base = re.sub(r'[^a-z0-9]+', '-', text.lower()).strip('-')
|
||||||
if not slug:
|
if not base:
|
||||||
slug = "section"
|
base = "section"
|
||||||
if slug in seen_slugs:
|
if base in seen_slugs:
|
||||||
seen_slugs[slug] += 1
|
# Increment until the disambiguated candidate is itself unused, so a
|
||||||
slug = f"{slug}-{seen_slugs[slug]}"
|
# generated "intro-1" can't collide with a natural "intro-1" slug.
|
||||||
else:
|
n = seen_slugs[base]
|
||||||
seen_slugs[slug] = 0
|
while True:
|
||||||
return slug
|
n += 1
|
||||||
|
cand = f"{base}-{n}"
|
||||||
|
if cand not in seen_slugs:
|
||||||
|
break
|
||||||
|
seen_slugs[base] = n
|
||||||
|
seen_slugs[cand] = 0
|
||||||
|
return cand
|
||||||
|
seen_slugs[base] = 0
|
||||||
|
return base
|
||||||
|
|
||||||
for m in re.finditer(r'^(#{2,3})\s+(.+)$', md_text, re.MULTILINE):
|
for m in re.finditer(r'^(#{2,3})\s+(.+)$', md_text, re.MULTILINE):
|
||||||
level = len(m.group(1))
|
level = len(m.group(1))
|
||||||
|
|||||||
@@ -0,0 +1,27 @@
|
|||||||
|
"""Regression: _extract_headings must emit a unique slug per heading.
|
||||||
|
|
||||||
|
_make_slug disambiguates repeats by appending "-N", but it only tracked the
|
||||||
|
*base* slug, so a generated "intro-1" could collide with a naturally-occurring
|
||||||
|
"intro-1" (e.g. headings "Intro", "Intro", "Intro 1" all produced
|
||||||
|
["intro", "intro-1", "intro-1"]). Duplicate slugs become duplicate heading ids,
|
||||||
|
which makes the second table-of-contents link dead. Slugs are now guaranteed
|
||||||
|
unique. Plain repeats keep their existing "-1", "-2" sequence.
|
||||||
|
"""
|
||||||
|
from src.visual_report import _extract_headings
|
||||||
|
|
||||||
|
|
||||||
|
def _slugs(md):
|
||||||
|
return [h["slug"] for h in _extract_headings(md)]
|
||||||
|
|
||||||
|
|
||||||
|
def test_disambiguated_slug_does_not_collide_with_natural_slug():
|
||||||
|
slugs = _slugs("## Intro\n\n## Intro\n\n## Intro 1\n")
|
||||||
|
assert len(slugs) == len(set(slugs)), slugs
|
||||||
|
|
||||||
|
|
||||||
|
def test_plain_repeats_keep_sequential_suffixes():
|
||||||
|
assert _slugs("## Foo\n\n## Foo\n\n## Foo\n") == ["foo", "foo-1", "foo-2"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_distinct_headings_are_unchanged():
|
||||||
|
assert _slugs("## Alpha\n\n## Beta\n") == ["alpha", "beta"]
|
||||||
Reference in New Issue
Block a user