mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-28 23:52:09 -04:00
fix(visual_report): make TOC heading slugs unique
Ensure generated visual-report TOC slugs cannot collide with naturally occurring slug names. Add regression coverage for duplicate headings, natural suffix collisions, and unchanged distinct headings.
This commit is contained in:
+17
-9
@@ -118,15 +118,23 @@ def _extract_headings(md_text: str) -> List[Dict[str, str]]:
|
||||
return re.sub(r'\s+', ' ', text).strip()
|
||||
|
||||
def _make_slug(text: str) -> str:
|
||||
slug = re.sub(r'[^a-z0-9]+', '-', text.lower()).strip('-')
|
||||
if not slug:
|
||||
slug = "section"
|
||||
if slug in seen_slugs:
|
||||
seen_slugs[slug] += 1
|
||||
slug = f"{slug}-{seen_slugs[slug]}"
|
||||
else:
|
||||
seen_slugs[slug] = 0
|
||||
return slug
|
||||
base = re.sub(r'[^a-z0-9]+', '-', text.lower()).strip('-')
|
||||
if not base:
|
||||
base = "section"
|
||||
if base in seen_slugs:
|
||||
# Increment until the disambiguated candidate is itself unused, so a
|
||||
# generated "intro-1" can't collide with a natural "intro-1" slug.
|
||||
n = seen_slugs[base]
|
||||
while True:
|
||||
n += 1
|
||||
cand = f"{base}-{n}"
|
||||
if cand not in seen_slugs:
|
||||
break
|
||||
seen_slugs[base] = n
|
||||
seen_slugs[cand] = 0
|
||||
return cand
|
||||
seen_slugs[base] = 0
|
||||
return base
|
||||
|
||||
for m in re.finditer(r'^(#{2,3})\s+(.+)$', md_text, re.MULTILINE):
|
||||
level = len(m.group(1))
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
"""Regression: _extract_headings must emit a unique slug per heading.
|
||||
|
||||
_make_slug disambiguates repeats by appending "-N", but it only tracked the
|
||||
*base* slug, so a generated "intro-1" could collide with a naturally-occurring
|
||||
"intro-1" (e.g. headings "Intro", "Intro", "Intro 1" all produced
|
||||
["intro", "intro-1", "intro-1"]). Duplicate slugs become duplicate heading ids,
|
||||
which makes the second table-of-contents link dead. Slugs are now guaranteed
|
||||
unique. Plain repeats keep their existing "-1", "-2" sequence.
|
||||
"""
|
||||
from src.visual_report import _extract_headings
|
||||
|
||||
|
||||
def _slugs(md):
|
||||
return [h["slug"] for h in _extract_headings(md)]
|
||||
|
||||
|
||||
def test_disambiguated_slug_does_not_collide_with_natural_slug():
|
||||
slugs = _slugs("## Intro\n\n## Intro\n\n## Intro 1\n")
|
||||
assert len(slugs) == len(set(slugs)), slugs
|
||||
|
||||
|
||||
def test_plain_repeats_keep_sequential_suffixes():
|
||||
assert _slugs("## Foo\n\n## Foo\n\n## Foo\n") == ["foo", "foo-1", "foo-2"]
|
||||
|
||||
|
||||
def test_distinct_headings_are_unchanged():
|
||||
assert _slugs("## Alpha\n\n## Beta\n") == ["alpha", "beta"]
|
||||
Reference in New Issue
Block a user