fix(visual_report): make TOC heading slugs unique

Ensure generated visual-report TOC slugs cannot collide with naturally occurring slug names. Add regression coverage for duplicate headings, natural suffix collisions, and unchanged distinct headings.
This commit is contained in:
Miraç Duran
2026-06-27 19:36:17 +03:00
committed by GitHub
parent 67040a196f
commit d5286f926e
2 changed files with 44 additions and 9 deletions
+17 -9
View File
@@ -118,15 +118,23 @@ def _extract_headings(md_text: str) -> List[Dict[str, str]]:
return re.sub(r'\s+', ' ', text).strip()
def _make_slug(text: str) -> str:
slug = re.sub(r'[^a-z0-9]+', '-', text.lower()).strip('-')
if not slug:
slug = "section"
if slug in seen_slugs:
seen_slugs[slug] += 1
slug = f"{slug}-{seen_slugs[slug]}"
else:
seen_slugs[slug] = 0
return slug
base = re.sub(r'[^a-z0-9]+', '-', text.lower()).strip('-')
if not base:
base = "section"
if base in seen_slugs:
# Increment until the disambiguated candidate is itself unused, so a
# generated "intro-1" can't collide with a natural "intro-1" slug.
n = seen_slugs[base]
while True:
n += 1
cand = f"{base}-{n}"
if cand not in seen_slugs:
break
seen_slugs[base] = n
seen_slugs[cand] = 0
return cand
seen_slugs[base] = 0
return base
for m in re.finditer(r'^(#{2,3})\s+(.+)$', md_text, re.MULTILINE):
level = len(m.group(1))
+27
View File
@@ -0,0 +1,27 @@
"""Regression: _extract_headings must emit a unique slug per heading.
_make_slug disambiguates repeats by appending "-N", but it only tracked the
*base* slug, so a generated "intro-1" could collide with a naturally-occurring
"intro-1" (e.g. headings "Intro", "Intro", "Intro 1" all produced
["intro", "intro-1", "intro-1"]). Duplicate slugs become duplicate heading ids,
which makes the second table-of-contents link dead. Slugs are now guaranteed
unique. Plain repeats keep their existing "-1", "-2" sequence.
"""
from src.visual_report import _extract_headings
def _slugs(md):
return [h["slug"] for h in _extract_headings(md)]
def test_disambiguated_slug_does_not_collide_with_natural_slug():
slugs = _slugs("## Intro\n\n## Intro\n\n## Intro 1\n")
assert len(slugs) == len(set(slugs)), slugs
def test_plain_repeats_keep_sequential_suffixes():
assert _slugs("## Foo\n\n## Foo\n\n## Foo\n") == ["foo", "foo-1", "foo-2"]
def test_distinct_headings_are_unchanged():
assert _slugs("## Alpha\n\n## Beta\n") == ["alpha", "beta"]