test(taxonomy): auto-mark tests by area and sub-area (#3491)

2026-06-15 17:25:26 -04:00 · 2026-06-09 00:13:28 +01:00
parent e7c1d75884
commit a240f28af9
5 changed files with 380 additions and 4 deletions
@@ -1,3 +1,18 @@
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 asyncio_mode = "auto"
+# Test-taxonomy markers added at collection time by tests/conftest.py. The
+# stable area_* markers are declared here; the dynamic sub_<filename-token>
+# markers are registered before collection by pytest_configure in
+# tests/conftest.py, so unknown-mark warnings still flag genuine typos outside
+# the taxonomy. See tests/_taxonomy.py and tests/README.md.
+markers = [
+    "area_security: tests covering auth, owner-scope, SSRF, XSS, confinement, redaction",
+    "area_routes: tests covering HTTP route / API behavior",
+    "area_services: tests covering service-layer behavior (llm, cookbook, email, calendar, ...)",
+    "area_cli: tests covering CLI / script behavior",
+    "area_js: JavaScript / Node-backed tests",
+    "area_helpers: self-tests for the shared test helpers in tests/helpers/",
+    "area_unit: pure parser / utility tests that do not clearly belong elsewhere",
+    "area_uncategorized: tests not yet matched by the taxonomy (fallback)",
+]
@@ -13,6 +13,26 @@ behavioral-vs-source-text policy, and helper/factory extraction rules - see
 [`TESTING_STANDARD.md`](./TESTING_STANDARD.md). This file is the concrete helper
 reference; that file is the standard the refactor works toward.

+## Running focused subsets (taxonomy markers)
+
+`tests/conftest.py` tags every test at collection time with two markers derived
+from its filename by `tests/_taxonomy.py`: an `area_*` marker (e.g.
+`area_security`) and a finer `sub_*` marker (e.g. `sub_owner_scope`). This adds
+markers only - it moves no files and changes no test behavior. Use them to run a
+focused slice:
+
+```bash
+python3 -m pytest -m area_security
+python3 -m pytest -m "area_services and sub_cookbook"
+```
+
+Areas are `security`, `routes`, `services`, `cli`, `js`, `helpers`, `unit`, and
+`uncategorized`. Classification is conservative and token-based: a file that
+matches no area keyword falls back to `area_uncategorized` with its filename as
+the sub-area. The `area_*` names are registered in `pyproject.toml`; the dynamic
+`sub_*` names are registered before collection by `pytest_configure` in
+`tests/conftest.py`, so unknown-mark warnings still flag genuine typos.
+
 ## Core principles

 - Keep PRs small and homogeneous: one kind of change per PR.
@@ -0,0 +1,162 @@
+"""Conservative test taxonomy: classify test files by area and sub-area.
+
+This module is the single source of truth for the collection-time markers added
+in ``tests/conftest.py``. It performs no inference beyond simple, exact matching
+of filename tokens against small, explicit keyword sets. A file is matched to
+the first area (in priority order) whose keyword set intersects its filename
+tokens; files that match no area fall back to ``uncategorized`` with the
+filename itself as the sub-area.
+
+The categories mirror ``tests/TESTING_STANDARD.md``. This module imports nothing
+from the application - only the standard library - and changes no test behavior.
+"""
+from __future__ import annotations
+
+import re
+from collections.abc import Iterable
+from dataclasses import dataclass
+from pathlib import Path
+
+# Area keyword sets. Keep these small and explicit; prefer leaving a file
+# ``uncategorized`` over guessing. Matching is exact, token-by-token.
+SECURITY_KEYWORDS = frozenset({
+    "security", "auth", "owner", "scope",
+    "ssrf", "xss", "confinement", "permission", "redaction",
+})
+CLI_KEYWORDS = frozenset({"cli"})
+ROUTES_KEYWORDS = frozenset({"route", "routes", "api"})
+SERVICES_KEYWORDS = frozenset({
+    "llm", "provider", "cookbook", "session", "history", "email",
+    "calendar", "memory", "gallery", "document", "research", "mcp",
+    "scheduler", "webhook", "embedding",
+})
+UNIT_KEYWORDS = frozenset({
+    "parse", "parser", "parsing", "nonstring", "nondict",
+    "atomic", "regex", "tokenize",
+})
+
+# Keyword-matched areas, in priority order (first match wins). Security is a
+# cross-cutting concern and intentionally outranks the feature areas, so e.g.
+# ``test_email_owner_scope.py`` classifies as ``security``, not ``services``.
+# ``js`` and ``helpers`` are matched by dedicated rules in ``_match_area``.
+KEYWORD_AREAS = (
+    ("security", SECURITY_KEYWORDS),
+    ("cli", CLI_KEYWORDS),
+    ("routes", ROUTES_KEYWORDS),
+    ("services", SERVICES_KEYWORDS),
+    ("unit", UNIT_KEYWORDS),
+)
+
+# File extensions that indicate a JavaScript/Node-backed test.
+JS_EXTENSIONS = frozenset({".js", ".mjs", ".ts"})
+
+UNCATEGORIZED = "uncategorized"
+
+
+@dataclass(frozen=True)
+class TestClassification:
+    """Area and sub-area for a single test file."""
+
+    area: str
+    sub_area: str
+
+
+def normalize_marker_name(value: str) -> str:
+    """Lowercase ``value`` and reduce it to a marker-safe ``[a-z0-9_]`` token."""
+    lowered = value.lower()
+    collapsed = re.sub(r"[^a-z0-9]+", "_", lowered)
+    return collapsed.strip("_")
+
+
+def _stem(path: str | Path) -> str:
+    """Filename without its extension chain (``invariant.test.mjs`` -> ``invariant``)."""
+    return Path(path).name.split(".", 1)[0]
+
+
+def _extension(path: str | Path) -> str:
+    """Lowercased final file extension, e.g. ``.py`` or ``.mjs``."""
+    return Path(path).suffix.lower()
+
+
+def _filename_tokens(path: str | Path) -> tuple[str, ...]:
+    """Underscore tokens of the filename stem, with a leading ``test`` dropped."""
+    tokens = tuple(t for t in normalize_marker_name(_stem(path)).split("_") if t)
+    if tokens and tokens[0] == "test":
+        tokens = tokens[1:]
+    return tokens
+
+
+def _matched_keywords(tokens: tuple[str, ...], keywords: frozenset[str]) -> tuple[str, ...]:
+    """Filename tokens that appear in ``keywords``, in order, de-duplicated."""
+    matched: list[str] = []
+    for token in tokens:
+        if token in keywords and token not in matched:
+            matched.append(token)
+    return tuple(matched)
+
+
+def _match_area(tokens: tuple[str, ...], extension: str) -> tuple[str, tuple[str, ...]]:
+    """Return ``(area, matched_keywords)`` using the conservative priority order."""
+    if extension in JS_EXTENSIONS or "js" in tokens:
+        return "js", ("js",)
+    if tokens and tokens[0] == "helpers":
+        return "helpers", ("helpers",)
+    for area, keywords in KEYWORD_AREAS:
+        matched = _matched_keywords(tokens, keywords)
+        if matched:
+            return area, matched
+    return UNCATEGORIZED, ()
+
+
+def _sub_area(area: str, matched: tuple[str, ...], tokens: tuple[str, ...]) -> str:
+    """Derive the sub-area: matched keywords for a known area, else the filename."""
+    if area == UNCATEGORIZED:
+        return "_".join(tokens)
+    return "_".join(matched)
+
+
+def _in_helpers_dir(path: str | Path) -> bool:
+    """True if ``path`` is under the test helper dir ``tests/helpers/``.
+
+    Matches the exact adjacent ``tests``/``helpers`` component pair, so an
+    unrelated ancestor directory merely named ``helpers`` does not count.
+    """
+    parts = Path(path).parent.parts
+    adjacent_pairs = list(zip(parts, parts[1:]))
+    return ("tests", "helpers") in adjacent_pairs
+
+
+def classify_test_path(path: str | Path) -> TestClassification:
+    """Classify a test file path into an area and a sub-area.
+
+    A test file under a ``helpers`` directory is a helper self-test regardless of
+    its filename, which complements the filename first-token rule in
+    ``_match_area`` (e.g. ``test_helpers_import_state.py`` in ``tests/``).
+    """
+    if _in_helpers_dir(path):
+        return TestClassification(area="helpers", sub_area="helpers")
+    tokens = _filename_tokens(path)
+    area, matched = _match_area(tokens, _extension(path))
+    sub_area = _sub_area(area, matched, tokens) or UNCATEGORIZED
+    return TestClassification(area=area, sub_area=sub_area)
+
+
+def markers_for_path(path: str | Path) -> tuple[str, ...]:
+    """Return the ``(area_*, sub_*)`` marker names for a test file path."""
+    classification = classify_test_path(path)
+    area_marker = normalize_marker_name(f"area_{classification.area}")
+    sub_marker = normalize_marker_name(f"sub_{classification.sub_area}")
+    return (area_marker, sub_marker)
+
+
+def discover_markers(paths: Iterable[str | Path]) -> tuple[str, ...]:
+    """Distinct ``area_*`` / ``sub_*`` marker names for ``paths``, sorted.
+
+    Pure: it derives names from the given paths only and performs no filesystem
+    access of its own. The caller decides which paths to scan. Used at
+    ``pytest_configure`` time to register the dynamic ``sub_*`` markers.
+    """
+    names: set[str] = set()
+    for path in paths:
+        names.update(markers_for_path(path))
+    return tuple(sorted(names))
@@ -1,4 +1,4 @@
-"""Shared test configuration — ensure project root is on sys.path and stub heavy deps."""
+"""Shared test configuration - ensure project root is on sys.path and stub heavy deps."""
 import sys
 import os
 import types
@@ -9,12 +9,12 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

 # Importing core.database below runs init_db() at import time, and its default
 # (sqlite:///./data/app.db) can't be opened in a clean worktree because SQLite
-# won't create the missing ./data parent dir — pytest then dies during
+# won't create the missing ./data parent dir - pytest then dies during
 # collection, before any test module loads. Default to an in-memory DB for the
 # test session so collection is deterministic and writes no repo-local
 # artifacts. An explicit DATABASE_URL (a real test/CI database) is preserved.
 # This only unblocks collection/import-time init; it does not provide a shared
-# file-backed DB across processes — tests needing that must set DATABASE_URL.
+# file-backed DB across processes - tests needing that must set DATABASE_URL.
 os.environ.setdefault("DATABASE_URL", "sqlite:///:memory:")

 # Pre-import real heavy modules BEFORE any test file's module-level stubs can
@@ -27,7 +27,7 @@ try:
    import sqlalchemy.orm  # noqa: F401
    import core.database  # noqa: F401
 except ImportError:
-    pass  # not installed — the stubs below will handle it
+    pass  # not installed - the stubs below will handle it

 def _has_module(mod_name: str) -> bool:
    try:
@@ -54,3 +54,37 @@ if "src.database" not in sys.modules:
    _db.SessionLocal = MagicMock()
    _db.ModelEndpoint = MagicMock()
    sys.modules["src.database"] = _db
+
+
+def pytest_configure(config):
+    """Register the dynamic taxonomy ``sub_*`` markers before collection.
+
+    The stable ``area_*`` markers are declared in ``pyproject.toml``. The
+    per-file ``sub_*`` markers are derived from the test filenames here so that
+    unknown-mark warnings still surface genuine typos outside the taxonomy. This
+    only registers marker names; it imports no production module.
+    """
+    import pathlib
+    from tests._taxonomy import discover_markers
+
+    tests_dir = pathlib.Path(__file__).parent
+    paths = list(tests_dir.rglob("test_*.py")) + list(tests_dir.rglob("*_test.py"))
+    for marker_name in discover_markers(paths):
+        if marker_name.startswith("sub_"):
+            config.addinivalue_line("markers", f"{marker_name}: taxonomy sub-area marker")
+
+
+def pytest_collection_modifyitems(config, items):
+    """Tag each collected test with its taxonomy ``area_*`` and ``sub_*`` markers.
+
+    Collection-time only: this adds markers and nothing else. It does not skip,
+    reorder, or deselect tests, mutate fixtures or the environment, or import any
+    production module. See ``tests/_taxonomy.py`` for the classification rules.
+    """
+    import pytest
+    from tests._taxonomy import markers_for_path
+
+    for item in items:
+        path = getattr(item, "path", None) or item.fspath
+        for marker_name in markers_for_path(path):
+            item.add_marker(getattr(pytest.mark, marker_name))
@@ -0,0 +1,145 @@
+"""Unit tests for tests/_taxonomy.py - the test-taxonomy classification module.
+
+These tests pin the conservative classification behavior directly, without
+running pytest collection. They import only the module under test (a test-support
+module, not production code) and touch no filesystem.
+"""
+import re
+
+import pytest
+
+from tests._taxonomy import (
+    classify_test_path,
+    discover_markers,
+    markers_for_path,
+    normalize_marker_name,
+)
+
+
+# --- normalize_marker_name ---------------------------------------------------
+
+def test_normalize_lowercases():
+    assert normalize_marker_name("Area_Security") == "area_security"
+
+
+def test_normalize_converts_nonalphanumeric_runs_to_underscore():
+    assert normalize_marker_name("owner--scope..test") == "owner_scope_test"
+
+
+def test_normalize_strips_leading_and_trailing_underscores():
+    assert normalize_marker_name("__owner-scope__") == "owner_scope"
+
+
+# --- classify_test_path: one example per area --------------------------------
+
+@pytest.mark.parametrize("filename, expected_area, expected_sub", [
+    ("test_owner_scope.py", "security", "owner_scope"),
+    ("test_cookbook_helpers.py", "services", "cookbook"),
+    ("test_routes_sessions.py", "routes", "routes"),
+    ("test_backup_cli.py", "cli", "cli"),
+    ("test_compare_js.py", "js", "js"),
+    ("segmenter.test.mjs", "js", "js"),
+    ("segmenter.test.js", "js", "js"),
+    ("segmenter.test.ts", "js", "js"),
+    ("test_helpers_import_state.py", "helpers", "helpers"),
+    ("test_atomic_io.py", "unit", "atomic"),
+])
+def test_classify_examples(filename, expected_area, expected_sub):
+    result = classify_test_path(filename)
+    assert result.area == expected_area
+    assert result.sub_area == expected_sub
+
+
+# --- classify_test_path: fallback --------------------------------------------
+
+def test_unknown_filename_is_uncategorized():
+    result = classify_test_path("test_widget_gizmo_thing.py")
+    assert result.area == "uncategorized"
+
+
+def test_uncategorized_sub_area_is_derived_from_filename_tokens():
+    result = classify_test_path("test_archived_sessions_model_filter.py")
+    assert result.area == "uncategorized"
+    assert result.sub_area == "archived_sessions_model_filter"
+
+
+# --- markers_for_path --------------------------------------------------------
+
+def test_markers_for_path_returns_one_area_and_one_sub():
+    markers = markers_for_path("test_owner_scope.py")
+    assert markers == ("area_security", "sub_owner_scope")
+    assert len([m for m in markers if m.startswith("area_")]) == 1
+    assert len([m for m in markers if m.startswith("sub_")]) == 1
+
+
+def test_markers_for_path_are_normalized():
+    markers = markers_for_path("test_foo-bar.py")
+    assert markers == ("area_uncategorized", "sub_foo_bar")
+    for marker in markers:
+        assert re.fullmatch(r"[a-z0-9_]+", marker)
+
+
+# --- discover_markers --------------------------------------------------------
+
+def test_discover_markers_is_sorted_and_deduplicated():
+    paths = [
+        "test_owner_scope.py",
+        "test_owner_scope.py",
+        "test_cookbook_helpers.py",
+    ]
+    markers = discover_markers(paths)
+    assert markers == tuple(sorted(set(markers)))
+    assert markers == (
+        "area_security",
+        "area_services",
+        "sub_cookbook",
+        "sub_owner_scope",
+    )
+
+
+def test_discover_markers_includes_area_and_sub():
+    markers = discover_markers(["test_owner_scope.py"])
+    assert any(m.startswith("area_") for m in markers)
+    assert any(m.startswith("sub_") for m in markers)
+
+
+# --- edge cases --------------------------------------------------------------
+
+def test_normalize_all_symbols_becomes_empty():
+    assert normalize_marker_name("@@@") == ""
+
+
+def test_bare_test_filename_is_fully_uncategorized():
+    result = classify_test_path("tests/test.py")
+    assert result.area == "uncategorized"
+    assert result.sub_area == "uncategorized"
+
+
+def test_markers_for_bare_test_filename():
+    markers = markers_for_path("tests/test.py")
+    assert "area_uncategorized" in markers
+    assert "sub_uncategorized" in markers
+
+
+@pytest.mark.parametrize("path", [
+    "tests/helpers/test_module_isolation.py",
+    "/work/repo/tests/helpers/test_module_isolation.py",
+])
+def test_file_under_helpers_dir_is_helpers(path):
+    result = classify_test_path(path)
+    assert result.area == "helpers"
+    assert result.sub_area == "helpers"
+
+
+# --- priority contract -------------------------------------------------------
+
+def test_security_beats_services_when_both_tokens_present():
+    result = classify_test_path("test_email_owner_scope.py")
+    assert result.area == "security"
+    assert result.sub_area == "owner_scope"
+
+
+def test_unrelated_helpers_ancestor_is_not_helpers():
+    result = classify_test_path("/work/helpers/odysseus/tests/test_owner_scope.py")
+    assert result.area == "security"
+    assert result.sub_area == "owner_scope"