test(taxonomy): auto-mark tests by area and sub-area (#3491)

This commit is contained in:
Alexandre Teixeira
2026-06-09 00:13:28 +01:00
committed by GitHub
parent e7c1d75884
commit a240f28af9
5 changed files with 380 additions and 4 deletions
+15
View File
@@ -1,3 +1,18 @@
[tool.pytest.ini_options]
testpaths = ["tests"]
asyncio_mode = "auto"
# Test-taxonomy markers added at collection time by tests/conftest.py. The
# stable area_* markers are declared here; the dynamic sub_<filename-token>
# markers are registered before collection by pytest_configure in
# tests/conftest.py, so unknown-mark warnings still flag genuine typos outside
# the taxonomy. See tests/_taxonomy.py and tests/README.md.
markers = [
"area_security: tests covering auth, owner-scope, SSRF, XSS, confinement, redaction",
"area_routes: tests covering HTTP route / API behavior",
"area_services: tests covering service-layer behavior (llm, cookbook, email, calendar, ...)",
"area_cli: tests covering CLI / script behavior",
"area_js: JavaScript / Node-backed tests",
"area_helpers: self-tests for the shared test helpers in tests/helpers/",
"area_unit: pure parser / utility tests that do not clearly belong elsewhere",
"area_uncategorized: tests not yet matched by the taxonomy (fallback)",
]
+20
View File
@@ -13,6 +13,26 @@ behavioral-vs-source-text policy, and helper/factory extraction rules - see
[`TESTING_STANDARD.md`](./TESTING_STANDARD.md). This file is the concrete helper
reference; that file is the standard the refactor works toward.
## Running focused subsets (taxonomy markers)
`tests/conftest.py` tags every test at collection time with two markers derived
from its filename by `tests/_taxonomy.py`: an `area_*` marker (e.g.
`area_security`) and a finer `sub_*` marker (e.g. `sub_owner_scope`). This adds
markers only - it moves no files and changes no test behavior. Use them to run a
focused slice:
```bash
python3 -m pytest -m area_security
python3 -m pytest -m "area_services and sub_cookbook"
```
Areas are `security`, `routes`, `services`, `cli`, `js`, `helpers`, `unit`, and
`uncategorized`. Classification is conservative and token-based: a file that
matches no area keyword falls back to `area_uncategorized` with its filename as
the sub-area. The `area_*` names are registered in `pyproject.toml`; the dynamic
`sub_*` names are registered before collection by `pytest_configure` in
`tests/conftest.py`, so unknown-mark warnings still flag genuine typos.
## Core principles
- Keep PRs small and homogeneous: one kind of change per PR.
+162
View File
@@ -0,0 +1,162 @@
"""Conservative test taxonomy: classify test files by area and sub-area.
This module is the single source of truth for the collection-time markers added
in ``tests/conftest.py``. It performs no inference beyond simple, exact matching
of filename tokens against small, explicit keyword sets. A file is matched to
the first area (in priority order) whose keyword set intersects its filename
tokens; files that match no area fall back to ``uncategorized`` with the
filename itself as the sub-area.
The categories mirror ``tests/TESTING_STANDARD.md``. This module imports nothing
from the application - only the standard library - and changes no test behavior.
"""
from __future__ import annotations
import re
from collections.abc import Iterable
from dataclasses import dataclass
from pathlib import Path
# Area keyword sets. Keep these small and explicit; prefer leaving a file
# ``uncategorized`` over guessing. Matching is exact, token-by-token.
SECURITY_KEYWORDS = frozenset({
"security", "auth", "owner", "scope",
"ssrf", "xss", "confinement", "permission", "redaction",
})
CLI_KEYWORDS = frozenset({"cli"})
ROUTES_KEYWORDS = frozenset({"route", "routes", "api"})
SERVICES_KEYWORDS = frozenset({
"llm", "provider", "cookbook", "session", "history", "email",
"calendar", "memory", "gallery", "document", "research", "mcp",
"scheduler", "webhook", "embedding",
})
UNIT_KEYWORDS = frozenset({
"parse", "parser", "parsing", "nonstring", "nondict",
"atomic", "regex", "tokenize",
})
# Keyword-matched areas, in priority order (first match wins). Security is a
# cross-cutting concern and intentionally outranks the feature areas, so e.g.
# ``test_email_owner_scope.py`` classifies as ``security``, not ``services``.
# ``js`` and ``helpers`` are matched by dedicated rules in ``_match_area``.
KEYWORD_AREAS = (
("security", SECURITY_KEYWORDS),
("cli", CLI_KEYWORDS),
("routes", ROUTES_KEYWORDS),
("services", SERVICES_KEYWORDS),
("unit", UNIT_KEYWORDS),
)
# File extensions that indicate a JavaScript/Node-backed test.
JS_EXTENSIONS = frozenset({".js", ".mjs", ".ts"})
UNCATEGORIZED = "uncategorized"
@dataclass(frozen=True)
class TestClassification:
"""Area and sub-area for a single test file."""
area: str
sub_area: str
def normalize_marker_name(value: str) -> str:
"""Lowercase ``value`` and reduce it to a marker-safe ``[a-z0-9_]`` token."""
lowered = value.lower()
collapsed = re.sub(r"[^a-z0-9]+", "_", lowered)
return collapsed.strip("_")
def _stem(path: str | Path) -> str:
"""Filename without its extension chain (``invariant.test.mjs`` -> ``invariant``)."""
return Path(path).name.split(".", 1)[0]
def _extension(path: str | Path) -> str:
"""Lowercased final file extension, e.g. ``.py`` or ``.mjs``."""
return Path(path).suffix.lower()
def _filename_tokens(path: str | Path) -> tuple[str, ...]:
"""Underscore tokens of the filename stem, with a leading ``test`` dropped."""
tokens = tuple(t for t in normalize_marker_name(_stem(path)).split("_") if t)
if tokens and tokens[0] == "test":
tokens = tokens[1:]
return tokens
def _matched_keywords(tokens: tuple[str, ...], keywords: frozenset[str]) -> tuple[str, ...]:
"""Filename tokens that appear in ``keywords``, in order, de-duplicated."""
matched: list[str] = []
for token in tokens:
if token in keywords and token not in matched:
matched.append(token)
return tuple(matched)
def _match_area(tokens: tuple[str, ...], extension: str) -> tuple[str, tuple[str, ...]]:
"""Return ``(area, matched_keywords)`` using the conservative priority order."""
if extension in JS_EXTENSIONS or "js" in tokens:
return "js", ("js",)
if tokens and tokens[0] == "helpers":
return "helpers", ("helpers",)
for area, keywords in KEYWORD_AREAS:
matched = _matched_keywords(tokens, keywords)
if matched:
return area, matched
return UNCATEGORIZED, ()
def _sub_area(area: str, matched: tuple[str, ...], tokens: tuple[str, ...]) -> str:
"""Derive the sub-area: matched keywords for a known area, else the filename."""
if area == UNCATEGORIZED:
return "_".join(tokens)
return "_".join(matched)
def _in_helpers_dir(path: str | Path) -> bool:
"""True if ``path`` is under the test helper dir ``tests/helpers/``.
Matches the exact adjacent ``tests``/``helpers`` component pair, so an
unrelated ancestor directory merely named ``helpers`` does not count.
"""
parts = Path(path).parent.parts
adjacent_pairs = list(zip(parts, parts[1:]))
return ("tests", "helpers") in adjacent_pairs
def classify_test_path(path: str | Path) -> TestClassification:
"""Classify a test file path into an area and a sub-area.
A test file under a ``helpers`` directory is a helper self-test regardless of
its filename, which complements the filename first-token rule in
``_match_area`` (e.g. ``test_helpers_import_state.py`` in ``tests/``).
"""
if _in_helpers_dir(path):
return TestClassification(area="helpers", sub_area="helpers")
tokens = _filename_tokens(path)
area, matched = _match_area(tokens, _extension(path))
sub_area = _sub_area(area, matched, tokens) or UNCATEGORIZED
return TestClassification(area=area, sub_area=sub_area)
def markers_for_path(path: str | Path) -> tuple[str, ...]:
"""Return the ``(area_*, sub_*)`` marker names for a test file path."""
classification = classify_test_path(path)
area_marker = normalize_marker_name(f"area_{classification.area}")
sub_marker = normalize_marker_name(f"sub_{classification.sub_area}")
return (area_marker, sub_marker)
def discover_markers(paths: Iterable[str | Path]) -> tuple[str, ...]:
"""Distinct ``area_*`` / ``sub_*`` marker names for ``paths``, sorted.
Pure: it derives names from the given paths only and performs no filesystem
access of its own. The caller decides which paths to scan. Used at
``pytest_configure`` time to register the dynamic ``sub_*`` markers.
"""
names: set[str] = set()
for path in paths:
names.update(markers_for_path(path))
return tuple(sorted(names))
+38 -4
View File
@@ -1,4 +1,4 @@
"""Shared test configuration ensure project root is on sys.path and stub heavy deps."""
"""Shared test configuration - ensure project root is on sys.path and stub heavy deps."""
import sys
import os
import types
@@ -9,12 +9,12 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Importing core.database below runs init_db() at import time, and its default
# (sqlite:///./data/app.db) can't be opened in a clean worktree because SQLite
# won't create the missing ./data parent dir pytest then dies during
# won't create the missing ./data parent dir - pytest then dies during
# collection, before any test module loads. Default to an in-memory DB for the
# test session so collection is deterministic and writes no repo-local
# artifacts. An explicit DATABASE_URL (a real test/CI database) is preserved.
# This only unblocks collection/import-time init; it does not provide a shared
# file-backed DB across processes tests needing that must set DATABASE_URL.
# file-backed DB across processes - tests needing that must set DATABASE_URL.
os.environ.setdefault("DATABASE_URL", "sqlite:///:memory:")
# Pre-import real heavy modules BEFORE any test file's module-level stubs can
@@ -27,7 +27,7 @@ try:
import sqlalchemy.orm # noqa: F401
import core.database # noqa: F401
except ImportError:
pass # not installed the stubs below will handle it
pass # not installed - the stubs below will handle it
def _has_module(mod_name: str) -> bool:
try:
@@ -54,3 +54,37 @@ if "src.database" not in sys.modules:
_db.SessionLocal = MagicMock()
_db.ModelEndpoint = MagicMock()
sys.modules["src.database"] = _db
def pytest_configure(config):
"""Register the dynamic taxonomy ``sub_*`` markers before collection.
The stable ``area_*`` markers are declared in ``pyproject.toml``. The
per-file ``sub_*`` markers are derived from the test filenames here so that
unknown-mark warnings still surface genuine typos outside the taxonomy. This
only registers marker names; it imports no production module.
"""
import pathlib
from tests._taxonomy import discover_markers
tests_dir = pathlib.Path(__file__).parent
paths = list(tests_dir.rglob("test_*.py")) + list(tests_dir.rglob("*_test.py"))
for marker_name in discover_markers(paths):
if marker_name.startswith("sub_"):
config.addinivalue_line("markers", f"{marker_name}: taxonomy sub-area marker")
def pytest_collection_modifyitems(config, items):
"""Tag each collected test with its taxonomy ``area_*`` and ``sub_*`` markers.
Collection-time only: this adds markers and nothing else. It does not skip,
reorder, or deselect tests, mutate fixtures or the environment, or import any
production module. See ``tests/_taxonomy.py`` for the classification rules.
"""
import pytest
from tests._taxonomy import markers_for_path
for item in items:
path = getattr(item, "path", None) or item.fspath
for marker_name in markers_for_path(path):
item.add_marker(getattr(pytest.mark, marker_name))
+145
View File
@@ -0,0 +1,145 @@
"""Unit tests for tests/_taxonomy.py - the test-taxonomy classification module.
These tests pin the conservative classification behavior directly, without
running pytest collection. They import only the module under test (a test-support
module, not production code) and touch no filesystem.
"""
import re
import pytest
from tests._taxonomy import (
classify_test_path,
discover_markers,
markers_for_path,
normalize_marker_name,
)
# --- normalize_marker_name ---------------------------------------------------
def test_normalize_lowercases():
assert normalize_marker_name("Area_Security") == "area_security"
def test_normalize_converts_nonalphanumeric_runs_to_underscore():
assert normalize_marker_name("owner--scope..test") == "owner_scope_test"
def test_normalize_strips_leading_and_trailing_underscores():
assert normalize_marker_name("__owner-scope__") == "owner_scope"
# --- classify_test_path: one example per area --------------------------------
@pytest.mark.parametrize("filename, expected_area, expected_sub", [
("test_owner_scope.py", "security", "owner_scope"),
("test_cookbook_helpers.py", "services", "cookbook"),
("test_routes_sessions.py", "routes", "routes"),
("test_backup_cli.py", "cli", "cli"),
("test_compare_js.py", "js", "js"),
("segmenter.test.mjs", "js", "js"),
("segmenter.test.js", "js", "js"),
("segmenter.test.ts", "js", "js"),
("test_helpers_import_state.py", "helpers", "helpers"),
("test_atomic_io.py", "unit", "atomic"),
])
def test_classify_examples(filename, expected_area, expected_sub):
result = classify_test_path(filename)
assert result.area == expected_area
assert result.sub_area == expected_sub
# --- classify_test_path: fallback --------------------------------------------
def test_unknown_filename_is_uncategorized():
result = classify_test_path("test_widget_gizmo_thing.py")
assert result.area == "uncategorized"
def test_uncategorized_sub_area_is_derived_from_filename_tokens():
result = classify_test_path("test_archived_sessions_model_filter.py")
assert result.area == "uncategorized"
assert result.sub_area == "archived_sessions_model_filter"
# --- markers_for_path --------------------------------------------------------
def test_markers_for_path_returns_one_area_and_one_sub():
markers = markers_for_path("test_owner_scope.py")
assert markers == ("area_security", "sub_owner_scope")
assert len([m for m in markers if m.startswith("area_")]) == 1
assert len([m for m in markers if m.startswith("sub_")]) == 1
def test_markers_for_path_are_normalized():
markers = markers_for_path("test_foo-bar.py")
assert markers == ("area_uncategorized", "sub_foo_bar")
for marker in markers:
assert re.fullmatch(r"[a-z0-9_]+", marker)
# --- discover_markers --------------------------------------------------------
def test_discover_markers_is_sorted_and_deduplicated():
paths = [
"test_owner_scope.py",
"test_owner_scope.py",
"test_cookbook_helpers.py",
]
markers = discover_markers(paths)
assert markers == tuple(sorted(set(markers)))
assert markers == (
"area_security",
"area_services",
"sub_cookbook",
"sub_owner_scope",
)
def test_discover_markers_includes_area_and_sub():
markers = discover_markers(["test_owner_scope.py"])
assert any(m.startswith("area_") for m in markers)
assert any(m.startswith("sub_") for m in markers)
# --- edge cases --------------------------------------------------------------
def test_normalize_all_symbols_becomes_empty():
assert normalize_marker_name("@@@") == ""
def test_bare_test_filename_is_fully_uncategorized():
result = classify_test_path("tests/test.py")
assert result.area == "uncategorized"
assert result.sub_area == "uncategorized"
def test_markers_for_bare_test_filename():
markers = markers_for_path("tests/test.py")
assert "area_uncategorized" in markers
assert "sub_uncategorized" in markers
@pytest.mark.parametrize("path", [
"tests/helpers/test_module_isolation.py",
"/work/repo/tests/helpers/test_module_isolation.py",
])
def test_file_under_helpers_dir_is_helpers(path):
result = classify_test_path(path)
assert result.area == "helpers"
assert result.sub_area == "helpers"
# --- priority contract -------------------------------------------------------
def test_security_beats_services_when_both_tokens_present():
result = classify_test_path("test_email_owner_scope.py")
assert result.area == "security"
assert result.sub_area == "owner_scope"
def test_unrelated_helpers_ancestor_is_not_helpers():
result = classify_test_path("/work/helpers/odysseus/tests/test_owner_scope.py")
assert result.area == "security"
assert result.sub_area == "owner_scope"