test: add report-only order-sensitivity runner (#3982)

* test: add report-only order-sensitivity runner * test: report cwd in order-sensitivity runner
2026-06-15 17:25:26 -04:00 · 2026-06-15 07:49:47 +01:00
parent a172522d87
commit 2cf8bd14ae
3 changed files with 455 additions and 0 deletions
@@ -83,6 +83,60 @@ python3 -m pytest tests/test_auth_config_lock_concurrency.py
 python3 -m pytest -m slow
 ```

+## Order-sensitivity reporting (report-only)
+
+`tests/run_order_report.py` runs pytest with the collected test items shuffled
+by a seeded RNG, to surface order-sensitive tests (hidden coupling through
+shared import state, module caches, databases, etc.). It is report-only: it is
+not wired into CI, adds no gate, and changes no normal pytest collection or
+ordering - the shuffle exists only inside this runner. The seed is always
+printed, and pytest targets/options go after a literal `--`:
+
+```bash
+python3 tests/run_order_report.py --seed 123 -- tests/cli/ -q
+python3 tests/run_order_report.py -- tests/cli/ -q   # generates and prints a seed
+```
+
+The same seed reproduces the same order when the reported working directory,
+pytest target arguments, and test environment are also the same. The runner
+prints all command arguments with shell-safe POSIX quoting and uses the
+invoking Python interpreter.
+
+A generated-seed run starts with output like:
+
+```text
+[order-report] working directory: /path/to/odysseus
+[order-report] shuffling test order with seed 284734921
+[order-report] reproduce from this working directory with the same test environment:
+[order-report] reproduce with: /path/to/odysseus/.venv/bin/python /path/to/odysseus/tests/run_order_report.py --seed 284734921 -- tests/cli/ -q
+```
+
+Run the printed command from the reported working directory to reproduce the
+same fixed-seed order:
+
+```text
+[order-report] working directory: /path/to/odysseus
+[order-report] shuffling test order with seed 284734921
+[order-report] reproduce from this working directory with the same test environment:
+[order-report] reproduce with: /path/to/odysseus/.venv/bin/python /path/to/odysseus/tests/run_order_report.py --seed 284734921 -- tests/cli/ -q
+```
+
+Pytest output remains visible between the report header and footer. A failing
+run ends with pytest's normal failure report followed by:
+
+```text
+FAILED tests/example_test.py::test_example - AssertionError
+[order-report] seed 284734921: pytest exit code 1 (report-only; fix order-sensitive failures in separate scoped PRs)
+```
+
+Failures discovered this way are real isolation bugs: fix them in separate
+scoped PRs - do not silence them with `skip`/`xfail`, and do not "fix" them by
+depending on a particular order.
+
+The runner propagates pytest's exit code, so it composes with normal local
+workflows; "report-only" means it is not a CI gate, not that failures are
+swallowed.
+
 ## Core principles

 - Keep PRs small and homogeneous: one kind of change per PR.
@@ -0,0 +1,156 @@
+#!/usr/bin/env python3
+"""Report-only randomized test-order runner (issue #3973).
+
+Runs pytest with the collected test items shuffled by a seeded RNG so
+order-sensitive tests (hidden coupling through shared import state, module
+caches, databases, etc.) surface locally. The seed is always printed, so any
+failing order is reproducible with ``--seed``.
+
+This runner is report-only: it is not wired into CI, adds no gate, and does
+not change normal pytest collection or ordering. Failures it discovers should
+be fixed in separate scoped PRs, not silenced here.
+
+Examples:
+    python3 tests/run_order_report.py --seed 123 -- tests/cli/ -q
+    python3 tests/run_order_report.py -- tests/cli/ -q   # generates and prints a seed
+
+The shuffle is applied through a local ``pytest_collection_modifyitems`` hook
+passed to ``pytest.main`` as an in-process plugin; no conftest or global
+plugin is involved. Reproduction requires the reported working directory,
+seed, pytest arguments, and test environment. The exit code is pytest's own.
+"""
+from __future__ import annotations
+
+import argparse
+import random
+import shlex
+import sys
+from collections.abc import Callable, Sequence
+from pathlib import Path
+
+# Seeds are kept in the non-negative 32-bit range so they stay short enough to
+# copy from a report line into a reproduction command.
+SEED_MAX = 2**32 - 1
+
+
+def shuffle_items(items: list, seed: int) -> None:
+    """Deterministically shuffle ``items`` in place using ``seed``."""
+    random.Random(seed).shuffle(items)
+
+
+class OrderShuffle:
+    """Local pytest plugin that shuffles collected items with a fixed seed."""
+
+    def __init__(self, seed: int):
+        self.seed = seed
+
+    def pytest_collection_modifyitems(self, items: list) -> None:
+        shuffle_items(items, self.seed)
+
+
+def generate_seed() -> int:
+    """Generate a fresh seed for a run that did not pass ``--seed``."""
+    return random.SystemRandom().randint(0, SEED_MAX)
+
+
+def seed_type(value: str) -> int:
+    """argparse type: a seed in ``[0, SEED_MAX]``."""
+    number = int(value)
+    if not 0 <= number <= SEED_MAX:
+        raise argparse.ArgumentTypeError(
+            f"seed must be between 0 and {SEED_MAX}, got {value!r}"
+        )
+    return number
+
+
+def build_parser() -> argparse.ArgumentParser:
+    """Build the argument parser for the order-sensitivity runner."""
+    parser = argparse.ArgumentParser(
+        prog="run_order_report.py",
+        description=(
+            "Run pytest with randomized test order to surface order-sensitive "
+            "tests. Report-only: prints the seed used and propagates pytest's "
+            "exit code; it changes no normal pytest behavior."
+        ),
+        epilog=(
+            "Pass pytest targets and options after a literal -- separator, "
+            "e.g.: run_order_report.py --seed 123 -- tests/cli/ -q"
+        ),
+    )
+    parser.add_argument(
+        "--seed",
+        type=seed_type,
+        help="shuffle seed; omitted: a seed is generated and printed",
+    )
+    parser.add_argument(
+        "pytest_args",
+        nargs="*",
+        metavar="-- PYTEST_ARGS",
+        help="pytest targets/options forwarded after a literal --",
+    )
+    return parser
+
+
+def runner_path() -> str:
+    """Return an absolute path for copy-pasteable reproduction commands."""
+    return str(Path(__file__).resolve())
+
+
+def print_report_header(seed: int, pytest_args: Sequence[str]) -> None:
+    """Print the seed and an exact reproduction command before running."""
+    repro = [
+        sys.executable,
+        runner_path(),
+        "--seed",
+        str(seed),
+        "--",
+        *pytest_args,
+    ]
+    print(f"[order-report] working directory: {Path.cwd()}")
+    print(f"[order-report] shuffling test order with seed {seed}")
+    print(
+        "[order-report] reproduce from this working directory with the same "
+        "test environment:"
+    )
+    print(f"[order-report] reproduce with: {shlex.join(repro)}")
+
+
+def print_report_footer(seed: int, exit_code: int) -> None:
+    """Print the outcome with the seed again, after possibly long pytest output."""
+    outcome = "no failures" if exit_code == 0 else f"pytest exit code {exit_code}"
+    print(
+        f"[order-report] seed {seed}: {outcome} "
+        "(report-only; fix order-sensitive failures in separate scoped PRs)"
+    )
+
+
+def run(
+    argv: Sequence[str] | None = None,
+    pytest_main: Callable[..., int] | None = None,
+) -> int:
+    """Parse ``argv``, run pytest with shuffled item order, and report the seed.
+
+    ``pytest_main`` is injected so tests can assert on the forwarded arguments
+    and plugin without running a nested pytest. It must match ``pytest.main``:
+    accept ``(args, plugins=...)`` and return an exit code.
+    """
+    namespace = build_parser().parse_args(argv)
+    seed = namespace.seed if namespace.seed is not None else generate_seed()
+    pytest_args = list(namespace.pytest_args)
+    print_report_header(seed, pytest_args)
+    if pytest_main is None:
+        import pytest
+
+        pytest_main = pytest.main
+    exit_code = int(pytest_main(pytest_args, plugins=[OrderShuffle(seed)]))
+    print_report_footer(seed, exit_code)
+    return exit_code
+
+
+def main() -> int:
+    """Console entry point."""
+    return run(sys.argv[1:])
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
@@ -0,0 +1,245 @@
+"""Direct tests for the order-sensitivity report runner (tests/run_order_report.py).
+
+The shuffle and argument plumbing are tested without spawning pytest: the
+shuffle helpers are asserted directly and ``run`` is exercised with an
+injected fake ``pytest.main``. A small subprocess test then proves the seed is
+applied end to end (reproducible, seed visible) against a throwaway test file,
+never the real suite.
+"""
+from __future__ import annotations
+
+import shlex
+import subprocess
+import sys
+from pathlib import Path
+
+import pytest
+
+from tests.run_order_report import (
+    SEED_MAX,
+    OrderShuffle,
+    generate_seed,
+    run,
+    shuffle_items,
+)
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+RUNNER = REPO_ROOT / "tests" / "run_order_report.py"
+
+
+class _FakePytestMain:
+    """Records forwarded args and plugins and returns a fixed exit code."""
+
+    def __init__(self, returncode: int = 0):
+        self.returncode = returncode
+        self.calls: list[tuple[list[str], list]] = []
+
+    def __call__(self, args: list[str], plugins: list) -> int:
+        self.calls.append((list(args), list(plugins)))
+        return self.returncode
+
+
+# --- shuffle determinism -----------------------------------------------------
+
+
+def test_same_seed_shuffles_identically():
+    first = list(range(20))
+    second = list(range(20))
+    shuffle_items(first, seed=123)
+    shuffle_items(second, seed=123)
+    assert first == second
+
+
+def test_different_seeds_shuffle_differently():
+    first = list(range(20))
+    second = list(range(20))
+    shuffle_items(first, seed=123)
+    shuffle_items(second, seed=321)
+    assert first != second
+
+
+def test_shuffle_preserves_items():
+    items = list(range(20))
+    shuffle_items(items, seed=123)
+    assert sorted(items) == list(range(20))
+
+
+def test_plugin_hook_matches_shuffle_items():
+    hooked = list(range(20))
+    expected = list(range(20))
+    OrderShuffle(seed=7).pytest_collection_modifyitems(hooked)
+    shuffle_items(expected, seed=7)
+    assert hooked == expected
+
+
+# --- argument parsing and pytest invocation ----------------------------------
+
+
+def test_pytest_args_after_separator_are_forwarded():
+    fake = _FakePytestMain()
+    run(["--seed", "123", "--", "tests/cli/", "-q"], pytest_main=fake)
+    (args, plugins), = fake.calls
+    assert args == ["tests/cli/", "-q"]
+    assert [type(p) for p in plugins] == [OrderShuffle]
+
+
+def test_explicit_seed_reaches_plugin():
+    fake = _FakePytestMain()
+    run(["--seed", "123", "--", "-q"], pytest_main=fake)
+    (_, plugins), = fake.calls
+    assert plugins[0].seed == 123
+
+
+def test_pytest_exit_code_is_propagated():
+    fake = _FakePytestMain(returncode=3)
+    assert run(["--seed", "123", "--", "-q"], pytest_main=fake) == 3
+
+
+@pytest.mark.parametrize("value", ["abc", "-1", str(SEED_MAX + 1)])
+def test_invalid_seed_is_rejected_before_pytest(value):
+    fake = _FakePytestMain()
+    with pytest.raises(SystemExit) as excinfo:
+        run(["--seed", value, "--", "-q"], pytest_main=fake)
+    assert excinfo.value.code == 2
+    assert fake.calls == []
+
+
+# --- seed reporting -----------------------------------------------------------
+
+
+def test_explicit_seed_is_printed_with_repro_command(capsys):
+    run(["--seed", "123", "--", "tests/cli/", "-q"], pytest_main=_FakePytestMain())
+    out = capsys.readouterr().out
+    assert "[order-report] shuffling test order with seed 123" in out
+    repro = shlex.join(
+        [
+            sys.executable,
+            str(RUNNER),
+            "--seed",
+            "123",
+            "--",
+            "tests/cli/",
+            "-q",
+        ]
+    )
+    assert f"reproduce with: {repro}" in out
+
+
+def test_working_directory_is_reported(capsys, monkeypatch, tmp_path):
+    monkeypatch.chdir(tmp_path)
+    run(["--seed", "123", "--", "-q"], pytest_main=_FakePytestMain())
+    out = capsys.readouterr().out
+    assert f"[order-report] working directory: {tmp_path}" in out
+
+
+def test_footer_repeats_seed_and_outcome(capsys):
+    run(["--seed", "123", "--", "-q"], pytest_main=_FakePytestMain(returncode=1))
+    out = capsys.readouterr().out
+    assert "[order-report] seed 123: pytest exit code 1" in out
+
+
+def test_generated_seed_is_printed_and_used(capsys):
+    fake = _FakePytestMain()
+    run(["--", "-q"], pytest_main=fake)
+    out = capsys.readouterr().out
+    seed_line = next(line for line in out.splitlines() if "with seed" in line)
+    seed = int(seed_line.rsplit("seed ", 1)[1])
+    assert 0 <= seed <= SEED_MAX
+    (_, plugins), = fake.calls
+    assert plugins[0].seed == seed
+
+
+def test_generate_seed_is_within_range():
+    assert all(0 <= generate_seed() <= SEED_MAX for _ in range(5))
+
+
+# --- end-to-end: the seed really drives collection order (real subprocess) ---
+
+_SAMPLE_TESTS = "".join(
+    f"def test_{name}():\n    pass\n\n"
+    for name in ("alpha", "bravo", "charlie", "delta", "echo", "foxtrot", "golf", "hotel")
+)
+
+
+@pytest.fixture(scope="module")
+def sample_suite(tmp_path_factory) -> Path:
+    """A throwaway directory with eight trivial tests, outside the repo rootdir."""
+    suite = tmp_path_factory.mktemp("order_report_suite")
+    (suite / "test_sample.py").write_text(_SAMPLE_TESTS, encoding="utf-8")
+    return suite
+
+
+def _collect_order(sample_suite: Path, seed: int) -> tuple[list[str], str]:
+    """Run the runner with ``--collect-only`` and return (test ids, stdout)."""
+    result = subprocess.run(
+        [
+            sys.executable,
+            str(RUNNER),
+            "--seed",
+            str(seed),
+            "--",
+            "--collect-only",
+            "-q",
+            "-p",
+            "no:cacheprovider",
+            "test_sample.py",
+        ],
+        cwd=sample_suite,
+        capture_output=True,
+        text=True,
+    )
+    assert result.returncode == 0, result.stderr or result.stdout
+    ids = [line for line in result.stdout.splitlines() if "::" in line]
+    assert len(ids) == 8, result.stdout
+    return ids, result.stdout
+
+
+def test_subprocess_same_seed_is_reproducible(sample_suite):
+    first, out = _collect_order(sample_suite, seed=123)
+    second, _ = _collect_order(sample_suite, seed=123)
+    assert first == second
+    assert "[order-report] shuffling test order with seed 123" in out
+
+
+def test_subprocess_different_seeds_change_order(sample_suite):
+    first, _ = _collect_order(sample_suite, seed=123)
+    second, _ = _collect_order(sample_suite, seed=321)
+    assert first != second
+
+
+def test_subprocess_failure_exit_code_and_footer(tmp_path):
+    """A real failing pytest run keeps pytest's exit code and reports the seed."""
+    (tmp_path / "test_failure.py").write_text(
+        "def test_failure():\n    assert False\n",
+        encoding="utf-8",
+    )
+
+    result = subprocess.run(
+        [
+            sys.executable,
+            str(RUNNER),
+            "--seed",
+            "123",
+            "--",
+            "test_failure.py",
+            "-q",
+        ],
+        cwd=tmp_path,
+        capture_output=True,
+        text=True,
+    )
+
+    assert result.returncode == 1
+    repro = shlex.join(
+        [
+            sys.executable,
+            str(RUNNER),
+            "--seed",
+            "123",
+            "--",
+            "test_failure.py",
+            "-q",
+        ]
+    )
+    assert f"reproduce with: {repro}" in result.stdout
+    assert "[order-report] seed 123: pytest exit code 1" in result.stdout