test: add report-only order-sensitivity runner (#3982)

* test: add report-only order-sensitivity runner

* test: report cwd in order-sensitivity runner
This commit is contained in:
Alexandre Teixeira
2026-06-15 07:49:47 +01:00
committed by GitHub
parent a172522d87
commit 2cf8bd14ae
3 changed files with 455 additions and 0 deletions
+54
View File
@@ -83,6 +83,60 @@ python3 -m pytest tests/test_auth_config_lock_concurrency.py
python3 -m pytest -m slow
```
## Order-sensitivity reporting (report-only)
`tests/run_order_report.py` runs pytest with the collected test items shuffled
by a seeded RNG, to surface order-sensitive tests (hidden coupling through
shared import state, module caches, databases, etc.). It is report-only: it is
not wired into CI, adds no gate, and changes no normal pytest collection or
ordering - the shuffle exists only inside this runner. The seed is always
printed, and pytest targets/options go after a literal `--`:
```bash
python3 tests/run_order_report.py --seed 123 -- tests/cli/ -q
python3 tests/run_order_report.py -- tests/cli/ -q # generates and prints a seed
```
The same seed reproduces the same order when the reported working directory,
pytest target arguments, and test environment are also the same. The runner
prints all command arguments with shell-safe POSIX quoting and uses the
invoking Python interpreter.
A generated-seed run starts with output like:
```text
[order-report] working directory: /path/to/odysseus
[order-report] shuffling test order with seed 284734921
[order-report] reproduce from this working directory with the same test environment:
[order-report] reproduce with: /path/to/odysseus/.venv/bin/python /path/to/odysseus/tests/run_order_report.py --seed 284734921 -- tests/cli/ -q
```
Run the printed command from the reported working directory to reproduce the
same fixed-seed order:
```text
[order-report] working directory: /path/to/odysseus
[order-report] shuffling test order with seed 284734921
[order-report] reproduce from this working directory with the same test environment:
[order-report] reproduce with: /path/to/odysseus/.venv/bin/python /path/to/odysseus/tests/run_order_report.py --seed 284734921 -- tests/cli/ -q
```
Pytest output remains visible between the report header and footer. A failing
run ends with pytest's normal failure report followed by:
```text
FAILED tests/example_test.py::test_example - AssertionError
[order-report] seed 284734921: pytest exit code 1 (report-only; fix order-sensitive failures in separate scoped PRs)
```
Failures discovered this way are real isolation bugs: fix them in separate
scoped PRs - do not silence them with `skip`/`xfail`, and do not "fix" them by
depending on a particular order.
The runner propagates pytest's exit code, so it composes with normal local
workflows; "report-only" means it is not a CI gate, not that failures are
swallowed.
## Core principles
- Keep PRs small and homogeneous: one kind of change per PR.
+156
View File
@@ -0,0 +1,156 @@
#!/usr/bin/env python3
"""Report-only randomized test-order runner (issue #3973).
Runs pytest with the collected test items shuffled by a seeded RNG so
order-sensitive tests (hidden coupling through shared import state, module
caches, databases, etc.) surface locally. The seed is always printed, so any
failing order is reproducible with ``--seed``.
This runner is report-only: it is not wired into CI, adds no gate, and does
not change normal pytest collection or ordering. Failures it discovers should
be fixed in separate scoped PRs, not silenced here.
Examples:
python3 tests/run_order_report.py --seed 123 -- tests/cli/ -q
python3 tests/run_order_report.py -- tests/cli/ -q # generates and prints a seed
The shuffle is applied through a local ``pytest_collection_modifyitems`` hook
passed to ``pytest.main`` as an in-process plugin; no conftest or global
plugin is involved. Reproduction requires the reported working directory,
seed, pytest arguments, and test environment. The exit code is pytest's own.
"""
from __future__ import annotations
import argparse
import random
import shlex
import sys
from collections.abc import Callable, Sequence
from pathlib import Path
# Seeds are kept in the non-negative 32-bit range so they stay short enough to
# copy from a report line into a reproduction command.
SEED_MAX = 2**32 - 1
def shuffle_items(items: list, seed: int) -> None:
"""Deterministically shuffle ``items`` in place using ``seed``."""
random.Random(seed).shuffle(items)
class OrderShuffle:
"""Local pytest plugin that shuffles collected items with a fixed seed."""
def __init__(self, seed: int):
self.seed = seed
def pytest_collection_modifyitems(self, items: list) -> None:
shuffle_items(items, self.seed)
def generate_seed() -> int:
"""Generate a fresh seed for a run that did not pass ``--seed``."""
return random.SystemRandom().randint(0, SEED_MAX)
def seed_type(value: str) -> int:
"""argparse type: a seed in ``[0, SEED_MAX]``."""
number = int(value)
if not 0 <= number <= SEED_MAX:
raise argparse.ArgumentTypeError(
f"seed must be between 0 and {SEED_MAX}, got {value!r}"
)
return number
def build_parser() -> argparse.ArgumentParser:
"""Build the argument parser for the order-sensitivity runner."""
parser = argparse.ArgumentParser(
prog="run_order_report.py",
description=(
"Run pytest with randomized test order to surface order-sensitive "
"tests. Report-only: prints the seed used and propagates pytest's "
"exit code; it changes no normal pytest behavior."
),
epilog=(
"Pass pytest targets and options after a literal -- separator, "
"e.g.: run_order_report.py --seed 123 -- tests/cli/ -q"
),
)
parser.add_argument(
"--seed",
type=seed_type,
help="shuffle seed; omitted: a seed is generated and printed",
)
parser.add_argument(
"pytest_args",
nargs="*",
metavar="-- PYTEST_ARGS",
help="pytest targets/options forwarded after a literal --",
)
return parser
def runner_path() -> str:
"""Return an absolute path for copy-pasteable reproduction commands."""
return str(Path(__file__).resolve())
def print_report_header(seed: int, pytest_args: Sequence[str]) -> None:
"""Print the seed and an exact reproduction command before running."""
repro = [
sys.executable,
runner_path(),
"--seed",
str(seed),
"--",
*pytest_args,
]
print(f"[order-report] working directory: {Path.cwd()}")
print(f"[order-report] shuffling test order with seed {seed}")
print(
"[order-report] reproduce from this working directory with the same "
"test environment:"
)
print(f"[order-report] reproduce with: {shlex.join(repro)}")
def print_report_footer(seed: int, exit_code: int) -> None:
"""Print the outcome with the seed again, after possibly long pytest output."""
outcome = "no failures" if exit_code == 0 else f"pytest exit code {exit_code}"
print(
f"[order-report] seed {seed}: {outcome} "
"(report-only; fix order-sensitive failures in separate scoped PRs)"
)
def run(
argv: Sequence[str] | None = None,
pytest_main: Callable[..., int] | None = None,
) -> int:
"""Parse ``argv``, run pytest with shuffled item order, and report the seed.
``pytest_main`` is injected so tests can assert on the forwarded arguments
and plugin without running a nested pytest. It must match ``pytest.main``:
accept ``(args, plugins=...)`` and return an exit code.
"""
namespace = build_parser().parse_args(argv)
seed = namespace.seed if namespace.seed is not None else generate_seed()
pytest_args = list(namespace.pytest_args)
print_report_header(seed, pytest_args)
if pytest_main is None:
import pytest
pytest_main = pytest.main
exit_code = int(pytest_main(pytest_args, plugins=[OrderShuffle(seed)]))
print_report_footer(seed, exit_code)
return exit_code
def main() -> int:
"""Console entry point."""
return run(sys.argv[1:])
if __name__ == "__main__":
raise SystemExit(main())
+245
View File
@@ -0,0 +1,245 @@
"""Direct tests for the order-sensitivity report runner (tests/run_order_report.py).
The shuffle and argument plumbing are tested without spawning pytest: the
shuffle helpers are asserted directly and ``run`` is exercised with an
injected fake ``pytest.main``. A small subprocess test then proves the seed is
applied end to end (reproducible, seed visible) against a throwaway test file,
never the real suite.
"""
from __future__ import annotations
import shlex
import subprocess
import sys
from pathlib import Path
import pytest
from tests.run_order_report import (
SEED_MAX,
OrderShuffle,
generate_seed,
run,
shuffle_items,
)
REPO_ROOT = Path(__file__).resolve().parents[1]
RUNNER = REPO_ROOT / "tests" / "run_order_report.py"
class _FakePytestMain:
"""Records forwarded args and plugins and returns a fixed exit code."""
def __init__(self, returncode: int = 0):
self.returncode = returncode
self.calls: list[tuple[list[str], list]] = []
def __call__(self, args: list[str], plugins: list) -> int:
self.calls.append((list(args), list(plugins)))
return self.returncode
# --- shuffle determinism -----------------------------------------------------
def test_same_seed_shuffles_identically():
first = list(range(20))
second = list(range(20))
shuffle_items(first, seed=123)
shuffle_items(second, seed=123)
assert first == second
def test_different_seeds_shuffle_differently():
first = list(range(20))
second = list(range(20))
shuffle_items(first, seed=123)
shuffle_items(second, seed=321)
assert first != second
def test_shuffle_preserves_items():
items = list(range(20))
shuffle_items(items, seed=123)
assert sorted(items) == list(range(20))
def test_plugin_hook_matches_shuffle_items():
hooked = list(range(20))
expected = list(range(20))
OrderShuffle(seed=7).pytest_collection_modifyitems(hooked)
shuffle_items(expected, seed=7)
assert hooked == expected
# --- argument parsing and pytest invocation ----------------------------------
def test_pytest_args_after_separator_are_forwarded():
fake = _FakePytestMain()
run(["--seed", "123", "--", "tests/cli/", "-q"], pytest_main=fake)
(args, plugins), = fake.calls
assert args == ["tests/cli/", "-q"]
assert [type(p) for p in plugins] == [OrderShuffle]
def test_explicit_seed_reaches_plugin():
fake = _FakePytestMain()
run(["--seed", "123", "--", "-q"], pytest_main=fake)
(_, plugins), = fake.calls
assert plugins[0].seed == 123
def test_pytest_exit_code_is_propagated():
fake = _FakePytestMain(returncode=3)
assert run(["--seed", "123", "--", "-q"], pytest_main=fake) == 3
@pytest.mark.parametrize("value", ["abc", "-1", str(SEED_MAX + 1)])
def test_invalid_seed_is_rejected_before_pytest(value):
fake = _FakePytestMain()
with pytest.raises(SystemExit) as excinfo:
run(["--seed", value, "--", "-q"], pytest_main=fake)
assert excinfo.value.code == 2
assert fake.calls == []
# --- seed reporting -----------------------------------------------------------
def test_explicit_seed_is_printed_with_repro_command(capsys):
run(["--seed", "123", "--", "tests/cli/", "-q"], pytest_main=_FakePytestMain())
out = capsys.readouterr().out
assert "[order-report] shuffling test order with seed 123" in out
repro = shlex.join(
[
sys.executable,
str(RUNNER),
"--seed",
"123",
"--",
"tests/cli/",
"-q",
]
)
assert f"reproduce with: {repro}" in out
def test_working_directory_is_reported(capsys, monkeypatch, tmp_path):
monkeypatch.chdir(tmp_path)
run(["--seed", "123", "--", "-q"], pytest_main=_FakePytestMain())
out = capsys.readouterr().out
assert f"[order-report] working directory: {tmp_path}" in out
def test_footer_repeats_seed_and_outcome(capsys):
run(["--seed", "123", "--", "-q"], pytest_main=_FakePytestMain(returncode=1))
out = capsys.readouterr().out
assert "[order-report] seed 123: pytest exit code 1" in out
def test_generated_seed_is_printed_and_used(capsys):
fake = _FakePytestMain()
run(["--", "-q"], pytest_main=fake)
out = capsys.readouterr().out
seed_line = next(line for line in out.splitlines() if "with seed" in line)
seed = int(seed_line.rsplit("seed ", 1)[1])
assert 0 <= seed <= SEED_MAX
(_, plugins), = fake.calls
assert plugins[0].seed == seed
def test_generate_seed_is_within_range():
assert all(0 <= generate_seed() <= SEED_MAX for _ in range(5))
# --- end-to-end: the seed really drives collection order (real subprocess) ---
_SAMPLE_TESTS = "".join(
f"def test_{name}():\n pass\n\n"
for name in ("alpha", "bravo", "charlie", "delta", "echo", "foxtrot", "golf", "hotel")
)
@pytest.fixture(scope="module")
def sample_suite(tmp_path_factory) -> Path:
"""A throwaway directory with eight trivial tests, outside the repo rootdir."""
suite = tmp_path_factory.mktemp("order_report_suite")
(suite / "test_sample.py").write_text(_SAMPLE_TESTS, encoding="utf-8")
return suite
def _collect_order(sample_suite: Path, seed: int) -> tuple[list[str], str]:
"""Run the runner with ``--collect-only`` and return (test ids, stdout)."""
result = subprocess.run(
[
sys.executable,
str(RUNNER),
"--seed",
str(seed),
"--",
"--collect-only",
"-q",
"-p",
"no:cacheprovider",
"test_sample.py",
],
cwd=sample_suite,
capture_output=True,
text=True,
)
assert result.returncode == 0, result.stderr or result.stdout
ids = [line for line in result.stdout.splitlines() if "::" in line]
assert len(ids) == 8, result.stdout
return ids, result.stdout
def test_subprocess_same_seed_is_reproducible(sample_suite):
first, out = _collect_order(sample_suite, seed=123)
second, _ = _collect_order(sample_suite, seed=123)
assert first == second
assert "[order-report] shuffling test order with seed 123" in out
def test_subprocess_different_seeds_change_order(sample_suite):
first, _ = _collect_order(sample_suite, seed=123)
second, _ = _collect_order(sample_suite, seed=321)
assert first != second
def test_subprocess_failure_exit_code_and_footer(tmp_path):
"""A real failing pytest run keeps pytest's exit code and reports the seed."""
(tmp_path / "test_failure.py").write_text(
"def test_failure():\n assert False\n",
encoding="utf-8",
)
result = subprocess.run(
[
sys.executable,
str(RUNNER),
"--seed",
"123",
"--",
"test_failure.py",
"-q",
],
cwd=tmp_path,
capture_output=True,
text=True,
)
assert result.returncode == 1
repro = shlex.join(
[
sys.executable,
str(RUNNER),
"--seed",
"123",
"--",
"test_failure.py",
"-q",
]
)
assert f"reproduce with: {repro}" in result.stdout
assert "[order-report] seed 123: pytest exit code 1" in result.stdout