diff --git a/tests/README.md b/tests/README.md index 4fb909294..e4a43e434 100644 --- a/tests/README.md +++ b/tests/README.md @@ -83,6 +83,30 @@ python3 -m pytest tests/test_auth_config_lock_concurrency.py python3 -m pytest -m slow ``` +## Order-sensitivity reporting (report-only) + +`tests/run_order_report.py` runs pytest with the collected test items shuffled +by a seeded RNG, to surface order-sensitive tests (hidden coupling through +shared import state, module caches, databases, etc.). It is report-only: it is +not wired into CI, adds no gate, and changes no normal pytest collection or +ordering - the shuffle exists only inside this runner. The seed is always +printed, and pytest targets/options go after a literal `--`: + +```bash +python3 tests/run_order_report.py --seed 123 -- tests/cli/ -q +python3 tests/run_order_report.py -- tests/cli/ -q # generates and prints a seed +``` + +The same seed reproduces the same order, so to reproduce a failing run, re-run +with the seed it printed (the runner prints an exact reproduction command at +the top of each run). Failures discovered this way are real isolation bugs: +fix them in separate scoped PRs - do not silence them with `skip`/`xfail`, and +do not "fix" them by depending on a particular order. + +The runner propagates pytest's exit code, so it composes with normal local +workflows; "report-only" means it is not a CI gate, not that failures are +swallowed. + ## Core principles - Keep PRs small and homogeneous: one kind of change per PR. diff --git a/tests/run_order_report.py b/tests/run_order_report.py new file mode 100644 index 000000000..456fa0c65 --- /dev/null +++ b/tests/run_order_report.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +"""Report-only randomized test-order runner (issue #3973). + +Runs pytest with the collected test items shuffled by a seeded RNG so +order-sensitive tests (hidden coupling through shared import state, module +caches, databases, etc.) surface locally. The seed is always printed, so any +failing order is reproducible with ``--seed``. + +This runner is report-only: it is not wired into CI, adds no gate, and does +not change normal pytest collection or ordering. Failures it discovers should +be fixed in separate scoped PRs, not silenced here. + +Examples: + python3 tests/run_order_report.py --seed 123 -- tests/cli/ -q + python3 tests/run_order_report.py -- tests/cli/ -q # generates and prints a seed + +The shuffle is applied through a local ``pytest_collection_modifyitems`` hook +passed to ``pytest.main`` as an in-process plugin; no conftest or global +plugin is involved. The exit code is pytest's own. +""" +from __future__ import annotations + +import argparse +import random +import shlex +import sys +from collections.abc import Callable, Sequence +from pathlib import Path + +# Seeds are kept in the non-negative 32-bit range so they stay short enough to +# copy from a report line into a reproduction command. +SEED_MAX = 2**32 - 1 + + +def shuffle_items(items: list, seed: int) -> None: + """Deterministically shuffle ``items`` in place using ``seed``.""" + random.Random(seed).shuffle(items) + + +class OrderShuffle: + """Local pytest plugin that shuffles collected items with a fixed seed.""" + + def __init__(self, seed: int): + self.seed = seed + + def pytest_collection_modifyitems(self, items: list) -> None: + shuffle_items(items, self.seed) + + +def generate_seed() -> int: + """Generate a fresh seed for a run that did not pass ``--seed``.""" + return random.SystemRandom().randint(0, SEED_MAX) + + +def seed_type(value: str) -> int: + """argparse type: a seed in ``[0, SEED_MAX]``.""" + number = int(value) + if not 0 <= number <= SEED_MAX: + raise argparse.ArgumentTypeError( + f"seed must be between 0 and {SEED_MAX}, got {value!r}" + ) + return number + + +def build_parser() -> argparse.ArgumentParser: + """Build the argument parser for the order-sensitivity runner.""" + parser = argparse.ArgumentParser( + prog="run_order_report.py", + description=( + "Run pytest with randomized test order to surface order-sensitive " + "tests. Report-only: prints the seed used and propagates pytest's " + "exit code; it changes no normal pytest behavior." + ), + epilog=( + "Pass pytest targets and options after a literal -- separator, " + "e.g.: run_order_report.py --seed 123 -- tests/cli/ -q" + ), + ) + parser.add_argument( + "--seed", + type=seed_type, + help="shuffle seed; omitted: a seed is generated and printed", + ) + parser.add_argument( + "pytest_args", + nargs="*", + metavar="-- PYTEST_ARGS", + help="pytest targets/options forwarded after a literal --", + ) + return parser + + +def runner_path() -> str: + """Return an absolute path for copy-pasteable reproduction commands.""" + return str(Path(__file__).resolve()) + + +def print_report_header(seed: int, pytest_args: Sequence[str]) -> None: + """Print the seed and an exact reproduction command before running.""" + repro = [ + sys.executable, + runner_path(), + "--seed", + str(seed), + "--", + *pytest_args, + ] + print(f"[order-report] shuffling test order with seed {seed}") + print(f"[order-report] reproduce with: {shlex.join(repro)}") + + +def print_report_footer(seed: int, exit_code: int) -> None: + """Print the outcome with the seed again, after possibly long pytest output.""" + outcome = "no failures" if exit_code == 0 else f"pytest exit code {exit_code}" + print( + f"[order-report] seed {seed}: {outcome} " + "(report-only; fix order-sensitive failures in separate scoped PRs)" + ) + + +def run( + argv: Sequence[str] | None = None, + pytest_main: Callable[..., int] | None = None, +) -> int: + """Parse ``argv``, run pytest with shuffled item order, and report the seed. + + ``pytest_main`` is injected so tests can assert on the forwarded arguments + and plugin without running a nested pytest. It must match ``pytest.main``: + accept ``(args, plugins=...)`` and return an exit code. + """ + namespace = build_parser().parse_args(argv) + seed = namespace.seed if namespace.seed is not None else generate_seed() + pytest_args = list(namespace.pytest_args) + print_report_header(seed, pytest_args) + if pytest_main is None: + import pytest + + pytest_main = pytest.main + exit_code = int(pytest_main(pytest_args, plugins=[OrderShuffle(seed)])) + print_report_footer(seed, exit_code) + return exit_code + + +def main() -> int: + """Console entry point.""" + return run(sys.argv[1:]) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/test_run_order_report.py b/tests/test_run_order_report.py new file mode 100644 index 000000000..c61e0e717 --- /dev/null +++ b/tests/test_run_order_report.py @@ -0,0 +1,238 @@ +"""Direct tests for the order-sensitivity report runner (tests/run_order_report.py). + +The shuffle and argument plumbing are tested without spawning pytest: the +shuffle helpers are asserted directly and ``run`` is exercised with an +injected fake ``pytest.main``. A small subprocess test then proves the seed is +applied end to end (reproducible, seed visible) against a throwaway test file, +never the real suite. +""" +from __future__ import annotations + +import shlex +import subprocess +import sys +from pathlib import Path + +import pytest + +from tests.run_order_report import ( + SEED_MAX, + OrderShuffle, + generate_seed, + run, + shuffle_items, +) + +REPO_ROOT = Path(__file__).resolve().parents[1] +RUNNER = REPO_ROOT / "tests" / "run_order_report.py" + + +class _FakePytestMain: + """Records forwarded args and plugins and returns a fixed exit code.""" + + def __init__(self, returncode: int = 0): + self.returncode = returncode + self.calls: list[tuple[list[str], list]] = [] + + def __call__(self, args: list[str], plugins: list) -> int: + self.calls.append((list(args), list(plugins))) + return self.returncode + + +# --- shuffle determinism ----------------------------------------------------- + + +def test_same_seed_shuffles_identically(): + first = list(range(20)) + second = list(range(20)) + shuffle_items(first, seed=123) + shuffle_items(second, seed=123) + assert first == second + + +def test_different_seeds_shuffle_differently(): + first = list(range(20)) + second = list(range(20)) + shuffle_items(first, seed=123) + shuffle_items(second, seed=321) + assert first != second + + +def test_shuffle_preserves_items(): + items = list(range(20)) + shuffle_items(items, seed=123) + assert sorted(items) == list(range(20)) + + +def test_plugin_hook_matches_shuffle_items(): + hooked = list(range(20)) + expected = list(range(20)) + OrderShuffle(seed=7).pytest_collection_modifyitems(hooked) + shuffle_items(expected, seed=7) + assert hooked == expected + + +# --- argument parsing and pytest invocation ---------------------------------- + + +def test_pytest_args_after_separator_are_forwarded(): + fake = _FakePytestMain() + run(["--seed", "123", "--", "tests/cli/", "-q"], pytest_main=fake) + (args, plugins), = fake.calls + assert args == ["tests/cli/", "-q"] + assert [type(p) for p in plugins] == [OrderShuffle] + + +def test_explicit_seed_reaches_plugin(): + fake = _FakePytestMain() + run(["--seed", "123", "--", "-q"], pytest_main=fake) + (_, plugins), = fake.calls + assert plugins[0].seed == 123 + + +def test_pytest_exit_code_is_propagated(): + fake = _FakePytestMain(returncode=3) + assert run(["--seed", "123", "--", "-q"], pytest_main=fake) == 3 + + +@pytest.mark.parametrize("value", ["abc", "-1", str(SEED_MAX + 1)]) +def test_invalid_seed_is_rejected_before_pytest(value): + fake = _FakePytestMain() + with pytest.raises(SystemExit) as excinfo: + run(["--seed", value, "--", "-q"], pytest_main=fake) + assert excinfo.value.code == 2 + assert fake.calls == [] + + +# --- seed reporting ----------------------------------------------------------- + + +def test_explicit_seed_is_printed_with_repro_command(capsys): + run(["--seed", "123", "--", "tests/cli/", "-q"], pytest_main=_FakePytestMain()) + out = capsys.readouterr().out + assert "[order-report] shuffling test order with seed 123" in out + repro = shlex.join( + [ + sys.executable, + str(RUNNER), + "--seed", + "123", + "--", + "tests/cli/", + "-q", + ] + ) + assert f"reproduce with: {repro}" in out + + +def test_footer_repeats_seed_and_outcome(capsys): + run(["--seed", "123", "--", "-q"], pytest_main=_FakePytestMain(returncode=1)) + out = capsys.readouterr().out + assert "[order-report] seed 123: pytest exit code 1" in out + + +def test_generated_seed_is_printed_and_used(capsys): + fake = _FakePytestMain() + run(["--", "-q"], pytest_main=fake) + out = capsys.readouterr().out + seed_line = next(line for line in out.splitlines() if "with seed" in line) + seed = int(seed_line.rsplit("seed ", 1)[1]) + assert 0 <= seed <= SEED_MAX + (_, plugins), = fake.calls + assert plugins[0].seed == seed + + +def test_generate_seed_is_within_range(): + assert all(0 <= generate_seed() <= SEED_MAX for _ in range(5)) + + +# --- end-to-end: the seed really drives collection order (real subprocess) --- + +_SAMPLE_TESTS = "".join( + f"def test_{name}():\n pass\n\n" + for name in ("alpha", "bravo", "charlie", "delta", "echo", "foxtrot", "golf", "hotel") +) + + +@pytest.fixture(scope="module") +def sample_suite(tmp_path_factory) -> Path: + """A throwaway directory with eight trivial tests, outside the repo rootdir.""" + suite = tmp_path_factory.mktemp("order_report_suite") + (suite / "test_sample.py").write_text(_SAMPLE_TESTS, encoding="utf-8") + return suite + + +def _collect_order(sample_suite: Path, seed: int) -> tuple[list[str], str]: + """Run the runner with ``--collect-only`` and return (test ids, stdout).""" + result = subprocess.run( + [ + sys.executable, + str(RUNNER), + "--seed", + str(seed), + "--", + "--collect-only", + "-q", + "-p", + "no:cacheprovider", + "test_sample.py", + ], + cwd=sample_suite, + capture_output=True, + text=True, + ) + assert result.returncode == 0, result.stderr or result.stdout + ids = [line for line in result.stdout.splitlines() if "::" in line] + assert len(ids) == 8, result.stdout + return ids, result.stdout + + +def test_subprocess_same_seed_is_reproducible(sample_suite): + first, out = _collect_order(sample_suite, seed=123) + second, _ = _collect_order(sample_suite, seed=123) + assert first == second + assert "[order-report] shuffling test order with seed 123" in out + + +def test_subprocess_different_seeds_change_order(sample_suite): + first, _ = _collect_order(sample_suite, seed=123) + second, _ = _collect_order(sample_suite, seed=321) + assert first != second + + +def test_subprocess_failure_exit_code_and_footer(tmp_path): + """A real failing pytest run keeps pytest's exit code and reports the seed.""" + (tmp_path / "test_failure.py").write_text( + "def test_failure():\n assert False\n", + encoding="utf-8", + ) + + result = subprocess.run( + [ + sys.executable, + str(RUNNER), + "--seed", + "123", + "--", + "test_failure.py", + "-q", + ], + cwd=tmp_path, + capture_output=True, + text=True, + ) + + assert result.returncode == 1 + repro = shlex.join( + [ + sys.executable, + str(RUNNER), + "--seed", + "123", + "--", + "test_failure.py", + "-q", + ] + ) + assert f"reproduce with: {repro}" in result.stdout + assert "[order-report] seed 123: pytest exit code 1" in result.stdout