From a7fc1343a35d5e09d5caa08a9334390f025f813f Mon Sep 17 00:00:00 2001 From: nopoz Date: Sun, 28 Jun 2026 03:42:20 -0700 Subject: [PATCH] fix(security): prevent ReDoS in verdict-prose and continuation matchers (#4943) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two py/polynomial-redos sinks ran regexes with two adjacent \s-matching quantifiers over untrusted model text, backtracking O(n^2) when the tail failed on a whitespace flood: - routes/skills_routes.py: the last-resort verdict-from-prose extractor used `["\'\s:]*\s*` — the class already matches \s, so the trailing \s* was a redundant second quantifier. Dropped it (extracted to a documented module constant _VERDICT_PROSE_RE); the matched text is identical, the scan linear. - src/agent_loop.py _EXPLICIT_CONTINUATION_RE: `\s*[.!?]*\s*$` put two \s* around `[.!?]*`. Rewrote as `\s*(?:[.!?]+\s*)?$` — same accepted tails (no two \s* adjacent), linear. Portable form (no possessive quantifiers). Both verified output-equivalent to the originals across a fuzz corpus. Adds tests/test_redos_verdict_continuation.py pinning the unchanged match sets and bounding the flood inputs (old patterns took seconds at 40k whitespace chars). --- routes/skills_routes.py | 12 +++- src/agent_loop.py | 7 ++- tests/test_redos_verdict_continuation.py | 80 ++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 2 deletions(-) create mode 100644 tests/test_redos_verdict_continuation.py diff --git a/routes/skills_routes.py b/routes/skills_routes.py index 444dd69cf..711baa2e5 100644 --- a/routes/skills_routes.py +++ b/routes/skills_routes.py @@ -22,6 +22,16 @@ from core.middleware import require_admin logger = logging.getLogger(__name__) +# Last-resort verdict extraction from a teacher/verifier model's prose (run when +# JSON parsing fails). `["\'\s:]*` already consumes whitespace, so the original +# trailing `\s*` made two adjacent \s-matching quantifiers that backtrack O(n^2) +# on a `verdict` + whitespace flood in untrusted model output (CodeQL +# py/polynomial-redos). Without it a single unbounded quantifier remains — the +# matched text is identical, and the scan is linear. +_VERDICT_PROSE_RE = re.compile( + r'verdict["\'\s:]*["\']?(pass|needs_work|fail|inconclusive)', re.I +) + class SkillAddRequest(BaseModel): # New schema (preferred) @@ -196,7 +206,7 @@ async def _eval_skill_run(skill_md: str, task: str, transcript: str, # Last resort: pull the verdict keyword straight out of the prose so a # clearly-decided run isn't thrown away as "unparseable". if v not in _VERDICTS: - km = _re.search(r'verdict["\'\s:]*\s*["\']?(pass|needs_work|fail|inconclusive)', text, _re.I) + km = _VERDICT_PROSE_RE.search(text) if km: v = km.group(1).lower() if data is None: diff --git a/src/agent_loop.py b/src/agent_loop.py index be80c316a..9ff0cc8d1 100644 --- a/src/agent_loop.py +++ b/src/agent_loop.py @@ -845,7 +845,12 @@ _EXPLICIT_CONTINUATION_RE = re.compile( r"run it|launch it|start it|use that|that one|same|the same|" r"first|second|third|the first one|the second one|the third one|" r"[123]|[abc]" - r")\s*[.!?]*\s*$", + # `\s*[.!?]*\s*$` put two \s-matching quantifiers around `[.!?]*`, which + # backtracks O(n^2) on a terse reply + whitespace flood (py/polynomial-redos). + # `\s*(?:[.!?]+\s*)?$` accepts the same "trailing space/punctuation" tails + # (the inner \s* only engages after `[.!?]+`, so no two \s* are adjacent) and + # is linear. + r")\s*(?:[.!?]+\s*)?$", re.IGNORECASE, ) _RETRY_CONTINUATION_RE = re.compile( diff --git a/tests/test_redos_verdict_continuation.py b/tests/test_redos_verdict_continuation.py new file mode 100644 index 000000000..1304f0200 --- /dev/null +++ b/tests/test_redos_verdict_continuation.py @@ -0,0 +1,80 @@ +"""Regression tests for two py/polynomial-redos sinks over untrusted model text. + +Both had two adjacent `\\s`-matching quantifiers that backtrack O(n^2) when the +rest of the pattern fails on a whitespace flood: + + * `routes/skills_routes.py` `_VERDICT_PROSE_RE` — `["\\'\\s:]*\\s*` (the class + already matches `\\s`) over a teacher/verifier model's prose verdict. + * `src/agent_loop.py` `_EXPLICIT_CONTINUATION_RE` — `\\s*[.!?]*\\s*$` over a + user's terse reply. + +Each is rewritten to drop the adjacency while keeping the exact match set. The +tests pin correctness (matches unchanged) and bound the flood inputs; the old +patterns took seconds, the loose budget is seconds, so the margin is ~100x. +""" + +import time + +import pytest + +import src.agent_tools # noqa: F401 (break agent_tools<->agent_loop import cycle) +from routes.skills_routes import _VERDICT_PROSE_RE +from src.agent_loop import _EXPLICIT_CONTINUATION_RE, _is_explicit_continuation + +_BUDGET_S = 4.0 + + +def _timed(fn, *args): + start = time.perf_counter() + result = fn(*args) + return result, time.perf_counter() - start + + +# ── #229 verdict-from-prose: matches unchanged ────────────────────────────── + +@pytest.mark.parametrize("text,expected", [ + ('verdict": "FAIL"', "fail"), + ("verdict needs_work", "needs_work"), + ("Verdict: inconclusive", "inconclusive"), + ("verdict\t\t'pass'", "pass"), + ("verdictpass", "pass"), # all separators optional — keyword may abut, as before + ("the verdict is: pass overall", None), # intervening "is" breaks the run + ("no clear decision here", None), +]) +def test_verdict_prose_extraction(text, expected): + m = _VERDICT_PROSE_RE.search(text) + assert (m.group(1).lower() if m else None) == expected + + +def test_verdict_prose_flood_is_fast(): + evil = "verdict" + "\t" * 40000 + "x" # `verdict` then whitespace, no keyword + (m, dt) = _timed(_VERDICT_PROSE_RE.search, evil) + assert dt < _BUDGET_S, f"_VERDICT_PROSE_RE took {dt:.2f}s" + assert m is None + + +# ── #472 explicit-continuation: classification unchanged ──────────────────── + +@pytest.mark.parametrize("text", [ + "yes", "y", "ok!", "okay ...", "sure!!", "do it", "1", "a", "2.", + "the second one", " yes ", "continue", "run it!", "third???", +]) +def test_continuation_accepts_terse_confirmations(text): + assert _is_explicit_continuation(text) + + +@pytest.mark.parametrize("text", [ + "no", "maybe yes", "yesx", "let's not", "y . ! .", "", "run the script please", +]) +def test_continuation_rejects_non_confirmations(text): + assert not _is_explicit_continuation(text) + + +def test_continuation_flood_is_fast(): + evil = "y" + "\t" * 40000 + "x" # terse opener then whitespace flood, no `$` + (_, dt) = _timed(_is_explicit_continuation, evil) + assert dt < _BUDGET_S, f"_is_explicit_continuation took {dt:.2f}s" + # Direct on the compiled pattern too (the function strips first). + (m, dt2) = _timed(_EXPLICIT_CONTINUATION_RE.match, evil) + assert dt2 < _BUDGET_S, f"_EXPLICIT_CONTINUATION_RE took {dt2:.2f}s" + assert m is None