From 3940297655354f3d8117bd6f8301c261993b8614 Mon Sep 17 00:00:00 2001 From: Karandeep Bhardwaj <20508971+karandeepbhardwaj@users.noreply.github.com> Date: Sat, 6 Jun 2026 23:55:33 -0400 Subject: [PATCH] fix(webhooks): redact IPv6 addresses in sanitized error messages (#3038) * fix(webhooks): redact IPv6 addresses in sanitized error messages sanitize_error() only stripped IPv4 literals, so a failed webhook delivery to an internal IPv6 host (::1, fe80::/fc00:: ...) leaked the address into Webhook.last_error, which is surfaced in the UI. The module already treats internal IPv6 as sensitive (see _PRIVATE_NETWORKS and src/url_safety.py); the scrubber just didn't keep up. Add an IPv6 redaction pass covering bracketed, full 8-group, and ::-compressed forms. The pattern is scoped to leave clock times ("12:34:56"), MAC addresses, and C++ "::" tokens untouched, and the ::-branch uses a lookahead over a flat character class so there is no nested quantifier to backtrack on (no ReDoS on long colon/hex runs). Adds tests/test_webhook_sanitize_error_ipv6.py. * webhook: validate IPv6 candidates with ipaddress, not a regex grammar Per review on #3038: instead of hand-rolling the IPv6 grammar in a regex (brittle, and easy to over-match colon-heavy text), use a loose regex to find candidate tokens and let ipaddress.ip_address() decide. Only tokens it parses as IPv6 are redacted, so the false-positive guards (clock times, MACs, "std::vector") now come from the stdlib instead of a custom pattern. This also covers cases the old pattern missed -- zone ids (fe80::1%eth0) and IPv4-mapped addresses -- and no longer partially mangles invalid colon strings (a 9-group token is preserved whole rather than losing its first 8 groups). The bracketed branch is a single greedy class with no X*:X* backtracking; verified ~1ms on 40k-char adversarial input. Extends the test file with zone-id, IPv4-mapped, and invalid-token cases. * webhook: redact bracketed/scoped/IPv4-mapped IPv6 as one unit Review on #3038 found a few IP forms left partially redacted or malformed by sanitize_error(): [fe80::1%eth0]:8080 -> [[redacted]]:8080 [::ffff:192.168.0.1]:8080 -> [[redacted][redacted]]:8080 ::ffff:192.168.0.1 -> [redacted][redacted] Two causes: the bracketed branch's character class dropped zone ids, so scoped addresses fell through to the bare branch and left the brackets and port behind; and the IPv4 pass ran first, stripping the embedded v4 of an IPv4-mapped address so the v6 pass then redacted the "::ffff:" remnant separately. Fix: - run the IP-candidate pass before the IPv4 pass, so IPv4-mapped forms are matched and redacted whole - match the full bracketed authority ([...] + optional %zone + :port) as a single token, and redact a v4-or-v6 literal inside [ ] as one [redacted] - extend the bare branch with a bounded (exactly-3) dotted-quad tail for IPv4-mapped forms; exactly-3 so it can't swallow a partial suffix and accidentally preserve an otherwise-valid address Each form now collapses to a single [redacted]; the candidate finder stays linear (~1.3ms on 40k-char adversarial input). Adds regression tests for the three reported forms and keeps the timestamp/MAC/std::vector coverage. --- src/webhook_manager.py | 57 ++++++++++++- tests/test_webhook_sanitize_error_ipv6.py | 98 +++++++++++++++++++++++ 2 files changed, 152 insertions(+), 3 deletions(-) create mode 100644 tests/test_webhook_sanitize_error_ipv6.py diff --git a/src/webhook_manager.py b/src/webhook_manager.py index e43f8e4ed..267ceaa38 100644 --- a/src/webhook_manager.py +++ b/src/webhook_manager.py @@ -136,11 +136,62 @@ def validate_events(events_str: str) -> str: return ",".join(events) +# Broad candidate matcher for the IP-redaction pass. Deliberately loose: a +# bracketed host authority ([fe80::1%eth0]:8080 and friends) with an optional +# :port, or a bare IPv6 run — hex groups joined by colons, an optional trailing +# dotted-quad for IPv4-mapped forms (::ffff:192.168.0.1), and an optional %zone. +# It does NOT encode the IPv6 grammar; ipaddress.ip_address() is the real +# validator (see _redact_ip_candidate), so any colon-bearing string it rejects +# (clock times, MACs, "std::vector") is left alone. Every branch is a single +# greedy class or a repetition over a mandatory ':'/'.' delimiter, so there is no +# nested-quantifier backtracking (ReDoS-safe). +_IP_CANDIDATE = re.compile( + r'\[[^\[\]\s]*\](?::\d+)?' + r'|(? str: + """Redact a candidate token that the stdlib confirms is an IP address. + + A bare token is redacted only when it parses as IPv6 — bare IPv4 is left to + the dedicated IPv4 pass. A bracketed token is a host authority, so a v4 or v6 + literal inside [ ] is redacted as a whole. This keeps output consistent (one + [redacted], never nested or partial) for scoped/mapped/ported forms. + """ + token = match.group(0) + bracketed = token.startswith('[') + candidate = token + if bracketed: + # Keep only what's inside [...]; the trailing :port is dropped. + candidate = candidate[1:candidate.index(']')] + # A zone id (fe80::1%eth0) is not part of the address ipaddress parses. + candidate = candidate.split('%', 1)[0] + # The loose bare pattern can trail one stray ':' (e.g. "::1:" in "host ::1: + # down"); drop it unless it's the "::" compression marker. + if candidate.endswith(':') and not candidate.endswith('::'): + candidate = candidate[:-1] + try: + addr = ipaddress.ip_address(candidate) + except ValueError: + return token + if bracketed or isinstance(addr, ipaddress.IPv6Address): + return '[redacted]' + return token + + def sanitize_error(error: str, max_len: int = 200) -> str: """Strip potentially sensitive details from error messages.""" - # Remove IP addresses and ports - cleaned = re.sub(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(:\d+)?', '[redacted]', error) - # Remove hostnames in URLs + # Redact IPv6 (and bracketed-authority) addresses first, so an IPv4-mapped + # form like ::ffff:192.168.0.1 is scrubbed as one unit instead of having its + # embedded IPv4 removed first and leaving a stray "::ffff:" behind. Broad + # candidates are validated by ipaddress.ip_address(), so the false-positive + # guards (clock times, MACs, C++ "::") come from the stdlib, not a regex. + cleaned = _IP_CANDIDATE.sub(_redact_ip_candidate, error) + # Remove remaining bare IPv4 addresses and ports. + cleaned = re.sub(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(:\d+)?', '[redacted]', cleaned) + # Remove hostnames in URLs. cleaned = re.sub(r'https?://[^\s/]+', '[redacted-url]', cleaned) return cleaned[:max_len] diff --git a/tests/test_webhook_sanitize_error_ipv6.py b/tests/test_webhook_sanitize_error_ipv6.py new file mode 100644 index 000000000..ca5109da3 --- /dev/null +++ b/tests/test_webhook_sanitize_error_ipv6.py @@ -0,0 +1,98 @@ +"""sanitize_error must scrub IPv6 addresses, not just IPv4. + +Webhook delivery errors are stored in Webhook.last_error and surfaced in the +UI. The scrubber removed IPv4 literals but let IPv6 addresses through, so a +failed delivery to an internal v6 host (::1, fe80::/fc00:: ...) leaked the +address. This pins the v6 redaction while keeping the false-positive guards +(clock times, MACs, C++ "::") that make the pattern safe on arbitrary text. +""" + +import os +import sys +from unittest.mock import patch + +from tests.helpers.import_state import clear_module, preserve_import_state + +# Same import dance as test_webhook_ssrf_resilience.py: webhook_manager pulls in +# core.database (init_db -> create_all), which needs a DB path at import time. +# Pin DATABASE_URL to in-memory SQLite and restore module state afterwards. +# sanitize_error itself is pure (stdlib re only). +with patch.dict(os.environ, {"DATABASE_URL": "sqlite:///:memory:"}), \ + preserve_import_state("src.database", "core.database"): + clear_module("src.database") + _core_database = sys.modules.get("core.database") + if _core_database is not None and not getattr(_core_database, "__file__", None): + del sys.modules["core.database"] + from src.webhook_manager import sanitize_error + + +def test_ipv6_addresses_are_redacted(): + leaky = [ + "connect to [fd00::1234:5678]:8080 failed", # bracketed + port + "ConnectError to fe80::1 refused", # link-local + "no route to ::1", # loopback + "host fc00::abcd unreachable", # unique-local + "connect to [::1]:443 refused", # bracketed + port + "POST https://[2001:db8::1]:443/hook failed", # inside a URL + "addr 2001:0db8:0000:0000:0000:ff00:0042:8329", # full 8-group + ] + for msg in leaky: + out = sanitize_error(msg) + # Scrubbed via the v6 rule ([redacted]) or, inside a URL, the URL rule + # ([redacted-url]) — either way the address must not survive. + assert "[redacted" in out, out + assert "::" not in out and "[fd00" not in out, out + + +def test_non_addresses_are_preserved(): + # Colon-bearing strings that are NOT IPv6 must pass through untouched, so + # error messages stay readable. + safe = [ + "failed at 12:34:56 today", # clock time + "2026-06-05T22:36:55 connection reset", # ISO timestamp + "std::vector overflow", # C++ scope resolution + "device ab:cd:ef:01:23:45 offline", # MAC address + "unsupported ratio 16:9", + "HTTP 500 from upstream", + "request [deadbeef] failed", # bracketed hex id, no colon + ] + for msg in safe: + assert sanitize_error(msg) == msg, msg + + +def test_ipv4_still_redacted_and_length_capped(): + assert sanitize_error("dial 192.168.1.5:9000 refused") == "dial [redacted] refused" + assert len(sanitize_error("x" * 500)) == 200 + + +def test_ipv6_zone_id_is_redacted(): + # Link-local addresses often carry a %zone (fe80::1%eth0). The whole token, + # zone included, must go — ipaddress validates the address part. + out = sanitize_error("bind fe80::1%eth0 unreachable") + assert "[redacted]" in out + assert "::" not in out and "%eth0" not in out and "fe80" not in out + + +def test_ipv4_mapped_ipv6_is_scrubbed(): + # ::ffff:192.168.0.1 must be redacted as a single unit (one [redacted]), not + # split into "[redacted][redacted]" by the v6 and v4 passes. + assert sanitize_error("to ::ffff:192.168.0.1 closed") == "to [redacted] closed" + + +def test_bracketed_scoped_ipv6_with_port_is_one_redaction(): + # [fe80::1%eth0]:8080 — the whole bracketed authority (zone + port) goes, + # with no leftover brackets/port and no nested [redacted]. + assert sanitize_error("dial [fe80::1%eth0]:8080 timeout") == "dial [redacted] timeout" + + +def test_bracketed_ipv4_mapped_with_port_is_one_redaction(): + # [::ffff:192.168.0.1]:8080 — same, for an IPv4-mapped literal in brackets. + assert sanitize_error("dial [::ffff:192.168.0.1]:8080 timeout") == "dial [redacted] timeout" + + +def test_invalid_ipv6_is_not_partially_mangled(): + # Nine groups is not a valid address. Backing the scrub with ipaddress means + # the whole token is preserved, instead of a hand-rolled 8-group regex + # chewing off "1:2:3:4:5:6:7:8" and leaving a dangling ":9". + msg = "weird id 1:2:3:4:5:6:7:8:9 here" + assert sanitize_error(msg) == msg