Files
odysseus/src/caldav_sync.py
T
Mazen Tamer Salah 1209f258d7 fix(caldav): skip the prune when any object fails to parse (#3454)
* fix(caldav): don't prune the whole window when no objects could be parsed

The post-sync prune deletes local origin=="caldav" rows in the window whose UID
the server didn't just return. With an empty seen_uids it falls back to
`uid.isnot(None)` — a match-all delete. That's right when the calendar is
genuinely empty, but when the server returns objects and every one fails to
parse (malformed iCal / an icalendar error), seen_uids is empty only because
nothing could be read, so the match-all branch silently deletes every local
event in the 90-day-back/365-day-forward window.

Track whether any object failed to parse and gate the prune with a small pure
helper `_should_prune_window(seen_uids, parse_failed)`: prune when something was
read, or when the calendar is genuinely empty (no objects, no parse errors), but
never when objects came back unreadable.

Adds tests/test_caldav_prune_parse_failure.py for the three cases.

* fix(caldav): skip the prune on any parse failure, not just total

Review follow-up (#3454): _should_prune_window returned True whenever seen_uids
was non-empty, so a partial parse failure (say 48 of 50 objects parse) still
pruned the 2 unreadable-but-still-upstream events, because their UIDs were absent
from seen_uids. Any parse failure makes seen_uids an incomplete view of the
server, so pruning against it is unsafe whether the failure is total or partial.

Skip the prune on any parse failure (return not parse_failed); only prune on a
clean read (a genuinely empty window is still safe to prune). Tradeoff: one
permanently-unparseable event pauses deletion mirroring until it is fixed, which
is the safe direction (false-keep beats false-delete).

Replace the now-incorrect "partial failure still prunes" assertion with a
partial-failure regression: one object parses, one fails, so the prune is
skipped and the unparsed event's local copy is not deleted.

---------

Co-authored-by: Kenny Van de Maele <kenny@kvandemaele.be>
2026-06-08 18:59:14 +02:00

536 lines
23 KiB
Python

"""CalDAV → local SQLite sync.
The Settings UI lets users save CalDAV credentials, but the original
sync path was removed when calendar storage was migrated to SQLite.
This module re-wires that gap as a one-way pull (remote → local),
called on calendar open and from a periodic scheduler loop.
Design notes:
- We use the `caldav` lib so PROPFIND discovery + REPORT XML work
across Radicale / Nextcloud / Apple / Fastmail without us
reinventing the protocol. It's pure Python.
- The lib is synchronous; we run it in a threadpool via
`asyncio.to_thread` so the FastAPI event loop stays free.
- Each remote calendar maps to one local `CalendarCal` row with
`source="caldav"` and `id` = a stable hash of the remote URL so
re-syncs idempotently target the same row.
- Events upsert by VEVENT UID (kept as the local `uid`). Local
CalDAV-sourced events not seen in the latest pull are deleted so
remote deletions propagate.
- Datetimes are converted to UTC and the row is flagged `is_utc=True`
so the serializer adds the Z suffix and the frontend renders in the
user's local TZ correctly.
"""
import asyncio
import hashlib
import ipaddress
import logging
import os
import socket
import uuid
from datetime import date, datetime, timedelta, timezone
from urllib.parse import urlparse, urlunparse
logger = logging.getLogger(__name__)
# Pull window: 90 days back, 1 year forward. Keeps the REPORT cheap and
# matches what the calendar UI typically renders. Far-future recurring
# events still come through via RRULE expansion on the frontend.
_LOOKBACK_DAYS = 90
_LOOKAHEAD_DAYS = 365
_BLOCKED_HOSTS = {
"localhost",
"localhost.",
"ip6-localhost",
"metadata.google.internal",
}
def _private_caldav_allowed() -> bool:
return os.environ.get("ODYSSEUS_ALLOW_PRIVATE_CALDAV", "0").lower() in {"1", "true", "yes"}
def _validate_caldav_address(addr: ipaddress._BaseAddress) -> None:
if isinstance(addr, ipaddress.IPv6Address) and addr.ipv4_mapped is not None:
addr = addr.ipv4_mapped
if (
addr.is_loopback
or addr.is_link_local
or addr.is_multicast
or addr.is_unspecified
or addr.is_reserved
):
raise ValueError("CalDAV URL host is not allowed")
if addr.is_private and not _private_caldav_allowed():
raise ValueError("Private CalDAV IPs require ODYSSEUS_ALLOW_PRIVATE_CALDAV=1")
def _validate_caldav_ip(host: str) -> None:
try:
ip = ipaddress.ip_address(host.strip("[]"))
except ValueError:
return
_validate_caldav_address(ip)
def _resolve_caldav_host_ips(host: str) -> list[ipaddress._BaseAddress]:
addrs: list[ipaddress._BaseAddress] = []
for family, _, _, _, sockaddr in socket.getaddrinfo(host, None):
if family not in (socket.AF_INET, socket.AF_INET6):
continue
try:
addrs.append(ipaddress.ip_address(sockaddr[0].split("%", 1)[0]))
except ValueError:
continue
return addrs
def _validate_caldav_hostname(host: str) -> None:
try:
ipaddress.ip_address(host.strip("[]"))
return
except ValueError:
pass
try:
addrs = _resolve_caldav_host_ips(host)
except OSError:
raise ValueError("CalDAV URL host does not resolve")
if not addrs:
raise ValueError("CalDAV URL host does not resolve")
for addr in addrs:
_validate_caldav_address(addr)
def validate_caldav_url(raw_url: str) -> str:
"""Validate and normalize a user-provided CalDAV URL before server-side use."""
url = (raw_url if isinstance(raw_url, str) else "").strip()
if not url:
raise ValueError("CalDAV URL is required")
parsed = urlparse(url)
if parsed.scheme not in {"http", "https"}:
raise ValueError("CalDAV URL must start with http:// or https://")
if not parsed.hostname:
raise ValueError("CalDAV URL must include a host")
if parsed.username or parsed.password:
raise ValueError("Put CalDAV credentials in the username/password fields, not the URL")
if parsed.fragment:
raise ValueError("CalDAV URL fragments are not allowed")
try:
parsed.port
except ValueError:
raise ValueError("CalDAV URL has an invalid port")
host = (parsed.hostname or "").lower()
if host in _BLOCKED_HOSTS or host.endswith(".localhost"):
raise ValueError("CalDAV URL host is not allowed")
_validate_caldav_ip(host)
_validate_caldav_hostname(host)
return urlunparse(parsed._replace(fragment="")).rstrip("/")
def _stable_cal_id(remote_url: str, owner: str = "", account_id: str = "") -> str:
"""Deterministic local id for a remote CalDAV calendar, scoped to owner
and account so two users — or one user with two accounts — pointing at
the same server URL get distinct local rows (avoids PK collision, #2765).
The owner and account_id default to "" for the legacy/URL-only path so
existing callers without those arguments keep working."""
key = f"{owner}\n{account_id}\n{remote_url}"
h = hashlib.sha256(key.encode("utf-8")).hexdigest()[:24]
return f"caldav-{h}"
def _to_utc_naive(dt):
"""CalDAV datetimes can be tz-aware (with a TZID) or naive. The DB
column is naive but we set is_utc=True so the serializer adds Z.
All-day events stay as date and get widened to datetime here."""
if isinstance(dt, datetime):
if dt.tzinfo is not None:
return dt.astimezone(timezone.utc).replace(tzinfo=None), False
return dt, False # naive → treat as local
# date-only (all-day)
return datetime(dt.year, dt.month, dt.day), True
def _find_existing_event(db, pending, uid_val, calendar_id):
"""Find the event to update for THIS calendar.
CalendarEvent.uid is the global primary key, so an unscoped lookup by uid
returns whatever row holds that VEVENT uid — including another owner's.
The old code then reassigned that row's calendar_id, moving (stealing)
another user's event into the syncing calendar whenever the two share a
uid (shared/subscribed/public calendars, or two accounts on one server).
Scope the lookup to the calendar being synced; a genuine cross-user uid
collision then fails the PK insert inside the per-calendar try/except
instead of hijacking the row. (import_ics was already fixed this way.)
"""
from core.database import CalendarEvent
return pending.get(uid_val) or db.query(CalendarEvent).filter(
CalendarEvent.uid == uid_val,
CalendarEvent.calendar_id == calendar_id,
).first()
def _google_caldav_events_url(url: str) -> str | None:
"""Map a Google CalDAV *principal* URL to its event-collection URL.
Google serves the principal at ``…/user`` but events live under ``…/events``
— the ``/user`` resource holds no VEVENTs. The `caldav` library's
principal→home-set discovery does not reliably enumerate calendars from
Google's ``/user`` endpoint, so the sync falls into the "treat the URL as a
single calendar" fallback below. Pointed at ``/user`` that fallback issues
every calendar-query REPORT against the principal, which returns a clean but
empty 200 for all date ranges — the calendar shows no events even though
auth succeeded (issue #2507).
Both Google CalDAV endpoint forms are handled, since some accounts only
authenticate against one of them:
- newer: ``https://apidata.googleusercontent.com/caldav/v2/<id>/user``
- legacy: ``https://www.google.com/calendar/dav/<id>/user``
Returns the events URL for a recognised Google principal URL, else None so
the caller keeps the original URL unchanged.
"""
parts = urlparse(url)
host = (parts.hostname or "").lower()
path = parts.path.rstrip("/")
if not path.endswith("/user"):
return None
is_google = (
host.endswith("googleusercontent.com") # newer /caldav/v2 form
or (host in ("www.google.com", "google.com") and "/calendar/dav/" in path) # legacy form
)
if not is_google:
return None
new_path = path[: -len("/user")] + "/events"
return urlunparse(parts._replace(path=new_path))
def _open_url_as_calendar(client, url: str):
"""Open ``url`` as a single calendar collection.
Used when principal discovery yields no calendars. Google's principal URL
is not an event collection, so map it to the events URL first
(see ``_google_caldav_events_url``); other servers' URLs are used as-is.
"""
target = _google_caldav_events_url(url) or url
return client.calendar(url=target)
def _build_dav_client(url: str, username: str, password: str):
"""Construct a CalDAV client with automatic redirects disabled.
``validate_caldav_url`` resolves and vets the *initial* host, but caldav's
underlying HTTP session follows 3xx redirects by default. So a URL that
passes validation can still be redirected — at request time — to
loopback / link-local / private space, re-opening the SSRF the host check
closes. Pin the session to zero redirects: any 3xx then raises instead of
silently following an attacker-chosen ``Location``. This mirrors the
test-connection path in ``routes/calendar_routes.py``, which already sets
``follow_redirects=False``.
DAVClient exposes no per-request redirect flag, so we set it on the session
after construction (the session is created in ``__init__``).
"""
import caldav
client = caldav.DAVClient(url=url, username=username, password=password)
# Unconditional: a redirect-disable that only sometimes applies is not a
# control. The session exists right after __init__ on every real client;
# test_build_dav_client_disables_redirects asserts it against installed
# caldav in CI.
client.session.max_redirects = 0
return client
def _should_prune_window(seen_uids: set, parse_failed: bool) -> bool:
"""Whether the post-sync prune of vanished CalDAV events is safe to run.
The prune deletes local ``origin=="caldav"`` rows in the window whose UID the
server did not just return. Any parse failure (total or partial) makes
``seen_uids`` an incomplete view of the server, so pruning against it can
delete events that still exist upstream but could not be read: a total
failure wipes the whole window, a partial failure deletes just the
unreadable ones. Only prune on a clean read. An empty ``seen_uids`` after a
clean read is a genuinely empty window, which is safe to prune.
"""
return not parse_failed
def _sync_blocking(owner: str, url: str, username: str, password: str, account_id: str = "") -> dict:
"""The actual sync — synchronous, intended to run in a threadpool.
Returns counts: {calendars, events, deleted, errors}."""
# Lazy imports so a missing `caldav` dep doesn't break app startup —
# the integrations form still works, sync just no-ops with an error.
from caldav.lib.error import AuthorizationError, NotFoundError
from core.database import CalendarCal, CalendarEvent, SessionLocal
result = {"calendars": 0, "events": 0, "deleted": 0, "errors": []}
client = _build_dav_client(url, username, password)
# Discovery: try principal → calendars first; if the server doesn't
# support discovery (or the URL points directly at a calendar), fall
# back to treating the URL as a single calendar.
calendars = []
try:
principal = client.principal()
calendars = principal.calendars()
except (AuthorizationError, NotFoundError) as e:
result["errors"].append(f"Discovery failed: {e}")
return result
except Exception as e:
logger.info(f"CalDAV principal discovery failed, trying URL as calendar: {e}")
try:
calendars = [_open_url_as_calendar(client, url)]
except Exception as e2:
result["errors"].append(f"Could not open URL as calendar: {e2}")
return result
if not calendars:
try:
calendars = [_open_url_as_calendar(client, url)]
except Exception as e:
result["errors"].append(f"No calendars and URL fallback failed: {e}")
return result
start = datetime.utcnow() - timedelta(days=_LOOKBACK_DAYS)
end = datetime.utcnow() + timedelta(days=_LOOKAHEAD_DAYS)
db = SessionLocal()
try:
for remote_cal in calendars:
try:
remote_url = str(remote_cal.url)
cal_id = _stable_cal_id(remote_url, owner=owner, account_id=account_id)
display_name = (remote_cal.name or "").strip() or "CalDAV"
local_cal = db.query(CalendarCal).filter(
CalendarCal.id == cal_id,
CalendarCal.owner == owner,
).first()
if not local_cal:
local_cal = CalendarCal(
id=cal_id,
owner=owner,
name=display_name,
color="#5b8abf",
source="caldav",
account_id=account_id or None,
)
db.add(local_cal)
db.commit()
else:
# Refresh display name and stamp account_id if missing.
changed = False
if local_cal.name != display_name:
local_cal.name = display_name
changed = True
if account_id and not local_cal.account_id:
local_cal.account_id = account_id
changed = True
if changed:
db.commit()
result["calendars"] += 1
# Fetch events in window. `date_search` returns CalendarObject
# resources; each may contain one VEVENT (most servers) or
# several (rare).
from icalendar import Calendar as iCal
seen_uids = set()
# Track events added to the session but not yet committed so
# duplicate UIDs within the same batch are updated, not re-inserted
# (which would violate the UNIQUE constraint on commit).
pending: dict = {}
parse_failed = False
try:
objs = remote_cal.date_search(start=start, end=end, expand=False)
except Exception as e:
result["errors"].append(f"{display_name}: date_search failed ({e})")
continue
for obj in objs:
try:
ical = iCal.from_ical(obj.data)
except Exception as e:
result["errors"].append(f"{display_name}: parse failed ({e})")
parse_failed = True
continue
for comp in ical.walk():
if comp.name != "VEVENT":
continue
uid_val = str(comp.get("uid", "")) or str(uuid.uuid4())
seen_uids.add(uid_val)
dtstart_p = comp.get("dtstart")
if not dtstart_p:
continue
start_dt, all_day = _to_utc_naive(dtstart_p.dt)
dtend_p = comp.get("dtend")
if dtend_p:
end_dt, _ = _to_utc_naive(dtend_p.dt)
elif all_day:
end_dt = start_dt + timedelta(days=1)
else:
end_dt = start_dt + timedelta(hours=1)
# is_utc reflects whether the source carried a TZ
# we converted from. All-day = no TZ semantics.
row_is_utc = (
not all_day
and isinstance(dtstart_p.dt, datetime)
and dtstart_p.dt.tzinfo is not None
)
summary = str(comp.get("summary", ""))
description = str(comp.get("description", ""))
location = str(comp.get("location", ""))
rrule = (
comp.get("rrule").to_ical().decode()
if comp.get("rrule")
else ""
)
existing = _find_existing_event(db, pending, uid_val, local_cal.id)
if existing:
existing.calendar_id = local_cal.id
existing.summary = summary
existing.description = description
existing.location = location
existing.dtstart = start_dt
existing.dtend = end_dt
existing.all_day = all_day
existing.is_utc = row_is_utc
existing.rrule = rrule
existing.origin = "caldav"
else:
new_ev = CalendarEvent(
uid=uid_val,
calendar_id=local_cal.id,
summary=summary,
description=description,
location=location,
dtstart=start_dt,
dtend=end_dt,
all_day=all_day,
is_utc=row_is_utc,
rrule=rrule,
origin="caldav",
)
db.add(new_ev)
pending[uid_val] = new_ev
result["events"] += 1
db.commit()
# Prune locally-cached CalDAV events that vanished
# upstream (only within our sync window — events outside
# the window aren't in `objs`, so we'd false-delete them).
# Only rows we previously pulled from the server (origin=="caldav")
# are prunable; locally-created events (agent / email triage / a
# UI event whose write-back failed) carry origin NULL and must
# never be deleted just because the server didn't return them.
# Skip the prune on any parse failure: seen_uids is then an
# incomplete view of the server, so pruning against it would
# delete events that still exist upstream but could not be read
# (the empty-seen_uids case wipes the whole window; a partial
# failure deletes just the unreadable rows).
if _should_prune_window(seen_uids, parse_failed):
stale = db.query(CalendarEvent).filter(
CalendarEvent.calendar_id == local_cal.id,
CalendarEvent.origin == "caldav",
CalendarEvent.dtstart >= start,
CalendarEvent.dtstart <= end,
~CalendarEvent.uid.in_(seen_uids) if seen_uids else CalendarEvent.uid.isnot(None),
).all()
for ev in stale:
db.delete(ev)
result["deleted"] += len(stale)
db.commit()
except Exception as e:
logger.exception("CalDAV sync failed for one calendar")
result["errors"].append(str(e)[:200])
db.rollback()
finally:
db.close()
return result
def _load_caldav_accounts(owner: str) -> list:
"""Return the list of CalDAV accounts for *owner*, auto-migrating the legacy
single-account ``caldav`` key to the new ``caldav_accounts`` list on first call.
The save step is best-effort: if ``_save_for_user`` is unavailable (e.g. in a
test with a minimal prefs mock) the migrated accounts are still returned; the
next real call will just re-run the cheap migration again.
"""
import uuid as _uuid
from routes.prefs_routes import _load_for_user
prefs = _load_for_user(owner) or {}
if "caldav_accounts" in prefs:
return list(prefs["caldav_accounts"] or [])
# Migrate legacy single-account config to the list format.
legacy = prefs.get("caldav", {}) or {}
if legacy.get("url"):
accounts = [{
"id": str(_uuid.uuid4()),
"label": "CalDAV",
"url": legacy["url"],
"username": legacy.get("username", ""),
"password": legacy.get("password", ""),
}]
prefs["caldav_accounts"] = accounts
prefs.pop("caldav", None)
try:
from routes.prefs_routes import _save_for_user
_save_for_user(owner, prefs)
except (ImportError, AttributeError):
pass # best-effort; next call re-migrates from the still-present legacy key
return accounts
return []
async def sync_caldav(owner: str) -> dict:
"""Pull CalDAV state into local DB for `owner` across all configured accounts.
Returns aggregated counts + per-account errors."""
from src.secret_storage import decrypt
accounts = _load_caldav_accounts(owner)
if not accounts:
return {
"calendars": 0, "events": 0, "deleted": 0,
"errors": ["CalDAV is not configured"],
}
totals: dict = {"calendars": 0, "events": 0, "deleted": 0, "errors": []}
for acc in accounts:
url = (acc.get("url") or "").strip()
user = (acc.get("username") or "").strip()
pw = acc.get("password") or ""
account_id = acc.get("id") or ""
label = acc.get("label") or url or account_id
try:
pw = decrypt(pw)
except Exception:
pass
if not (url and user and pw):
totals["errors"].append(f"{label}: missing URL, username, or password")
continue
try:
url = validate_caldav_url(url)
result = await asyncio.to_thread(_sync_blocking, owner, url, user, pw, account_id)
except ValueError as e:
result = {"calendars": 0, "events": 0, "deleted": 0, "errors": [str(e)]}
except Exception as e:
logger.exception("CalDAV sync raised for account %s", label)
result = {"calendars": 0, "events": 0, "deleted": 0, "errors": [str(e)[:200]]}
totals["calendars"] += result.get("calendars", 0)
totals["events"] += result.get("events", 0)
totals["deleted"] += result.get("deleted", 0)
for err in result.get("errors", []):
totals["errors"].append(f"{label}: {err}")
return totals