"""CalDAV → local SQLite sync. The Settings UI lets users save CalDAV credentials, but the original sync path was removed when calendar storage was migrated to SQLite. This module re-wires that gap as a one-way pull (remote → local), called on calendar open and from a periodic scheduler loop. Design notes: - We use the `caldav` lib so PROPFIND discovery + REPORT XML work across Radicale / Nextcloud / Apple / Fastmail without us reinventing the protocol. It's pure Python. - The lib is synchronous; we run it in a threadpool via `asyncio.to_thread` so the FastAPI event loop stays free. - Each remote calendar maps to one local `CalendarCal` row with `source="caldav"` and `id` = a stable hash of the remote URL so re-syncs idempotently target the same row. - Events upsert by VEVENT UID (kept as the local `uid`). Local CalDAV-sourced events not seen in the latest pull are deleted so remote deletions propagate. - Datetimes are converted to UTC and the row is flagged `is_utc=True` so the serializer adds the Z suffix and the frontend renders in the user's local TZ correctly. """ import asyncio import hashlib import ipaddress import logging import os import socket import uuid from datetime import date, datetime, timedelta, timezone from urllib.parse import urlparse, urlunparse logger = logging.getLogger(__name__) # Pull window: 90 days back, 1 year forward. Keeps the REPORT cheap and # matches what the calendar UI typically renders. Far-future recurring # events still come through via RRULE expansion on the frontend. _LOOKBACK_DAYS = 90 _LOOKAHEAD_DAYS = 365 _BLOCKED_HOSTS = { "localhost", "localhost.", "ip6-localhost", "metadata.google.internal", } def _private_caldav_allowed() -> bool: return os.environ.get("ODYSSEUS_ALLOW_PRIVATE_CALDAV", "0").lower() in {"1", "true", "yes"} def _validate_caldav_address(addr: ipaddress._BaseAddress) -> None: if isinstance(addr, ipaddress.IPv6Address) and addr.ipv4_mapped is not None: addr = addr.ipv4_mapped if ( addr.is_loopback or addr.is_link_local or addr.is_multicast or addr.is_unspecified or addr.is_reserved ): raise ValueError("CalDAV URL host is not allowed") if addr.is_private and not _private_caldav_allowed(): raise ValueError("Private CalDAV IPs require ODYSSEUS_ALLOW_PRIVATE_CALDAV=1") def _validate_caldav_ip(host: str) -> None: try: ip = ipaddress.ip_address(host.strip("[]")) except ValueError: return _validate_caldav_address(ip) def _resolve_caldav_host_ips(host: str) -> list[ipaddress._BaseAddress]: addrs: list[ipaddress._BaseAddress] = [] for family, _, _, _, sockaddr in socket.getaddrinfo(host, None): if family not in (socket.AF_INET, socket.AF_INET6): continue try: addrs.append(ipaddress.ip_address(sockaddr[0].split("%", 1)[0])) except ValueError: continue return addrs def _validate_caldav_hostname(host: str) -> None: try: ipaddress.ip_address(host.strip("[]")) return except ValueError: pass try: addrs = _resolve_caldav_host_ips(host) except OSError: raise ValueError("CalDAV URL host does not resolve") if not addrs: raise ValueError("CalDAV URL host does not resolve") for addr in addrs: _validate_caldav_address(addr) def validate_caldav_url(raw_url: str) -> str: """Validate and normalize a user-provided CalDAV URL before server-side use.""" url = (raw_url if isinstance(raw_url, str) else "").strip() if not url: raise ValueError("CalDAV URL is required") parsed = urlparse(url) if parsed.scheme not in {"http", "https"}: raise ValueError("CalDAV URL must start with http:// or https://") if not parsed.hostname: raise ValueError("CalDAV URL must include a host") if parsed.username or parsed.password: raise ValueError("Put CalDAV credentials in the username/password fields, not the URL") if parsed.fragment: raise ValueError("CalDAV URL fragments are not allowed") try: parsed.port except ValueError: raise ValueError("CalDAV URL has an invalid port") host = (parsed.hostname or "").lower() if host in _BLOCKED_HOSTS or host.endswith(".localhost"): raise ValueError("CalDAV URL host is not allowed") _validate_caldav_ip(host) _validate_caldav_hostname(host) return urlunparse(parsed._replace(fragment="")).rstrip("/") def _stable_cal_id(remote_url: str, owner: str = "", account_id: str = "") -> str: """Deterministic local id for a remote CalDAV calendar, scoped to owner and account so two users — or one user with two accounts — pointing at the same server URL get distinct local rows (avoids PK collision, #2765). The owner and account_id default to "" for the legacy/URL-only path so existing callers without those arguments keep working.""" key = f"{owner}\n{account_id}\n{remote_url}" h = hashlib.sha256(key.encode("utf-8")).hexdigest()[:24] return f"caldav-{h}" def _to_utc_naive(dt): """CalDAV datetimes can be tz-aware (with a TZID) or naive. The DB column is naive but we set is_utc=True so the serializer adds Z. All-day events stay as date and get widened to datetime here.""" if isinstance(dt, datetime): if dt.tzinfo is not None: return dt.astimezone(timezone.utc).replace(tzinfo=None), False return dt, False # naive → treat as local # date-only (all-day) return datetime(dt.year, dt.month, dt.day), True def _find_existing_event(db, pending, uid_val, calendar_id): """Find the event to update for THIS calendar. CalendarEvent.uid is the global primary key, so an unscoped lookup by uid returns whatever row holds that VEVENT uid — including another owner's. The old code then reassigned that row's calendar_id, moving (stealing) another user's event into the syncing calendar whenever the two share a uid (shared/subscribed/public calendars, or two accounts on one server). Scope the lookup to the calendar being synced; a genuine cross-user uid collision then fails the PK insert inside the per-calendar try/except instead of hijacking the row. (import_ics was already fixed this way.) """ from core.database import CalendarEvent return pending.get(uid_val) or db.query(CalendarEvent).filter( CalendarEvent.uid == uid_val, CalendarEvent.calendar_id == calendar_id, ).first() def _google_caldav_events_url(url: str) -> str | None: """Map a Google CalDAV *principal* URL to its event-collection URL. Google serves the principal at ``…/user`` but events live under ``…/events`` — the ``/user`` resource holds no VEVENTs. The `caldav` library's principal→home-set discovery does not reliably enumerate calendars from Google's ``/user`` endpoint, so the sync falls into the "treat the URL as a single calendar" fallback below. Pointed at ``/user`` that fallback issues every calendar-query REPORT against the principal, which returns a clean but empty 200 for all date ranges — the calendar shows no events even though auth succeeded (issue #2507). Both Google CalDAV endpoint forms are handled, since some accounts only authenticate against one of them: - newer: ``https://apidata.googleusercontent.com/caldav/v2//user`` - legacy: ``https://www.google.com/calendar/dav//user`` Returns the events URL for a recognised Google principal URL, else None so the caller keeps the original URL unchanged. """ parts = urlparse(url) host = (parts.hostname or "").lower() path = parts.path.rstrip("/") if not path.endswith("/user"): return None is_google = ( host.endswith("googleusercontent.com") # newer /caldav/v2 form or (host in ("www.google.com", "google.com") and "/calendar/dav/" in path) # legacy form ) if not is_google: return None new_path = path[: -len("/user")] + "/events" return urlunparse(parts._replace(path=new_path)) def _open_url_as_calendar(client, url: str): """Open ``url`` as a single calendar collection. Used when principal discovery yields no calendars. Google's principal URL is not an event collection, so map it to the events URL first (see ``_google_caldav_events_url``); other servers' URLs are used as-is. """ target = _google_caldav_events_url(url) or url return client.calendar(url=target) def _build_dav_client(url: str, username: str, password: str): """Construct a CalDAV client with automatic redirects disabled. ``validate_caldav_url`` resolves and vets the *initial* host, but caldav's underlying HTTP session follows 3xx redirects by default. So a URL that passes validation can still be redirected — at request time — to loopback / link-local / private space, re-opening the SSRF the host check closes. Pin the session to zero redirects: any 3xx then raises instead of silently following an attacker-chosen ``Location``. This mirrors the test-connection path in ``routes/calendar_routes.py``, which already sets ``follow_redirects=False``. DAVClient exposes no per-request redirect flag, so we set it on the session after construction (the session is created in ``__init__``). """ import caldav client = caldav.DAVClient(url=url, username=username, password=password) # Unconditional: a redirect-disable that only sometimes applies is not a # control. The session exists right after __init__ on every real client; # test_build_dav_client_disables_redirects asserts it against installed # caldav in CI. client.session.max_redirects = 0 return client def _should_prune_window(seen_uids: set, parse_failed: bool) -> bool: """Whether the post-sync prune of vanished CalDAV events is safe to run. The prune deletes local ``origin=="caldav"`` rows in the window whose UID the server did not just return. Any parse failure (total or partial) makes ``seen_uids`` an incomplete view of the server, so pruning against it can delete events that still exist upstream but could not be read: a total failure wipes the whole window, a partial failure deletes just the unreadable ones. Only prune on a clean read. An empty ``seen_uids`` after a clean read is a genuinely empty window, which is safe to prune. """ return not parse_failed def _sync_blocking(owner: str, url: str, username: str, password: str, account_id: str = "") -> dict: """The actual sync — synchronous, intended to run in a threadpool. Returns counts: {calendars, events, deleted, errors}.""" # Lazy imports so a missing `caldav` dep doesn't break app startup — # the integrations form still works, sync just no-ops with an error. from caldav.lib.error import AuthorizationError, NotFoundError from core.database import CalendarCal, CalendarEvent, SessionLocal result = {"calendars": 0, "events": 0, "deleted": 0, "errors": []} client = _build_dav_client(url, username, password) # Discovery: try principal → calendars first; if the server doesn't # support discovery (or the URL points directly at a calendar), fall # back to treating the URL as a single calendar. calendars = [] try: principal = client.principal() calendars = principal.calendars() except (AuthorizationError, NotFoundError) as e: result["errors"].append(f"Discovery failed: {e}") return result except Exception as e: logger.info(f"CalDAV principal discovery failed, trying URL as calendar: {e}") try: calendars = [_open_url_as_calendar(client, url)] except Exception as e2: result["errors"].append(f"Could not open URL as calendar: {e2}") return result if not calendars: try: calendars = [_open_url_as_calendar(client, url)] except Exception as e: result["errors"].append(f"No calendars and URL fallback failed: {e}") return result start = datetime.utcnow() - timedelta(days=_LOOKBACK_DAYS) end = datetime.utcnow() + timedelta(days=_LOOKAHEAD_DAYS) db = SessionLocal() try: for remote_cal in calendars: try: remote_url = str(remote_cal.url) cal_id = _stable_cal_id(remote_url, owner=owner, account_id=account_id) display_name = (remote_cal.name or "").strip() or "CalDAV" local_cal = db.query(CalendarCal).filter( CalendarCal.id == cal_id, CalendarCal.owner == owner, ).first() if not local_cal: local_cal = CalendarCal( id=cal_id, owner=owner, name=display_name, color="#5b8abf", source="caldav", account_id=account_id or None, ) db.add(local_cal) db.commit() else: # Refresh display name and stamp account_id if missing. changed = False if local_cal.name != display_name: local_cal.name = display_name changed = True if account_id and not local_cal.account_id: local_cal.account_id = account_id changed = True if changed: db.commit() result["calendars"] += 1 # Fetch events in window. `date_search` returns CalendarObject # resources; each may contain one VEVENT (most servers) or # several (rare). from icalendar import Calendar as iCal seen_uids = set() # Track events added to the session but not yet committed so # duplicate UIDs within the same batch are updated, not re-inserted # (which would violate the UNIQUE constraint on commit). pending: dict = {} parse_failed = False try: objs = remote_cal.date_search(start=start, end=end, expand=False) except Exception as e: result["errors"].append(f"{display_name}: date_search failed ({e})") continue for obj in objs: try: ical = iCal.from_ical(obj.data) except Exception as e: result["errors"].append(f"{display_name}: parse failed ({e})") parse_failed = True continue for comp in ical.walk(): if comp.name != "VEVENT": continue uid_val = str(comp.get("uid", "")) or str(uuid.uuid4()) seen_uids.add(uid_val) dtstart_p = comp.get("dtstart") if not dtstart_p: continue start_dt, all_day = _to_utc_naive(dtstart_p.dt) dtend_p = comp.get("dtend") if dtend_p: end_dt, _ = _to_utc_naive(dtend_p.dt) elif all_day: end_dt = start_dt + timedelta(days=1) else: end_dt = start_dt + timedelta(hours=1) # is_utc reflects whether the source carried a TZ # we converted from. All-day = no TZ semantics. row_is_utc = ( not all_day and isinstance(dtstart_p.dt, datetime) and dtstart_p.dt.tzinfo is not None ) summary = str(comp.get("summary", "")) description = str(comp.get("description", "")) location = str(comp.get("location", "")) rrule = ( comp.get("rrule").to_ical().decode() if comp.get("rrule") else "" ) existing = _find_existing_event(db, pending, uid_val, local_cal.id) if existing: existing.calendar_id = local_cal.id existing.summary = summary existing.description = description existing.location = location existing.dtstart = start_dt existing.dtend = end_dt existing.all_day = all_day existing.is_utc = row_is_utc existing.rrule = rrule existing.origin = "caldav" else: new_ev = CalendarEvent( uid=uid_val, calendar_id=local_cal.id, summary=summary, description=description, location=location, dtstart=start_dt, dtend=end_dt, all_day=all_day, is_utc=row_is_utc, rrule=rrule, origin="caldav", ) db.add(new_ev) pending[uid_val] = new_ev result["events"] += 1 db.commit() # Prune locally-cached CalDAV events that vanished # upstream (only within our sync window — events outside # the window aren't in `objs`, so we'd false-delete them). # Only rows we previously pulled from the server (origin=="caldav") # are prunable; locally-created events (agent / email triage / a # UI event whose write-back failed) carry origin NULL and must # never be deleted just because the server didn't return them. # Skip the prune on any parse failure: seen_uids is then an # incomplete view of the server, so pruning against it would # delete events that still exist upstream but could not be read # (the empty-seen_uids case wipes the whole window; a partial # failure deletes just the unreadable rows). if _should_prune_window(seen_uids, parse_failed): stale = db.query(CalendarEvent).filter( CalendarEvent.calendar_id == local_cal.id, CalendarEvent.origin == "caldav", CalendarEvent.dtstart >= start, CalendarEvent.dtstart <= end, ~CalendarEvent.uid.in_(seen_uids) if seen_uids else CalendarEvent.uid.isnot(None), ).all() for ev in stale: db.delete(ev) result["deleted"] += len(stale) db.commit() except Exception as e: logger.exception("CalDAV sync failed for one calendar") result["errors"].append(str(e)[:200]) db.rollback() finally: db.close() return result def _load_caldav_accounts(owner: str) -> list: """Return the list of CalDAV accounts for *owner*, auto-migrating the legacy single-account ``caldav`` key to the new ``caldav_accounts`` list on first call. The save step is best-effort: if ``_save_for_user`` is unavailable (e.g. in a test with a minimal prefs mock) the migrated accounts are still returned; the next real call will just re-run the cheap migration again. """ import uuid as _uuid from routes.prefs_routes import _load_for_user prefs = _load_for_user(owner) or {} if "caldav_accounts" in prefs: return list(prefs["caldav_accounts"] or []) # Migrate legacy single-account config to the list format. legacy = prefs.get("caldav", {}) or {} if legacy.get("url"): accounts = [{ "id": str(_uuid.uuid4()), "label": "CalDAV", "url": legacy["url"], "username": legacy.get("username", ""), "password": legacy.get("password", ""), }] prefs["caldav_accounts"] = accounts prefs.pop("caldav", None) try: from routes.prefs_routes import _save_for_user _save_for_user(owner, prefs) except (ImportError, AttributeError): pass # best-effort; next call re-migrates from the still-present legacy key return accounts return [] async def sync_caldav(owner: str) -> dict: """Pull CalDAV state into local DB for `owner` across all configured accounts. Returns aggregated counts + per-account errors.""" from src.secret_storage import decrypt accounts = _load_caldav_accounts(owner) if not accounts: return { "calendars": 0, "events": 0, "deleted": 0, "errors": ["CalDAV is not configured"], } totals: dict = {"calendars": 0, "events": 0, "deleted": 0, "errors": []} for acc in accounts: url = (acc.get("url") or "").strip() user = (acc.get("username") or "").strip() pw = acc.get("password") or "" account_id = acc.get("id") or "" label = acc.get("label") or url or account_id try: pw = decrypt(pw) except Exception: pass if not (url and user and pw): totals["errors"].append(f"{label}: missing URL, username, or password") continue try: url = validate_caldav_url(url) result = await asyncio.to_thread(_sync_blocking, owner, url, user, pw, account_id) except ValueError as e: result = {"calendars": 0, "events": 0, "deleted": 0, "errors": [str(e)]} except Exception as e: logger.exception("CalDAV sync raised for account %s", label) result = {"calendars": 0, "events": 0, "deleted": 0, "errors": [str(e)[:200]]} totals["calendars"] += result.get("calendars", 0) totals["events"] += result.get("events", 0) totals["deleted"] += result.get("deleted", 0) for err in result.get("errors", []): totals["errors"].append(f"{label}: {err}") return totals