diff --git a/routes/email_routes.py b/routes/email_routes.py index d0c40659a..f8ad50e2e 100644 --- a/routes/email_routes.py +++ b/routes/email_routes.py @@ -249,6 +249,41 @@ def _uid_from_fetch_meta(meta_b: bytes) -> str: return m.group(1).decode() if m else "" +_FETCH_SEQ_RE = re.compile(rb"^(\d+)\s+\(") + + +def _group_uid_fetch_records(msg_data) -> list: + """Group an imaplib UID FETCH response into per-message (meta, payload). + + imaplib yields an interleaved list: ``(meta, literal)`` tuples for + attributes that carry a literal (``RFC822.HEADER {n}`` etc.) plus bare + ``bytes`` elements for everything the server sends outside a literal. + Where each attribute lands is server-specific: Dovecot sends FLAGS + *before* the header literal (so it ends up inside the tuple meta), while + Gmail sends FLAGS *after* it, arriving as a bare ``b' FLAGS (\\Seen))'`` + element. Dropping bare elements therefore silently loses FLAGS on Gmail + and every message renders as unread/unflagged. + + A tuple whose meta starts with a sequence number opens a new record; + every other part — continuation tuple or bare bytes — is folded into the + current record's meta so attribute regexes see the full meta text. + Plain ``b')'`` terminators get folded in too, which is harmless. + """ + grouped: list = [] # list of (meta_bytes, payload_bytes_or_None) + for part in (msg_data or []): + if isinstance(part, tuple): + meta_b = part[0] if isinstance(part[0], (bytes, bytearray)) else str(part[0]).encode() + if _FETCH_SEQ_RE.match(meta_b): + grouped.append((meta_b, part[1])) + elif grouped: + cur_meta, cur_payload = grouped[-1] + grouped[-1] = (cur_meta + b" " + meta_b, cur_payload or part[1]) + elif isinstance(part, (bytes, bytearray)) and grouped: + cur_meta, cur_payload = grouped[-1] + grouped[-1] = (cur_meta + b" " + bytes(part), cur_payload) + return grouped + + def _smtp_ready(cfg: dict) -> bool: return bool(cfg.get("smtp_host") and cfg.get("smtp_user") and cfg.get("smtp_password")) @@ -799,20 +834,11 @@ def setup_email_routes(): except Exception as e: logger.warning(f"Batch fetch failed, falling back to per-UID: {e}") status, msg_data = "NO", [] - # imaplib batch responses interleave (meta, payload) tuples and - # `b')'` terminators. Group by message: each tuple where the - # meta begins with a seq number starts a new message record. - seq_re = re.compile(rb'^(\d+)\s+\(') - grouped = [] # list of (meta_str, payload_bytes) - for part in (msg_data or []): - if isinstance(part, tuple): - meta_b = part[0] if isinstance(part[0], (bytes, bytearray)) else str(part[0]).encode() - if seq_re.match(meta_b): - grouped.append((meta_b, part[1])) - elif grouped: - # continuation of previous message — concatenate meta info if any - cur_meta, cur_payload = grouped[-1] - grouped[-1] = (cur_meta + b" " + meta_b, cur_payload or part[1]) + # Group the batched response into per-message (meta, payload) + # records. Bare bytes parts must be kept: Gmail returns FLAGS + # after the header literal as a bare element, and dropping it + # rendered every Gmail message as unread/unflagged. + grouped = _group_uid_fetch_records(msg_data) if status != "OK" and not grouped: conn.logout() @@ -1098,14 +1124,15 @@ def setup_email_routes(): continue raw_header = None flags = "" - for part in msg_data: - if isinstance(part, tuple): - meta = part[0].decode() if isinstance(part[0], bytes) else str(part[0]) - if b"RFC822.HEADER" in part[0] if isinstance(part[0], bytes) else "RFC822.HEADER" in meta: - raw_header = part[1] - flag_match = re.search(r'FLAGS \(([^)]*)\)', meta) - if flag_match: - flags = flag_match.group(1) + # Same Gmail caveat as the list route: FLAGS may + # arrive after the header literal, so group bare + # parts back into the message meta before scanning. + for meta_b, payload in _group_uid_fetch_records(msg_data): + if payload and b"RFC822.HEADER" in meta_b: + raw_header = payload + flag_match = re.search(rb'FLAGS \(([^)]*)\)', meta_b) + if flag_match: + flags = flag_match.group(1).decode(errors="replace") if not raw_header: continue msg = email_mod.message_from_bytes(raw_header) diff --git a/tests/test_email_gmail_fetch_flags.py b/tests/test_email_gmail_fetch_flags.py new file mode 100644 index 000000000..53e300544 --- /dev/null +++ b/tests/test_email_gmail_fetch_flags.py @@ -0,0 +1,71 @@ +"""Regression tests for _group_uid_fetch_records (Gmail FLAGS placement). + +imaplib hands back UID FETCH responses as an interleaved list of +``(meta, literal)`` tuples and bare ``bytes`` elements. Dovecot sends FLAGS +before the RFC822.HEADER literal, so they sit inside the tuple meta; Gmail +sends FLAGS *after* the literal, as a bare ``b' FLAGS (\\Seen))'`` element. +The old grouping loop only looked at tuples, so on Gmail every message lost +its FLAGS and rendered as unread/unflagged in the email library. +""" + +import re + +from routes.email_routes import _group_uid_fetch_records, _uid_from_fetch_meta + + +def _flags(meta_b: bytes) -> str: + m = re.search(rb"FLAGS \(([^)]*)\)", meta_b) + return m.group(1).decode() if m else "" + + +# Captured shape of a real Gmail response to +# UID FETCH a,b (UID FLAGS RFC822.HEADER RFC822.SIZE): +GMAIL_RESPONSE = [ + (b"10779 (UID 18723 RFC822.SIZE 54308 RFC822.HEADER {24}", b"Subject: read one\r\n\r\n"), + rb" FLAGS (\Seen))", + (b"10780 (UID 18724 RFC822.SIZE 124310 RFC822.HEADER {26}", b"Subject: unread one\r\n\r\n"), + rb" FLAGS ())", +] + +# Dovecot puts FLAGS before the literal and terminates with a bare b')'. +DOVECOT_RESPONSE = [ + (rb"1 (UID 5 FLAGS (\Seen) RFC822.SIZE 100 RFC822.HEADER {18}", b"Subject: hi\r\n\r\n"), + b")", + (b"2 (UID 6 FLAGS () RFC822.SIZE 90 RFC822.HEADER {19}", b"Subject: new\r\n\r\n"), + b")", +] + + +def test_gmail_post_literal_flags_attach_to_their_own_message(): + grouped = _group_uid_fetch_records(GMAIL_RESPONSE) + + assert len(grouped) == 2 + assert _uid_from_fetch_meta(grouped[0][0]) == "18723" + assert _flags(grouped[0][0]) == r"\Seen" + assert grouped[0][1] == b"Subject: read one\r\n\r\n" + + assert _uid_from_fetch_meta(grouped[1][0]) == "18724" + assert _flags(grouped[1][0]) == "" + assert grouped[1][1] == b"Subject: unread one\r\n\r\n" + + +def test_dovecot_pre_literal_flags_unchanged(): + grouped = _group_uid_fetch_records(DOVECOT_RESPONSE) + + assert len(grouped) == 2 + assert _flags(grouped[0][0]) == r"\Seen" + assert _flags(grouped[1][0]) == "" + assert grouped[1][1] == b"Subject: new\r\n\r\n" + + +def test_size_and_uid_survive_grouping(): + grouped = _group_uid_fetch_records(GMAIL_RESPONSE) + sizes = [re.search(rb"RFC822\.SIZE (\d+)", m).group(1) for m, _ in grouped] + assert sizes == [b"54308", b"124310"] + + +def test_empty_and_none_inputs(): + assert _group_uid_fetch_records(None) == [] + assert _group_uid_fetch_records([]) == [] + # A stray bare element before any tuple opens no record and must not crash. + assert _group_uid_fetch_records([rb" FLAGS (\Seen))"]) == []