fix(email): keep FETCH attributes Gmail sends after the header literal (all Gmail mail showed as unread) (#3785)

* fix(email): keep FETCH attributes Gmail sends after the header literal

imaplib returns a UID FETCH response as an interleaved list of
(meta, literal) tuples plus bare bytes elements. Which attributes land
where is server-specific: Dovecot sends FLAGS before the RFC822.HEADER
literal (inside the tuple meta), Gmail sends them after it, as a bare
` FLAGS (\Seen))` element. The email list grouping loop and the search
loop only inspected tuples, so on Gmail every message lost its FLAGS and
the whole mailbox rendered as unread/unflagged, with mark-read appearing
to have no effect.

Extract the grouping into _group_uid_fetch_records(), fold bare bytes
parts into the current message meta there, and reuse it in both the
batched list fetch and the per-UID search fetch. Covered by unit tests
with captured Gmail-shaped and Dovecot-shaped responses.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>

* test(email): use raw byte literals for IMAP backslash escapes

---------

Co-authored-by: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
AkioKoneko
2026-06-11 16:12:39 +02:00
committed by GitHub
parent c500bcb47d
commit 4fa4d0100a
2 changed files with 120 additions and 22 deletions
+49 -22
View File
@@ -249,6 +249,41 @@ def _uid_from_fetch_meta(meta_b: bytes) -> str:
return m.group(1).decode() if m else ""
_FETCH_SEQ_RE = re.compile(rb"^(\d+)\s+\(")
def _group_uid_fetch_records(msg_data) -> list:
"""Group an imaplib UID FETCH response into per-message (meta, payload).
imaplib yields an interleaved list: ``(meta, literal)`` tuples for
attributes that carry a literal (``RFC822.HEADER {n}`` etc.) plus bare
``bytes`` elements for everything the server sends outside a literal.
Where each attribute lands is server-specific: Dovecot sends FLAGS
*before* the header literal (so it ends up inside the tuple meta), while
Gmail sends FLAGS *after* it, arriving as a bare ``b' FLAGS (\\Seen))'``
element. Dropping bare elements therefore silently loses FLAGS on Gmail
and every message renders as unread/unflagged.
A tuple whose meta starts with a sequence number opens a new record;
every other part — continuation tuple or bare bytes — is folded into the
current record's meta so attribute regexes see the full meta text.
Plain ``b')'`` terminators get folded in too, which is harmless.
"""
grouped: list = [] # list of (meta_bytes, payload_bytes_or_None)
for part in (msg_data or []):
if isinstance(part, tuple):
meta_b = part[0] if isinstance(part[0], (bytes, bytearray)) else str(part[0]).encode()
if _FETCH_SEQ_RE.match(meta_b):
grouped.append((meta_b, part[1]))
elif grouped:
cur_meta, cur_payload = grouped[-1]
grouped[-1] = (cur_meta + b" " + meta_b, cur_payload or part[1])
elif isinstance(part, (bytes, bytearray)) and grouped:
cur_meta, cur_payload = grouped[-1]
grouped[-1] = (cur_meta + b" " + bytes(part), cur_payload)
return grouped
def _smtp_ready(cfg: dict) -> bool:
return bool(cfg.get("smtp_host") and cfg.get("smtp_user") and cfg.get("smtp_password"))
@@ -799,20 +834,11 @@ def setup_email_routes():
except Exception as e:
logger.warning(f"Batch fetch failed, falling back to per-UID: {e}")
status, msg_data = "NO", []
# imaplib batch responses interleave (meta, payload) tuples and
# `b')'` terminators. Group by message: each tuple where the
# meta begins with a seq number starts a new message record.
seq_re = re.compile(rb'^(\d+)\s+\(')
grouped = [] # list of (meta_str, payload_bytes)
for part in (msg_data or []):
if isinstance(part, tuple):
meta_b = part[0] if isinstance(part[0], (bytes, bytearray)) else str(part[0]).encode()
if seq_re.match(meta_b):
grouped.append((meta_b, part[1]))
elif grouped:
# continuation of previous message — concatenate meta info if any
cur_meta, cur_payload = grouped[-1]
grouped[-1] = (cur_meta + b" " + meta_b, cur_payload or part[1])
# Group the batched response into per-message (meta, payload)
# records. Bare bytes parts must be kept: Gmail returns FLAGS
# after the header literal as a bare element, and dropping it
# rendered every Gmail message as unread/unflagged.
grouped = _group_uid_fetch_records(msg_data)
if status != "OK" and not grouped:
conn.logout()
@@ -1098,14 +1124,15 @@ def setup_email_routes():
continue
raw_header = None
flags = ""
for part in msg_data:
if isinstance(part, tuple):
meta = part[0].decode() if isinstance(part[0], bytes) else str(part[0])
if b"RFC822.HEADER" in part[0] if isinstance(part[0], bytes) else "RFC822.HEADER" in meta:
raw_header = part[1]
flag_match = re.search(r'FLAGS \(([^)]*)\)', meta)
if flag_match:
flags = flag_match.group(1)
# Same Gmail caveat as the list route: FLAGS may
# arrive after the header literal, so group bare
# parts back into the message meta before scanning.
for meta_b, payload in _group_uid_fetch_records(msg_data):
if payload and b"RFC822.HEADER" in meta_b:
raw_header = payload
flag_match = re.search(rb'FLAGS \(([^)]*)\)', meta_b)
if flag_match:
flags = flag_match.group(1).decode(errors="replace")
if not raw_header:
continue
msg = email_mod.message_from_bytes(raw_header)
+71
View File
@@ -0,0 +1,71 @@
"""Regression tests for _group_uid_fetch_records (Gmail FLAGS placement).
imaplib hands back UID FETCH responses as an interleaved list of
``(meta, literal)`` tuples and bare ``bytes`` elements. Dovecot sends FLAGS
before the RFC822.HEADER literal, so they sit inside the tuple meta; Gmail
sends FLAGS *after* the literal, as a bare ``b' FLAGS (\\Seen))'`` element.
The old grouping loop only looked at tuples, so on Gmail every message lost
its FLAGS and rendered as unread/unflagged in the email library.
"""
import re
from routes.email_routes import _group_uid_fetch_records, _uid_from_fetch_meta
def _flags(meta_b: bytes) -> str:
m = re.search(rb"FLAGS \(([^)]*)\)", meta_b)
return m.group(1).decode() if m else ""
# Captured shape of a real Gmail response to
# UID FETCH a,b (UID FLAGS RFC822.HEADER RFC822.SIZE):
GMAIL_RESPONSE = [
(b"10779 (UID 18723 RFC822.SIZE 54308 RFC822.HEADER {24}", b"Subject: read one\r\n\r\n"),
rb" FLAGS (\Seen))",
(b"10780 (UID 18724 RFC822.SIZE 124310 RFC822.HEADER {26}", b"Subject: unread one\r\n\r\n"),
rb" FLAGS ())",
]
# Dovecot puts FLAGS before the literal and terminates with a bare b')'.
DOVECOT_RESPONSE = [
(rb"1 (UID 5 FLAGS (\Seen) RFC822.SIZE 100 RFC822.HEADER {18}", b"Subject: hi\r\n\r\n"),
b")",
(b"2 (UID 6 FLAGS () RFC822.SIZE 90 RFC822.HEADER {19}", b"Subject: new\r\n\r\n"),
b")",
]
def test_gmail_post_literal_flags_attach_to_their_own_message():
grouped = _group_uid_fetch_records(GMAIL_RESPONSE)
assert len(grouped) == 2
assert _uid_from_fetch_meta(grouped[0][0]) == "18723"
assert _flags(grouped[0][0]) == r"\Seen"
assert grouped[0][1] == b"Subject: read one\r\n\r\n"
assert _uid_from_fetch_meta(grouped[1][0]) == "18724"
assert _flags(grouped[1][0]) == ""
assert grouped[1][1] == b"Subject: unread one\r\n\r\n"
def test_dovecot_pre_literal_flags_unchanged():
grouped = _group_uid_fetch_records(DOVECOT_RESPONSE)
assert len(grouped) == 2
assert _flags(grouped[0][0]) == r"\Seen"
assert _flags(grouped[1][0]) == ""
assert grouped[1][1] == b"Subject: new\r\n\r\n"
def test_size_and_uid_survive_grouping():
grouped = _group_uid_fetch_records(GMAIL_RESPONSE)
sizes = [re.search(rb"RFC822\.SIZE (\d+)", m).group(1) for m, _ in grouped]
assert sizes == [b"54308", b"124310"]
def test_empty_and_none_inputs():
assert _group_uid_fetch_records(None) == []
assert _group_uid_fetch_records([]) == []
# A stray bare element before any tuple opens no record and must not crash.
assert _group_uid_fetch_records([rb" FLAGS (\Seen))"]) == []