mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-15 17:25:26 -04:00
fix(email): keep FETCH attributes Gmail sends after the header literal (all Gmail mail showed as unread) (#3785)
* fix(email): keep FETCH attributes Gmail sends after the header literal imaplib returns a UID FETCH response as an interleaved list of (meta, literal) tuples plus bare bytes elements. Which attributes land where is server-specific: Dovecot sends FLAGS before the RFC822.HEADER literal (inside the tuple meta), Gmail sends them after it, as a bare ` FLAGS (\Seen))` element. The email list grouping loop and the search loop only inspected tuples, so on Gmail every message lost its FLAGS and the whole mailbox rendered as unread/unflagged, with mark-read appearing to have no effect. Extract the grouping into _group_uid_fetch_records(), fold bare bytes parts into the current message meta there, and reuse it in both the batched list fetch and the per-UID search fetch. Covered by unit tests with captured Gmail-shaped and Dovecot-shaped responses. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com> * test(email): use raw byte literals for IMAP backslash escapes --------- Co-authored-by: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
+49
-22
@@ -249,6 +249,41 @@ def _uid_from_fetch_meta(meta_b: bytes) -> str:
|
||||
return m.group(1).decode() if m else ""
|
||||
|
||||
|
||||
_FETCH_SEQ_RE = re.compile(rb"^(\d+)\s+\(")
|
||||
|
||||
|
||||
def _group_uid_fetch_records(msg_data) -> list:
|
||||
"""Group an imaplib UID FETCH response into per-message (meta, payload).
|
||||
|
||||
imaplib yields an interleaved list: ``(meta, literal)`` tuples for
|
||||
attributes that carry a literal (``RFC822.HEADER {n}`` etc.) plus bare
|
||||
``bytes`` elements for everything the server sends outside a literal.
|
||||
Where each attribute lands is server-specific: Dovecot sends FLAGS
|
||||
*before* the header literal (so it ends up inside the tuple meta), while
|
||||
Gmail sends FLAGS *after* it, arriving as a bare ``b' FLAGS (\\Seen))'``
|
||||
element. Dropping bare elements therefore silently loses FLAGS on Gmail
|
||||
and every message renders as unread/unflagged.
|
||||
|
||||
A tuple whose meta starts with a sequence number opens a new record;
|
||||
every other part — continuation tuple or bare bytes — is folded into the
|
||||
current record's meta so attribute regexes see the full meta text.
|
||||
Plain ``b')'`` terminators get folded in too, which is harmless.
|
||||
"""
|
||||
grouped: list = [] # list of (meta_bytes, payload_bytes_or_None)
|
||||
for part in (msg_data or []):
|
||||
if isinstance(part, tuple):
|
||||
meta_b = part[0] if isinstance(part[0], (bytes, bytearray)) else str(part[0]).encode()
|
||||
if _FETCH_SEQ_RE.match(meta_b):
|
||||
grouped.append((meta_b, part[1]))
|
||||
elif grouped:
|
||||
cur_meta, cur_payload = grouped[-1]
|
||||
grouped[-1] = (cur_meta + b" " + meta_b, cur_payload or part[1])
|
||||
elif isinstance(part, (bytes, bytearray)) and grouped:
|
||||
cur_meta, cur_payload = grouped[-1]
|
||||
grouped[-1] = (cur_meta + b" " + bytes(part), cur_payload)
|
||||
return grouped
|
||||
|
||||
|
||||
def _smtp_ready(cfg: dict) -> bool:
|
||||
return bool(cfg.get("smtp_host") and cfg.get("smtp_user") and cfg.get("smtp_password"))
|
||||
|
||||
@@ -799,20 +834,11 @@ def setup_email_routes():
|
||||
except Exception as e:
|
||||
logger.warning(f"Batch fetch failed, falling back to per-UID: {e}")
|
||||
status, msg_data = "NO", []
|
||||
# imaplib batch responses interleave (meta, payload) tuples and
|
||||
# `b')'` terminators. Group by message: each tuple where the
|
||||
# meta begins with a seq number starts a new message record.
|
||||
seq_re = re.compile(rb'^(\d+)\s+\(')
|
||||
grouped = [] # list of (meta_str, payload_bytes)
|
||||
for part in (msg_data or []):
|
||||
if isinstance(part, tuple):
|
||||
meta_b = part[0] if isinstance(part[0], (bytes, bytearray)) else str(part[0]).encode()
|
||||
if seq_re.match(meta_b):
|
||||
grouped.append((meta_b, part[1]))
|
||||
elif grouped:
|
||||
# continuation of previous message — concatenate meta info if any
|
||||
cur_meta, cur_payload = grouped[-1]
|
||||
grouped[-1] = (cur_meta + b" " + meta_b, cur_payload or part[1])
|
||||
# Group the batched response into per-message (meta, payload)
|
||||
# records. Bare bytes parts must be kept: Gmail returns FLAGS
|
||||
# after the header literal as a bare element, and dropping it
|
||||
# rendered every Gmail message as unread/unflagged.
|
||||
grouped = _group_uid_fetch_records(msg_data)
|
||||
|
||||
if status != "OK" and not grouped:
|
||||
conn.logout()
|
||||
@@ -1098,14 +1124,15 @@ def setup_email_routes():
|
||||
continue
|
||||
raw_header = None
|
||||
flags = ""
|
||||
for part in msg_data:
|
||||
if isinstance(part, tuple):
|
||||
meta = part[0].decode() if isinstance(part[0], bytes) else str(part[0])
|
||||
if b"RFC822.HEADER" in part[0] if isinstance(part[0], bytes) else "RFC822.HEADER" in meta:
|
||||
raw_header = part[1]
|
||||
flag_match = re.search(r'FLAGS \(([^)]*)\)', meta)
|
||||
if flag_match:
|
||||
flags = flag_match.group(1)
|
||||
# Same Gmail caveat as the list route: FLAGS may
|
||||
# arrive after the header literal, so group bare
|
||||
# parts back into the message meta before scanning.
|
||||
for meta_b, payload in _group_uid_fetch_records(msg_data):
|
||||
if payload and b"RFC822.HEADER" in meta_b:
|
||||
raw_header = payload
|
||||
flag_match = re.search(rb'FLAGS \(([^)]*)\)', meta_b)
|
||||
if flag_match:
|
||||
flags = flag_match.group(1).decode(errors="replace")
|
||||
if not raw_header:
|
||||
continue
|
||||
msg = email_mod.message_from_bytes(raw_header)
|
||||
|
||||
@@ -0,0 +1,71 @@
|
||||
"""Regression tests for _group_uid_fetch_records (Gmail FLAGS placement).
|
||||
|
||||
imaplib hands back UID FETCH responses as an interleaved list of
|
||||
``(meta, literal)`` tuples and bare ``bytes`` elements. Dovecot sends FLAGS
|
||||
before the RFC822.HEADER literal, so they sit inside the tuple meta; Gmail
|
||||
sends FLAGS *after* the literal, as a bare ``b' FLAGS (\\Seen))'`` element.
|
||||
The old grouping loop only looked at tuples, so on Gmail every message lost
|
||||
its FLAGS and rendered as unread/unflagged in the email library.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
from routes.email_routes import _group_uid_fetch_records, _uid_from_fetch_meta
|
||||
|
||||
|
||||
def _flags(meta_b: bytes) -> str:
|
||||
m = re.search(rb"FLAGS \(([^)]*)\)", meta_b)
|
||||
return m.group(1).decode() if m else ""
|
||||
|
||||
|
||||
# Captured shape of a real Gmail response to
|
||||
# UID FETCH a,b (UID FLAGS RFC822.HEADER RFC822.SIZE):
|
||||
GMAIL_RESPONSE = [
|
||||
(b"10779 (UID 18723 RFC822.SIZE 54308 RFC822.HEADER {24}", b"Subject: read one\r\n\r\n"),
|
||||
rb" FLAGS (\Seen))",
|
||||
(b"10780 (UID 18724 RFC822.SIZE 124310 RFC822.HEADER {26}", b"Subject: unread one\r\n\r\n"),
|
||||
rb" FLAGS ())",
|
||||
]
|
||||
|
||||
# Dovecot puts FLAGS before the literal and terminates with a bare b')'.
|
||||
DOVECOT_RESPONSE = [
|
||||
(rb"1 (UID 5 FLAGS (\Seen) RFC822.SIZE 100 RFC822.HEADER {18}", b"Subject: hi\r\n\r\n"),
|
||||
b")",
|
||||
(b"2 (UID 6 FLAGS () RFC822.SIZE 90 RFC822.HEADER {19}", b"Subject: new\r\n\r\n"),
|
||||
b")",
|
||||
]
|
||||
|
||||
|
||||
def test_gmail_post_literal_flags_attach_to_their_own_message():
|
||||
grouped = _group_uid_fetch_records(GMAIL_RESPONSE)
|
||||
|
||||
assert len(grouped) == 2
|
||||
assert _uid_from_fetch_meta(grouped[0][0]) == "18723"
|
||||
assert _flags(grouped[0][0]) == r"\Seen"
|
||||
assert grouped[0][1] == b"Subject: read one\r\n\r\n"
|
||||
|
||||
assert _uid_from_fetch_meta(grouped[1][0]) == "18724"
|
||||
assert _flags(grouped[1][0]) == ""
|
||||
assert grouped[1][1] == b"Subject: unread one\r\n\r\n"
|
||||
|
||||
|
||||
def test_dovecot_pre_literal_flags_unchanged():
|
||||
grouped = _group_uid_fetch_records(DOVECOT_RESPONSE)
|
||||
|
||||
assert len(grouped) == 2
|
||||
assert _flags(grouped[0][0]) == r"\Seen"
|
||||
assert _flags(grouped[1][0]) == ""
|
||||
assert grouped[1][1] == b"Subject: new\r\n\r\n"
|
||||
|
||||
|
||||
def test_size_and_uid_survive_grouping():
|
||||
grouped = _group_uid_fetch_records(GMAIL_RESPONSE)
|
||||
sizes = [re.search(rb"RFC822\.SIZE (\d+)", m).group(1) for m, _ in grouped]
|
||||
assert sizes == [b"54308", b"124310"]
|
||||
|
||||
|
||||
def test_empty_and_none_inputs():
|
||||
assert _group_uid_fetch_records(None) == []
|
||||
assert _group_uid_fetch_records([]) == []
|
||||
# A stray bare element before any tuple opens no record and must not crash.
|
||||
assert _group_uid_fetch_records([rb" FLAGS (\Seen))"]) == []
|
||||
Reference in New Issue
Block a user