mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-15 17:25:26 -04:00
fix(email): keep FETCH attributes Gmail sends after the header literal (all Gmail mail showed as unread) (#3785)
* fix(email): keep FETCH attributes Gmail sends after the header literal imaplib returns a UID FETCH response as an interleaved list of (meta, literal) tuples plus bare bytes elements. Which attributes land where is server-specific: Dovecot sends FLAGS before the RFC822.HEADER literal (inside the tuple meta), Gmail sends them after it, as a bare ` FLAGS (\Seen))` element. The email list grouping loop and the search loop only inspected tuples, so on Gmail every message lost its FLAGS and the whole mailbox rendered as unread/unflagged, with mark-read appearing to have no effect. Extract the grouping into _group_uid_fetch_records(), fold bare bytes parts into the current message meta there, and reuse it in both the batched list fetch and the per-UID search fetch. Covered by unit tests with captured Gmail-shaped and Dovecot-shaped responses. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com> * test(email): use raw byte literals for IMAP backslash escapes --------- Co-authored-by: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
+49
-22
@@ -249,6 +249,41 @@ def _uid_from_fetch_meta(meta_b: bytes) -> str:
|
|||||||
return m.group(1).decode() if m else ""
|
return m.group(1).decode() if m else ""
|
||||||
|
|
||||||
|
|
||||||
|
_FETCH_SEQ_RE = re.compile(rb"^(\d+)\s+\(")
|
||||||
|
|
||||||
|
|
||||||
|
def _group_uid_fetch_records(msg_data) -> list:
|
||||||
|
"""Group an imaplib UID FETCH response into per-message (meta, payload).
|
||||||
|
|
||||||
|
imaplib yields an interleaved list: ``(meta, literal)`` tuples for
|
||||||
|
attributes that carry a literal (``RFC822.HEADER {n}`` etc.) plus bare
|
||||||
|
``bytes`` elements for everything the server sends outside a literal.
|
||||||
|
Where each attribute lands is server-specific: Dovecot sends FLAGS
|
||||||
|
*before* the header literal (so it ends up inside the tuple meta), while
|
||||||
|
Gmail sends FLAGS *after* it, arriving as a bare ``b' FLAGS (\\Seen))'``
|
||||||
|
element. Dropping bare elements therefore silently loses FLAGS on Gmail
|
||||||
|
and every message renders as unread/unflagged.
|
||||||
|
|
||||||
|
A tuple whose meta starts with a sequence number opens a new record;
|
||||||
|
every other part — continuation tuple or bare bytes — is folded into the
|
||||||
|
current record's meta so attribute regexes see the full meta text.
|
||||||
|
Plain ``b')'`` terminators get folded in too, which is harmless.
|
||||||
|
"""
|
||||||
|
grouped: list = [] # list of (meta_bytes, payload_bytes_or_None)
|
||||||
|
for part in (msg_data or []):
|
||||||
|
if isinstance(part, tuple):
|
||||||
|
meta_b = part[0] if isinstance(part[0], (bytes, bytearray)) else str(part[0]).encode()
|
||||||
|
if _FETCH_SEQ_RE.match(meta_b):
|
||||||
|
grouped.append((meta_b, part[1]))
|
||||||
|
elif grouped:
|
||||||
|
cur_meta, cur_payload = grouped[-1]
|
||||||
|
grouped[-1] = (cur_meta + b" " + meta_b, cur_payload or part[1])
|
||||||
|
elif isinstance(part, (bytes, bytearray)) and grouped:
|
||||||
|
cur_meta, cur_payload = grouped[-1]
|
||||||
|
grouped[-1] = (cur_meta + b" " + bytes(part), cur_payload)
|
||||||
|
return grouped
|
||||||
|
|
||||||
|
|
||||||
def _smtp_ready(cfg: dict) -> bool:
|
def _smtp_ready(cfg: dict) -> bool:
|
||||||
return bool(cfg.get("smtp_host") and cfg.get("smtp_user") and cfg.get("smtp_password"))
|
return bool(cfg.get("smtp_host") and cfg.get("smtp_user") and cfg.get("smtp_password"))
|
||||||
|
|
||||||
@@ -799,20 +834,11 @@ def setup_email_routes():
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Batch fetch failed, falling back to per-UID: {e}")
|
logger.warning(f"Batch fetch failed, falling back to per-UID: {e}")
|
||||||
status, msg_data = "NO", []
|
status, msg_data = "NO", []
|
||||||
# imaplib batch responses interleave (meta, payload) tuples and
|
# Group the batched response into per-message (meta, payload)
|
||||||
# `b')'` terminators. Group by message: each tuple where the
|
# records. Bare bytes parts must be kept: Gmail returns FLAGS
|
||||||
# meta begins with a seq number starts a new message record.
|
# after the header literal as a bare element, and dropping it
|
||||||
seq_re = re.compile(rb'^(\d+)\s+\(')
|
# rendered every Gmail message as unread/unflagged.
|
||||||
grouped = [] # list of (meta_str, payload_bytes)
|
grouped = _group_uid_fetch_records(msg_data)
|
||||||
for part in (msg_data or []):
|
|
||||||
if isinstance(part, tuple):
|
|
||||||
meta_b = part[0] if isinstance(part[0], (bytes, bytearray)) else str(part[0]).encode()
|
|
||||||
if seq_re.match(meta_b):
|
|
||||||
grouped.append((meta_b, part[1]))
|
|
||||||
elif grouped:
|
|
||||||
# continuation of previous message — concatenate meta info if any
|
|
||||||
cur_meta, cur_payload = grouped[-1]
|
|
||||||
grouped[-1] = (cur_meta + b" " + meta_b, cur_payload or part[1])
|
|
||||||
|
|
||||||
if status != "OK" and not grouped:
|
if status != "OK" and not grouped:
|
||||||
conn.logout()
|
conn.logout()
|
||||||
@@ -1098,14 +1124,15 @@ def setup_email_routes():
|
|||||||
continue
|
continue
|
||||||
raw_header = None
|
raw_header = None
|
||||||
flags = ""
|
flags = ""
|
||||||
for part in msg_data:
|
# Same Gmail caveat as the list route: FLAGS may
|
||||||
if isinstance(part, tuple):
|
# arrive after the header literal, so group bare
|
||||||
meta = part[0].decode() if isinstance(part[0], bytes) else str(part[0])
|
# parts back into the message meta before scanning.
|
||||||
if b"RFC822.HEADER" in part[0] if isinstance(part[0], bytes) else "RFC822.HEADER" in meta:
|
for meta_b, payload in _group_uid_fetch_records(msg_data):
|
||||||
raw_header = part[1]
|
if payload and b"RFC822.HEADER" in meta_b:
|
||||||
flag_match = re.search(r'FLAGS \(([^)]*)\)', meta)
|
raw_header = payload
|
||||||
if flag_match:
|
flag_match = re.search(rb'FLAGS \(([^)]*)\)', meta_b)
|
||||||
flags = flag_match.group(1)
|
if flag_match:
|
||||||
|
flags = flag_match.group(1).decode(errors="replace")
|
||||||
if not raw_header:
|
if not raw_header:
|
||||||
continue
|
continue
|
||||||
msg = email_mod.message_from_bytes(raw_header)
|
msg = email_mod.message_from_bytes(raw_header)
|
||||||
|
|||||||
@@ -0,0 +1,71 @@
|
|||||||
|
"""Regression tests for _group_uid_fetch_records (Gmail FLAGS placement).
|
||||||
|
|
||||||
|
imaplib hands back UID FETCH responses as an interleaved list of
|
||||||
|
``(meta, literal)`` tuples and bare ``bytes`` elements. Dovecot sends FLAGS
|
||||||
|
before the RFC822.HEADER literal, so they sit inside the tuple meta; Gmail
|
||||||
|
sends FLAGS *after* the literal, as a bare ``b' FLAGS (\\Seen))'`` element.
|
||||||
|
The old grouping loop only looked at tuples, so on Gmail every message lost
|
||||||
|
its FLAGS and rendered as unread/unflagged in the email library.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from routes.email_routes import _group_uid_fetch_records, _uid_from_fetch_meta
|
||||||
|
|
||||||
|
|
||||||
|
def _flags(meta_b: bytes) -> str:
|
||||||
|
m = re.search(rb"FLAGS \(([^)]*)\)", meta_b)
|
||||||
|
return m.group(1).decode() if m else ""
|
||||||
|
|
||||||
|
|
||||||
|
# Captured shape of a real Gmail response to
|
||||||
|
# UID FETCH a,b (UID FLAGS RFC822.HEADER RFC822.SIZE):
|
||||||
|
GMAIL_RESPONSE = [
|
||||||
|
(b"10779 (UID 18723 RFC822.SIZE 54308 RFC822.HEADER {24}", b"Subject: read one\r\n\r\n"),
|
||||||
|
rb" FLAGS (\Seen))",
|
||||||
|
(b"10780 (UID 18724 RFC822.SIZE 124310 RFC822.HEADER {26}", b"Subject: unread one\r\n\r\n"),
|
||||||
|
rb" FLAGS ())",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Dovecot puts FLAGS before the literal and terminates with a bare b')'.
|
||||||
|
DOVECOT_RESPONSE = [
|
||||||
|
(rb"1 (UID 5 FLAGS (\Seen) RFC822.SIZE 100 RFC822.HEADER {18}", b"Subject: hi\r\n\r\n"),
|
||||||
|
b")",
|
||||||
|
(b"2 (UID 6 FLAGS () RFC822.SIZE 90 RFC822.HEADER {19}", b"Subject: new\r\n\r\n"),
|
||||||
|
b")",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_gmail_post_literal_flags_attach_to_their_own_message():
|
||||||
|
grouped = _group_uid_fetch_records(GMAIL_RESPONSE)
|
||||||
|
|
||||||
|
assert len(grouped) == 2
|
||||||
|
assert _uid_from_fetch_meta(grouped[0][0]) == "18723"
|
||||||
|
assert _flags(grouped[0][0]) == r"\Seen"
|
||||||
|
assert grouped[0][1] == b"Subject: read one\r\n\r\n"
|
||||||
|
|
||||||
|
assert _uid_from_fetch_meta(grouped[1][0]) == "18724"
|
||||||
|
assert _flags(grouped[1][0]) == ""
|
||||||
|
assert grouped[1][1] == b"Subject: unread one\r\n\r\n"
|
||||||
|
|
||||||
|
|
||||||
|
def test_dovecot_pre_literal_flags_unchanged():
|
||||||
|
grouped = _group_uid_fetch_records(DOVECOT_RESPONSE)
|
||||||
|
|
||||||
|
assert len(grouped) == 2
|
||||||
|
assert _flags(grouped[0][0]) == r"\Seen"
|
||||||
|
assert _flags(grouped[1][0]) == ""
|
||||||
|
assert grouped[1][1] == b"Subject: new\r\n\r\n"
|
||||||
|
|
||||||
|
|
||||||
|
def test_size_and_uid_survive_grouping():
|
||||||
|
grouped = _group_uid_fetch_records(GMAIL_RESPONSE)
|
||||||
|
sizes = [re.search(rb"RFC822\.SIZE (\d+)", m).group(1) for m, _ in grouped]
|
||||||
|
assert sizes == [b"54308", b"124310"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_empty_and_none_inputs():
|
||||||
|
assert _group_uid_fetch_records(None) == []
|
||||||
|
assert _group_uid_fetch_records([]) == []
|
||||||
|
# A stray bare element before any tuple opens no record and must not crash.
|
||||||
|
assert _group_uid_fetch_records([rb" FLAGS (\Seen))"]) == []
|
||||||
Reference in New Issue
Block a user