mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-16 17:55:26 -04:00
Decode email headers without injected spaces
Use email.header.make_header for MIME header decoding so adjacent encoded/plain header parts preserve RFC spacing, with regression coverage.
This commit is contained in:
@@ -337,14 +337,25 @@ def _decode_header(raw):
|
||||
"""Decode MIME encoded header."""
|
||||
if not raw:
|
||||
return ""
|
||||
parts = email.header.decode_header(raw)
|
||||
decoded = []
|
||||
for data, charset in parts:
|
||||
if isinstance(data, bytes):
|
||||
decoded.append(data.decode(charset or "utf-8", errors="replace"))
|
||||
else:
|
||||
decoded.append(data)
|
||||
return " ".join(decoded)
|
||||
try:
|
||||
# make_header concatenates per RFC 2047: no spurious space between an
|
||||
# encoded-word and adjacent plain text (plain runs keep their own
|
||||
# whitespace), and whitespace between two adjacent encoded-words is
|
||||
# dropped. The old " ".join produced "Re: Jose" style double spaces
|
||||
# on every non-ASCII subject or sender.
|
||||
return str(email.header.make_header(email.header.decode_header(raw)))
|
||||
except Exception:
|
||||
# Malformed header or unknown charset: lossy per-part decode
|
||||
decoded = []
|
||||
for data, charset in email.header.decode_header(raw):
|
||||
if isinstance(data, bytes):
|
||||
try:
|
||||
decoded.append(data.decode(charset or "utf-8", errors="replace"))
|
||||
except LookupError:
|
||||
decoded.append(data.decode("utf-8", errors="replace"))
|
||||
else:
|
||||
decoded.append(data)
|
||||
return "".join(decoded)
|
||||
|
||||
|
||||
def _extract_text(msg):
|
||||
|
||||
Reference in New Issue
Block a user