Decode email headers without injected spaces

Use email.header.make_header for MIME header decoding so adjacent encoded/plain header parts preserve RFC spacing, with regression coverage.
This commit is contained in:
Afonso Coutinho
2026-06-03 05:45:33 +01:00
committed by GitHub
parent f29c827e6e
commit 46999debdb
2 changed files with 53 additions and 8 deletions
+19 -8
View File
@@ -337,14 +337,25 @@ def _decode_header(raw):
"""Decode MIME encoded header."""
if not raw:
return ""
parts = email.header.decode_header(raw)
decoded = []
for data, charset in parts:
if isinstance(data, bytes):
decoded.append(data.decode(charset or "utf-8", errors="replace"))
else:
decoded.append(data)
return " ".join(decoded)
try:
# make_header concatenates per RFC 2047: no spurious space between an
# encoded-word and adjacent plain text (plain runs keep their own
# whitespace), and whitespace between two adjacent encoded-words is
# dropped. The old " ".join produced "Re: Jose" style double spaces
# on every non-ASCII subject or sender.
return str(email.header.make_header(email.header.decode_header(raw)))
except Exception:
# Malformed header or unknown charset: lossy per-part decode
decoded = []
for data, charset in email.header.decode_header(raw):
if isinstance(data, bytes):
try:
decoded.append(data.decode(charset or "utf-8", errors="replace"))
except LookupError:
decoded.append(data.decode("utf-8", errors="replace"))
else:
decoded.append(data)
return "".join(decoded)
def _extract_text(msg):