Skip to content

Commit bd170fa

Browse files
HaseU-gitclaude
andcommitted
fix: RFC 2047 encode non-ASCII display names in address headers (#404)
Apply encode_address_header_value() to To, From, Cc, and Bcc headers in MessageBuilder::build() so that non-ASCII display names (e.g. Japanese characters) are properly RFC 2047 encoded. Previously only the Subject header was encoded, causing mojibake in address headers. Closes #404 Co-Authored-By: Claude Opus 4.6 <[email protected]>
1 parent 7e22a3d commit bd170fa

File tree

2 files changed

+147
-5
lines changed

2 files changed

+147
-5
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@googleworkspace/cli": patch
3+
---
4+
5+
Apply RFC 2047 encoding to non-ASCII display names in To, From, Cc, and Bcc headers to prevent mojibake

src/helpers/gmail/mod.rs

Lines changed: 142 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,35 @@ pub(super) fn encode_header_value(value: &str) -> String {
272272
encoded_words.join("\r\n ")
273273
}
274274

275+
/// RFC 2047 encode the display-name portions of an address header value.
276+
///
277+
/// Handles formats like `"Name" <addr>`, `Name <addr>`, bare `addr`, and
278+
/// comma-separated lists thereof. Only the display-name text is encoded;
279+
/// angle-bracket addresses are left untouched.
280+
pub(super) fn encode_address_header_value(value: &str) -> String {
281+
value
282+
.split(',')
283+
.map(|addr| {
284+
let trimmed = addr.trim();
285+
if let Some(open) = trimmed.rfind('<') {
286+
let display = trimmed[..open].trim();
287+
let angle_addr = &trimmed[open..]; // "<email>"
288+
if display.is_empty() {
289+
trimmed.to_string()
290+
} else {
291+
// Strip surrounding quotes if present.
292+
let unquoted = display.trim_matches('"').trim();
293+
format!("{} {}", encode_header_value(unquoted), angle_addr)
294+
}
295+
} else {
296+
// Bare address (no display name) — nothing to encode.
297+
trimmed.to_string()
298+
}
299+
})
300+
.collect::<Vec<_>>()
301+
.join(", ")
302+
}
303+
275304
/// In-Reply-To and References values for threading a reply or forward.
276305
pub(super) struct ThreadingHeaders<'a> {
277306
pub in_reply_to: &'a str,
@@ -281,7 +310,7 @@ pub(super) struct ThreadingHeaders<'a> {
281310
/// Shared builder for RFC 2822 email messages.
282311
///
283312
/// Handles header construction with CRLF sanitization and RFC 2047
284-
/// encoding of non-ASCII subjects. Each helper owns its body assembly
313+
/// encoding of non-ASCII subjects and address display names. Each helper owns its body assembly
285314
/// (quoted reply, forwarded block, plain body) and passes it to `build()`.
286315
pub(super) struct MessageBuilder<'a> {
287316
pub to: &'a str,
@@ -302,7 +331,7 @@ impl MessageBuilder<'_> {
302331

303332
let mut headers = format!(
304333
"To: {}\r\nSubject: {}",
305-
sanitize_header_value(self.to),
334+
encode_address_header_value(&sanitize_header_value(self.to)),
306335
// Sanitize first: stripping CRLF before encoding prevents injection
307336
// in encoded-words.
308337
encode_header_value(&sanitize_header_value(self.subject)),
@@ -319,17 +348,26 @@ impl MessageBuilder<'_> {
319348
headers.push_str("\r\nMIME-Version: 1.0\r\nContent-Type: text/plain; charset=utf-8");
320349

321350
if let Some(from) = self.from {
322-
headers.push_str(&format!("\r\nFrom: {}", sanitize_header_value(from)));
351+
headers.push_str(&format!(
352+
"\r\nFrom: {}",
353+
encode_address_header_value(&sanitize_header_value(from))
354+
));
323355
}
324356

325357
if let Some(cc) = self.cc {
326-
headers.push_str(&format!("\r\nCc: {}", sanitize_header_value(cc)));
358+
headers.push_str(&format!(
359+
"\r\nCc: {}",
360+
encode_address_header_value(&sanitize_header_value(cc))
361+
));
327362
}
328363

329364
// The Gmail API reads the Bcc header to route to those recipients,
330365
// then strips it before delivery.
331366
if let Some(bcc) = self.bcc {
332-
headers.push_str(&format!("\r\nBcc: {}", sanitize_header_value(bcc)));
367+
headers.push_str(&format!(
368+
"\r\nBcc: {}",
369+
encode_address_header_value(&sanitize_header_value(bcc))
370+
));
333371
}
334372

335373
format!("{}\r\n\r\n{}", headers, body)
@@ -961,6 +999,66 @@ mod tests {
961999
assert_eq!(sanitize_header_value("bare\rreturn"), "barereturn");
9621000
}
9631001

1002+
#[test]
1003+
fn test_encode_address_header_value_bare_email() {
1004+
assert_eq!(
1005+
encode_address_header_value("[email protected]"),
1006+
1007+
);
1008+
}
1009+
1010+
#[test]
1011+
fn test_encode_address_header_value_ascii_display_name() {
1012+
let result = encode_address_header_value("\"Alice\" <[email protected]>");
1013+
assert_eq!(result, "Alice <[email protected]>");
1014+
}
1015+
1016+
#[test]
1017+
fn test_encode_address_header_value_non_ascii_display_name() {
1018+
use base64::engine::general_purpose::STANDARD;
1019+
let input = "\"\u{4e0b}\u{91ce}\u{7950}\u{592a}\" <[email protected]>";
1020+
let result = encode_address_header_value(input);
1021+
assert!(
1022+
result.contains("=?UTF-8?B?"),
1023+
"Display name should be RFC 2047 encoded: {}",
1024+
result
1025+
);
1026+
assert!(
1027+
result.ends_with("<[email protected]>"),
1028+
"Email address must be preserved: {}",
1029+
result
1030+
);
1031+
// Decode and verify the display name round-trips.
1032+
let b64_part = result
1033+
.trim_start_matches("=?UTF-8?B?")
1034+
.split("?=")
1035+
.next()
1036+
.unwrap();
1037+
let decoded = String::from_utf8(STANDARD.decode(b64_part).unwrap()).unwrap();
1038+
assert_eq!(decoded, "\u{4e0b}\u{91ce}\u{7950}\u{592a}");
1039+
}
1040+
1041+
#[test]
1042+
fn test_encode_address_header_value_multiple_addresses() {
1043+
let input = "\"\u{4e0b}\u{91ce}\" <[email protected]>, [email protected], \"\u{91ce}\u{53e3}\" <[email protected]>";
1044+
let result = encode_address_header_value(input);
1045+
let parts: Vec<&str> = result.split(", ").collect();
1046+
assert_eq!(parts.len(), 3);
1047+
assert!(parts[0].contains("=?UTF-8?B?"));
1048+
assert!(parts[0].ends_with("<[email protected]>"));
1049+
assert_eq!(parts[1], "[email protected]");
1050+
assert!(parts[2].contains("=?UTF-8?B?"));
1051+
assert!(parts[2].ends_with("<[email protected]>"));
1052+
}
1053+
1054+
#[test]
1055+
fn test_encode_address_header_value_unquoted_non_ascii() {
1056+
let input = "\u{4e0b}\u{91ce}\u{7950}\u{592a} <[email protected]>";
1057+
let result = encode_address_header_value(input);
1058+
assert!(result.contains("=?UTF-8?B?"));
1059+
assert!(result.ends_with("<[email protected]>"));
1060+
}
1061+
9641062
#[test]
9651063
fn test_encode_header_value_ascii() {
9661064
assert_eq!(encode_header_value("Hello World"), "Hello World");
@@ -1064,6 +1162,45 @@ mod tests {
10641162
assert!(!raw.contains("Solar — Quote Request"));
10651163
}
10661164

1165+
#[test]
1166+
fn test_message_builder_non_ascii_address_headers() {
1167+
let raw = MessageBuilder {
1168+
to: "\"\u{91ce}\u{53e3}\" <[email protected]>",
1169+
subject: "Report",
1170+
from: Some("\"\u{9577}\u{8c37}\u{5ddd}\" <[email protected]>"),
1171+
cc: Some("\"\u{4e0b}\u{91ce}\u{7950}\u{592a}\" <[email protected]>"),
1172+
bcc: Some("\"\u{9ed2}\u{5ddd}\" <[email protected]>"),
1173+
threading: None,
1174+
}
1175+
.build("Body");
1176+
1177+
// Display names must be RFC 2047 encoded, not raw UTF-8.
1178+
assert!(
1179+
!raw.contains("\u{4e0b}\u{91ce}\u{7950}\u{592a}"),
1180+
"Raw non-ASCII must not appear in Cc header"
1181+
);
1182+
assert!(
1183+
!raw.contains("\u{91ce}\u{53e3}"),
1184+
"Raw non-ASCII must not appear in To header"
1185+
);
1186+
assert!(
1187+
!raw.contains("\u{9577}\u{8c37}\u{5ddd}"),
1188+
"Raw non-ASCII must not appear in From header"
1189+
);
1190+
assert!(
1191+
!raw.contains("\u{9ed2}\u{5ddd}"),
1192+
"Raw non-ASCII must not appear in Bcc header"
1193+
);
1194+
// Email addresses must be preserved.
1195+
assert!(raw.contains("<[email protected]>"));
1196+
assert!(raw.contains("<[email protected]>"));
1197+
assert!(raw.contains("<[email protected]>"));
1198+
assert!(raw.contains("<[email protected]>"));
1199+
// Encoded words must be present.
1200+
assert!(raw.contains("Cc: =?UTF-8?B?"));
1201+
assert!(raw.contains("From: =?UTF-8?B?"));
1202+
}
1203+
10671204
#[test]
10681205
fn test_message_builder_sanitizes_crlf_injection() {
10691206
let raw = MessageBuilder {

0 commit comments

Comments
 (0)