Skip to content

Commit 5ae7bab

Browse files
vstinnerhroncok
authored andcommitted
00415: [CVE-2023-27043] pythongh-102988: Reject malformed addresses in email.parseaddr() (python#111116)
Detect email address parsing errors and return empty tuple to indicate the parsing error (old API). Add an optional 'strict' parameter to getaddresses() and parseaddr() functions. Patch by Thomas Dwyer. Co-Authored-By: Thomas Dwyer <[email protected]> Changes for Python 2: - Define encoding for test_email - Adjust import so we don't need change the tests - Do not use f-strings - Do not use SubTest - KW only function arguments are not supported
1 parent 004e7da commit 5ae7bab

4 files changed

Lines changed: 342 additions & 23 deletions

File tree

Doc/library/email.utils.rst

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,18 @@ There are several useful utilities provided in the :mod:`email.utils` module:
2121
begins with angle brackets, they are stripped off.
2222

2323

24-
.. function:: parseaddr(address)
24+
.. function:: parseaddr(address, *, strict=True)
2525

2626
Parse address -- which should be the value of some address-containing field such
2727
as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and
2828
*email address* parts. Returns a tuple of that information, unless the parse
2929
fails, in which case a 2-tuple of ``('', '')`` is returned.
3030

31+
If *strict* is true, use a strict parser which rejects malformed inputs.
32+
33+
.. versionchanged:: 3.13
34+
Add *strict* optional parameter and reject malformed inputs by default.
35+
3136

3237
.. function:: formataddr(pair)
3338

@@ -37,12 +42,15 @@ There are several useful utilities provided in the :mod:`email.utils` module:
3742
second element is returned unmodified.
3843

3944

40-
.. function:: getaddresses(fieldvalues)
45+
.. function:: getaddresses(fieldvalues, *, strict=True)
4146

4247
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
4348
*fieldvalues* is a sequence of header field values as might be returned by
44-
:meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
45-
example that gets all the recipients of a message::
49+
:meth:`Message.get_all <email.message.Message.get_all>`.
50+
51+
If *strict* is true, use a strict parser which rejects malformed inputs.
52+
53+
Here's a simple example that gets all the recipients of a message::
4654

4755
from email.utils import getaddresses
4856

@@ -52,6 +60,9 @@ There are several useful utilities provided in the :mod:`email.utils` module:
5260
resent_ccs = msg.get_all('resent-cc', [])
5361
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
5462

63+
.. versionchanged:: 3.13
64+
Add *strict* optional parameter and reject malformed inputs by default.
65+
5566

5667
.. function:: parsedate(date)
5768

Lib/email/test/test_email.py

Lines changed: 177 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# -*- coding: utf-8 -*-
12
# Copyright (C) 2001-2010 Python Software Foundation
23
34
# email package unit tests
@@ -30,7 +31,7 @@
3031
from email.MIMEBase import MIMEBase
3132
from email.MIMEMessage import MIMEMessage
3233
from email.MIMEMultipart import MIMEMultipart
33-
from email import Utils
34+
from email import Utils, utils
3435
from email import Errors
3536
from email import Encoders
3637
from email import Iterators
@@ -2414,15 +2415,135 @@ def test_getaddresses(self):
24142415
[('Al Person', '[email protected]'),
24152416
('Bud Person', '[email protected]')])
24162417

2418+
def test_parsing_errors(self):
2419+
"""Test for parsing errors from CVE-2023-27043 and CVE-2019-16056"""
2420+
2421+
2422+
empty = ('', '')
2423+
2424+
# Test utils.getaddresses() and utils.parseaddr() on malformed email
2425+
# addresses: default behavior (strict=True) rejects malformed address,
2426+
# and strict=False which tolerates malformed address.
2427+
for invalid_separator, expected_non_strict in (
2428+
('(', [('<' + bob + '>', alice)]),
2429+
(')', [('', alice), empty, ('', bob)]),
2430+
('<', [('', alice), empty, ('', bob), empty]),
2431+
('>', [('', alice), empty, ('', bob)]),
2432+
('[', [('', alice + '[<' + bob + '>]')]),
2433+
(']', [('', alice), empty, ('', bob)]),
2434+
('@', [empty, empty, ('', bob)]),
2435+
(';', [('', alice), empty, ('', bob)]),
2436+
(':', [('', alice), ('', bob)]),
2437+
('.', [('', alice + '.'), ('', bob)]),
2438+
('"', [('', alice), ('', '<' + bob + '>')]),
2439+
):
2440+
address = alice + invalid_separator + '<' + bob + '>'
2441+
self.assertEqual(utils.getaddresses([address]),
2442+
[empty])
2443+
self.assertEqual(utils.getaddresses([address], strict=False),
2444+
expected_non_strict)
2445+
2446+
self.assertEqual(utils.parseaddr([address]),
2447+
empty)
2448+
self.assertEqual(utils.parseaddr([address], strict=False),
2449+
('', address))
2450+
2451+
# Comma (',') is treated differently depending on strict parameter.
2452+
# Comma without quotes.
2453+
address = alice + ',<' + bob + '>'
2454+
self.assertEqual(utils.getaddresses([address]),
2455+
[('', alice), ('', bob)])
2456+
self.assertEqual(utils.getaddresses([address], strict=False),
2457+
[('', alice), ('', bob)])
2458+
self.assertEqual(utils.parseaddr([address]),
2459+
empty)
2460+
self.assertEqual(utils.parseaddr([address], strict=False),
2461+
('', address))
2462+
2463+
# Real name between quotes containing comma.
2464+
address = '"Alice, [email protected]" <[email protected]>'
2465+
expected_strict = ('Alice, [email protected]', '[email protected]')
2466+
self.assertEqual(utils.getaddresses([address]), [expected_strict])
2467+
self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict])
2468+
self.assertEqual(utils.parseaddr([address]), expected_strict)
2469+
self.assertEqual(utils.parseaddr([address], strict=False),
2470+
('', address))
2471+
2472+
# Valid parenthesis in comments.
2473+
address = '[email protected] (Alice)'
2474+
expected_strict = ('Alice', '[email protected]')
2475+
self.assertEqual(utils.getaddresses([address]), [expected_strict])
2476+
self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict])
2477+
self.assertEqual(utils.parseaddr([address]), expected_strict)
2478+
self.assertEqual(utils.parseaddr([address], strict=False),
2479+
('', address))
2480+
2481+
# Invalid parenthesis in comments.
2482+
address = '[email protected] )Alice('
2483+
self.assertEqual(utils.getaddresses([address]), [empty])
2484+
self.assertEqual(utils.getaddresses([address], strict=False),
2485+
[('', '[email protected]'), ('', ''), ('', 'Alice')])
2486+
self.assertEqual(utils.parseaddr([address]), empty)
2487+
self.assertEqual(utils.parseaddr([address], strict=False),
2488+
('', address))
2489+
2490+
# Two addresses with quotes separated by comma.
2491+
address = '"Jane Doe" <[email protected]>, "John Doe" <[email protected]>'
2492+
self.assertEqual(utils.getaddresses([address]),
2493+
[('Jane Doe', '[email protected]'),
2494+
('John Doe', '[email protected]')])
2495+
self.assertEqual(utils.getaddresses([address], strict=False),
2496+
[('Jane Doe', '[email protected]'),
2497+
('John Doe', '[email protected]')])
2498+
self.assertEqual(utils.parseaddr([address]), empty)
2499+
self.assertEqual(utils.parseaddr([address], strict=False),
2500+
('', address))
2501+
2502+
# Test email.utils.supports_strict_parsing attribute
2503+
self.assertEqual(email.utils.supports_strict_parsing, True)
2504+
24172505
def test_getaddresses_nasty(self):
2418-
eq = self.assertEqual
2419-
eq(Utils.getaddresses(['foo: ;']), [('', '')])
2420-
eq(Utils.getaddresses(
2421-
['[]*-- =~$']),
2422-
[('', ''), ('', ''), ('', '*--')])
2423-
eq(Utils.getaddresses(
2424-
['foo: ;', '"Jason R. Mastaler" <[email protected]>']),
2425-
[('', ''), ('Jason R. Mastaler', '[email protected]')])
2506+
for addresses, expected in (
2507+
(['"Sürname, Firstname" <[email protected]>'],
2508+
[('Sürname, Firstname', '[email protected]')]),
2509+
2510+
(['foo: ;'],
2511+
[('', '')]),
2512+
2513+
(['foo: ;', '"Jason R. Mastaler" <[email protected]>'],
2514+
[('', ''), ('Jason R. Mastaler', '[email protected]')]),
2515+
2516+
([r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>'],
2517+
[('Pete (A nice ) chap his account his host)', '[email protected]')]),
2518+
2519+
(['(Empty list)(start)Undisclosed recipients :(nobody(I know))'],
2520+
[('', '')]),
2521+
2522+
(['Mary <@machine.tld:[email protected]>, , jdoe@test . example'],
2523+
[('Mary', '[email protected]'), ('', ''), ('', '[email protected]')]),
2524+
2525+
(['John Doe <jdoe@machine(comment). example>'],
2526+
[('John Doe (comment)', '[email protected]')]),
2527+
2528+
(['"Mary Smith: Personal Account" <[email protected]>'],
2529+
[('Mary Smith: Personal Account', '[email protected]')]),
2530+
2531+
(['Undisclosed recipients:;'],
2532+
[('', '')]),
2533+
2534+
([r'<[email protected]>, "Giant; \"Big\" Box" <[email protected]>'],
2535+
[('', '[email protected]'), ('Giant; "Big" Box', '[email protected]')]),
2536+
):
2537+
self.assertEqual(utils.getaddresses(addresses),
2538+
expected)
2539+
self.assertEqual(utils.getaddresses(addresses, strict=False),
2540+
expected)
2541+
2542+
addresses = ['[]*-- =~$']
2543+
self.assertEqual(utils.getaddresses(addresses),
2544+
[('', '')])
2545+
self.assertEqual(utils.getaddresses(addresses, strict=False),
2546+
[('', ''), ('', ''), ('', '*--')])
24262547

24272548
def test_getaddresses_embedded_comment(self):
24282549
"""Test proper handling of a nested comment"""
@@ -2533,6 +2654,53 @@ def test_partial_falls_inside_message_delivery_status(self):
25332654
text/rfc822-headers
25342655
""")
25352656

2657+
def test_iter_escaped_chars(self):
2658+
self.assertEqual(list(utils._iter_escaped_chars(r'a\\b\"c\\"d')),
2659+
[(0, 'a'),
2660+
(2, '\\\\'),
2661+
(3, 'b'),
2662+
(5, '\\"'),
2663+
(6, 'c'),
2664+
(8, '\\\\'),
2665+
(9, '"'),
2666+
(10, 'd')])
2667+
self.assertEqual(list(utils._iter_escaped_chars('a\\')),
2668+
[(0, 'a'), (1, '\\')])
2669+
2670+
def test_strip_quoted_realnames(self):
2671+
def check(addr, expected):
2672+
self.assertEqual(utils._strip_quoted_realnames(addr), expected)
2673+
2674+
check('"Jane Doe" <[email protected]>, "John Doe" <[email protected]>',
2675+
2676+
check(r'"Jane \"Doe\"." <[email protected]>',
2677+
2678+
2679+
# special cases
2680+
check(r'before"name"after', 'beforeafter')
2681+
check(r'before"name"', 'before')
2682+
check(r'b"name"', 'b') # single char
2683+
check(r'"name"after', 'after')
2684+
check(r'"name"a', 'a') # single char
2685+
check(r'"name"', '')
2686+
2687+
# no change
2688+
for addr in (
2689+
'Jane Doe <[email protected]>, John Doe <[email protected]>',
2690+
'lone " quote',
2691+
):
2692+
self.assertEqual(utils._strip_quoted_realnames(addr), addr)
2693+
2694+
2695+
def test_check_parenthesis(self):
2696+
2697+
self.assertTrue(utils._check_parenthesis(addr + ' (Alice)'))
2698+
self.assertFalse(utils._check_parenthesis(addr + ' )Alice('))
2699+
self.assertFalse(utils._check_parenthesis(addr + ' (Alice))'))
2700+
self.assertFalse(utils._check_parenthesis(addr + ' ((Alice)'))
2701+
2702+
# Ignore real name between quotes
2703+
self.assertTrue(utils._check_parenthesis('")Alice((" ' + addr))
25362704

25372705

25382706
# Test the iterator/generators

0 commit comments

Comments
 (0)