Skip to content

Commit d261f8a

Browse files
authored
Ensure that application/octet-stream is the default content_type (#11580)
1 parent b1bd65d commit d261f8a

File tree

6 files changed

+67
-11
lines changed

6 files changed

+67
-11
lines changed

CHANGES/10889.bugfix.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Updated ``Content-Type`` header parsing to return ``application/octet-stream`` when header contains invalid syntax.
2+
See :rfc:`9110#section-8.3-5`.
3+
4+
-- by :user:`sgaist`.

CONTRIBUTORS.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,7 @@ Roman Postnov
320320
Rong Zhang
321321
Samir Akarioh
322322
Samuel Colvin
323+
Samuel Gaist
323324
Sean Hunt
324325
Sebastian Acuna
325326
Sebastian Hanula

aiohttp/helpers.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@
2020
from collections import namedtuple
2121
from collections.abc import Callable, Iterable, Iterator, Mapping
2222
from contextlib import suppress
23+
from email.message import EmailMessage
2324
from email.parser import HeaderParser
25+
from email.policy import HTTP
2426
from email.utils import parsedate
2527
from http.cookies import SimpleCookie
2628
from math import ceil
@@ -356,14 +358,40 @@ def parse_mimetype(mimetype: str) -> MimeType:
356358
)
357359

358360

361+
class EnsureOctetStream(EmailMessage):
362+
def __init__(self) -> None:
363+
super().__init__()
364+
# https://www.rfc-editor.org/rfc/rfc9110#section-8.3-5
365+
self.set_default_type("application/octet-stream")
366+
367+
def get_content_type(self) -> Any:
368+
"""Re-implementation from Message
369+
370+
Returns application/octet-stream in place of plain/text when
371+
value is wrong.
372+
373+
The way this class is used guarantees that content-type will
374+
be present so simplify the checks wrt to the base implementation.
375+
"""
376+
value = self.get("content-type", "").lower()
377+
378+
# Based on the implementation of _splitparam in the standard library
379+
ctype, _, _ = value.partition(";")
380+
ctype = ctype.strip()
381+
if ctype.count("/") != 1:
382+
return self.get_default_type()
383+
return ctype
384+
385+
359386
@functools.lru_cache(maxsize=56)
360387
def parse_content_type(raw: str) -> tuple[str, MappingProxyType[str, str]]:
361388
"""Parse Content-Type header.
362389
363390
Returns a tuple of the parsed content type and a
364-
MappingProxyType of parameters.
391+
MappingProxyType of parameters. The default returned value
392+
is `application/octet-stream`
365393
"""
366-
msg = HeaderParser().parsestr(f"Content-Type: {raw}")
394+
msg = HeaderParser(EnsureOctetStream, policy=HTTP).parsestr(f"Content-Type: {raw}")
367395
content_type = msg.get_content_type()
368396
params = msg.get_params(())
369397
content_dict = dict(params[1:]) # First element is content type again

docs/client_reference.rst

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1550,16 +1550,14 @@ Response object
15501550

15511551
.. note::
15521552

1553-
Returns value is ``'application/octet-stream'`` if no
1554-
Content-Type header present in HTTP headers according to
1555-
:rfc:`9110`. If the *Content-Type* header is invalid (e.g., ``jpg``
1556-
instead of ``image/jpeg``), the value is ``text/plain`` by default
1557-
according to :rfc:`2045`. To see the original header check
1558-
``resp.headers['CONTENT-TYPE']``.
1553+
Returns ``'application/octet-stream'`` if no Content-Type header
1554+
is present or the value contains invalid syntax according to
1555+
:rfc:`9110`. To see the original header check
1556+
``resp.headers["Content-Type"]``.
15591557

15601558
To make sure Content-Type header is not present in
15611559
the server reply, use :attr:`headers` or :attr:`raw_headers`, e.g.
1562-
``'CONTENT-TYPE' not in resp.headers``.
1560+
``'Content-Type' not in resp.headers``.
15631561

15641562
.. attribute:: charset
15651563

tests/test_helpers.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from collections.abc import Iterator
88
from math import ceil, modf
99
from pathlib import Path
10+
from types import MappingProxyType
1011
from unittest import mock
1112
from urllib.request import getproxies_environment
1213

@@ -81,6 +82,30 @@ def test_parse_mimetype(mimetype: str, expected: helpers.MimeType) -> None:
8182
assert result == expected
8283

8384

85+
# ------------------- parse_content_type ------------------------------
86+
87+
88+
@pytest.mark.parametrize(
89+
"content_type, expected",
90+
[
91+
(
92+
"text/plain",
93+
("text/plain", MultiDictProxy(MultiDict())),
94+
),
95+
(
96+
"wrong",
97+
("application/octet-stream", MultiDictProxy(MultiDict())),
98+
),
99+
],
100+
)
101+
def test_parse_content_type(
102+
content_type: str, expected: tuple[str, MappingProxyType[str, str]]
103+
) -> None:
104+
result = helpers.parse_content_type(content_type)
105+
106+
assert result == expected
107+
108+
84109
# ------------------- guess_filename ----------------------------------
85110

86111

tests/test_web_response.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,10 +1023,10 @@ def test_ctor_content_type_with_extra() -> None:
10231023
assert resp.headers["content-type"] == "text/plain; version=0.0.4; charset=utf-8"
10241024

10251025

1026-
def test_invalid_content_type_parses_to_text_plain() -> None:
1026+
def test_invalid_content_type_parses_to_application_octect_stream() -> None:
10271027
resp = web.Response(text="test test", content_type="jpeg")
10281028

1029-
assert resp.content_type == "text/plain"
1029+
assert resp.content_type == "application/octet-stream"
10301030
assert resp.headers["content-type"] == "jpeg; charset=utf-8"
10311031

10321032

0 commit comments

Comments
 (0)