|
54 | 54 | ConnectionTimeoutError, |
55 | 55 | ContentTypeError, |
56 | 56 | InvalidURL, |
| 57 | + InvalidUrlClientError, |
| 58 | + InvalidUrlRedirectClientError, |
| 59 | + NonHttpUrlClientError, |
| 60 | + NonHttpUrlRedirectClientError, |
| 61 | + RedirectClientError, |
57 | 62 | ServerConnectionError, |
58 | 63 | ServerDisconnectedError, |
59 | 64 | ServerFingerprintMismatch, |
|
108 | 113 | "ConnectionTimeoutError", |
109 | 114 | "ContentTypeError", |
110 | 115 | "InvalidURL", |
| 116 | + "InvalidUrlClientError", |
| 117 | + "RedirectClientError", |
| 118 | + "NonHttpUrlClientError", |
| 119 | + "InvalidUrlRedirectClientError", |
| 120 | + "NonHttpUrlRedirectClientError", |
111 | 121 | "ServerConnectionError", |
112 | 122 | "ServerDisconnectedError", |
113 | 123 | "ServerFingerprintMismatch", |
@@ -167,6 +177,7 @@ class ClientTimeout: |
167 | 177 |
|
168 | 178 | # https://www.rfc-editor.org/rfc/rfc9110#section-9.2.2 |
169 | 179 | IDEMPOTENT_METHODS = frozenset({"GET", "HEAD", "OPTIONS", "TRACE", "PUT", "DELETE"}) |
| 180 | +HTTP_SCHEMA_SET = frozenset({"http", "https", ""}) |
170 | 181 |
|
171 | 182 | _RetType = TypeVar("_RetType") |
172 | 183 | _CharsetResolver = Callable[[ClientResponse, bytes], str] |
@@ -404,7 +415,10 @@ async def _request( |
404 | 415 | try: |
405 | 416 | url = self._build_url(str_or_url) |
406 | 417 | except ValueError as e: |
407 | | - raise InvalidURL(str_or_url) from e |
| 418 | + raise InvalidUrlClientError(str_or_url) from e |
| 419 | + |
| 420 | + if url.scheme not in HTTP_SCHEMA_SET: |
| 421 | + raise NonHttpUrlClientError(url) |
408 | 422 |
|
409 | 423 | skip_headers = set(self._skip_auto_headers) |
410 | 424 | if skip_auto_headers is not None: |
@@ -459,6 +473,15 @@ async def _request( |
459 | 473 | retry_persistent_connection = method in IDEMPOTENT_METHODS |
460 | 474 | while True: |
461 | 475 | url, auth_from_url = strip_auth_from_url(url) |
| 476 | + if not url.raw_host: |
| 477 | + # NOTE: Bail early, otherwise, causes `InvalidURL` through |
| 478 | + # NOTE: `self._request_class()` below. |
| 479 | + err_exc_cls = ( |
| 480 | + InvalidUrlRedirectClientError |
| 481 | + if redirects |
| 482 | + else InvalidUrlClientError |
| 483 | + ) |
| 484 | + raise err_exc_cls(url) |
462 | 485 | if auth and auth_from_url: |
463 | 486 | raise ValueError( |
464 | 487 | "Cannot combine AUTH argument with " |
@@ -611,34 +634,44 @@ async def _request( |
611 | 634 | resp.release() |
612 | 635 |
|
613 | 636 | try: |
614 | | - parsed_url = URL( |
| 637 | + parsed_redirect_url = URL( |
615 | 638 | r_url, encoded=not self._requote_redirect_url |
616 | 639 | ) |
617 | | - |
618 | 640 | except ValueError as e: |
619 | | - raise InvalidURL(r_url) from e |
| 641 | + raise InvalidUrlRedirectClientError( |
| 642 | + r_url, |
| 643 | + "Server attempted redirecting to a location that does not look like a URL", |
| 644 | + ) from e |
620 | 645 |
|
621 | | - scheme = parsed_url.scheme |
622 | | - if scheme not in ("http", "https", ""): |
| 646 | + scheme = parsed_redirect_url.scheme |
| 647 | + if scheme not in HTTP_SCHEMA_SET: |
623 | 648 | resp.close() |
624 | | - raise ValueError("Can redirect only to http or https") |
| 649 | + raise NonHttpUrlRedirectClientError(r_url) |
625 | 650 | elif not scheme: |
626 | | - parsed_url = url.join(parsed_url) |
| 651 | + parsed_redirect_url = url.join(parsed_redirect_url) |
627 | 652 |
|
628 | 653 | is_same_host_https_redirect = ( |
629 | | - url.host == parsed_url.host |
630 | | - and parsed_url.scheme == "https" |
| 654 | + url.host == parsed_redirect_url.host |
| 655 | + and parsed_redirect_url.scheme == "https" |
631 | 656 | and url.scheme == "http" |
632 | 657 | ) |
633 | 658 |
|
| 659 | + try: |
| 660 | + redirect_origin = parsed_redirect_url.origin() |
| 661 | + except ValueError as origin_val_err: |
| 662 | + raise InvalidUrlRedirectClientError( |
| 663 | + parsed_redirect_url, |
| 664 | + "Invalid redirect URL origin", |
| 665 | + ) from origin_val_err |
| 666 | + |
634 | 667 | if ( |
635 | | - url.origin() != parsed_url.origin() |
| 668 | + url.origin() != redirect_origin |
636 | 669 | and not is_same_host_https_redirect |
637 | 670 | ): |
638 | 671 | auth = None |
639 | 672 | headers.pop(hdrs.AUTHORIZATION, None) |
640 | 673 |
|
641 | | - url = parsed_url |
| 674 | + url = parsed_redirect_url |
642 | 675 | params = {} |
643 | 676 | resp.release() |
644 | 677 | continue |
|
0 commit comments