Skip to content

Fix MaxRetryError when loading MNIST & Fashion-MNIST #1116

@PSSF23

Description

@PSSF23

Description

When loading MNIST data from the dataset, a MaxRetryError is thrown.

Steps/Code to Reproduce

dataset = openml.datasets.get_dataset(554, download_data=False)
X, y, is_categorical, _ = dataset.get_data(
    dataset_format="array", target=dataset.default_target_attribute
)

Expected Results

No error is thrown. MNIST data is loaded.

Actual Results

---------------------------------------------------------------------------
MaxRetryError                             Traceback (most recent call last)
<ipython-input-7-1f1613061a94> in <module>
      1 dataset = openml.datasets.get_dataset(554, download_data=False)
----> 2 X, y, is_categorical, _ = dataset.get_data(
      3     dataset_format="array", target=dataset.default_target_attribute
      4 )

~/miniconda3/envs/ndd/lib/python3.8/site-packages/openml/datasets/dataset.py in get_data(self, target, include_row_id, include_ignore_attribute, dataset_format)
    696             List of attribute names.
    697         """
--> 698         data, categorical, attribute_names = self._load_data()
    699 
    700         to_exclude = []

~/miniconda3/envs/ndd/lib/python3.8/site-packages/openml/datasets/dataset.py in _load_data(self)
    526         if need_to_create_pickle or need_to_create_feather:
    527             if self.data_file is None:
--> 528                 self._download_data()
    529 
    530             file_to_load = self.data_file if self.parquet_file is None else self.parquet_file

~/miniconda3/envs/ndd/lib/python3.8/site-packages/openml/datasets/dataset.py in _download_data(self)
    304         self.data_file = _get_dataset_arff(self)
    305         if self._minio_url is not None:
--> 306             self.parquet_file = _get_dataset_parquet(self)
    307 
    308     def _get_arff(self, format: str) -> Dict:

~/miniconda3/envs/ndd/lib/python3.8/site-packages/openml/datasets/functions.py in _get_dataset_parquet(description, cache_directory)
   1001     if not os.path.isfile(output_file_path):
   1002         try:
-> 1003             openml._api_calls._download_minio_file(
   1004                 source=cast(str, url), destination=output_file_path
   1005             )

~/miniconda3/envs/ndd/lib/python3.8/site-packages/openml/_api_calls.py in _download_minio_file(source, destination, exists_ok)
    103 
    104     try:
--> 105         client.fget_object(
    106             bucket_name=bucket, object_name=object_name, file_path=str(destination),
    107         )

~/miniconda3/envs/ndd/lib/python3.8/site-packages/minio/api.py in fget_object(self, bucket_name, object_name, file_path, request_headers, ssec, version_id, extra_query_params, tmp_file_path)
   1067         makedirs(os.path.dirname(file_path))
   1068 
-> 1069         stat = self.stat_object(
   1070             bucket_name,
   1071             object_name,

~/miniconda3/envs/ndd/lib/python3.8/site-packages/minio/api.py in stat_object(self, bucket_name, object_name, ssec, version_id, extra_query_params)
   1904         query_params = extra_query_params or {}
   1905         query_params.update({"versionId": version_id} if version_id else {})
-> 1906         response = self._execute(
   1907             "HEAD",
   1908             bucket_name,

~/miniconda3/envs/ndd/lib/python3.8/site-packages/minio/api.py in _execute(self, method, bucket_name, object_name, body, headers, query_params, preload_content, no_body_trace)
    411 
    412         try:
--> 413             return self._url_open(
    414                 method,
    415                 region,

~/miniconda3/envs/ndd/lib/python3.8/site-packages/minio/api.py in _url_open(self, method, region, bucket_name, object_name, body, headers, query_params, preload_content, no_body_trace)
    278                 http_headers.add(key, value)
    279 
--> 280         response = self._http.urlopen(
    281             method,
    282             urlunsplit(url),

~/miniconda3/envs/ndd/lib/python3.8/site-packages/urllib3/poolmanager.py in urlopen(self, method, url, redirect, **kw)
    373             response = conn.urlopen(method, url, **kw)
    374         else:
--> 375             response = conn.urlopen(method, u.request_uri, **kw)
    376 
    377         redirect_location = redirect and response.get_redirect_location()

~/miniconda3/envs/ndd/lib/python3.8/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    844             retries.sleep(response)
    845             log.debug("Retry: %s", url)
--> 846             return self.urlopen(
    847                 method,
    848                 url,

~/miniconda3/envs/ndd/lib/python3.8/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    844             retries.sleep(response)
    845             log.debug("Retry: %s", url)
--> 846             return self.urlopen(
    847                 method,
    848                 url,

~/miniconda3/envs/ndd/lib/python3.8/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    844             retries.sleep(response)
    845             log.debug("Retry: %s", url)
--> 846             return self.urlopen(
    847                 method,
    848                 url,

~/miniconda3/envs/ndd/lib/python3.8/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    844             retries.sleep(response)
    845             log.debug("Retry: %s", url)
--> 846             return self.urlopen(
    847                 method,
    848                 url,

~/miniconda3/envs/ndd/lib/python3.8/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    844             retries.sleep(response)
    845             log.debug("Retry: %s", url)
--> 846             return self.urlopen(
    847                 method,
    848                 url,

~/miniconda3/envs/ndd/lib/python3.8/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    834         if retries.is_retry(method, response.status, has_retry_after):
    835             try:
--> 836                 retries = retries.increment(method, url, response=response, _pool=self)
    837             except MaxRetryError:
    838                 if retries.raise_on_status:

~/miniconda3/envs/ndd/lib/python3.8/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
    571 
    572         if new_retry.is_exhausted():
--> 573             raise MaxRetryError(_pool, url, error or ResponseError(cause))
    574 
    575         log.debug("Incremented Retry for (url='%s'): %r", url, new_retry)

MaxRetryError: HTTPConnectionPool(host='openml1.win.tue.nl', port=80): Max retries exceeded with url: /dataset554/dataset_554.pq (Caused by ResponseError('too many 503 error responses'))

Versions

macOS-10.16-x86_64-i386-64bit
Python 3.8.5 (default, Sep  4 2020, 02:22:02) 
[Clang 10.0.0 ]
NumPy 1.19.5
SciPy 1.7.1
Scikit-Learn 1.0
OpenML 0.12.2

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions