-
-
Notifications
You must be signed in to change notification settings - Fork 212
Closed
Labels
Description
Description
When loading MNIST data from the dataset, a MaxRetryError is thrown.
Steps/Code to Reproduce
dataset = openml.datasets.get_dataset(554, download_data=False)
X, y, is_categorical, _ = dataset.get_data(
dataset_format="array", target=dataset.default_target_attribute
)Expected Results
No error is thrown. MNIST data is loaded.
Actual Results
---------------------------------------------------------------------------
MaxRetryError Traceback (most recent call last)
<ipython-input-7-1f1613061a94> in <module>
1 dataset = openml.datasets.get_dataset(554, download_data=False)
----> 2 X, y, is_categorical, _ = dataset.get_data(
3 dataset_format="array", target=dataset.default_target_attribute
4 )
~/miniconda3/envs/ndd/lib/python3.8/site-packages/openml/datasets/dataset.py in get_data(self, target, include_row_id, include_ignore_attribute, dataset_format)
696 List of attribute names.
697 """
--> 698 data, categorical, attribute_names = self._load_data()
699
700 to_exclude = []
~/miniconda3/envs/ndd/lib/python3.8/site-packages/openml/datasets/dataset.py in _load_data(self)
526 if need_to_create_pickle or need_to_create_feather:
527 if self.data_file is None:
--> 528 self._download_data()
529
530 file_to_load = self.data_file if self.parquet_file is None else self.parquet_file
~/miniconda3/envs/ndd/lib/python3.8/site-packages/openml/datasets/dataset.py in _download_data(self)
304 self.data_file = _get_dataset_arff(self)
305 if self._minio_url is not None:
--> 306 self.parquet_file = _get_dataset_parquet(self)
307
308 def _get_arff(self, format: str) -> Dict:
~/miniconda3/envs/ndd/lib/python3.8/site-packages/openml/datasets/functions.py in _get_dataset_parquet(description, cache_directory)
1001 if not os.path.isfile(output_file_path):
1002 try:
-> 1003 openml._api_calls._download_minio_file(
1004 source=cast(str, url), destination=output_file_path
1005 )
~/miniconda3/envs/ndd/lib/python3.8/site-packages/openml/_api_calls.py in _download_minio_file(source, destination, exists_ok)
103
104 try:
--> 105 client.fget_object(
106 bucket_name=bucket, object_name=object_name, file_path=str(destination),
107 )
~/miniconda3/envs/ndd/lib/python3.8/site-packages/minio/api.py in fget_object(self, bucket_name, object_name, file_path, request_headers, ssec, version_id, extra_query_params, tmp_file_path)
1067 makedirs(os.path.dirname(file_path))
1068
-> 1069 stat = self.stat_object(
1070 bucket_name,
1071 object_name,
~/miniconda3/envs/ndd/lib/python3.8/site-packages/minio/api.py in stat_object(self, bucket_name, object_name, ssec, version_id, extra_query_params)
1904 query_params = extra_query_params or {}
1905 query_params.update({"versionId": version_id} if version_id else {})
-> 1906 response = self._execute(
1907 "HEAD",
1908 bucket_name,
~/miniconda3/envs/ndd/lib/python3.8/site-packages/minio/api.py in _execute(self, method, bucket_name, object_name, body, headers, query_params, preload_content, no_body_trace)
411
412 try:
--> 413 return self._url_open(
414 method,
415 region,
~/miniconda3/envs/ndd/lib/python3.8/site-packages/minio/api.py in _url_open(self, method, region, bucket_name, object_name, body, headers, query_params, preload_content, no_body_trace)
278 http_headers.add(key, value)
279
--> 280 response = self._http.urlopen(
281 method,
282 urlunsplit(url),
~/miniconda3/envs/ndd/lib/python3.8/site-packages/urllib3/poolmanager.py in urlopen(self, method, url, redirect, **kw)
373 response = conn.urlopen(method, url, **kw)
374 else:
--> 375 response = conn.urlopen(method, u.request_uri, **kw)
376
377 redirect_location = redirect and response.get_redirect_location()
~/miniconda3/envs/ndd/lib/python3.8/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
844 retries.sleep(response)
845 log.debug("Retry: %s", url)
--> 846 return self.urlopen(
847 method,
848 url,
~/miniconda3/envs/ndd/lib/python3.8/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
844 retries.sleep(response)
845 log.debug("Retry: %s", url)
--> 846 return self.urlopen(
847 method,
848 url,
~/miniconda3/envs/ndd/lib/python3.8/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
844 retries.sleep(response)
845 log.debug("Retry: %s", url)
--> 846 return self.urlopen(
847 method,
848 url,
~/miniconda3/envs/ndd/lib/python3.8/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
844 retries.sleep(response)
845 log.debug("Retry: %s", url)
--> 846 return self.urlopen(
847 method,
848 url,
~/miniconda3/envs/ndd/lib/python3.8/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
844 retries.sleep(response)
845 log.debug("Retry: %s", url)
--> 846 return self.urlopen(
847 method,
848 url,
~/miniconda3/envs/ndd/lib/python3.8/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
834 if retries.is_retry(method, response.status, has_retry_after):
835 try:
--> 836 retries = retries.increment(method, url, response=response, _pool=self)
837 except MaxRetryError:
838 if retries.raise_on_status:
~/miniconda3/envs/ndd/lib/python3.8/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
571
572 if new_retry.is_exhausted():
--> 573 raise MaxRetryError(_pool, url, error or ResponseError(cause))
574
575 log.debug("Incremented Retry for (url='%s'): %r", url, new_retry)
MaxRetryError: HTTPConnectionPool(host='openml1.win.tue.nl', port=80): Max retries exceeded with url: /dataset554/dataset_554.pq (Caused by ResponseError('too many 503 error responses'))Versions
macOS-10.16-x86_64-i386-64bit
Python 3.8.5 (default, Sep 4 2020, 02:22:02)
[Clang 10.0.0 ]
NumPy 1.19.5
SciPy 1.7.1
Scikit-Learn 1.0
OpenML 0.12.2