-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Closed
Labels
Description
See upstream: zarr-developers/zarr-python#551
It seems that using a ZipStore creates 1 byte objects for Unicode string attributes.
For example, saving the same Dataset with a DirectoryStore and a Zip Store creates an attribute for a unicode array with 20 bytes in size in the first, and 1 byte in size in the second.
In fact, ubuntu file roller isn't even allowing me to extract the files.
I have a feeling it is due to the note in the zarr documentation
Note that Zip files do not provide any way to remove or replace existing entries.
MCVE Code Sample
ZipStore
import xarray as xr
import zarr
x = xr.Dataset()
x['hello'] = 'world'
x
with zarr.ZipStore('test_store.zip', mode='w') as store:
x.to_zarr(store)
with zarr.ZipStore('test_store.zip', mode='r') as store:
x_read = xr.open_zarr(store).compute()Issued error
---------------------------------------------------------------------------
BadZipFile Traceback (most recent call last)
<ipython-input-1-2a92a6db56ab> in <module>
7 x.to_zarr(store)
8 with zarr.ZipStore('test_store.zip', mode='r') as store:
----> 9 x_read = xr.open_zarr(store).compute()
~/miniconda3/envs/dev/lib/python3.7/site-packages/xarray/core/dataset.py in compute(self, **kwargs)
803 """
804 new = self.copy(deep=False)
--> 805 return new.load(**kwargs)
806
807 def _persist_inplace(self, **kwargs) -> "Dataset":
~/miniconda3/envs/dev/lib/python3.7/site-packages/xarray/core/dataset.py in load(self, **kwargs)
655 for k, v in self.variables.items():
656 if k not in lazy_data:
--> 657 v.load()
658
659 return self
~/miniconda3/envs/dev/lib/python3.7/site-packages/xarray/core/variable.py in load(self, **kwargs)
370 self._data = as_compatible_data(self._data.compute(**kwargs))
371 elif not hasattr(self._data, "__array_function__"):
--> 372 self._data = np.asarray(self._data)
373 return self
374
~/miniconda3/envs/dev/lib/python3.7/site-packages/numpy/core/_asarray.py in asarray(a, dtype, order)
83
84 """
---> 85 return array(a, dtype, copy=False, order=order)
86
87
~/miniconda3/envs/dev/lib/python3.7/site-packages/xarray/core/indexing.py in __array__(self, dtype)
545 def __array__(self, dtype=None):
546 array = as_indexable(self.array)
--> 547 return np.asarray(array[self.key], dtype=None)
548
549 def transpose(self, order):
~/miniconda3/envs/dev/lib/python3.7/site-packages/xarray/backends/zarr.py in __getitem__(self, key)
46 array = self.get_array()
47 if isinstance(key, indexing.BasicIndexer):
---> 48 return array[key.tuple]
49 elif isinstance(key, indexing.VectorizedIndexer):
50 return array.vindex[
~/miniconda3/envs/dev/lib/python3.7/site-packages/zarr/core.py in __getitem__(self, selection)
570
571 fields, selection = pop_fields(selection)
--> 572 return self.get_basic_selection(selection, fields=fields)
573
574 def get_basic_selection(self, selection=Ellipsis, out=None, fields=None):
~/miniconda3/envs/dev/lib/python3.7/site-packages/zarr/core.py in get_basic_selection(self, selection, out, fields)
693 if self._shape == ():
694 return self._get_basic_selection_zd(selection=selection, out=out,
--> 695 fields=fields)
696 else:
697 return self._get_basic_selection_nd(selection=selection, out=out,
~/miniconda3/envs/dev/lib/python3.7/site-packages/zarr/core.py in _get_basic_selection_zd(self, selection, out, fields)
709 # obtain encoded data for chunk
710 ckey = self._chunk_key((0,))
--> 711 cdata = self.chunk_store[ckey]
712
713 except KeyError:
~/miniconda3/envs/dev/lib/python3.7/site-packages/zarr/storage.py in __getitem__(self, key)
1249 with self.mutex:
1250 with self.zf.open(key) as f: # will raise KeyError
-> 1251 return f.read()
1252
1253 def __setitem__(self, key, value):
~/miniconda3/envs/dev/lib/python3.7/zipfile.py in read(self, n)
914 self._offset = 0
915 while not self._eof:
--> 916 buf += self._read1(self.MAX_N)
917 return buf
918
~/miniconda3/envs/dev/lib/python3.7/zipfile.py in _read1(self, n)
1018 if self._left <= 0:
1019 self._eof = True
-> 1020 self._update_crc(data)
1021 return data
1022
~/miniconda3/envs/dev/lib/python3.7/zipfile.py in _update_crc(self, newdata)
946 # Check the CRC if we're at the end of the file
947 if self._eof and self._running_crc != self._expected_crc:
--> 948 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
949
950 def read1(self, n):
BadZipFile: Bad CRC-32 for file 'hello/0'
0
2
Untitled10.ipynbWorking Directory Store example
import xarray as xr
import zarr
x = xr.Dataset()
x['hello'] = 'world'
x
store = zarr.DirectoryStore('test_store2.zarr')
x.to_zarr(store)
x_read = xr.open_zarr(store)
x_read.compute()
assert x_read.hello == x.helloExpected Output
The string metadata should work.
Output of xr.show_versions()
Details
INSTALLED VERSIONS
------------------
commit: None
python: 3.7.6 | packaged by conda-forge | (default, Jan 7 2020, 22:33:48)
[GCC 7.3.0]
python-bits: 64
OS: Linux
OS-release: 5.3.0-40-generic
machine: x86_64
processor: x86_64
byteorder: little
LC_ALL: None
LANG: en_CA.UTF-8
LOCALE: en_CA.UTF-8
libhdf5: None
libnetcdf: None
xarray: 0.14.1
pandas: 1.0.0
numpy: 1.17.5
scipy: 1.4.1
netCDF4: None
pydap: None
h5netcdf: None
h5py: None
Nio: None
zarr: 2.4.0
cftime: None
nc_time_axis: None
PseudoNetCDF: None
rasterio: None
cfgrib: None
iris: None
bottleneck: None
dask: 2.10.1
distributed: 2.10.0
matplotlib: 3.1.3
cartopy: None
seaborn: None
numbagg: None
setuptools: 45.1.0.post20200119
pip: 20.0.2
conda: None
pytest: 5.3.1
IPython: 7.12.0
sphinx: 2.3.1