-
Notifications
You must be signed in to change notification settings - Fork 28
Closed
Labels
IOWork for reading/writing different formatsWork for reading/writing different formatsbugSomething isn't workingSomething isn't working
Description
Description
There is an issue with h5py reading the febus data below. @d-chambers you should have access to a directory on google drive containing 2 sample patches that reproduce the error.
Example
import dascore as dc
data_path = "test_data/"
sp = dc.spool(data_path).update()
pa = sp[0]Error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
Cell In[4], line 1
----> 1 sp = dc.spool(data_path).update()
File ~/miniconda3/envs/dc_user/lib/python3.12/site-packages/dascore/clients/dirspool.py:108, in DirectorySpool.update(self, progress)
104 @compose_docstring(doc=BaseSpool.update.__doc__)
105 def update(self, progress: PROGRESS_LEVELS = "standard") -> Self:
106 """{doc}."""
107 out = self.__class__(
--> 108 base_path=self.indexer.update(progress=progress),
109 preferred_format=self._preferred_format,
110 select_kwargs=self._select_kwargs,
111 )
112 return out
File ~/miniconda3/envs/dc_user/lib/python3.12/site-packages/dascore/io/indexer.py:328, in DirectoryIndexer.update(self, paths, progress)
326 timestamp = self._get_mtime(only_new=True)
327 paths = self._get_paths(paths)
--> 328 df = dc.scan_to_df(
329 path=paths,
330 timestamp=timestamp,
331 progress=progress,
332 ext=self.ext,
333 )
334 # Put contents found into database.
335 if not df.empty:
336 # Some users were surprised the spool wasn't sorted. We still cant
337 # guarantee all spools will be sorted but we can make sure most are
338 # by sorting the contents before dumping to index.
File ~/miniconda3/envs/dc_user/lib/python3.12/site-packages/dascore/io/core.py:693, in scan_to_df(path, file_format, file_version, ext, timestamp, progress, exclude)
691 if isinstance(path, DataFrameSpool):
692 return path.get_contents()
--> 693 info = scan(
694 path=path,
695 file_format=file_format,
696 file_version=file_version,
697 ext=ext,
698 timestamp=timestamp,
699 progress=progress,
700 )
701 df = _model_list_to_df(info, exclude=exclude)
702 return df
File ~/miniconda3/envs/dc_user/lib/python3.12/site-packages/dascore/io/core.py:884, in scan(path, file_format, file_version, ext, timestamp, progress)
882 else:
883 try:
--> 884 source = fiber_io.scan(resource, _pre_cast=True)
885 # This happens if the file is corrupt see #346.
886 except (OSError, InvalidFiberFileError, ValueError, TypeError):
File ~/miniconda3/envs/dc_user/lib/python3.12/site-packages/dascore/io/core.py:404, in _type_caster.<locals>._wrapper(_pre_cast, *args, **kwargs)
400 # TODO look at replacing this with pydantic's type_guard thing.
401
402 # this allows us to fast-track calls from generic functions
403 if required_type is None or _pre_cast:
--> 404 return func(*args, **kwargs)
405 bound = sig.bind(*args, **kwargs)
406 new_kw = bound.arguments
File ~/miniconda3/envs/dc_user/lib/python3.12/site-packages/dascore/io/febus/core.py:78, in Febus2.scan(self, resource, **kwargs)
72 file_version = _get_febus_version_str(resource)
73 extras = {
74 "path": resource.filename,
75 "file_format": self.name,
76 "file_version": str(file_version),
77 }
---> 78 for attr, cm, _ in _yield_attrs_coords(resource):
79 attr["coords"] = cm.to_summary_dict()
80 attr.update(dict(extras))
File ~/miniconda3/envs/dc_user/lib/python3.12/site-packages/dascore/io/febus/utils.py:192, in _yield_attrs_coords(fi)
190 for febus in febuses:
191 attr = _get_febus_attrs(febus)
--> 192 cm = _get_febus_coord_manager(febus)
193 yield attr, cm, febus
File ~/miniconda3/envs/dc_user/lib/python3.12/site-packages/dascore/io/febus/utils.py:180, in _get_febus_coord_manager(feb)
177 def _get_febus_coord_manager(feb: _FebusSlice) -> CoordManager:
178 """Get a coordinate manager for febus slice."""
179 coords = dict(
--> 180 time=_get_time_coord(feb),
181 distance=_get_distance_coord(feb),
182 )
183 cm = get_coord_manager(coords=coords, dims=("time", "distance"))
184 return cm
File ~/miniconda3/envs/dc_user/lib/python3.12/site-packages/dascore/io/febus/utils.py:127, in _get_time_coord(feb)
125 # Get spacing between time samples (in s) and the total time of each block.
126 time_step = feb.zone.attrs["Spacing"][1] / 1_000 # value in ms, convert to s.
--> 127 excess_rows = _get_time_overlap_samples(feb, data_shape)
128 total_time_rows = (data_shape[1] - excess_rows) * n_blocks
129 # Get origin info, these are offsets from time to get to the first simple
130 # of the block. These should always be non-positive.
File ~/miniconda3/envs/dc_user/lib/python3.12/site-packages/dascore/io/febus/utils.py:103, in _get_time_overlap_samples(feb, data_shape)
101 """Determine the number of redundant samples in the time dimension."""
102 time_step = feb.zone.attrs["Spacing"][1] / 1_000 # value in ms, convert to s.
--> 103 block_time = _maybe_unpack(1 / (feb.zone.attrs["BlockRate"] / 1_000))
104 # Since the data have overlaps in each block's time dimension, we need to
105 # trim the overlap off the time dimension to avoid having to merge blocks later.
106 # However, sometimes the "BlockOverlap" is wrong, so we calculate it
107 # manually here.
108 expected_samples = int(np.round(block_time / time_step))
File h5py/_objects.pyx:54, in h5py._objects.with_phil.wrapper()
File h5py/_objects.pyx:55, in h5py._objects.with_phil.wrapper()
File ~/miniconda3/envs/dc_user/lib/python3.12/site-packages/h5py/_hl/attrs.py:55, in AttributeManager.__getitem__(self, name)
51 @with_phil
52 def __getitem__(self, name):
53 """ Read the value of an attribute.
54 """
---> 55 attr = h5a.open(self._id, self._e(name))
56 shape = attr.shape
58 # shape is None for empty dataspaces
File h5py/_objects.pyx:54, in h5py._objects.with_phil.wrapper()
File h5py/_objects.pyx:55, in h5py._objects.with_phil.wrapper()
File h5py/h5a.pyx:79, in h5py.h5a.open()
KeyError: "Unable to synchronously open attribute (can't locate attribute: 'BlockRate')"Expected behavior
Scan and read febus data with no issue.
Versions
- OS [e.g. Ubuntu 20.04]: macOS 15.6
- DASCore Version [e.g. 0.0.5]: 0.1.12
- Python Version [e.g. 3.10]: 3.12
Metadata
Metadata
Assignees
Labels
IOWork for reading/writing different formatsWork for reading/writing different formatsbugSomething isn't workingSomething isn't working