Skip to content

Commit 72c7878

Browse files
authored
fix_514 (#515)
1 parent 2d2a7dd commit 72c7878

File tree

3 files changed

+46
-7
lines changed

3 files changed

+46
-7
lines changed

dascore/io/prodml/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
"""
22
Support for prodML format.
33
4-
This is used by Silixa's iDAS as an HDF5 format and perhaps other interrogators.
5-
64
More info about ProdML can be found here:
75
https://www.energistics.org/prodml-developers-users
86
"""

dascore/io/prodml/utils.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
from __future__ import annotations
44

5+
import numpy as np
6+
57
import dascore as dc
68
from dascore.constants import VALID_DATA_TYPES
79
from dascore.core.coordmanager import get_coord_manager
@@ -60,13 +62,29 @@ def get_distance_units(attrs):
6062

6163
def _get_time_coord(node):
6264
"""Get the time information from a Raw node."""
63-
time_attrs = node["RawDataTime"].attrs
65+
time_array = node["RawDataTime"]
66+
array_len = len(time_array)
67+
assert array_len > 0, "Missing time array in ProdML file."
68+
time_attrs = time_array.attrs
6469
start_str = unbyte(time_attrs["PartStartTime"]).split("+")[0]
6570
start = dc.to_datetime64(start_str.rstrip("Z"))
6671
end_str = unbyte(time_attrs["PartEndTime"]).split("+")[0]
6772
end = dc.to_datetime64(end_str.rstrip("Z"))
68-
step = (end - start) / (len(node["RawDataTime"]) - 1)
69-
return get_coord(start=start, stop=end + step, step=step, units="s")
73+
step = (end - start) / (array_len - 1)
74+
time_coord = get_coord(start=start, stop=end + step, step=step, units="s")
75+
# Sometimes the "PartEndTime" can be wrong. Check for this and try to
76+
# compensate. See #414.
77+
last = np.asarray(time_array[-1:]).astype("datetime64[us]")[0]
78+
tc_max = np.asarray(time_coord.max()).astype("datetime64[us]")
79+
diff = float(np.abs((tc_max - last) / step))
80+
# Note: just in case the time array is not in microseconds as it should
81+
# be, we prefer to use the iso 8601 strings in the 'PartStartTime' attrs
82+
# because they are less likely to get messed up. Therefore, we only
83+
# correct time coordinate from time array if the values are "close" but off.
84+
if 0 < diff < 10:
85+
time_array = time_array[:].astype("datetime64[us]")
86+
time_coord = get_coord(data=time_array)
87+
return time_coord
7088

7189

7290
def _get_data_unit_and_type(node):
@@ -108,9 +126,9 @@ def _get_prodml_attrs(fi, extras=None) -> list[dict]:
108126
for node in raw_nodes.values():
109127
info = dict(base_info)
110128
t_coord = _get_time_coord(node)
111-
info.update(t_coord.get_attrs_dict("time"))
112129
info.update(_get_data_unit_and_type(node))
113-
info["dims"] = _get_dims(node)
130+
dims = _get_dims(node)
131+
info["dims"] = dims
114132
if extras is not None:
115133
info.update(extras)
116134
info["coords"] = {"time": t_coord, "distance": d_coord}

tests/test_io/test_prodml/test_prod_ml.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44

55
import shutil
66

7+
import h5py
78
import pandas as pd
89
import pytest
910
import tables
1011

1112
import dascore as dc
13+
from dascore.core.coords import get_coord
1214
from dascore.io.core import read
1315
from dascore.utils.downloader import fetch
1416

@@ -53,6 +55,22 @@ def issue_221_patch_path(self, tmp_path_factory):
5355
node._v_attrs.Dimensions = new_dims
5456
return new_path
5557

58+
@pytest.fixture(scope="class")
59+
def issue_514_patch_path(self, tmp_path_factory):
60+
"""Make a patch with bad endtime metadata. See #412."""
61+
tmp_path = tmp_path_factory.mktemp("issue_514")
62+
path = dc.utils.downloader.fetch("prodml_2.0.h5")
63+
new_path = shutil.copy2(path, tmp_path / "prod_2_issue_514.h5")
64+
with h5py.File(new_path, "a") as fi:
65+
# monkey patch dimensions to simulate issue.
66+
parent_node = fi["Acquisition"]["Raw[0]"]
67+
time_node = parent_node["RawDataTime"]
68+
time = time_node[:].astype("datetime64[us]")
69+
time_coord = get_coord(data=time)
70+
new_time = str(time_coord.max() + time_coord.step * 2)
71+
time_node.attrs["PartEndTime"] = new_time
72+
return new_path
73+
5674
@pytest.fixture(scope="class")
5775
def silixa_h5_patch(self, idas_h5_example_path):
5876
"""Get the silixa file, return Patch."""
@@ -76,6 +94,11 @@ def test_issue_221(self, issue_221_patch_path):
7694
patch = dc.read(issue_221_patch_path)[0]
7795
assert isinstance(patch, dc.Patch)
7896

97+
def test_issue_514(self, issue_514_patch_path):
98+
"""Ensure the patch can be read despite bad attribute info."""
99+
patch = dc.read(issue_514_patch_path)[0]
100+
assert isinstance(patch, dc.Patch)
101+
79102

80103
class TestReadQuantXV2:
81104
"""Tests for reading the QuantXV2 format."""

0 commit comments

Comments
 (0)