|
2 | 2 |
|
3 | 3 | from __future__ import annotations |
4 | 4 |
|
| 5 | +import numpy as np |
| 6 | + |
5 | 7 | import dascore as dc |
6 | 8 | from dascore.constants import VALID_DATA_TYPES |
7 | 9 | from dascore.core.coordmanager import get_coord_manager |
@@ -60,13 +62,29 @@ def get_distance_units(attrs): |
60 | 62 |
|
61 | 63 | def _get_time_coord(node): |
62 | 64 | """Get the time information from a Raw node.""" |
63 | | - time_attrs = node["RawDataTime"].attrs |
| 65 | + time_array = node["RawDataTime"] |
| 66 | + array_len = len(time_array) |
| 67 | + assert array_len > 0, "Missing time array in ProdML file." |
| 68 | + time_attrs = time_array.attrs |
64 | 69 | start_str = unbyte(time_attrs["PartStartTime"]).split("+")[0] |
65 | 70 | start = dc.to_datetime64(start_str.rstrip("Z")) |
66 | 71 | end_str = unbyte(time_attrs["PartEndTime"]).split("+")[0] |
67 | 72 | end = dc.to_datetime64(end_str.rstrip("Z")) |
68 | | - step = (end - start) / (len(node["RawDataTime"]) - 1) |
69 | | - return get_coord(start=start, stop=end + step, step=step, units="s") |
| 73 | + step = (end - start) / (array_len - 1) |
| 74 | + time_coord = get_coord(start=start, stop=end + step, step=step, units="s") |
| 75 | + # Sometimes the "PartEndTime" can be wrong. Check for this and try to |
| 76 | + # compensate. See #414. |
| 77 | + last = np.asarray(time_array[-1:]).astype("datetime64[us]")[0] |
| 78 | + tc_max = np.asarray(time_coord.max()).astype("datetime64[us]") |
| 79 | + diff = float(np.abs((tc_max - last) / step)) |
| 80 | + # Note: just in case the time array is not in microseconds as it should |
| 81 | + # be, we prefer to use the iso 8601 strings in the 'PartStartTime' attrs |
| 82 | + # because they are less likely to get messed up. Therefore, we only |
| 83 | + # correct time coordinate from time array if the values are "close" but off. |
| 84 | + if 0 < diff < 10: |
| 85 | + time_array = time_array[:].astype("datetime64[us]") |
| 86 | + time_coord = get_coord(data=time_array) |
| 87 | + return time_coord |
70 | 88 |
|
71 | 89 |
|
72 | 90 | def _get_data_unit_and_type(node): |
@@ -108,9 +126,9 @@ def _get_prodml_attrs(fi, extras=None) -> list[dict]: |
108 | 126 | for node in raw_nodes.values(): |
109 | 127 | info = dict(base_info) |
110 | 128 | t_coord = _get_time_coord(node) |
111 | | - info.update(t_coord.get_attrs_dict("time")) |
112 | 129 | info.update(_get_data_unit_and_type(node)) |
113 | | - info["dims"] = _get_dims(node) |
| 130 | + dims = _get_dims(node) |
| 131 | + info["dims"] = dims |
114 | 132 | if extras is not None: |
115 | 133 | info.update(extras) |
116 | 134 | info["coords"] = {"time": t_coord, "distance": d_coord} |
|
0 commit comments