22
33from __future__ import annotations
44
5+ import warnings
56from collections import namedtuple
7+ from functools import cache
68
79import numpy as np
810
1315 _maybe_unpack ,
1416 broadcast_for_index ,
1517 maybe_get_items ,
18+ tukey_fence ,
1619 unbyte ,
1720)
1821
2427)
2528
2629
30+ @cache
31+ def _get_block_time (feb ):
32+ """Get the block time (time in seconds between each block)."""
33+ # Some files have this set. We haven't yet seen any files where this
34+ # values exists and is wrong, so we trust it (for now). This is probably
35+ # much faster than reading the whole time vector.
36+ br = feb .zone .attrs .get ("BlockRate" , 0 ) / 1_000
37+ if br > 0 :
38+ return 1 / br
39+ # Otherwise we have to try to use the time vector. Here be dragons.
40+ time_shape = feb .source ["time" ].shape
41+ # Not sure why but time has the shape of [1, n] for some files and just
42+ # n for others. The first might imply different times for different
43+ # zones? We aren't set up to handle that, but we don't know if it can happen
44+ # so just assert here.
45+ assert np .max (time_shape ) == np .prod (
46+ time_shape
47+ ), "Non flat 2d time vector is not supported by DASCore Febus reader."
48+ # Get the average time spacing in each block. These can vary a bit so
49+ # account for outliers.
50+ time = np .squeeze (feb .source ["time" ][:])
51+ d_time = time [1 :] - time [:- 1 ]
52+ tmin , tmax = tukey_fence (d_time )
53+ d_time = d_time [(d_time >= tmin ) & (d_time <= tmax )]
54+ # After removing outliers, the mean seems to work better than the median
55+ # for the test files we have. There is still a concerning amount of
56+ # variability.
57+ return np .mean (d_time )
58+
59+
60+ @cache
61+ def _get_sample_spacing (feb : _FebusSlice , n_time_samps : int ):
62+ """
63+ Determine the temporal sample spacing (in seconds).
64+ """
65+ # Note: This is a bit dicey, but we are trying to account for variability
66+ # seen in real Febus Files. In some files the zone Spacing attr indicates one
67+ # sample rate, while zone.attrs['SamplingRate'] indicates another. It
68+ # varies as to which one is actually right, so we try to figure that
69+ # out here.
70+ ts_1 = feb .zone .attrs ["Spacing" ][1 ] / 1_000 # value in ms, convert to s.
71+ # In most cases sample_rate is either bogus or in Hz. It isn't even mentioned
72+ # in some Febus documentation.
73+ ts_2 = _maybe_unpack (1.0 / feb .zone .attrs ["SamplingRate" ])
74+ # Get the block time. This doesn't account for overlap, so it wont be exact.
75+ block_time = _get_block_time (feb )
76+ # Get candidate times, return the closet to the block_time.
77+ ts_array = np .array ([ts_1 , ts_2 ])
78+ block_time_array = ts_array * n_time_samps
79+ return ts_array [np .argmin (np .abs (block_time_array - block_time ))]
80+
81+
2782def _flatten_febus_info (fi ) -> tuple [_FebusSlice , ...]:
2883 """
2984 Given a febus file, return a tuple of named tuples with key info.
@@ -97,16 +152,16 @@ def _get_febus_attrs(feb: _FebusSlice) -> dict:
97152 return out
98153
99154
100- def _get_time_overlap_samples (feb , data_shape ):
155+ def _get_time_overlap_samples (feb , n_time_samps , tstep = None ):
101156 """Determine the number of redundant samples in the time dimension."""
102- time_step = feb . zone . attrs [ "Spacing" ][ 1 ] / 1_000 # value in ms, convert to s.
103- block_time = _maybe_unpack ( 1 / ( feb . zone . attrs [ "BlockRate" ] / 1_000 ) )
157+ tstep = tstep if tstep is not None else _get_sample_spacing ( feb , n_time_samps )
158+ block_time = _get_block_time ( feb )
104159 # Since the data have overlaps in each block's time dimension, we need to
105160 # trim the overlap off the time dimension to avoid having to merge blocks later.
106161 # However, sometimes the "BlockOverlap" is wrong, so we calculate it
107- # manually here.
108- expected_samples = int (np .round (block_time / time_step ) )
109- excess_rows = data_shape [ 1 ] - expected_samples
162+ # manually here, rounding to nearest even number .
163+ expected_samples = int (np .round (( block_time / tstep ) / 2 ) * 2 )
164+ excess_rows = n_time_samps - expected_samples
110165 assert (
111166 excess_rows % 2 == 0
112167 ), "excess rows must be symmetric to distribute on both ends"
@@ -119,23 +174,32 @@ def _get_time_coord(feb):
119174 # In older version time shape is different, always grab first element.
120175 first_slice = tuple (0 for _ in time .shape )
121176 t_0 = time [first_slice ]
122- # Data dimensions are block_index, time, distance
123- data_shape = feb .zone [feb .data_name ].shape
124- n_blocks = data_shape [0 ]
177+ # Number of time blocks in the data cube.
178+ shape = feb .zone [feb .data_name ].shape
179+ n_time_samps = shape [1 ]
180+ n_blocks = shape [0 ]
125181 # Get spacing between time samples (in s) and the total time of each block.
126- time_step = feb . zone . attrs [ "Spacing" ][ 1 ] / 1_000 # value in ms, convert to s.
127- excess_rows = _get_time_overlap_samples (feb , data_shape )
128- total_time_rows = (data_shape [ 1 ] - excess_rows ) * n_blocks
182+ time_step = _get_sample_spacing ( feb , n_time_samps )
183+ excess_rows = _get_time_overlap_samples (feb , n_time_samps , tstep = time_step )
184+ total_time_rows = (n_time_samps - excess_rows ) * n_blocks
129185 # Get origin info, these are offsets from time to get to the first simple
130186 # of the block. These should always be non-positive.
131187 time_offset = feb .zone .attrs ["Origin" ][1 ] / 1_000 # also convert to s
132188 assert time_offset <= 0 , "time offset must be non positive"
133189 # Get the start/stop indices for the zone. We assume zones never sub-slice
134- # time (only distance) but assert that here.
190+ # time (only distance). However, some files (eg Valencia) have an incorrect
191+ # value set here, so we only warn.
135192 extent = feb .zone .attrs ["Extent" ]
136- assert (extent [3 ] - extent [2 ] + 1 ) == data_shape [1 ], "Cant handle sub time zones"
137- # Create time coord
138- # Need to account for removing overlap times.
193+ if (extent [3 ] - extent [2 ] + 1 ) != n_time_samps :
194+ msg = (
195+ "It appears the Febus file extents specify a different range than "
196+ "found in the data array. Double check this is correct."
197+ )
198+ warnings .warn (msg , UserWarning , stacklevel = 2 )
199+ # Create time coord.
200+ # Need to account for removing overlap times. Also, time vector refers
201+ # to the center of the block, so this finds the first non-overlapping
202+ # sample.
139203 total_start = t_0 + time_offset + (excess_rows // 2 ) * time_step
140204 total_end = total_start + total_time_rows * time_step
141205 time_coord = get_coord (
@@ -224,7 +288,7 @@ def _get_time_filtered_data(data, t_start_end, time, total_slice, time_coord):
224288 times = t_start_end [in_time ]
225289 # get start/stop indexes for complete blocks
226290 start = np .argmax (in_time )
227- stop = np .argmax (np .cumsum (in_time ))
291+ stop = np .argmax (np .cumsum (in_time )) + ( 1 if len ( times ) else 0 )
228292 total_slice [0 ] = slice (start , stop )
229293 # load data from disk.
230294 data_2d = data [tuple (total_slice )].reshape (- 1 , data .shape [- 1 ])
@@ -244,8 +308,8 @@ def _get_time_filtered_data(data, t_start_end, time, total_slice, time_coord):
244308 dist_coord , time_coord = cm .coord_map ["distance" ], cm .coord_map ["time" ]
245309 data = febus .zone [febus .data_name ]
246310 data_shape = data .shape
247- skip_rows = _get_time_overlap_samples (febus , data_shape ) // 2
248- # Need to handle case where excess_rows == 0
311+ skip_rows = _get_time_overlap_samples (febus , data_shape [ 1 ] ) // 2
312+ # This handles the case where excess_rows == 0
249313 data_slice = slice (skip_rows , - skip_rows if skip_rows else None )
250314 total_slice = list (broadcast_for_index (3 , 1 , data_slice ))
251315 total_time_rows = data_shape [1 ] - 2 * skip_rows
0 commit comments