Skip to content

Commit d7d7c01

Browse files
committed
Speed up decode_cf_datetime
Instead of casting the input numeric dates to float, they are casted to nanoseconds as integer which makes `pd.to_timedelta()` work much faster (x100 speedup on my machine)
1 parent d5c7e06 commit d7d7c01

File tree

1 file changed

+20
-3
lines changed

1 file changed

+20
-3
lines changed

xarray/conventions.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,14 @@ def decode_cf_datetime(num_dates, units, calendar=None):
126126
operations, which makes it much faster than netCDF4.num2date. In such a
127127
case, the returned array will be of type np.datetime64.
128128
129+
Note that time unit in `units` must not be smaller than microseconds and
130+
not larger than days.
131+
129132
See also
130133
--------
131134
netCDF4.num2date
132135
"""
133-
num_dates = np.asarray(num_dates, dtype=float)
136+
num_dates = np.asarray(num_dates)
134137
flat_num_dates = num_dates.ravel()
135138
if calendar is None:
136139
calendar = 'standard'
@@ -155,10 +158,24 @@ def decode_cf_datetime(num_dates, units, calendar=None):
155158
pd.to_timedelta(flat_num_dates.min(), delta) + ref_date
156159
pd.to_timedelta(flat_num_dates.max(), delta) + ref_date
157160

158-
dates = (pd.to_timedelta(flat_num_dates, delta) + ref_date).values
161+
# Cast input dates to integers of nanoseconds because `pd.to_datetime`
162+
# works much faster when dealing with integers
163+
ns_per_time_delta = {'us': 1e3,
164+
'ms': 1e6,
165+
's': 1e9,
166+
'm': 1e9 * 60,
167+
'h': 1e9 * 60 * 60,
168+
'D': 1e9 * 60 * 60 * 24}
169+
flat_num_dates_ns_int = (flat_num_dates *
170+
ns_per_time_delta[delta]).astype(np.int64)
171+
172+
dates = (pd.to_timedelta(flat_num_dates_ns_int, 'ns') +
173+
ref_date).values
159174

160175
except (OutOfBoundsDatetime, OverflowError):
161-
dates = _decode_datetime_with_netcdf4(flat_num_dates, units, calendar)
176+
dates = _decode_datetime_with_netcdf4(flat_num_dates.astype(np.float),
177+
units,
178+
calendar)
162179

163180
return dates.reshape(num_dates.shape)
164181

0 commit comments

Comments
 (0)