@@ -173,6 +173,36 @@ struct ColumnVector<T>::equals
173173 bool operator ()(size_t lhs, size_t rhs) const { return CompareHelper<T>::equals (parent.data [lhs], parent.data [rhs], nan_direction_hint); }
174174};
175175
176+ // / Radix sort treats all positive NaNs to be greater than all numbers.
177+ // / And all negative NaNs to be lower that all numbers.
178+ // / Standard is not specifying which sign NaN should have, so after sort NaNs can
179+ // / be grouped on any side of the array or on both.
180+ // / Move all NaNs to the requested part of result.
181+ template <typename T>
182+ void moveNanToRequestedSide (
183+ typename ColumnVector<T>::Permutation::iterator begin,
184+ typename ColumnVector<T>::Permutation::iterator end,
185+ const typename ColumnVector<T>::Container & data,
186+ size_t data_size,
187+ bool reverse,
188+ int nan_direction_hint
189+ )
190+ {
191+ const auto is_nulls_last = ((nan_direction_hint > 0 ) != reverse);
192+ size_t nans_to_move = 0 ;
193+
194+ for (size_t i = 0 ; i < data_size; ++i)
195+ {
196+ if (isNaN (data[begin[is_nulls_last ? i : data_size - 1 - i]]))
197+ ++nans_to_move;
198+ else
199+ break ;
200+ }
201+
202+ if (nans_to_move)
203+ std::rotate (begin, begin + (is_nulls_last ? nans_to_move : data_size - nans_to_move), end);
204+ }
205+
176206#if USE_EMBEDDED_COMPILER
177207
178208template <typename T>
@@ -282,26 +312,8 @@ void ColumnVector<T>::getPermutation(IColumn::PermutationSortDirection direction
282312 pairs[i] = {data[i], i};
283313
284314 RadixSort<RadixSortTraits<T>>::executeLSD (pairs.data (), data_size, reverse, res.data ());
285-
286- // / Radix sort treats all positive NaNs to be greater than all numbers.
287- // / If the user needs the opposite, we must move them accordingly.
288- if (is_floating_point<T> && nan_direction_hint < 0 )
289- {
290- size_t nans_to_move = 0 ;
291-
292- for (size_t i = 0 ; i < data_size; ++i)
293- {
294- if (isNaN (data[res[reverse ? i : data_size - 1 - i]]))
295- ++nans_to_move;
296- else
297- break ;
298- }
299-
300- if (nans_to_move)
301- {
302- std::rotate (std::begin (res), std::begin (res) + (reverse ? nans_to_move : data_size - nans_to_move), std::end (res));
303- }
304- }
315+ if constexpr (is_floating_point<T>)
316+ moveNanToRequestedSide<T>(res.begin (), res.end (), data, data_size, reverse, nan_direction_hint);
305317
306318 return ;
307319 }
@@ -333,16 +345,16 @@ void ColumnVector<T>::updatePermutation(IColumn::PermutationSortDirection direct
333345 {
334346 // / TODO: LSD RadixSort is currently not stable if direction is descending, or value is floating point
335347 bool use_radix_sort = (sort_is_stable && ascending && !is_floating_point<T>) || !sort_is_stable;
336- size_t size = end - begin;
348+ size_t range_size = end - begin;
337349
338350 // / Thresholds on size. Lower threshold is arbitrary. Upper threshold is chosen by the type for histogram counters.
339- if (size >= 256 && size <= std::numeric_limits<UInt32>::max () && use_radix_sort)
351+ if (range_size >= 256 && range_size <= std::numeric_limits<UInt32>::max () && use_radix_sort)
340352 {
341353 bool try_sort = trySort (begin, end, pred);
342354 if (try_sort)
343355 return ;
344356
345- PaddedPODArray<ValueWithIndex<T>> pairs (size );
357+ PaddedPODArray<ValueWithIndex<T>> pairs (range_size );
346358 size_t index = 0 ;
347359
348360 for (auto * it = begin; it != end; ++it)
@@ -351,27 +363,9 @@ void ColumnVector<T>::updatePermutation(IColumn::PermutationSortDirection direct
351363 ++index;
352364 }
353365
354- RadixSort<RadixSortTraits<T>>::executeLSD (pairs.data (), size, reverse, begin);
355-
356- // / Radix sort treats all NaNs to be greater than all numbers.
357- // / If the user needs the opposite, we must move them accordingly.
358- if (is_floating_point<T> && nan_direction_hint < 0 )
359- {
360- size_t nans_to_move = 0 ;
361-
362- for (size_t i = 0 ; i < size; ++i)
363- {
364- if (isNaN (data[begin[reverse ? i : size - 1 - i]]))
365- ++nans_to_move;
366- else
367- break ;
368- }
369-
370- if (nans_to_move)
371- {
372- std::rotate (begin, begin + (reverse ? nans_to_move : size - nans_to_move), end);
373- }
374- }
366+ RadixSort<RadixSortTraits<T>>::executeLSD (pairs.data (), range_size, reverse, begin);
367+ if constexpr (is_floating_point<T>)
368+ moveNanToRequestedSide<T>(begin, end, data, range_size, reverse, nan_direction_hint);
375369
376370 return ;
377371 }
0 commit comments