Skip to content

Commit 86deac0

Browse files
Backport #79847 to 25.4: fix float NaN sorting order on some platforms
1 parent 81b3cb6 commit 86deac0

File tree

3 files changed

+2165
-44
lines changed

3 files changed

+2165
-44
lines changed

src/Columns/ColumnVector.cpp

Lines changed: 38 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,36 @@ struct ColumnVector<T>::equals
173173
bool operator()(size_t lhs, size_t rhs) const { return CompareHelper<T>::equals(parent.data[lhs], parent.data[rhs], nan_direction_hint); }
174174
};
175175

176+
/// Radix sort treats all positive NaNs to be greater than all numbers.
177+
/// And all negative NaNs to be lower that all numbers.
178+
/// Standard is not specifying which sign NaN should have, so after sort NaNs can
179+
/// be grouped on any side of the array or on both.
180+
/// Move all NaNs to the requested part of result.
181+
template <typename T>
182+
void moveNanToRequestedSide(
183+
typename ColumnVector<T>::Permutation::iterator begin,
184+
typename ColumnVector<T>::Permutation::iterator end,
185+
const typename ColumnVector<T>::Container & data,
186+
size_t data_size,
187+
bool reverse,
188+
int nan_direction_hint
189+
)
190+
{
191+
const auto is_nulls_last = ((nan_direction_hint > 0) != reverse);
192+
size_t nans_to_move = 0;
193+
194+
for (size_t i = 0; i < data_size; ++i)
195+
{
196+
if (isNaN(data[begin[is_nulls_last ? i : data_size - 1 - i]]))
197+
++nans_to_move;
198+
else
199+
break;
200+
}
201+
202+
if (nans_to_move)
203+
std::rotate(begin, begin + (is_nulls_last ? nans_to_move : data_size - nans_to_move), end);
204+
}
205+
176206
#if USE_EMBEDDED_COMPILER
177207

178208
template <typename T>
@@ -282,26 +312,8 @@ void ColumnVector<T>::getPermutation(IColumn::PermutationSortDirection direction
282312
pairs[i] = {data[i], i};
283313

284314
RadixSort<RadixSortTraits<T>>::executeLSD(pairs.data(), data_size, reverse, res.data());
285-
286-
/// Radix sort treats all positive NaNs to be greater than all numbers.
287-
/// If the user needs the opposite, we must move them accordingly.
288-
if (is_floating_point<T> && nan_direction_hint < 0)
289-
{
290-
size_t nans_to_move = 0;
291-
292-
for (size_t i = 0; i < data_size; ++i)
293-
{
294-
if (isNaN(data[res[reverse ? i : data_size - 1 - i]]))
295-
++nans_to_move;
296-
else
297-
break;
298-
}
299-
300-
if (nans_to_move)
301-
{
302-
std::rotate(std::begin(res), std::begin(res) + (reverse ? nans_to_move : data_size - nans_to_move), std::end(res));
303-
}
304-
}
315+
if constexpr (is_floating_point<T>)
316+
moveNanToRequestedSide<T>(res.begin(), res.end(), data, data_size, reverse, nan_direction_hint);
305317

306318
return;
307319
}
@@ -333,16 +345,16 @@ void ColumnVector<T>::updatePermutation(IColumn::PermutationSortDirection direct
333345
{
334346
/// TODO: LSD RadixSort is currently not stable if direction is descending, or value is floating point
335347
bool use_radix_sort = (sort_is_stable && ascending && !is_floating_point<T>) || !sort_is_stable;
336-
size_t size = end - begin;
348+
size_t range_size = end - begin;
337349

338350
/// Thresholds on size. Lower threshold is arbitrary. Upper threshold is chosen by the type for histogram counters.
339-
if (size >= 256 && size <= std::numeric_limits<UInt32>::max() && use_radix_sort)
351+
if (range_size >= 256 && range_size <= std::numeric_limits<UInt32>::max() && use_radix_sort)
340352
{
341353
bool try_sort = trySort(begin, end, pred);
342354
if (try_sort)
343355
return;
344356

345-
PaddedPODArray<ValueWithIndex<T>> pairs(size);
357+
PaddedPODArray<ValueWithIndex<T>> pairs(range_size);
346358
size_t index = 0;
347359

348360
for (auto * it = begin; it != end; ++it)
@@ -351,27 +363,9 @@ void ColumnVector<T>::updatePermutation(IColumn::PermutationSortDirection direct
351363
++index;
352364
}
353365

354-
RadixSort<RadixSortTraits<T>>::executeLSD(pairs.data(), size, reverse, begin);
355-
356-
/// Radix sort treats all NaNs to be greater than all numbers.
357-
/// If the user needs the opposite, we must move them accordingly.
358-
if (is_floating_point<T> && nan_direction_hint < 0)
359-
{
360-
size_t nans_to_move = 0;
361-
362-
for (size_t i = 0; i < size; ++i)
363-
{
364-
if (isNaN(data[begin[reverse ? i : size - 1 - i]]))
365-
++nans_to_move;
366-
else
367-
break;
368-
}
369-
370-
if (nans_to_move)
371-
{
372-
std::rotate(begin, begin + (reverse ? nans_to_move : size - nans_to_move), end);
373-
}
374-
}
366+
RadixSort<RadixSortTraits<T>>::executeLSD(pairs.data(), range_size, reverse, begin);
367+
if constexpr (is_floating_point<T>)
368+
moveNanToRequestedSide<T>(begin, end, data, range_size, reverse, nan_direction_hint);
375369

376370
return;
377371
}

0 commit comments

Comments
 (0)