Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 35 additions & 37 deletions cpp/src/arrow/compute/row/compare_internal_avx2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,40 @@ uint32_t KeyCompare::NullUpdateColumnToRowImp_avx2(
}
}

namespace {

// Intrinsics `_mm256_i32gather_epi32/64` treat the `vindex` as signed integer, and we
// are using `uint32_t` to represent the offset, in range of [0, 4G), within the row
// table. When the offset is larger than `0x80000000` (2GB), those intrinsics will treat
// it as negative offset and gather the data from undesired address. To avoid this issue,
// we normalize the addresses by translating `base` `0x80000000` higher, and `offset`
// `0x80000000` lower. This way, the offset is always in range of [-2G, 2G) and those
// intrinsics are safe.

constexpr uint64_t kTwoGB = 0x80000000ull;

template <uint32_t kScale>
inline __m256i UnsignedOffsetSafeGather32(int const* base, __m256i offset) {
int const* normalized_base = base + kTwoGB / sizeof(int);
__m256i normalized_offset =
_mm256_sub_epi32(offset, _mm256_set1_epi32(static_cast<int>(kTwoGB / kScale)));
return _mm256_i32gather_epi32(normalized_base, normalized_offset,
static_cast<int>(kScale));
}

template <uint32_t kScale>
inline __m256i UnsignedOffsetSafeGather64(arrow::util::int64_for_gather_t const* base,
__m128i offset) {
arrow::util::int64_for_gather_t const* normalized_base =
base + kTwoGB / sizeof(arrow::util::int64_for_gather_t);
__m128i normalized_offset =
_mm_sub_epi32(offset, _mm_set1_epi32(static_cast<int>(kTwoGB / kScale)));
return _mm256_i32gather_epi64(normalized_base, normalized_offset,
static_cast<int>(kScale));
}

} // namespace

template <bool use_selection, class COMPARE8_FN>
uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2(
uint32_t offset_within_row, uint32_t num_rows_to_compare,
Expand Down Expand Up @@ -236,10 +270,8 @@ uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2(
irow_right =
_mm256_loadu_si256(reinterpret_cast<const __m256i*>(left_to_right_map) + i);
}
// TODO: Need to test if this gather is OK when irow_right is larger than
// 0x80000000u.
__m256i offset_right =
_mm256_i32gather_epi32((const int*)offsets_right, irow_right, 4);
UnsignedOffsetSafeGather32<4>((int const*)offsets_right, irow_right);
offset_right = _mm256_add_epi32(offset_right, _mm256_set1_epi32(offset_within_row));

reinterpret_cast<uint64_t*>(match_bytevector)[i] =
Expand All @@ -253,40 +285,6 @@ uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2(
}
}

namespace {

// Intrinsics `_mm256_i32gather_epi32/64` treat the `vindex` as signed integer, and we
// are using `uint32_t` to represent the offset, in range of [0, 4G), within the row
// table. When the offset is larger than `0x80000000` (2GB), those intrinsics will treat
// it as negative offset and gather the data from undesired address. To avoid this issue,
// we normalize the addresses by translating `base` `0x80000000` higher, and `offset`
// `0x80000000` lower. This way, the offset is always in range of [-2G, 2G) and those
// intrinsics are safe.

constexpr uint64_t kTwoGB = 0x80000000ull;

template <uint32_t kScale>
inline __m256i UnsignedOffsetSafeGather32(int const* base, __m256i offset) {
int const* normalized_base = base + kTwoGB / sizeof(int);
__m256i normalized_offset =
_mm256_sub_epi32(offset, _mm256_set1_epi32(static_cast<int>(kTwoGB / kScale)));
return _mm256_i32gather_epi32(normalized_base, normalized_offset,
static_cast<int>(kScale));
}

template <uint32_t kScale>
inline __m256i UnsignedOffsetSafeGather64(arrow::util::int64_for_gather_t const* base,
__m128i offset) {
arrow::util::int64_for_gather_t const* normalized_base =
base + kTwoGB / sizeof(arrow::util::int64_for_gather_t);
__m128i normalized_offset =
_mm_sub_epi32(offset, _mm_set1_epi32(static_cast<int>(kTwoGB / kScale)));
return _mm256_i32gather_epi64(normalized_base, normalized_offset,
static_cast<int>(kScale));
}

} // namespace

template <int column_width>
inline uint64_t CompareSelected8_avx2(const uint8_t* left_base, const uint8_t* right_base,
__m256i irow_left, __m256i offset_right,
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/compute/row/row_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@ Status RowTableImpl::AppendSelectionFrom(const RowTableImpl& from,
// Varying-length rows
auto from_offsets = reinterpret_cast<const uint32_t*>(from.offsets_->data());
auto to_offsets = reinterpret_cast<uint32_t*>(offsets_->mutable_data());
// TODO(GH-43202): The following two variables are possibly overflowing.
uint32_t total_length = to_offsets[num_rows_];
uint32_t total_length_to_append = 0;
for (uint32_t i = 0; i < num_rows_to_append; ++i) {
Expand Down