Skip to content

Commit 7fbbf93

Browse files
Anton BikineevV8 LUCI CQ
authored andcommitted
utils: Try SIMD reduction based on vpmaxq
The reduction should be less expensive than the one based on vminvq. Change-Id: I8f1f8f181c2fccc754007a7715de32afb03790af Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/5058808 Reviewed-by: Michael Lippautz <[email protected]> Auto-Submit: Anton Bikineev <[email protected]> Commit-Queue: Michael Lippautz <[email protected]> Cr-Commit-Position: refs/heads/main@{#91167}
1 parent a77f959 commit 7fbbf93

1 file changed

Lines changed: 12 additions & 6 deletions

File tree

src/utils/utils.h

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ V8_INLINE bool OverlappingCompare(const Char* lhs, const Char* rhs,
335335
}
336336

337337
template <typename Char>
338-
V8_INLINE bool SimdMemcmp(const Char* lhs, const Char* rhs, size_t count) {
338+
V8_INLINE bool SimdMemEqual(const Char* lhs, const Char* rhs, size_t count) {
339339
static_assert(sizeof(Char) == 1);
340340
if (count == 0) {
341341
return true;
@@ -362,19 +362,25 @@ V8_INLINE bool SimdMemcmp(const Char* lhs, const Char* rhs, size_t count) {
362362
const auto lhs1 = vld1q_u8(lhs + count - sizeof(uint8x16_t));
363363
const auto rhs0 = vld1q_u8(rhs);
364364
const auto rhs1 = vld1q_u8(rhs + count - sizeof(uint8x16_t));
365-
return static_cast<bool>(
366-
vminvq_u8(vandq_u8(vceqq_u8(lhs0, rhs0), vceqq_u8(lhs1, rhs1))));
365+
const auto xored0 = veorq_u8(lhs0, rhs0);
366+
const auto xored1 = veorq_u8(lhs1, rhs1);
367+
const auto ored = vorrq_u8(xored0, xored1);
368+
return !static_cast<bool>(vgetq_lane_u64(vpmaxq_u8(ored, ored), 0));
367369
}
368370
default: // count: [33, ...]
369371
{
370372
const auto lhs0 = vld1q_u8(lhs);
371373
const auto rhs0 = vld1q_u8(rhs);
372-
if (!static_cast<bool>(vminvq_u8(vceqq_u8(lhs0, rhs0)))) return false;
374+
const auto xored = veorq_u8(lhs0, rhs0);
375+
if (static_cast<bool>(vgetq_lane_u64(vpmaxq_u8(xored, xored), 0)))
376+
return false;
373377
for (size_t i = count % sizeof(uint8x16_t); i < count;
374378
i += sizeof(uint8x16_t)) {
375379
const auto lhs0 = vld1q_u8(lhs + i);
376380
const auto rhs0 = vld1q_u8(rhs + i);
377-
if (!static_cast<bool>(vminvq_u8(vceqq_u8(lhs0, rhs0)))) return false;
381+
const auto xored = veorq_u8(lhs0, rhs0);
382+
if (static_cast<bool>(vgetq_lane_u64(vpmaxq_u8(xored, xored), 0)))
383+
return false;
378384
}
379385
return true;
380386
}
@@ -391,7 +397,7 @@ inline bool CompareCharsEqualUnsigned(const lchar* lhs, const rchar* rhs,
391397
if constexpr (sizeof(*lhs) == sizeof(*rhs)) {
392398
#if defined(V8_OPTIMIZE_WITH_NEON)
393399
if constexpr (sizeof(*lhs) == 1) {
394-
return SimdMemcmp(lhs, rhs, chars);
400+
return SimdMemEqual(lhs, rhs, chars);
395401
}
396402
#endif
397403
// memcmp compares byte-by-byte, but for equality it doesn't matter whether

0 commit comments

Comments
 (0)