Skip to content

Commit 0bb4006

Browse files
Milad FaV8 LUCI CQ
authored andcommitted
PPC: Optimize count of trailing zeros on P8 and below
Change-Id: Iff669f6272e2a95d5150108d5a3a77e903afbeb9 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3275568 Reviewed-by: Junliang Yan <[email protected]> Commit-Queue: Milad Fa <[email protected]> Cr-Commit-Position: refs/heads/main@{#77874}
1 parent 42036e7 commit 0bb4006

File tree

2 files changed

+22
-14
lines changed

2 files changed

+22
-14
lines changed

src/codegen/ppc/macro-assembler-ppc.cc

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3768,33 +3768,39 @@ void TurboAssembler::CountLeadingZerosU64(Register dst, Register src, RCBit r) {
37683768
cntlzd(dst, src, r);
37693769
}
37703770

3771+
#define COUNT_TRAILING_ZEROES_SLOW(max_count, scratch1, scratch2) \
3772+
Label loop, done; \
3773+
li(scratch1, Operand(max_count)); \
3774+
mtctr(scratch1); \
3775+
mr(scratch1, src); \
3776+
li(dst, Operand::Zero()); \
3777+
bind(&loop); /* while ((src & 1) == 0) */ \
3778+
andi(scratch2, scratch1, Operand(1)); \
3779+
bne(&done, cr0); \
3780+
srdi(scratch1, scratch1, Operand(1)); /* src >>= 1;*/ \
3781+
addi(dst, dst, Operand(1)); /* dst++ */ \
3782+
bdnz(&loop); \
3783+
bind(&done);
37713784
void TurboAssembler::CountTrailingZerosU32(Register dst, Register src,
3785+
Register scratch1, Register scratch2,
37723786
RCBit r) {
37733787
if (CpuFeatures::IsSupported(PPC_9_PLUS)) {
37743788
cnttzw(dst, src, r);
37753789
} else {
3776-
Register scratch1 = GetRegisterThatIsNotOneOf(dst, src, sp);
3777-
Register scratch2 = GetRegisterThatIsNotOneOf(dst, src, sp, scratch1);
3778-
Push(scratch1, scratch2);
3779-
ReverseBitsU32(dst, src, scratch1, scratch2);
3780-
Pop(scratch1, scratch2);
3781-
cntlzw(dst, dst, r);
3790+
COUNT_TRAILING_ZEROES_SLOW(32, scratch1, scratch2);
37823791
}
37833792
}
37843793

37853794
void TurboAssembler::CountTrailingZerosU64(Register dst, Register src,
3795+
Register scratch1, Register scratch2,
37863796
RCBit r) {
37873797
if (CpuFeatures::IsSupported(PPC_9_PLUS)) {
37883798
cnttzd(dst, src, r);
37893799
} else {
3790-
Register scratch1 = GetRegisterThatIsNotOneOf(dst, src, sp);
3791-
Register scratch2 = GetRegisterThatIsNotOneOf(dst, src, sp, scratch1);
3792-
Push(scratch1, scratch2);
3793-
ReverseBitsU64(dst, src, scratch1, scratch2);
3794-
Pop(scratch1, scratch2);
3795-
cntlzd(dst, dst, r);
3800+
COUNT_TRAILING_ZEROES_SLOW(64, scratch1, scratch2);
37963801
}
37973802
}
3803+
#undef COUNT_TRAILING_ZEROES_SLOW
37983804

37993805
void TurboAssembler::ClearByteU64(Register dst, int byte_idx) {
38003806
CHECK(0 <= byte_idx && byte_idx <= 7);

src/codegen/ppc/macro-assembler-ppc.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -263,8 +263,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
263263

264264
void CountLeadingZerosU32(Register dst, Register src, RCBit r = LeaveRC);
265265
void CountLeadingZerosU64(Register dst, Register src, RCBit r = LeaveRC);
266-
void CountTrailingZerosU32(Register dst, Register src, RCBit r = LeaveRC);
267-
void CountTrailingZerosU64(Register dst, Register src, RCBit r = LeaveRC);
266+
void CountTrailingZerosU32(Register dst, Register src, Register scratch1 = ip,
267+
Register scratch2 = r0, RCBit r = LeaveRC);
268+
void CountTrailingZerosU64(Register dst, Register src, Register scratch1 = ip,
269+
Register scratch2 = r0, RCBit r = LeaveRC);
268270

269271
void ClearByteU64(Register dst, int byte_idx);
270272
void ReverseBitsU64(Register dst, Register src, Register scratch1,

0 commit comments

Comments
 (0)