Skip to content

Commit 6d86f0c

Browse files
committed
Use ctlz_nonzero to improve ASM from next_power_of_two
1 parent 13e2400 commit 6d86f0c

File tree

1 file changed

+16
-1
lines changed

1 file changed

+16
-1
lines changed

src/libcore/num/mod.rs

+16-1
Original file line numberDiff line numberDiff line change
@@ -1262,6 +1262,7 @@ macro_rules! uint_impl {
12621262
($SelfT:ty, $ActualT:ty, $BITS:expr,
12631263
$ctpop:path,
12641264
$ctlz:path,
1265+
$ctlz_nonzero:path,
12651266
$cttz:path,
12661267
$bswap:path,
12671268
$add_with_overflow:path,
@@ -2184,6 +2185,7 @@ macro_rules! uint_impl {
21842185
// This method cannot overflow, as in the `next_power_of_two`
21852186
// overflow cases it instead ends up returning the maximum value
21862187
// of the type, and can return 0 for 0.
2188+
#[inline]
21872189
fn one_less_than_next_power_of_two(self) -> Self {
21882190
if self <= 1 { return 0; }
21892191

@@ -2192,7 +2194,7 @@ macro_rules! uint_impl {
21922194
// (such as intel pre-haswell) have more efficient ctlz
21932195
// intrinsics when the argument is non-zero.
21942196
let p = self - 1;
2195-
let z = p.leading_zeros();
2197+
let z = unsafe { $ctlz_nonzero(p) };
21962198
<$SelfT>::max_value() >> z
21972199
}
21982200

@@ -2236,11 +2238,17 @@ macro_rules! uint_impl {
22362238
}
22372239
}
22382240

2241+
#[cfg(stage0)]
2242+
unsafe fn ctlz_nonzero<T>(x: T) -> T { intrinsics::ctlz(x) }
2243+
#[cfg(not(stage0))]
2244+
unsafe fn ctlz_nonzero<T>(x: T) -> T { intrinsics::ctlz_nonzero(x) }
2245+
22392246
#[lang = "u8"]
22402247
impl u8 {
22412248
uint_impl! { u8, u8, 8,
22422249
intrinsics::ctpop,
22432250
intrinsics::ctlz,
2251+
ctlz_nonzero,
22442252
intrinsics::cttz,
22452253
intrinsics::bswap,
22462254
intrinsics::add_with_overflow,
@@ -2253,6 +2261,7 @@ impl u16 {
22532261
uint_impl! { u16, u16, 16,
22542262
intrinsics::ctpop,
22552263
intrinsics::ctlz,
2264+
ctlz_nonzero,
22562265
intrinsics::cttz,
22572266
intrinsics::bswap,
22582267
intrinsics::add_with_overflow,
@@ -2265,6 +2274,7 @@ impl u32 {
22652274
uint_impl! { u32, u32, 32,
22662275
intrinsics::ctpop,
22672276
intrinsics::ctlz,
2277+
ctlz_nonzero,
22682278
intrinsics::cttz,
22692279
intrinsics::bswap,
22702280
intrinsics::add_with_overflow,
@@ -2277,6 +2287,7 @@ impl u64 {
22772287
uint_impl! { u64, u64, 64,
22782288
intrinsics::ctpop,
22792289
intrinsics::ctlz,
2290+
ctlz_nonzero,
22802291
intrinsics::cttz,
22812292
intrinsics::bswap,
22822293
intrinsics::add_with_overflow,
@@ -2289,6 +2300,7 @@ impl u128 {
22892300
uint_impl! { u128, u128, 128,
22902301
intrinsics::ctpop,
22912302
intrinsics::ctlz,
2303+
ctlz_nonzero,
22922304
intrinsics::cttz,
22932305
intrinsics::bswap,
22942306
intrinsics::add_with_overflow,
@@ -2302,6 +2314,7 @@ impl usize {
23022314
uint_impl! { usize, u16, 16,
23032315
intrinsics::ctpop,
23042316
intrinsics::ctlz,
2317+
ctlz_nonzero,
23052318
intrinsics::cttz,
23062319
intrinsics::bswap,
23072320
intrinsics::add_with_overflow,
@@ -2314,6 +2327,7 @@ impl usize {
23142327
uint_impl! { usize, u32, 32,
23152328
intrinsics::ctpop,
23162329
intrinsics::ctlz,
2330+
ctlz_nonzero,
23172331
intrinsics::cttz,
23182332
intrinsics::bswap,
23192333
intrinsics::add_with_overflow,
@@ -2327,6 +2341,7 @@ impl usize {
23272341
uint_impl! { usize, u64, 64,
23282342
intrinsics::ctpop,
23292343
intrinsics::ctlz,
2344+
ctlz_nonzero,
23302345
intrinsics::cttz,
23312346
intrinsics::bswap,
23322347
intrinsics::add_with_overflow,

0 commit comments

Comments
 (0)