Skip to content

Commit a3133ab

Browse files
committed
Get rid of poorly predicted branches
1 parent 099b427 commit a3133ab

File tree

1 file changed

+25
-12
lines changed

1 file changed

+25
-12
lines changed

zmij.cc

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,12 @@ static_assert(!ZMIJ_USE_SSE4_1 || ZMIJ_USE_SSE);
6868
# define ZMIJ_AARCH64 0
6969
#endif
7070

71+
#ifdef __x86_64__
72+
# define ZMIJ_X86_64 1
73+
#else
74+
# define ZMIJ_X86_64 0
75+
#endif
76+
7177
#ifdef _MSC_VER
7278
# define ZMIJ_MSC_VER _MSC_VER
7379
# include <intrin.h> // __lzcnt64/_umul128/__umulh
@@ -171,6 +177,18 @@ inline auto clz(uint64_t x) noexcept -> int {
171177
#endif
172178
}
173179

180+
// Returns true_value if condition != 0, else false_value, without branching.
181+
ZMIJ_INLINE auto select(uint64_t condition, int64_t true_value,
182+
int64_t false_value) -> int64_t {
183+
if (!ZMIJ_X86_64) return condition ? true_value : false_value;
184+
ZMIJ_ASM(
185+
volatile("test %2, %2\n\t"
186+
"cmovne %1, %0\n\t" : //
187+
"+r"(false_value) : "r"(true_value),
188+
"r"(condition) : "cc"));
189+
return false_value;
190+
}
191+
174192
struct uint128 {
175193
uint64_t hi;
176194
uint64_t lo;
@@ -834,13 +852,9 @@ ZMIJ_INLINE auto to_decimal_fast(UInt bin_sig, int64_t raw_exp,
834852
bool round_up = upper >= ten;
835853
int64_t shorter = int64_t(integral - digit);
836854
int64_t longer = int64_t(integral + (cmp >= 0));
837-
if (ZMIJ_AARCH64) { // Faster version without ccmp.
838-
int64_t dec_sig = scaled_sig_mod10 < scaled_half_ulp ? shorter : longer;
839-
return {round_up ? shorter + 10 : dec_sig, dec_exp};
840-
}
841-
shorter += round_up * 10;
842-
bool use_shorter = (scaled_sig_mod10 <= scaled_half_ulp) + round_up != 0;
843-
return {use_shorter ? shorter : longer, dec_exp};
855+
int64_t dec_sig =
856+
select(scaled_sig_mod10 < scaled_half_ulp, shorter, longer);
857+
return {select(round_up, shorter + 10, dec_sig), dec_exp};
844858
}
845859
return to_decimal_schubfach(bin_sig, bin_exp, regular);
846860
}
@@ -874,10 +888,10 @@ auto write_fixed(char* buffer, uint64_t dec_sig, int dec_exp,
874888
write8(part1 + 1, read8(part1));
875889
}
876890

877-
char* dot = start + dec_exp + 1;
878-
*dot = '.';
891+
char* point = start + dec_exp + 1;
892+
*point = '.';
879893

880-
buffer = buffer > dot ? buffer + 1 : dot;
894+
buffer = buffer > point ? buffer + 1 : point;
881895
*buffer = '\0';
882896
return buffer;
883897
}
@@ -946,9 +960,8 @@ auto write(Float value, char* buffer) noexcept -> char* {
946960
}
947961

948962
// Write significand.
949-
if (dec_exp >= -4 && dec_exp < compute_dec_exp(traits::digits + 1)) {
963+
if (dec_exp >= -4 && dec_exp < compute_dec_exp(traits::digits + 1))
950964
return write_fixed<traits::num_bits>(buffer, dec.sig, dec_exp, extra_digit);
951-
}
952965
char* start = buffer;
953966
buffer =
954967
write_significand<traits::num_bits>(buffer + 1, dec.sig, extra_digit);

0 commit comments

Comments
 (0)