@@ -68,6 +68,12 @@ static_assert(!ZMIJ_USE_SSE4_1 || ZMIJ_USE_SSE);
6868# define ZMIJ_AARCH64 0
6969#endif
7070
71+ #ifdef __x86_64__
72+ # define ZMIJ_X86_64 1
73+ #else
74+ # define ZMIJ_X86_64 0
75+ #endif
76+
7177#ifdef _MSC_VER
7278# define ZMIJ_MSC_VER _MSC_VER
7379# include < intrin.h> // __lzcnt64/_umul128/__umulh
@@ -171,6 +177,18 @@ inline auto clz(uint64_t x) noexcept -> int {
171177#endif
172178}
173179
180+ // Returns true_value if condition != 0, else false_value, without branching.
181+ ZMIJ_INLINE auto select (uint64_t condition, int64_t true_value,
182+ int64_t false_value) -> int64_t {
183+ if (!ZMIJ_X86_64) return condition ? true_value : false_value;
184+ ZMIJ_ASM (
185+ volatile (" test %2, %2\n\t "
186+ " cmovne %1, %0\n\t " : //
187+ " +r" (false_value) : " r" (true_value),
188+ " r" (condition) : " cc" ));
189+ return false_value;
190+ }
191+
174192struct uint128 {
175193 uint64_t hi;
176194 uint64_t lo;
@@ -834,13 +852,9 @@ ZMIJ_INLINE auto to_decimal_fast(UInt bin_sig, int64_t raw_exp,
834852 bool round_up = upper >= ten;
835853 int64_t shorter = int64_t (integral - digit);
836854 int64_t longer = int64_t (integral + (cmp >= 0 ));
837- if (ZMIJ_AARCH64) { // Faster version without ccmp.
838- int64_t dec_sig = scaled_sig_mod10 < scaled_half_ulp ? shorter : longer;
839- return {round_up ? shorter + 10 : dec_sig, dec_exp};
840- }
841- shorter += round_up * 10 ;
842- bool use_shorter = (scaled_sig_mod10 <= scaled_half_ulp) + round_up != 0 ;
843- return {use_shorter ? shorter : longer, dec_exp};
855+ int64_t dec_sig =
856+ select (scaled_sig_mod10 < scaled_half_ulp, shorter, longer);
857+ return {select (round_up, shorter + 10 , dec_sig), dec_exp};
844858 }
845859 return to_decimal_schubfach (bin_sig, bin_exp, regular);
846860}
@@ -874,10 +888,10 @@ auto write_fixed(char* buffer, uint64_t dec_sig, int dec_exp,
874888 write8 (part1 + 1 , read8 (part1));
875889 }
876890
877- char * dot = start + dec_exp + 1 ;
878- *dot = ' .' ;
891+ char * point = start + dec_exp + 1 ;
892+ *point = ' .' ;
879893
880- buffer = buffer > dot ? buffer + 1 : dot ;
894+ buffer = buffer > point ? buffer + 1 : point ;
881895 *buffer = ' \0 ' ;
882896 return buffer;
883897}
@@ -946,9 +960,8 @@ auto write(Float value, char* buffer) noexcept -> char* {
946960 }
947961
948962 // Write significand.
949- if (dec_exp >= -4 && dec_exp < compute_dec_exp (traits::digits + 1 )) {
963+ if (dec_exp >= -4 && dec_exp < compute_dec_exp (traits::digits + 1 ))
950964 return write_fixed<traits::num_bits>(buffer, dec.sig , dec_exp, extra_digit);
951- }
952965 char * start = buffer;
953966 buffer =
954967 write_significand<traits::num_bits>(buffer + 1 , dec.sig , extra_digit);
0 commit comments