@@ -459,6 +459,9 @@ constexpr uint32_t neg10k = uint32_t((1ull << 32) - 10000);
459459constexpr int div100_exp = 19 ;
460460constexpr uint32_t div100_sig = (1 << div100_exp) / 100 + 1 ;
461461constexpr uint32_t neg100 = (1 << 16 ) - 100 ;
462+ constexpr int div10_exp = 10 ;
463+ constexpr uint32_t div10_sig = (1 << div10_exp) / 10 + 1 ;
464+ constexpr uint32_t neg10 = (1 << 8 ) - 10 ;
462465
463466constexpr uint64_t zeros = 0x0101010101010101u * ' 0' ;
464467
@@ -477,7 +480,7 @@ auto to_bcd8(uint64_t abcdefgh) noexcept -> uint64_t {
477480 neg100 * (((abcd_efgh * div100_sig) >> div100_exp) & 0x7f0000007f );
478481 uint64_t a_b_c_d_e_f_g_h =
479482 ab_cd_ef_gh +
480- ( 0x100 - 10 ) * (((ab_cd_ef_gh * 0x67 ) >> 10 ) & 0xf000f000f000f );
483+ neg10 * (((ab_cd_ef_gh * div10_sig ) >> div10_exp ) & 0xf000f000f000f );
481484 return is_big_endian () ? a_b_c_d_e_f_g_h : bswap64 (a_b_c_d_e_f_g_h);
482485}
483486
@@ -522,23 +525,22 @@ auto write_significand17(char* buffer, uint64_t value) noexcept -> char* {
522525 // We could probably make this bit faster, but we're preferring to
523526 // reuse the constants for now.
524527 uint64_t a = uint64_t (umul128 (abbccddee, c->mul_const ) >> 90 );
525- abbccddee -= a * hundred_million;
528+ uint64_t bbccddee = abbccddee - a * hundred_million;
526529
527530 char * start = buffer;
528531 buffer = write_if_nonzero (buffer, a);
529532
530- uint64x1_t hundredmillions64 = {abbccddee | (uint64_t (ffgghhii) << 32 )};
531- int32x2_t hundredmillions32 = vreinterpret_s32_u64 (hundredmillions64 );
533+ uint64x1_t ffgghhii_bbccddee64 = {(uint64_t (ffgghhii) << 32 ) | bbccddee };
534+ int32x2_t ffgghhii_bbccddee = vreinterpret_s32_u64 (ffgghhii_bbccddee64 );
532535
533- int32x2_t high_10000 = vreinterpret_s32_u32 (
536+ int32x2_t quo10k = vreinterpret_s32_u32 (
534537 vshr_n_u32 (vreinterpret_u32_s32 (
535- vqdmulh_n_s32 (hundredmillions32 , c->multipliers32 [0 ])),
538+ vqdmulh_n_s32 (ffgghhii_bbccddee , c->multipliers32 [0 ])),
536539 9 ));
537- int32x2_t tenthousands =
538- vmla_n_s32 (hundredmillions32, high_10000, c->multipliers32 [1 ]);
540+ int32x2_t rem10k = vmla_n_s32 (ffgghhii_bbccddee, quo10k, c->multipliers32 [1 ]);
539541
540542 int32x4_t extended =
541- vreinterpretq_s32_u32 (vshll_n_u16 (vreinterpret_u16_s32 (tenthousands ), 0 ));
543+ vreinterpretq_s32_u32 (vshll_n_u16 (vreinterpret_u16_s32 (rem10k ), 0 ));
542544
543545 // Compiler barrier, or clang breaks the subsequent MLA into UADDW + MUL.
544546 ZMIJ_ASM ((" " : " +w" (extended)));
0 commit comments