Skip to content

Commit 00f634e

Browse files
committed
Another nanooptimization for Apple Silicon 𓀨 from Dougall Johnson
1 parent bb03a6f commit 00f634e

File tree

1 file changed

+7
-1
lines changed

1 file changed

+7
-1
lines changed

‎zmij.cc‎

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -886,9 +886,15 @@ auto write(Float value, char* buffer) noexcept -> char* {
886886
buffer[2] = '\0';
887887
return buffer + 2;
888888
}
889-
// 19 is faster or equal to 12 even for 3 digits.
889+
890+
#if defined(__APPLE__) && defined(__aarch64__) && ZMIJ_USE_INT128
891+
// Use mulhi to divide by 100.
892+
uint32_t digit = (uint128_t(dec_exp) * 0x290000000000000) >> 64;
893+
#else
894+
// div100_exp=19 is faster or equal to 12 even for 3 digits.
890895
uint32_t digit =
891896
(uint32_t(dec_exp) * div100_sig) >> div100_exp; // value / 100
897+
#endif
892898
uint32_t digit_with_nuls = '0' + digit;
893899
if (is_big_endian()) digit_with_nuls <<= 24;
894900
memcpy(buffer, &digit_with_nuls, 4);

0 commit comments

Comments
 (0)