@@ -494,6 +494,12 @@ inline void write8(char* buffer, uint64_t value) noexcept {
494494 memcpy (buffer, &value, 8 );
495495}
496496
497+ inline auto read8 (char * buffer) noexcept -> uint64_t {
498+ uint64_t r;
499+ memcpy (&r, buffer, 8 );
500+ return r;
501+ }
502+
497503// Writes a significand consisting of up to 9 decimal digits (7-9 for normals)
498504// and removes trailing zeros.
499505auto write_significand9 (char * buffer, uint32_t value, bool has9digits) noexcept
@@ -513,16 +519,15 @@ auto write_significand9(char* buffer, uint32_t value, bool has9digits) noexcept
513519auto write_significand17 (char * buffer, uint64_t value, bool has17digits,
514520 long long value_div10) noexcept -> char* {
515521 if (!ZMIJ_USE_NEON && !ZMIJ_USE_SSE) {
516- char * start = buffer + 1 ;
517522 // Digits/pairs of digits are denoted by letters: value = abbccddeeffgghhii.
518523 uint32_t abbccddee = uint32_t (value / 100'000'000 );
519524 uint32_t ffgghhii = uint32_t (value % 100'000'000 );
520- buffer = write_if (start , abbccddee / 100'000'000 , has17digits);
525+ buffer = write_if (buffer , abbccddee / 100'000'000 , has17digits);
521526 uint64_t bcd = to_bcd8 (abbccddee % 100'000'000 );
522527 write8 (buffer, bcd | zeros);
523528 if (ffgghhii == 0 ) {
524- buffer += count_trailing_nonzeros (bcd );
525- return buffer - int (buffer - start == 1 );
529+ write8 ( buffer + 8 , zeros );
530+ return buffer + count_trailing_nonzeros (bcd );
526531 }
527532 bcd = to_bcd8 (ffgghhii);
528533 write8 (buffer + 8 , bcd | zeros);
@@ -560,8 +565,7 @@ auto write_significand17(char* buffer, uint64_t value, bool has17digits,
560565 uint64_t a = uint64_t (umul128 (abbccddee, c->mul_const ) >> 90 );
561566 uint64_t bbccddee = abbccddee - a * hundred_million;
562567
563- char * start = buffer + 1 ;
564- buffer = write_if (start, a, has17digits);
568+ buffer = write_if (buffer, a, has17digits);
565569
566570 uint64x1_t ffgghhii_bbccddee_64 = {(uint64_t (ffgghhii) << 32 ) | bbccddee};
567571 int32x2_t bbccddee_ffgghhii = vreinterpret_s32_u64 (ffgghhii_bbccddee_64);
@@ -596,7 +600,7 @@ auto write_significand17(char* buffer, uint64_t value, bool has17digits,
596600 vget_lane_u64 (vreinterpret_u64_u8 (vshrn_n_u16 (is_not_zero, 4 )), 0 );
597601
598602 buffer += 16 - ((zeroes != 0 ? clz (zeroes) : 64 ) >> 2 );
599- return buffer - int (buffer - start == 1 ) ;
603+ return buffer;
600604#elif ZMIJ_USE_SSE
601605 uint32_t last_digit = value - value_div10 * 10 ;
602606
@@ -692,7 +696,7 @@ auto write_significand17(char* buffer, uint64_t value, bool has17digits,
692696 auto len = size_t (64 ) - clz (mask); // size_t for native arithmetic
693697
694698 _mm_storeu_si128 (reinterpret_cast <__m128i*>(buffer), digits);
695- return buffer + (( last_digit != 0 ) ? 17 : len - (len == 1 ) );
699+ return buffer + (last_digit != 0 ? 17 : len);
696700#endif // ZMIJ_USE_SSE
697701}
698702
@@ -928,7 +932,41 @@ auto write(Float value, char* buffer) noexcept -> char* {
928932 if (traits::num_bits == 64 ) {
929933 bool has17digits = dec.sig >= uint64_t (1e16 );
930934 dec_exp += traits::max_digits10 - 2 + has17digits;
931- buffer = write_significand17 (buffer, dec.sig , has17digits, dec.sig_div10 );
935+
936+ if (dec_exp >= -4 && dec_exp < 0 ) {
937+ memcpy (buffer, " 0.0000000" , 8 );
938+ buffer = write_significand17 (buffer + 1 - dec_exp, dec.sig , has17digits,
939+ dec.sig_div10 );
940+ *buffer = ' \0 ' ;
941+ return buffer;
942+ }
943+
944+ // Could merge this path with the scientific path, or increase the upper
945+ // bound if this branch is bad on real world data.
946+ if (dec_exp >= 0 && dec_exp < 16 ) {
947+ // Avoid reading uninitialized memory (would be unnecessary in asm).
948+ write8 (buffer + 16 , 0 );
949+
950+ buffer = write_significand17 (buffer, dec.sig , has17digits, dec.sig_div10 );
951+
952+ // Branchless move to make space for the '.' without OOB accesses.
953+ char * part1 = start + dec_exp + (dec_exp < 2 );
954+ char * part2 = part1 + (dec_exp < 2 ) + (dec_exp < 9 ? 7 : 0 );
955+ uint64_t value1 = read8 (part1);
956+ uint64_t value2 = read8 (part2);
957+ write8 (part1 + 1 , value1);
958+ write8 (part2 + 1 , value2);
959+
960+ char * dot = start + dec_exp + 1 ;
961+ *dot = ' .' ;
962+
963+ buffer = buffer > dot ? buffer + 1 : dot;
964+ *buffer = ' \0 ' ;
965+ return buffer;
966+ }
967+
968+ buffer =
969+ write_significand17 (buffer + 1 , dec.sig , has17digits, dec.sig_div10 );
932970 } else {
933971 if (dec.sig < uint32_t (1e7 )) [[ZMIJ_UNLIKELY]] {
934972 dec.sig *= 10 ;
@@ -940,6 +978,7 @@ auto write(Float value, char* buffer) noexcept -> char* {
940978 }
941979 start[0 ] = start[1 ];
942980 start[1 ] = ' .' ;
981+ buffer -= (buffer - 1 == start + 1 ); // Remove trailing point.
943982
944983 // Write exponent.
945984 uint16_t e_sign = dec_exp >= 0 ? (' +' << 8 | ' e' ) : (' -' << 8 | ' e' );
0 commit comments