Skip to content

Commit 6589ede

Browse files
authored
Add memory barrier (#78)
Ensures constants are loaded from memory.
1 parent 43cbb46 commit 6589ede

File tree

1 file changed

+15
-11
lines changed

1 file changed

+15
-11
lines changed

zmij.cc

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -611,7 +611,7 @@ auto write_significand17(char* buffer, uint64_t value, bool has17digits,
611611
uint32_t abcdefgh = value_div10 / uint64_t(1e8);
612612
uint32_t ijklmnop = value_div10 % uint64_t(1e8);
613613

614-
alignas(64) static constexpr struct {
614+
alignas(64) static constexpr struct constants {
615615
static constexpr auto splat64(uint64_t x) -> uint128 { return {x, x}; }
616616
static constexpr auto splat32(uint32_t x) -> uint128 {
617617
return splat64(uint64_t(x) << 32 | x);
@@ -643,20 +643,24 @@ auto write_significand17(char* buffer, uint64_t value, bool has17digits,
643643
uint128 zeros = splat64(::zeros);
644644
} consts;
645645

646+
const constants* c = &consts;
647+
// Make the compiler forget where the constants came from to ensure they are loaded from memory.
648+
ZMIJ_ASM(("" : "+r"(c)));
649+
646650
using ptr = const __m128i*;
647-
const __m128i div10k = _mm_load_si128(ptr(&consts.div10k));
648-
const __m128i neg10k = _mm_load_si128(ptr(&consts.neg10k));
649-
const __m128i div100 = _mm_load_si128(ptr(&consts.div100));
650-
const __m128i div10 = _mm_load_si128(ptr(&consts.div10));
651+
const __m128i div10k = _mm_load_si128(ptr(&c->div10k));
652+
const __m128i neg10k = _mm_load_si128(ptr(&c->neg10k));
653+
const __m128i div100 = _mm_load_si128(ptr(&c->div100));
654+
const __m128i div10 = _mm_load_si128(ptr(&c->div10));
651655
# if ZMIJ_USE_SSE4_1
652-
const __m128i neg100 = _mm_load_si128(ptr(&consts.neg100));
653-
const __m128i neg10 = _mm_load_si128(ptr(&consts.neg10));
654-
const __m128i bswap = _mm_load_si128(ptr(&consts.bswap));
656+
const __m128i neg100 = _mm_load_si128(ptr(&c->neg100));
657+
const __m128i neg10 = _mm_load_si128(ptr(&c->neg10));
658+
const __m128i bswap = _mm_load_si128(ptr(&c->bswap));
655659
# else
656-
const __m128i hundred = _mm_load_si128(ptr(&consts.hundred));
657-
const __m128i moddiv10 = _mm_load_si128(ptr(&consts.moddiv10));
660+
const __m128i hundred = _mm_load_si128(ptr(&c->hundred));
661+
const __m128i moddiv10 = _mm_load_si128(ptr(&c->moddiv10));
658662
# endif
659-
const __m128i zeros = _mm_load_si128(ptr(&consts.zeros));
663+
const __m128i zeros = _mm_load_si128(ptr(&c->zeros));
660664

661665
// The BCD sequences are based on the ones provided by Xiang JunBo.
662666
__m128i x = _mm_set_epi64x(abcdefgh, ijklmnop);

0 commit comments

Comments
 (0)