Skip to content

Commit a943172

Browse files
azatdavidtgoldblatt
authored andcommitted
Add runtime detection for MADV_DONTNEED zeroes pages (mostly for qemu)
qemu does not support this, yet [1], and you can get very tricky assert if you will run program with jemalloc in use under qemu: <jemalloc>: ../contrib/jemalloc/src/extent.c:1195: Failed assertion: "p[i] == 0" [1]: https://patchwork.kernel.org/patch/10576637/ Here is a simple example that shows the problem [2]: // Gist to check possible issues with MADV_DONTNEED // For example it does not supported by qemu user // There is a patch for this [1], but it hasn't been applied. // [1]: https://lists.gnu.org/archive/html/qemu-devel/2018-08/msg05422.html #include <sys/mman.h> #include <stdio.h> #include <stddef.h> #include <assert.h> #include <string.h> int main(int argc, char **argv) { void *addr = mmap(NULL, 1<<16, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (addr == MAP_FAILED) { perror("mmap"); return 1; } memset(addr, 'A', 1<<16); if (!madvise(addr, 1<<16, MADV_DONTNEED)) { puts("MADV_DONTNEED does not return error. Check memory."); for (int i = 0; i < 1<<16; ++i) { assert(((unsigned char *)addr)[i] == 0); } } else { perror("madvise"); } if (munmap(addr, 1<<16)) { perror("munmap"); return 1; } return 0; } ### unpatched qemu $ qemu-x86_64-static /tmp/test-MADV_DONTNEED MADV_DONTNEED does not return error. Check memory. test-MADV_DONTNEED: /tmp/test-MADV_DONTNEED.c:19: main: Assertion `((unsigned char *)addr)[i] == 0' failed. qemu: uncaught target signal 6 (Aborted) - core dumped Aborted (core dumped) ### patched qemu (by returning ENOSYS error) $ qemu-x86_64 /tmp/test-MADV_DONTNEED madvise: Success ### patch for qemu to return ENOSYS diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 897d20c076..5540792e0e 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -11775,7 +11775,7 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, turns private file-backed mappings into anonymous mappings. This will break MADV_DONTNEED. This is a hint, so ignoring and returning success is ok. */ - return 0; + return ENOSYS; #endif #ifdef TARGET_NR_fcntl64 case TARGET_NR_fcntl64: [2]: https://gist.github.com/azat/12ba2c825b710653ece34dba7f926ece v2: - review fixes - add opt_dont_trust_madvise v3: - review fixes - rename opt_dont_trust_madvise to opt_trust_madvise
1 parent 2e3104b commit a943172

File tree

6 files changed

+94
-2
lines changed

6 files changed

+94
-2
lines changed

doc/jemalloc.xml.in

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -950,6 +950,18 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
950950
is <quote>disabled</quote>.</para></listitem>
951951
</varlistentry>
952952

953+
<varlistentry id="opt.trust_madvise">
954+
<term>
955+
<mallctl>opt.trust_madvise</mallctl>
956+
(<type>bool</type>)
957+
<literal>r-</literal>
958+
</term>
959+
<listitem><para>Do not perform runtime check for MADV_DONTNEED, to
960+
check that it actually zeros pages. The default is
961+
<quote>disabled</quote> on linux and <quote>enabled</quote> elsewhere.
962+
</para></listitem>
963+
</varlistentry>
964+
953965
<varlistentry id="opt.retain">
954966
<term>
955967
<mallctl>opt.retain</mallctl>

include/jemalloc/internal/jemalloc_internal_externs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ extern bool malloc_slow;
1111
/* Run-time options. */
1212
extern bool opt_abort;
1313
extern bool opt_abort_conf;
14+
extern bool opt_trust_madvise;
1415
extern bool opt_confirm_conf;
1516
extern bool opt_hpa;
1617
extern size_t opt_hpa_slab_max_alloc;

src/ctl.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ CTL_PROTO(config_utrace)
9090
CTL_PROTO(config_xmalloc)
9191
CTL_PROTO(opt_abort)
9292
CTL_PROTO(opt_abort_conf)
93+
CTL_PROTO(opt_trust_madvise)
9394
CTL_PROTO(opt_confirm_conf)
9495
CTL_PROTO(opt_hpa)
9596
CTL_PROTO(opt_hpa_slab_max_alloc)
@@ -372,6 +373,7 @@ static const ctl_named_node_t config_node[] = {
372373
static const ctl_named_node_t opt_node[] = {
373374
{NAME("abort"), CTL(opt_abort)},
374375
{NAME("abort_conf"), CTL(opt_abort_conf)},
376+
{NAME("trust_madvise"), CTL(opt_trust_madvise)},
375377
{NAME("confirm_conf"), CTL(opt_confirm_conf)},
376378
{NAME("hpa"), CTL(opt_hpa)},
377379
{NAME("hpa_slab_max_alloc"), CTL(opt_hpa_slab_max_alloc)},
@@ -2045,6 +2047,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
20452047

20462048
CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
20472049
CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
2050+
CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool)
20482051
CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
20492052
CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
20502053
CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_slab_max_alloc, size_t)

src/jemalloc.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,13 @@ bool opt_junk_free =
9494
false
9595
#endif
9696
;
97+
bool opt_trust_madvise =
98+
#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
99+
false
100+
#else
101+
true
102+
#endif
103+
;
97104

98105
zero_realloc_action_t opt_zero_realloc_action =
99106
zero_realloc_action_strict;
@@ -1256,6 +1263,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
12561263

12571264
CONF_HANDLE_BOOL(opt_abort, "abort")
12581265
CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
1266+
CONF_HANDLE_BOOL(opt_trust_madvise, "trust_madvise")
12591267
if (strncmp("metadata_thp", k, klen) == 0) {
12601268
int i;
12611269
bool match = false;

src/pages.c

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,57 @@ thp_mode_t init_system_thp_mode;
4242
/* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */
4343
static bool pages_can_purge_lazy_runtime = true;
4444

45+
#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
46+
static int madvise_dont_need_zeros_is_faulty = -1;
47+
/**
48+
* Check that MADV_DONTNEED will actually zero pages on subsequent access.
49+
*
50+
* Since qemu does not support this, yet [1], and you can get very tricky
51+
* assert if you will run program with jemalloc in use under qemu:
52+
*
53+
* <jemalloc>: ../contrib/jemalloc/src/extent.c:1195: Failed assertion: "p[i] == 0"
54+
*
55+
* [1]: https://patchwork.kernel.org/patch/10576637/
56+
*/
57+
static int madvise_MADV_DONTNEED_zeroes_pages()
58+
{
59+
int works = -1;
60+
size_t size = PAGE;
61+
62+
void * addr = mmap(NULL, size, PROT_READ|PROT_WRITE,
63+
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
64+
65+
if (addr == MAP_FAILED) {
66+
malloc_write("<jemalloc>: Cannot allocate memory for "
67+
"MADV_DONTNEED check\n");
68+
if (opt_abort) {
69+
abort();
70+
}
71+
}
72+
73+
memset(addr, 'A', size);
74+
if (madvise(addr, size, MADV_DONTNEED) == 0) {
75+
works = memchr(addr, 'A', size) == NULL;
76+
} else {
77+
/*
78+
* If madvise() does not support MADV_DONTNEED, then we can
79+
* call it anyway, and use it's return code.
80+
*/
81+
works = 1;
82+
}
83+
84+
if (munmap(addr, size) != 0) {
85+
malloc_write("<jemalloc>: Cannot deallocate memory for "
86+
"MADV_DONTNEED check\n");
87+
if (opt_abort) {
88+
abort();
89+
}
90+
}
91+
92+
return works;
93+
}
94+
#endif
95+
4596
/******************************************************************************/
4697
/*
4798
* Function prototypes for static functions that are referenced prior to
@@ -351,10 +402,12 @@ pages_purge_forced(void *addr, size_t size) {
351402

352403
#if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
353404
defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
354-
return (madvise(addr, size, MADV_DONTNEED) != 0);
405+
return (unlikely(madvise_dont_need_zeros_is_faulty) ||
406+
madvise(addr, size, MADV_DONTNEED) != 0);
355407
#elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \
356408
defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
357-
return (posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
409+
return (unlikely(madvise_dont_need_zeros_is_faulty) ||
410+
posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
358411
#elif defined(JEMALLOC_MAPS_COALESCE)
359412
/* Try to overlay a new demand-zeroed mapping. */
360413
return pages_commit(addr, size);
@@ -642,6 +695,20 @@ pages_boot(void) {
642695
return true;
643696
}
644697

698+
#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
699+
if (!opt_trust_madvise) {
700+
madvise_dont_need_zeros_is_faulty = !madvise_MADV_DONTNEED_zeroes_pages();
701+
if (madvise_dont_need_zeros_is_faulty) {
702+
malloc_write("<jemalloc>: MADV_DONTNEED does not work (memset will be used instead)\n");
703+
malloc_write("<jemalloc>: (This is the expected behaviour if you are running under QEMU)\n");
704+
}
705+
} else {
706+
/* In case opt_trust_madvise is disable,
707+
* do not do runtime check */
708+
madvise_dont_need_zeros_is_faulty = 0;
709+
}
710+
#endif
711+
645712
#ifndef _WIN32
646713
mmap_flags = MAP_PRIVATE | MAP_ANON;
647714
#endif

test/unit/mallctl.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,7 @@ TEST_BEGIN(test_mallctl_opt) {
279279

280280
TEST_MALLCTL_OPT(bool, abort, always);
281281
TEST_MALLCTL_OPT(bool, abort_conf, always);
282+
TEST_MALLCTL_OPT(bool, trust_madvise, always);
282283
TEST_MALLCTL_OPT(bool, confirm_conf, always);
283284
TEST_MALLCTL_OPT(const char *, metadata_thp, always);
284285
TEST_MALLCTL_OPT(bool, retain, always);

0 commit comments

Comments
 (0)