Skip to content

Commit 2d1c498

Browse files
hnaztorvalds
authored andcommitted
mm: memcontrol: make swap tracking an integral part of memory control
Without swap page tracking, users that are otherwise memory controlled can easily escape their containment and allocate significant amounts of memory that they're not being charged for. That's because swap does readahead, but without the cgroup records of who owned the page at swapout, readahead pages don't get charged until somebody actually faults them into their page table and we can identify an owner task. This can be maliciously exploited with MADV_WILLNEED, which triggers arbitrary readahead allocations without charging the pages. Make swap swap page tracking an integral part of memcg and remove the Kconfig options. In the first place, it was only made configurable to allow users to save some memory. But the overhead of tracking cgroup ownership per swap page is minimal - 2 byte per page, or 512k per 1G of swap, or 0.04%. Saving that at the expense of broken containment semantics is not something we should present as a coequal option. The swapaccount=0 boot option will continue to exist, and it will eliminate the page_counter overhead and hide the swap control files, but it won't disable swap slot ownership tracking. This patch makes sure we always have the cgroup records at swapin time; the next patch will fix the actual bug by charging readahead swap pages at swapin time rather than at fault time. v2: fix double swap charge bug in cgroup1/cgroup2 code gating [[email protected]: fix crash with cgroup_disable=memory] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Johannes Weiner <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Reviewed-by: Joonsoo Kim <[email protected]> Cc: Alex Shi <[email protected]> Cc: "Kirill A. Shutemov" <[email protected]> Cc: Roman Gushchin <[email protected]> Cc: Shakeel Butt <[email protected]> Cc: Balbir Singh <[email protected]> Cc: Naresh Kamboju <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Debugged-by: Hugh Dickins <[email protected]> Debugged-by: Michal Hocko <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent eccb52e commit 2d1c498

3 files changed

Lines changed: 24 additions & 52 deletions

File tree

init/Kconfig

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -819,24 +819,9 @@ config MEMCG
819819
Provides control over the memory footprint of tasks in a cgroup.
820820

821821
config MEMCG_SWAP
822-
bool "Swap controller"
822+
bool
823823
depends on MEMCG && SWAP
824-
help
825-
Provides control over the swap space consumed by tasks in a cgroup.
826-
827-
config MEMCG_SWAP_ENABLED
828-
bool "Swap controller enabled by default"
829-
depends on MEMCG_SWAP
830824
default y
831-
help
832-
Memory Resource Controller Swap Extension comes with its price in
833-
a bigger memory consumption. General purpose distribution kernels
834-
which want to enable the feature but keep it disabled by default
835-
and let the user enable it by swapaccount=1 boot command line
836-
parameter should have this option unselected.
837-
For those who want to have the feature enabled by default should
838-
select this option (if, for some reason, they need to disable it
839-
then swapaccount=0 does the trick).
840825

841826
config MEMCG_KMEM
842827
bool

mm/memcontrol.c

Lines changed: 23 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -83,14 +83,10 @@ static bool cgroup_memory_nokmem;
8383

8484
/* Whether the swap controller is active */
8585
#ifdef CONFIG_MEMCG_SWAP
86-
#ifdef CONFIG_MEMCG_SWAP_ENABLED
8786
bool cgroup_memory_noswap __read_mostly;
8887
#else
89-
bool cgroup_memory_noswap __read_mostly = 1;
90-
#endif /* CONFIG_MEMCG_SWAP_ENABLED */
91-
#else
9288
#define cgroup_memory_noswap 1
93-
#endif /* CONFIG_MEMCG_SWAP */
89+
#endif
9490

9591
#ifdef CONFIG_CGROUP_WRITEBACK
9692
static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq);
@@ -5360,8 +5356,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
53605356
* we call find_get_page() with swapper_space directly.
53615357
*/
53625358
page = find_get_page(swap_address_space(ent), swp_offset(ent));
5363-
if (do_memsw_account())
5364-
entry->val = ent.val;
5359+
entry->val = ent.val;
53655360

53665361
return page;
53675362
}
@@ -5395,8 +5390,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
53955390
page = find_get_entry(mapping, pgoff);
53965391
if (xa_is_value(page)) {
53975392
swp_entry_t swp = radix_to_swp_entry(page);
5398-
if (do_memsw_account())
5399-
*entry = swp;
5393+
*entry = swp;
54005394
page = find_get_page(swap_address_space(swp),
54015395
swp_offset(swp));
54025396
}
@@ -6529,6 +6523,9 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask,
65296523
goto out;
65306524

65316525
if (PageSwapCache(page)) {
6526+
swp_entry_t ent = { .val = page_private(page), };
6527+
unsigned short id;
6528+
65326529
/*
65336530
* Every swap fault against a single page tries to charge the
65346531
* page, bail as early as possible. shmem_unuse() encounters
@@ -6540,17 +6537,12 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask,
65406537
if (compound_head(page)->mem_cgroup)
65416538
goto out;
65426539

6543-
if (!cgroup_memory_noswap) {
6544-
swp_entry_t ent = { .val = page_private(page), };
6545-
unsigned short id;
6546-
6547-
id = lookup_swap_cgroup_id(ent);
6548-
rcu_read_lock();
6549-
memcg = mem_cgroup_from_id(id);
6550-
if (memcg && !css_tryget_online(&memcg->css))
6551-
memcg = NULL;
6552-
rcu_read_unlock();
6553-
}
6540+
id = lookup_swap_cgroup_id(ent);
6541+
rcu_read_lock();
6542+
memcg = mem_cgroup_from_id(id);
6543+
if (memcg && !css_tryget_online(&memcg->css))
6544+
memcg = NULL;
6545+
rcu_read_unlock();
65546546
}
65556547

65566548
if (!memcg)
@@ -6567,7 +6559,7 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask,
65676559
memcg_check_events(memcg, page);
65686560
local_irq_enable();
65696561

6570-
if (do_memsw_account() && PageSwapCache(page)) {
6562+
if (PageSwapCache(page)) {
65716563
swp_entry_t entry = { .val = page_private(page) };
65726564
/*
65736565
* The swap entry might not get freed for a long time,
@@ -6952,7 +6944,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
69526944
VM_BUG_ON_PAGE(PageLRU(page), page);
69536945
VM_BUG_ON_PAGE(page_count(page), page);
69546946

6955-
if (!do_memsw_account())
6947+
if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
69566948
return;
69576949

69586950
memcg = page->mem_cgroup;
@@ -6981,7 +6973,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
69816973
if (!mem_cgroup_is_root(memcg))
69826974
page_counter_uncharge(&memcg->memory, nr_entries);
69836975

6984-
if (memcg != swap_memcg) {
6976+
if (!cgroup_memory_noswap && memcg != swap_memcg) {
69856977
if (!mem_cgroup_is_root(swap_memcg))
69866978
page_counter_charge(&swap_memcg->memsw, nr_entries);
69876979
page_counter_uncharge(&memcg->memsw, nr_entries);
@@ -7017,7 +7009,7 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
70177009
struct mem_cgroup *memcg;
70187010
unsigned short oldid;
70197011

7020-
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) || cgroup_memory_noswap)
7012+
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
70217013
return 0;
70227014

70237015
memcg = page->mem_cgroup;
@@ -7033,7 +7025,7 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
70337025

70347026
memcg = mem_cgroup_id_get_online(memcg);
70357027

7036-
if (!mem_cgroup_is_root(memcg) &&
7028+
if (!cgroup_memory_noswap && !mem_cgroup_is_root(memcg) &&
70377029
!page_counter_try_charge(&memcg->swap, nr_pages, &counter)) {
70387030
memcg_memory_event(memcg, MEMCG_SWAP_MAX);
70397031
memcg_memory_event(memcg, MEMCG_SWAP_FAIL);
@@ -7061,14 +7053,11 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages)
70617053
struct mem_cgroup *memcg;
70627054
unsigned short id;
70637055

7064-
if (cgroup_memory_noswap)
7065-
return;
7066-
70677056
id = swap_cgroup_record(entry, 0, nr_pages);
70687057
rcu_read_lock();
70697058
memcg = mem_cgroup_from_id(id);
70707059
if (memcg) {
7071-
if (!mem_cgroup_is_root(memcg)) {
7060+
if (!cgroup_memory_noswap && !mem_cgroup_is_root(memcg)) {
70727061
if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
70737062
page_counter_uncharge(&memcg->swap, nr_pages);
70747063
else
@@ -7253,7 +7242,11 @@ static struct cftype memsw_files[] = {
72537242

72547243
static int __init mem_cgroup_swap_init(void)
72557244
{
7256-
if (mem_cgroup_disabled() || cgroup_memory_noswap)
7245+
/* No memory control -> no swap control */
7246+
if (mem_cgroup_disabled())
7247+
cgroup_memory_noswap = true;
7248+
7249+
if (cgroup_memory_noswap)
72577250
return 0;
72587251

72597252
WARN_ON(cgroup_add_dfl_cftypes(&memory_cgrp_subsys, swap_files));

mm/swap_cgroup.c

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -171,9 +171,6 @@ int swap_cgroup_swapon(int type, unsigned long max_pages)
171171
unsigned long length;
172172
struct swap_cgroup_ctrl *ctrl;
173173

174-
if (cgroup_memory_noswap)
175-
return 0;
176-
177174
length = DIV_ROUND_UP(max_pages, SC_PER_PAGE);
178175
array_size = length * sizeof(void *);
179176

@@ -209,9 +206,6 @@ void swap_cgroup_swapoff(int type)
209206
unsigned long i, length;
210207
struct swap_cgroup_ctrl *ctrl;
211208

212-
if (cgroup_memory_noswap)
213-
return;
214-
215209
mutex_lock(&swap_cgroup_mutex);
216210
ctrl = &swap_cgroup_ctrl[type];
217211
map = ctrl->map;

0 commit comments

Comments
 (0)