Skip to content

Commit 52390d6

Browse files
committed
Ignore kernel memory settings
This is somewhat radical approach to deal with kernel memory. Per-cgroup kernel memory limiting was always problematic. A few examples: - older kernels had bugs and were even oopsing sometimes (best example is RHEL7 kernel); - kernel is unable to reclaim the kernel memory so once the limit is hit a cgroup is toasted; - some kernel memory allocations don't allow failing. In addition to that, - users don't have a clue about how to set kernel memory limits (as the concept is much more complicated than e.g. [user] memory); - different kernels might have different kernel memory usage, which is sort of unexpected; - cgroup v2 do not have a [dedicated] kmem limit knob, and thus runc silently ignores kernel memory limits for v2; - kernel v5.4 made cgroup v1 kmem.limit obsoleted (see torvalds/linux@0158115f702b). In view of all this, and as the runtime-spec lists memory.kernel and memory.kernelTCP as OPTIONAL, let's ignore kernel memory limits (for cgroup v1, same as we're already doing for v2). This should result in less bugs and better user experience. The only bad side effect from it might be that stat can show kernel memory usage as 0 (since the accounting is not enabled). [v2: add a warning in specconv that limits are ignored] Signed-off-by: Kir Kolyshkin <[email protected]>
1 parent 59ad417 commit 52390d6

File tree

15 files changed

+16
-318
lines changed

15 files changed

+16
-318
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,18 +61,18 @@ sudo make install
6161
with some of them enabled by default (see `BUILDTAGS` in top-level `Makefile`).
6262

6363
To change build tags from the default, set the `BUILDTAGS` variable for make,
64-
e.g.
64+
e.g. to disable seccomp:
6565

6666
```bash
67-
make BUILDTAGS='seccomp'
67+
make BUILDTAGS=""
6868
```
6969

7070
| Build Tag | Feature | Enabled by default | Dependency |
7171
|-----------|------------------------------------|--------------------|------------|
7272
| seccomp | Syscall filtering | yes | libseccomp |
73-
| nokmem | disable kernel memory accounting | no | <none> |
7473

7574
The following build tags were used earlier, but are now obsoleted:
75+
- **nokmem** (since runc v1.0.0-rc94 kernel memory settings are ignored)
7676
- **apparmor** (since runc v1.0.0-rc93 the feature is always enabled)
7777
- **selinux** (since runc v1.0.0-rc93 the feature is always enabled)
7878

contrib/completions/bash/runc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -737,8 +737,6 @@ _runc_update() {
737737
--cpu-share
738738
--cpuset-cpus
739739
--cpuset-mems
740-
--kernel-memory
741-
--kernel-memory-tcp
742740
--memory
743741
--memory-reservation
744742
--memory-swap

libcontainer/cgroups/fs/kmem.go

Lines changed: 0 additions & 56 deletions
This file was deleted.

libcontainer/cgroups/fs/kmem_disabled.go

Lines changed: 0 additions & 15 deletions
This file was deleted.

libcontainer/cgroups/fs/memory.go

Lines changed: 1 addition & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -33,31 +33,6 @@ func (s *MemoryGroup) Name() string {
3333
}
3434

3535
func (s *MemoryGroup) Apply(path string, d *cgroupData) (err error) {
36-
if path == "" {
37-
return nil
38-
}
39-
if memoryAssigned(d.config) {
40-
if _, err := os.Stat(path); os.IsNotExist(err) {
41-
if err := os.MkdirAll(path, 0755); err != nil {
42-
return err
43-
}
44-
// Only enable kernel memory accouting when this cgroup
45-
// is created by libcontainer, otherwise we might get
46-
// error when people use `cgroupsPath` to join an existed
47-
// cgroup whose kernel memory is not initialized.
48-
if err := EnableKernelMemoryAccounting(path); err != nil {
49-
return err
50-
}
51-
}
52-
}
53-
defer func() {
54-
if err != nil {
55-
os.RemoveAll(path)
56-
}
57-
}()
58-
59-
// We need to join memory cgroup after set memory limits, because
60-
// kmem.limit_in_bytes can only be set when the cgroup is empty.
6136
return join(path, d.pid)
6237
}
6338

@@ -140,23 +115,14 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
140115
return err
141116
}
142117

143-
if cgroup.Resources.KernelMemory != 0 {
144-
if err := setKernelMemory(path, cgroup.Resources.KernelMemory); err != nil {
145-
return err
146-
}
147-
}
118+
// ignore KernelMemory and KernelMemoryTCP
148119

149120
if cgroup.Resources.MemoryReservation != 0 {
150121
if err := fscommon.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
151122
return err
152123
}
153124
}
154125

155-
if cgroup.Resources.KernelMemoryTCP != 0 {
156-
if err := fscommon.WriteFile(path, "memory.kmem.tcp.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemoryTCP, 10)); err != nil {
157-
return err
158-
}
159-
}
160126
if cgroup.Resources.OomKillDisable {
161127
if err := fscommon.WriteFile(path, "memory.oom_control", "1"); err != nil {
162128
return err
@@ -238,8 +204,6 @@ func memoryAssigned(cgroup *configs.Cgroup) bool {
238204
return cgroup.Resources.Memory != 0 ||
239205
cgroup.Resources.MemoryReservation != 0 ||
240206
cgroup.Resources.MemorySwap > 0 ||
241-
cgroup.Resources.KernelMemory > 0 ||
242-
cgroup.Resources.KernelMemoryTCP > 0 ||
243207
cgroup.Resources.OomKillDisable ||
244208
(cgroup.Resources.MemorySwappiness != nil && int64(*cgroup.Resources.MemorySwappiness) != -1)
245209
}

libcontainer/cgroups/fs/memory_test.go

Lines changed: 0 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -190,62 +190,6 @@ func TestMemorySetSwapSmallerThanMemory(t *testing.T) {
190190
}
191191
}
192192

193-
func TestMemorySetKernelMemory(t *testing.T) {
194-
helper := NewCgroupTestUtil("memory", t)
195-
defer helper.cleanup()
196-
197-
const (
198-
kernelMemoryBefore = 314572800 // 300M
199-
kernelMemoryAfter = 524288000 // 500M
200-
)
201-
202-
helper.writeFileContents(map[string]string{
203-
"memory.kmem.limit_in_bytes": strconv.Itoa(kernelMemoryBefore),
204-
})
205-
206-
helper.CgroupData.config.Resources.KernelMemory = kernelMemoryAfter
207-
memory := &MemoryGroup{}
208-
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
209-
t.Fatal(err)
210-
}
211-
212-
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.kmem.limit_in_bytes")
213-
if err != nil {
214-
t.Fatalf("Failed to parse memory.kmem.limit_in_bytes - %s", err)
215-
}
216-
if value != kernelMemoryAfter {
217-
t.Fatal("Got the wrong value, set memory.kmem.limit_in_bytes failed.")
218-
}
219-
}
220-
221-
func TestMemorySetKernelMemoryTCP(t *testing.T) {
222-
helper := NewCgroupTestUtil("memory", t)
223-
defer helper.cleanup()
224-
225-
const (
226-
kernelMemoryTCPBefore = 314572800 // 300M
227-
kernelMemoryTCPAfter = 524288000 // 500M
228-
)
229-
230-
helper.writeFileContents(map[string]string{
231-
"memory.kmem.tcp.limit_in_bytes": strconv.Itoa(kernelMemoryTCPBefore),
232-
})
233-
234-
helper.CgroupData.config.Resources.KernelMemoryTCP = kernelMemoryTCPAfter
235-
memory := &MemoryGroup{}
236-
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
237-
t.Fatal(err)
238-
}
239-
240-
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.kmem.tcp.limit_in_bytes")
241-
if err != nil {
242-
t.Fatalf("Failed to parse memory.kmem.tcp.limit_in_bytes - %s", err)
243-
}
244-
if value != kernelMemoryTCPAfter {
245-
t.Fatal("Got the wrong value, set memory.kmem.tcp.limit_in_bytes failed.")
246-
}
247-
}
248-
249193
func TestMemorySetMemorySwappinessDefault(t *testing.T) {
250194
helper := NewCgroupTestUtil("memory", t)
251195
defer helper.cleanup()

libcontainer/cgroups/systemd/v1.go

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import (
1212
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
1313
"github.com/opencontainers/runc/libcontainer/cgroups"
1414
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
15-
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
1615
"github.com/opencontainers/runc/libcontainer/configs"
1716
"github.com/sirupsen/logrus"
1817
)
@@ -170,14 +169,6 @@ func (m *legacyManager) Apply(pid int) error {
170169
}
171170
properties = append(properties, c.SystemdProps...)
172171

173-
// We have to set kernel memory here, as we can't change it once
174-
// processes have been attached to the cgroup.
175-
if c.Resources.KernelMemory != 0 {
176-
if err := enableKmem(c); err != nil {
177-
return err
178-
}
179-
}
180-
181172
if err := startUnit(dbusConnection, unitName, properties); err != nil {
182173
return err
183174
}
@@ -404,30 +395,6 @@ func (m *legacyManager) Set(container *configs.Config) error {
404395
return nil
405396
}
406397

407-
func enableKmem(c *configs.Cgroup) error {
408-
path, err := getSubsystemPath(c, "memory")
409-
if err != nil {
410-
if cgroups.IsNotFound(err) {
411-
return nil
412-
}
413-
return err
414-
}
415-
416-
if err := os.MkdirAll(path, 0755); err != nil {
417-
return err
418-
}
419-
// do not try to enable the kernel memory if we already have
420-
// tasks in the cgroup.
421-
content, err := fscommon.ReadFile(path, "tasks")
422-
if err != nil {
423-
return err
424-
}
425-
if len(content) > 0 {
426-
return nil
427-
}
428-
return fs.EnableKernelMemoryAccounting(path)
429-
}
430-
431398
func (m *legacyManager) GetPaths() map[string]string {
432399
m.mu.Lock()
433400
defer m.mu.Unlock()

libcontainer/configs/cgroup_linux.go

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,6 @@ type Resources struct {
5454
// Total memory usage (memory + swap); set `-1` to enable unlimited swap
5555
MemorySwap int64 `json:"memory_swap"`
5656

57-
// Kernel memory limit (in bytes)
58-
KernelMemory int64 `json:"kernel_memory"`
59-
60-
// Kernel memory limit for TCP use (in bytes)
61-
KernelMemoryTCP int64 `json:"kernel_memory_tcp"`
62-
6357
// CPU shares (relative weight vs. other containers)
6458
CpuShares uint64 `json:"cpu_shares"`
6559

libcontainer/integration/exec_test.go

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -667,41 +667,6 @@ func testPids(t *testing.T, systemd bool) {
667667
// As such, we don't test that case. YMMV.
668668
}
669669

670-
func TestRunWithKernelMemory(t *testing.T) {
671-
testRunWithKernelMemory(t, false)
672-
}
673-
674-
func TestRunWithKernelMemorySystemd(t *testing.T) {
675-
if !systemd.IsRunningSystemd() {
676-
t.Skip("Systemd is unsupported")
677-
}
678-
testRunWithKernelMemory(t, true)
679-
}
680-
681-
func testRunWithKernelMemory(t *testing.T, systemd bool) {
682-
if testing.Short() {
683-
return
684-
}
685-
if cgroups.IsCgroup2UnifiedMode() {
686-
t.Skip("cgroup v2 does not support kernel memory limit")
687-
}
688-
689-
rootfs, err := newRootfs()
690-
ok(t, err)
691-
defer remove(rootfs)
692-
693-
config := newTemplateConfig(&tParam{
694-
rootfs: rootfs,
695-
systemd: systemd,
696-
})
697-
config.Cgroups.Resources.KernelMemory = 52428800
698-
699-
_, _, err = runContainer(config, "", "ps")
700-
if err != nil {
701-
t.Fatalf("runContainer failed with kernel memory limit: %v", err)
702-
}
703-
}
704-
705670
func TestCgroupResourcesUnifiedErrorOnV1(t *testing.T) {
706671
testCgroupResourcesUnifiedErrorOnV1(t, false)
707672
}

libcontainer/specconv/spec_linux.go

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"github.com/opencontainers/runc/libcontainer/seccomp"
2222
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
2323
"github.com/opencontainers/runtime-spec/specs-go"
24+
"github.com/sirupsen/logrus"
2425

2526
"golang.org/x/sys/unix"
2627
)
@@ -510,11 +511,8 @@ func CreateCgroupConfig(opts *CreateOpts, defaultDevs []*devices.Device) (*confi
510511
if r.Memory.Swap != nil {
511512
c.Resources.MemorySwap = *r.Memory.Swap
512513
}
513-
if r.Memory.Kernel != nil {
514-
c.Resources.KernelMemory = *r.Memory.Kernel
515-
}
516-
if r.Memory.KernelTCP != nil {
517-
c.Resources.KernelMemoryTCP = *r.Memory.KernelTCP
514+
if r.Memory.Kernel != nil || r.Memory.KernelTCP != nil {
515+
logrus.Warn("Kernel memory settings are ignored and will be removed")
518516
}
519517
if r.Memory.Swappiness != nil {
520518
c.Resources.MemorySwappiness = r.Memory.Swappiness

0 commit comments

Comments
 (0)