Skip to content

Commit 4a7d3ae

Browse files
kolyshkinodinuge
andcommitted
libct/cg: support hugetlb rsvd
This adds support for hugetlb.<pagesize>.rsvd limiting and accounting. The previous non-rsvd max/limit_in_bytes does not account for reserved huge page memory, making it possible for a processes to reserve all the huge page memory, without being able to allocate it (due to cgroup restrictions). In practice this makes it possible to successfully mmap more huge page memory than allowed via the cgroup settings, but when using the memory the process will get a SIGBUS and crash. This is bad for applications trying to mmap at startup (and it succeeds), but the program crashes when starting to use the memory. eg. postgres is doing this by default. This also keeps writing to the old max/limit_in_bytes, for backward compatibility. More info can be found here: https://lkml.org/lkml/2020/2/3/1153 (commit message mostly written by Odin Ugedal) Co-authored-by: Odin Ugedal <[email protected]> Signed-off-by: Kir Kolyshkin <[email protected]>
1 parent ac78d13 commit 4a7d3ae

4 files changed

Lines changed: 155 additions & 18 deletions

File tree

libcontainer/cgroups/fs/hugetlb.go

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package fs
22

33
import (
4+
"errors"
5+
"os"
46
"strconv"
57

68
"github.com/opencontainers/runc/libcontainer/cgroups"
@@ -19,8 +21,23 @@ func (s *HugetlbGroup) Apply(path string, _ *configs.Resources, pid int) error {
1921
}
2022

2123
func (s *HugetlbGroup) Set(path string, r *configs.Resources) error {
24+
const suffix = ".limit_in_bytes"
25+
skipRsvd := false
26+
2227
for _, hugetlb := range r.HugetlbLimit {
23-
if err := cgroups.WriteFile(path, "hugetlb."+hugetlb.Pagesize+".limit_in_bytes", strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
28+
prefix := "hugetlb." + hugetlb.Pagesize
29+
val := strconv.FormatUint(hugetlb.Limit, 10)
30+
if err := cgroups.WriteFile(path, prefix+suffix, val); err != nil {
31+
return err
32+
}
33+
if skipRsvd {
34+
continue
35+
}
36+
if err := cgroups.WriteFile(path, prefix+".rsvd"+suffix, val); err != nil {
37+
if errors.Is(err, os.ErrNotExist) {
38+
skipRsvd = true
39+
continue
40+
}
2441
return err
2542
}
2643
}
@@ -32,24 +49,29 @@ func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error {
3249
if !cgroups.PathExists(path) {
3350
return nil
3451
}
52+
rsvd := ".rsvd"
3553
hugetlbStats := cgroups.HugetlbStats{}
3654
for _, pageSize := range cgroups.HugePageSizes() {
37-
usage := "hugetlb." + pageSize + ".usage_in_bytes"
38-
value, err := fscommon.GetCgroupParamUint(path, usage)
55+
again:
56+
prefix := "hugetlb." + pageSize + rsvd
57+
58+
value, err := fscommon.GetCgroupParamUint(path, prefix+".usage_in_bytes")
3959
if err != nil {
60+
if rsvd != "" && errors.Is(err, os.ErrNotExist) {
61+
rsvd = ""
62+
goto again
63+
}
4064
return err
4165
}
4266
hugetlbStats.Usage = value
4367

44-
maxUsage := "hugetlb." + pageSize + ".max_usage_in_bytes"
45-
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
68+
value, err = fscommon.GetCgroupParamUint(path, prefix+".max_usage_in_bytes")
4669
if err != nil {
4770
return err
4871
}
4972
hugetlbStats.MaxUsage = value
5073

51-
failcnt := "hugetlb." + pageSize + ".failcnt"
52-
value, err = fscommon.GetCgroupParamUint(path, failcnt)
74+
value, err = fscommon.GetCgroupParamUint(path, prefix+".failcnt")
5375
if err != nil {
5476
return err
5577
}

libcontainer/cgroups/fs/hugetlb_test.go

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,11 @@ const (
2121
limit = "hugetlb.%s.limit_in_bytes"
2222
maxUsage = "hugetlb.%s.max_usage_in_bytes"
2323
failcnt = "hugetlb.%s.failcnt"
24+
25+
rsvdUsage = "hugetlb.%s.rsvd.usage_in_bytes"
26+
rsvdLimit = "hugetlb.%s.rsvd.limit_in_bytes"
27+
rsvdMaxUsage = "hugetlb.%s.rsvd.max_usage_in_bytes"
28+
rsvdFailcnt = "hugetlb.%s.rsvd.failcnt"
2429
)
2530

2631
func TestHugetlbSetHugetlb(t *testing.T) {
@@ -52,13 +57,15 @@ func TestHugetlbSetHugetlb(t *testing.T) {
5257
}
5358

5459
for _, pageSize := range cgroups.HugePageSizes() {
55-
limit := fmt.Sprintf(limit, pageSize)
56-
value, err := fscommon.GetCgroupParamUint(path, limit)
57-
if err != nil {
58-
t.Fatal(err)
59-
}
60-
if value != hugetlbAfter {
61-
t.Fatalf("Set hugetlb.limit_in_bytes failed. Expected: %v, Got: %v", hugetlbAfter, value)
60+
for _, f := range []string{limit, rsvdLimit} {
61+
limit := fmt.Sprintf(f, pageSize)
62+
value, err := fscommon.GetCgroupParamUint(path, limit)
63+
if err != nil {
64+
t.Fatal(err)
65+
}
66+
if value != hugetlbAfter {
67+
t.Fatalf("Set %s failed. Expected: %v, Got: %v", limit, hugetlbAfter, value)
68+
}
6269
}
6370
}
6471
}
@@ -85,6 +92,28 @@ func TestHugetlbStats(t *testing.T) {
8592
}
8693
}
8794

95+
func TestHugetlbRStatsRsvd(t *testing.T) {
96+
path := tempDir(t, "hugetlb")
97+
for _, pageSize := range cgroups.HugePageSizes() {
98+
writeFileContents(t, path, map[string]string{
99+
fmt.Sprintf(rsvdUsage, pageSize): hugetlbUsageContents,
100+
fmt.Sprintf(rsvdMaxUsage, pageSize): hugetlbMaxUsageContents,
101+
fmt.Sprintf(rsvdFailcnt, pageSize): hugetlbFailcnt,
102+
})
103+
}
104+
105+
hugetlb := &HugetlbGroup{}
106+
actualStats := *cgroups.NewStats()
107+
err := hugetlb.GetStats(path, &actualStats)
108+
if err != nil {
109+
t.Fatal(err)
110+
}
111+
expectedStats := cgroups.HugetlbStats{Usage: 128, MaxUsage: 256, Failcnt: 100}
112+
for _, pageSize := range cgroups.HugePageSizes() {
113+
expectHugetlbStatEquals(t, expectedStats, actualStats.HugetlbStats[pageSize])
114+
}
115+
}
116+
88117
func TestHugetlbStatsNoUsageFile(t *testing.T) {
89118
path := tempDir(t, "hugetlb")
90119
writeFileContents(t, path, map[string]string{

libcontainer/cgroups/fs2/hugetlb.go

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package fs2
22

33
import (
4+
"errors"
5+
"os"
46
"strconv"
57

68
"github.com/opencontainers/runc/libcontainer/cgroups"
@@ -16,8 +18,22 @@ func setHugeTlb(dirPath string, r *configs.Resources) error {
1618
if !isHugeTlbSet(r) {
1719
return nil
1820
}
21+
const suffix = ".max"
22+
skipRsvd := false
1923
for _, hugetlb := range r.HugetlbLimit {
20-
if err := cgroups.WriteFile(dirPath, "hugetlb."+hugetlb.Pagesize+".max", strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
24+
prefix := "hugetlb." + hugetlb.Pagesize
25+
val := strconv.FormatUint(hugetlb.Limit, 10)
26+
if err := cgroups.WriteFile(dirPath, prefix+suffix, val); err != nil {
27+
return err
28+
}
29+
if skipRsvd {
30+
continue
31+
}
32+
if err := cgroups.WriteFile(dirPath, prefix+".rsvd"+suffix, val); err != nil {
33+
if errors.Is(err, os.ErrNotExist) {
34+
skipRsvd = true
35+
continue
36+
}
2137
return err
2238
}
2339
}
@@ -27,15 +43,21 @@ func setHugeTlb(dirPath string, r *configs.Resources) error {
2743

2844
func statHugeTlb(dirPath string, stats *cgroups.Stats) error {
2945
hugetlbStats := cgroups.HugetlbStats{}
46+
rsvd := ".rsvd"
3047
for _, pagesize := range cgroups.HugePageSizes() {
31-
value, err := fscommon.GetCgroupParamUint(dirPath, "hugetlb."+pagesize+".current")
48+
again:
49+
prefix := "hugetlb." + pagesize + rsvd
50+
value, err := fscommon.GetCgroupParamUint(dirPath, prefix+".current")
3251
if err != nil {
52+
if rsvd != "" && errors.Is(err, os.ErrNotExist) {
53+
rsvd = ""
54+
goto again
55+
}
3356
return err
3457
}
3558
hugetlbStats.Usage = value
3659

37-
fileName := "hugetlb." + pagesize + ".events"
38-
value, err = fscommon.GetValueByKey(dirPath, fileName, "max")
60+
value, err = fscommon.GetValueByKey(dirPath, prefix+".events", "max")
3961
if err != nil {
4062
return err
4163
}

tests/integration/cgroups.bats

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,70 @@ function setup() {
199199
check_cgroup_value "cpu.idle" "1"
200200
}
201201

202+
# Convert size in KB to hugetlb size suffix.
203+
convert_hugetlb_size() {
204+
local size=$1
205+
local units=("KB" "MB" "GB")
206+
local idx=0
207+
208+
while ((size >= 1024)); do
209+
((size /= 1024))
210+
((idx++))
211+
done
212+
213+
echo "$size${units[$idx]}"
214+
}
215+
216+
@test "runc run (hugetlb limits)" {
217+
requires cgroups_hugetlb
218+
[ $EUID -ne 0 ] && requires rootless_cgroup
219+
# shellcheck disable=SC2012 # ls is fine here.
220+
mapfile -t sizes_kb < <(ls /sys/kernel/mm/hugepages/ | sed -e 's/.*hugepages-//' -e 's/kB$//') #
221+
if [ "${#sizes_kb[@]}" -lt 1 ]; then
222+
skip "requires hugetlb"
223+
fi
224+
225+
# Create two arrays:
226+
# - sizes: hugetlb cgroup file suffixes;
227+
# - limits: limits for each size.
228+
for size in "${sizes_kb[@]}"; do
229+
sizes+=("$(convert_hugetlb_size "$size")")
230+
# Limit to 1 page.
231+
limits+=("$((size * 1024))")
232+
done
233+
234+
# Set per-size limits.
235+
for ((i = 0; i < ${#sizes[@]}; i++)); do
236+
size="${sizes[$i]}"
237+
limit="${limits[$i]}"
238+
update_config '.linux.resources.hugepageLimits += [{ pagesize: "'"$size"'", limit: '"$limit"' }]'
239+
done
240+
241+
set_cgroups_path
242+
runc run -d --console-socket "$CONSOLE_SOCKET" test_hugetlb
243+
[ "$status" -eq 0 ]
244+
245+
lim="max"
246+
[ -v CGROUP_V1 ] && lim=".limit_in_bytes"
247+
248+
optional=("")
249+
# Add rsvd, if available.
250+
if test -f "$(get_cgroup_path hugetlb)/hugetlb.${sizes[0]}.rsvd.$lim"; then
251+
optional+=(".rsvd")
252+
fi
253+
254+
# Check if the limits are as expected.
255+
for ((i = 0; i < ${#sizes[@]}; i++)); do
256+
size="${sizes[$i]}"
257+
limit="${limits[$i]}"
258+
for rsvd in "${optional[@]}"; do
259+
param="hugetlb.${size}${rsvd}.$lim"
260+
echo "checking $param"
261+
check_cgroup_value "$param" "$limit"
262+
done
263+
done
264+
}
265+
202266
@test "runc run (cgroup v2 resources.unified only)" {
203267
requires root cgroups_v2
204268

0 commit comments

Comments
 (0)