Skip to content

Commit a2d1a8a

Browse files
committed
oci.WithPrivileged: set the current caps, not the known caps
This change is needed for running the latest containerd inside Docker that is not aware of the recently added caps (BPF, PERFMON, CHECKPOINT_RESTORE). Without this change, containerd inside Docker fails to run containers with "apply caps: operation not permitted" error. See kubernetes-sigs/kind 2058 NOTE: The caller process of this function is now assumed to be as privileged as possible. Signed-off-by: Akihiro Suda <[email protected]>
1 parent ddcc431 commit a2d1a8a

12 files changed

Lines changed: 424 additions & 35 deletions

oci/spec_opts.go

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ import (
3838
"github.com/opencontainers/runc/libcontainer/user"
3939
specs "github.com/opencontainers/runtime-spec/specs-go"
4040
"github.com/pkg/errors"
41-
"github.com/syndtr/gocapability/capability"
4241
)
4342

4443
// SpecOpts sets spec specific information to a newly generated OCI spec
@@ -776,29 +775,6 @@ func WithCapabilities(caps []string) SpecOpts {
776775
}
777776
}
778777

779-
// WithAllCapabilities sets all linux capabilities for the process
780-
var WithAllCapabilities = func(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
781-
return WithCapabilities(GetAllCapabilities())(ctx, client, c, s)
782-
}
783-
784-
// GetAllCapabilities returns all caps up to CAP_LAST_CAP
785-
// or CAP_BLOCK_SUSPEND on RHEL6
786-
func GetAllCapabilities() []string {
787-
last := capability.CAP_LAST_CAP
788-
// hack for RHEL6 which has no /proc/sys/kernel/cap_last_cap
789-
if last == capability.Cap(63) {
790-
last = capability.CAP_BLOCK_SUSPEND
791-
}
792-
var caps []string
793-
for _, cap := range capability.List() {
794-
if cap > last {
795-
continue
796-
}
797-
caps = append(caps, "CAP_"+strings.ToUpper(cap.String()))
798-
}
799-
return caps
800-
}
801-
802778
func capsContain(caps []string, s string) bool {
803779
for _, c := range caps {
804780
if c == s {
@@ -1132,7 +1108,7 @@ func WithDefaultUnixDevices(_ context.Context, _ Client, _ *containers.Container
11321108

11331109
// WithPrivileged sets up options for a privileged container
11341110
var WithPrivileged = Compose(
1135-
WithAllCapabilities,
1111+
WithAllCurrentCapabilities,
11361112
WithMaskedPaths(nil),
11371113
WithReadonlyPaths(nil),
11381114
WithWriteableSysfs,

oci/spec_opts_linux.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"path/filepath"
2626

2727
"github.com/containerd/containerd/containers"
28+
"github.com/containerd/containerd/pkg/cap"
2829
specs "github.com/opencontainers/runtime-spec/specs-go"
2930
"golang.org/x/sys/unix"
3031
)
@@ -180,3 +181,19 @@ func WithCPUCFS(quota int64, period uint64) SpecOpts {
180181
return nil
181182
}
182183
}
184+
185+
// WithAllCurrentCapabilities propagates the effective capabilities of the caller process to the container process.
186+
// The capability set may differ from WithAllKnownCapabilities when running in a container.
187+
var WithAllCurrentCapabilities = func(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
188+
caps, err := cap.Current()
189+
if err != nil {
190+
return err
191+
}
192+
return WithCapabilities(caps)(ctx, client, c, s)
193+
}
194+
195+
// WithAllKnownCapabilities sets all the the known linux capabilities for the container process
196+
var WithAllKnownCapabilities = func(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
197+
caps := cap.Known()
198+
return WithCapabilities(caps)(ctx, client, c, s)
199+
}

oci/spec_opts_nonlinux.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// +build !linux
2+
3+
/*
4+
Copyright The containerd Authors.
5+
6+
Licensed under the Apache License, Version 2.0 (the "License");
7+
you may not use this file except in compliance with the License.
8+
You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
*/
18+
19+
package oci
20+
21+
import (
22+
"context"
23+
24+
"github.com/containerd/containerd/containers"
25+
)
26+
27+
// WithAllCurrentCapabilities propagates the effective capabilities of the caller process to the container process.
28+
// The capability set may differ from WithAllKnownCapabilities when running in a container.
29+
//nolint: deadcode, unused
30+
var WithAllCurrentCapabilities = func(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
31+
return WithCapabilities(nil)(ctx, client, c, s)
32+
}
33+
34+
// WithAllKnownCapabilities sets all the the known linux capabilities for the container process
35+
//nolint: deadcode, unused
36+
var WithAllKnownCapabilities = func(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
37+
return WithCapabilities(nil)(ctx, client, c, s)
38+
}

oci/spec_opts_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -574,7 +574,7 @@ func TestDropCaps(t *testing.T) {
574574

575575
var s specs.Spec
576576

577-
if err := WithAllCapabilities(context.Background(), nil, nil, &s); err != nil {
577+
if err := WithAllKnownCapabilities(context.Background(), nil, nil, &s); err != nil {
578578
t.Fatal(err)
579579
}
580580
if err := WithDroppedCapabilities([]string{"CAP_CHOWN"})(context.Background(), nil, nil, &s); err != nil {
@@ -593,7 +593,7 @@ func TestDropCaps(t *testing.T) {
593593
}
594594

595595
// Add all capabilities back and drop a different cap.
596-
if err := WithAllCapabilities(context.Background(), nil, nil, &s); err != nil {
596+
if err := WithAllKnownCapabilities(context.Background(), nil, nil, &s); err != nil {
597597
t.Fatal(err)
598598
}
599599
if err := WithDroppedCapabilities([]string{"CAP_FOWNER"})(context.Background(), nil, nil, &s); err != nil {

oci/spec_test.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323

2424
"github.com/containerd/containerd/containers"
2525
"github.com/containerd/containerd/namespaces"
26+
"github.com/containerd/containerd/pkg/testutil"
2627
specs "github.com/opencontainers/runtime-spec/specs-go"
2728
)
2829

@@ -251,6 +252,10 @@ func TestPopulateDefaultUnixSpec(t *testing.T) {
251252

252253
func TestWithPrivileged(t *testing.T) {
253254
t.Parallel()
255+
if runtime.GOOS == "linux" {
256+
// because WithPrivileged depends on CapEff in /proc/self/status
257+
testutil.RequiresRoot(t)
258+
}
254259

255260
ctx := namespaces.WithNamespace(context.Background(), "testing")
256261

@@ -272,6 +277,10 @@ func TestWithPrivileged(t *testing.T) {
272277
t.Fatal(err)
273278
}
274279

280+
if runtime.GOOS != "linux" {
281+
return
282+
}
283+
275284
if len(s.Process.Capabilities.Bounding) == 0 {
276285
t.Error("Expected capabilities to be set with privileged")
277286
}

pkg/cap/cap_linux.go

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
/*
2+
Copyright The containerd Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
// Package cap provides Linux capability utility
18+
package cap
19+
20+
import (
21+
"bufio"
22+
"io"
23+
"os"
24+
"strconv"
25+
"strings"
26+
27+
"github.com/pkg/errors"
28+
"github.com/syndtr/gocapability/capability"
29+
)
30+
31+
// FromUint64 parses an integer into string slice like
32+
// []{"CAP_SYS_ADMIN", ...}.
33+
//
34+
// Unknown cap numbers are returned as []int.
35+
func FromUint64(v uint64) ([]string, []int) {
36+
var (
37+
res []string
38+
unknown []int
39+
)
40+
knownList := capability.List()
41+
known := make(map[string]struct{}, len(knownList))
42+
for _, f := range knownList {
43+
known[f.String()] = struct{}{}
44+
}
45+
for i := 0; i <= 63; i++ {
46+
if b := (v >> i) & 0x1; b == 0x1 {
47+
c := capability.Cap(i)
48+
sRaw := c.String()
49+
if _, ok := known[sRaw]; ok {
50+
s := "CAP_" + strings.ToUpper(sRaw)
51+
res = append(res, s)
52+
} else {
53+
unknown = append(unknown, i)
54+
}
55+
}
56+
}
57+
return res, unknown
58+
}
59+
60+
// ParseProcPIDStatus returns uint64 value from /proc/<PID>/status file
61+
func ParseProcPIDStatus(r io.Reader) (map[capability.CapType]uint64, error) {
62+
res := make(map[capability.CapType]uint64)
63+
scanner := bufio.NewScanner(r)
64+
for scanner.Scan() {
65+
line := scanner.Text()
66+
pair := strings.SplitN(line, ":", 2)
67+
if len(pair) != 2 {
68+
continue
69+
}
70+
k := strings.TrimSpace(pair[0])
71+
v := strings.TrimSpace(pair[1])
72+
switch k {
73+
case "CapInh", "CapPrm", "CapEff", "CapBnd", "CapAmb":
74+
ui64, err := strconv.ParseUint(v, 16, 64)
75+
if err != nil {
76+
return nil, errors.Errorf("failed to parse line %q", line)
77+
}
78+
switch k {
79+
case "CapInh":
80+
res[capability.INHERITABLE] = ui64
81+
case "CapPrm":
82+
res[capability.PERMITTED] = ui64
83+
case "CapEff":
84+
res[capability.EFFECTIVE] = ui64
85+
case "CapBnd":
86+
res[capability.BOUNDING] = ui64
87+
case "CapAmb":
88+
res[capability.AMBIENT] = ui64
89+
}
90+
}
91+
}
92+
if err := scanner.Err(); err != nil {
93+
return nil, err
94+
}
95+
return res, nil
96+
}
97+
98+
// Current returns the list of the effective and the known caps of
99+
// the current process.
100+
//
101+
// The result is like []string{"CAP_SYS_ADMIN", ...}.
102+
//
103+
// The result does not contain caps that are not recognized by
104+
// the "github.com/syndtr/gocapability" library.
105+
func Current() ([]string, error) {
106+
f, err := os.Open("/proc/self/status")
107+
if err != nil {
108+
return nil, err
109+
}
110+
defer f.Close()
111+
caps, err := ParseProcPIDStatus(f)
112+
if err != nil {
113+
return nil, err
114+
}
115+
capEff := caps[capability.EFFECTIVE]
116+
names, _ := FromUint64(capEff)
117+
return names, nil
118+
}
119+
120+
var (
121+
// caps35 is the caps of kernel 3.5 (37 entries)
122+
caps35 = []string{
123+
"CAP_CHOWN", // 2.2
124+
"CAP_DAC_OVERRIDE", // 2.2
125+
"CAP_DAC_READ_SEARCH", // 2.2
126+
"CAP_FOWNER", // 2.2
127+
"CAP_FSETID", // 2.2
128+
"CAP_KILL", // 2.2
129+
"CAP_SETGID", // 2.2
130+
"CAP_SETUID", // 2.2
131+
"CAP_SETPCAP", // 2.2
132+
"CAP_LINUX_IMMUTABLE", // 2.2
133+
"CAP_NET_BIND_SERVICE", // 2.2
134+
"CAP_NET_BROADCAST", // 2.2
135+
"CAP_NET_ADMIN", // 2.2
136+
"CAP_NET_RAW", // 2.2
137+
"CAP_IPC_LOCK", // 2.2
138+
"CAP_IPC_OWNER", // 2.2
139+
"CAP_SYS_MODULE", // 2.2
140+
"CAP_SYS_RAWIO", // 2.2
141+
"CAP_SYS_CHROOT", // 2.2
142+
"CAP_SYS_PTRACE", // 2.2
143+
"CAP_SYS_PACCT", // 2.2
144+
"CAP_SYS_ADMIN", // 2.2
145+
"CAP_SYS_BOOT", // 2.2
146+
"CAP_SYS_NICE", // 2.2
147+
"CAP_SYS_RESOURCE", // 2.2
148+
"CAP_SYS_TIME", // 2.2
149+
"CAP_SYS_TTY_CONFIG", // 2.2
150+
"CAP_MKNOD", // 2.4
151+
"CAP_LEASE", // 2.4
152+
"CAP_AUDIT_WRITE", // 2.6.11
153+
"CAP_AUDIT_CONTROL", // 2.6.11
154+
"CAP_SETFCAP", // 2.6.24
155+
"CAP_MAC_OVERRIDE", // 2.6.25
156+
"CAP_MAC_ADMIN", // 2.6.25
157+
"CAP_SYSLOG", // 2.6.37
158+
"CAP_WAKE_ALARM", // 3.0
159+
"CAP_BLOCK_SUSPEND", // 3.5
160+
}
161+
// caps316 is the caps of kernel 3.16 (38 entries)
162+
caps316 = append(caps35, "CAP_AUDIT_READ")
163+
// caps58 is the caps of kernel 5.8 (40 entries)
164+
caps58 = append(caps316, []string{"CAP_PERFMON", "CAP_BPF"}...)
165+
// caps59 is the caps of kernel 5.9 (41 entries)
166+
caps59 = append(caps58, "CAP_CHECKPOINT_RESTORE")
167+
)
168+
169+
// Known returns the known cap strings as of kernel 5.9
170+
func Known() []string {
171+
return caps59
172+
}

0 commit comments

Comments
 (0)