Skip to content

Commit cf07f28

Browse files
fuweidk8s-infra-cherrypick-robot
authored andcommitted
*: should align pipe's owner with init process
The containerd-shim creates pipes and passes them to the init container as stdin, stdout, and stderr for logging purposes. By default, these pipes are owned by the root user (UID/GID: 0/0). The init container can access them directly through inheritance. However, if the init container attempts to open any files pointing to these pipes (e.g., /proc/1/fd/2, /dev/stderr), it will encounter a permission issue since it is not the owner. To avoid this, we need to align the ownership of the pipes with the init process. Fixes: #10598 Signed-off-by: Wei Fu <[email protected]>
1 parent 6e51f71 commit cf07f28

6 files changed

Lines changed: 245 additions & 0 deletions

File tree

integration/images/image_list.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ type ImageList struct {
3838
VolumeOwnership string
3939
ArgsEscaped string
4040
DockerSchema1 string
41+
Nginx string
4142
}
4243

4344
var (
@@ -57,6 +58,7 @@ func initImages(imageListFile string) {
5758
VolumeOwnership: "ghcr.io/containerd/volume-ownership:2.1",
5859
ArgsEscaped: "cplatpublic.azurecr.io/args-escaped-test-image-ns:1.0",
5960
DockerSchema1: "registry.k8s.io/busybox@sha256:4bdd623e848417d96127e16037743f0cd8b528c026e9175e22a84f639eca58ff",
61+
Nginx: "ghcr.io/containerd/nginx:1.27.0",
6062
}
6163

6264
if imageListFile != "" {
@@ -96,6 +98,8 @@ const (
9698
ArgsEscaped
9799
// DockerSchema1 image with docker schema 1
98100
DockerSchema1
101+
// Nginx image
102+
Nginx
99103
)
100104

101105
func initImageMap(imageList ImageList) map[int]string {
@@ -108,6 +112,7 @@ func initImageMap(imageList ImageList) map[int]string {
108112
images[VolumeOwnership] = imageList.VolumeOwnership
109113
images[ArgsEscaped] = imageList.ArgsEscaped
110114
images[DockerSchema1] = imageList.DockerSchema1
115+
images[Nginx] = imageList.Nginx
111116
return images
112117
}
113118

integration/main_test.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,24 @@ func WithDevice(containerPath, hostPath, permissions string) ContainerOpts {
471471
}
472472
}
473473

474+
// WithSELinuxOptions allows to set SELinux option for container.
475+
func WithSELinuxOptions(user, role, typ, level string) ContainerOpts {
476+
return func(c *runtime.ContainerConfig) {
477+
if c.Linux == nil {
478+
c.Linux = &runtime.LinuxContainerConfig{}
479+
}
480+
if c.Linux.SecurityContext == nil {
481+
c.Linux.SecurityContext = &runtime.LinuxContainerSecurityContext{}
482+
}
483+
c.Linux.SecurityContext.SelinuxOptions = &runtime.SELinuxOption{
484+
User: user,
485+
Role: role,
486+
Type: typ,
487+
Level: level,
488+
}
489+
}
490+
}
491+
474492
// ContainerConfig creates a container config given a name and image name
475493
// and additional container config options
476494
func ContainerConfig(name, image string, opts ...ContainerOpts) *runtime.ContainerConfig {

integration/pod_userns_linux_test.go

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,125 @@ func TestPodUserNS(t *testing.T) {
304304
}
305305
}
306306

307+
// TestIssue10598 tests a case[1] that init processes in container should be able
308+
// to open /dev/stdout or /dev/stderr if init processes are running in their
309+
// user namespace instead of root user.
310+
//
311+
// The shim server creates pipe for init processes' standard output. By default,
312+
// the owner of pipe is the same to shim server (root user). Let's say, the init
313+
// process is running with uid=1000/gid=1000 user. Init processes inherits the
314+
// pipe created by shim server so that it can just write data into that pipe.
315+
// However, if that init process tries to open /dev/stderr, the kernel will
316+
// return no permission error.
317+
//
318+
// The following output is from retsnoop[2].
319+
//
320+
// → do_open
321+
// → inode_permission
322+
// → generic_permission
323+
// ↔ make_vfsuid [0] 0.500us
324+
// ↔ make_vfsuid [0] 6.501us
325+
// ↔ from_kuid [0xffffffff] 0.700us
326+
// ← generic_permission [-EACCES] 13.501us
327+
//
328+
// Since uid_map/gid_map doesn't cover uid=0/gid=0, the kernel can't convert
329+
// uid=0 into valid uid in that uid_map. So, `from_kuid` returns invalid uid
330+
// value and then `do_open` returns EACCES error.
331+
//
332+
// [1]: https://github.com/containerd/containerd/issues/10598
333+
// [2]: https://github.com/anakryiko/retsnoop
334+
func TestIssue10598(t *testing.T) {
335+
if !supportsUserNS() {
336+
t.Skip("User namespaces are not supported")
337+
}
338+
if !supportsIDMap(defaultRoot) {
339+
t.Skipf("ID mappings are not supported on: %v", defaultRoot)
340+
}
341+
if err := supportsRuncIDMap(); err != nil {
342+
t.Skipf("OCI runtime doesn't support idmap mounts: %v", err)
343+
}
344+
345+
testPodLogDir := t.TempDir()
346+
347+
containerID := uint32(0)
348+
hostID := uint32(65536)
349+
size := uint32(65536)
350+
351+
t.Log("Create a sandbox with userns")
352+
sandboxOpts := []PodSandboxOpts{
353+
WithPodUserNs(containerID, hostID, size),
354+
WithPodLogDirectory(testPodLogDir),
355+
}
356+
sbConfig := PodSandboxConfig("issue10598", "userns", sandboxOpts...)
357+
sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
358+
require.NoError(t, err)
359+
360+
// Make sure the sandbox is cleaned up.
361+
defer func() {
362+
assert.NoError(t, runtimeService.StopPodSandbox(sb))
363+
assert.NoError(t, runtimeService.RemovePodSandbox(sb))
364+
}()
365+
366+
t.Log("Create a container for userns")
367+
368+
containerName := "nginx-userns"
369+
testImage := images.Get(images.Nginx)
370+
371+
EnsureImageExists(t, testImage)
372+
373+
containerOpts := []ContainerOpts{
374+
WithUserNamespace(containerID, hostID, size),
375+
WithLogPath(containerName),
376+
// The SELinux policy enforced by container-selinux prevents
377+
// NGINX from opening the /proc/self/fd/2 pipe. This scenario
378+
// is not intended to verify SELinux behavior in the user namespace
379+
// but rather to confirm the ownership of the standard output
380+
// file descriptor. The following option demonstrates how to
381+
// disable the restrictive SELinux rule for the NGINX process.
382+
WithSELinuxOptions(
383+
"unconfined_u",
384+
"unconfined_r",
385+
"container_runtime_t",
386+
"s0",
387+
),
388+
}
389+
390+
cnConfig := ContainerConfig(
391+
containerName,
392+
testImage,
393+
containerOpts...,
394+
)
395+
cn, err := runtimeService.CreateContainer(sb, cnConfig, sbConfig)
396+
require.NoError(t, err)
397+
398+
t.Log("Start the container")
399+
require.NoError(t, runtimeService.StartContainer(cn))
400+
401+
t.Log("Wait for container to start")
402+
require.NoError(t, Eventually(func() (bool, error) {
403+
content, err := os.ReadFile(filepath.Join(testPodLogDir, containerName))
404+
if err != nil {
405+
return false, err
406+
}
407+
408+
s, err := runtimeService.ContainerStatus(cn)
409+
if err != nil {
410+
return false, err
411+
}
412+
413+
if state := s.GetState(); state != runtime.ContainerState_CONTAINER_RUNNING {
414+
return false, fmt.Errorf("%s is not running\nstate: %s\nlog: %s",
415+
containerName, state, string(content))
416+
}
417+
418+
started := strings.Contains(string(content), "start worker processes")
419+
if started {
420+
t.Log(string(content))
421+
}
422+
return started, nil
423+
}, time.Second, 30*time.Second))
424+
}
425+
307426
func supportsRuncIDMap() error {
308427
var r runc.Runc
309428
features, err := r.Features(context.Background())

internal/cri/server/container_start.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,12 @@ func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContain
129129
containerd.WithTaskAPIEndpoint(endpoint.Address, endpoint.Version))
130130
}
131131

132+
ioOwnerTaskOpts, err := updateContainerIOOwner(ctx, container, config)
133+
if err != nil {
134+
return nil, fmt.Errorf("failed to update container IO owner: %w", err)
135+
}
136+
taskOpts = append(taskOpts, ioOwnerTaskOpts...)
137+
132138
task, err := container.NewTask(ctx, ioCreation, taskOpts...)
133139
if err != nil {
134140
return nil, fmt.Errorf("failed to create containerd task: %w", err)
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/*
2+
Copyright The containerd Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package server
18+
19+
import (
20+
"context"
21+
"fmt"
22+
23+
containerd "github.com/containerd/containerd/v2/client"
24+
"github.com/containerd/containerd/v2/internal/userns"
25+
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
26+
)
27+
28+
// updateContainerIOOwner updates I/O files' owner to align with initial processe's UID/GID.
29+
func updateContainerIOOwner(ctx context.Context, cntr containerd.Container, config *runtime.ContainerConfig) ([]containerd.NewTaskOpts, error) {
30+
if config.GetLinux() == nil {
31+
return nil, nil
32+
}
33+
34+
// FIXME(fuweid): Ideally, the pipe owner should be aligned with process owner.
35+
// No matter what user namespace container uses, it should work well. However,
36+
// it breaks the sig-node conformance case - [when querying /stats/summary should report resource usage through the stats api].
37+
// In order to keep compatible, the change should apply to user namespace only.
38+
if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetUsernsOptions() == nil {
39+
return nil, nil
40+
}
41+
42+
spec, err := cntr.Spec(ctx)
43+
if err != nil {
44+
return nil, fmt.Errorf("failed to get spec: %w", err)
45+
}
46+
47+
if spec.Linux == nil || spec.Process == nil {
48+
return nil, fmt.Errorf("invalid linux platform oci runtime spec")
49+
}
50+
51+
hostID, err := userns.IDMap{
52+
UidMap: spec.Linux.UIDMappings,
53+
GidMap: spec.Linux.GIDMappings,
54+
}.ToHost(userns.User{
55+
Uid: spec.Process.User.UID,
56+
Gid: spec.Process.User.GID,
57+
})
58+
if err != nil {
59+
return nil, fmt.Errorf("failed to do idmap to get host ID: %w", err)
60+
}
61+
62+
return []containerd.NewTaskOpts{
63+
containerd.WithUIDOwner(hostID.Uid),
64+
containerd.WithGIDOwner(hostID.Gid),
65+
}, nil
66+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
//go:build !linux
2+
3+
/*
4+
Copyright The containerd Authors.
5+
6+
Licensed under the Apache License, Version 2.0 (the "License");
7+
you may not use this file except in compliance with the License.
8+
You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
*/
18+
19+
package server
20+
21+
import (
22+
"context"
23+
24+
containerd "github.com/containerd/containerd/v2/client"
25+
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
26+
)
27+
28+
// updateContainerIOOwner updates I/O files' owner to align with initial processe's UID/GID.
29+
func updateContainerIOOwner(ctx context.Context, cntr containerd.Container, config *runtime.ContainerConfig) ([]containerd.NewTaskOpts, error) {
30+
return nil, nil
31+
}

0 commit comments

Comments
 (0)