Skip to content

Commit 2a4ed3e

Browse files
committed
merge 1.1-GHSA-xr7r-f8xq-vfvv into release-1.1
This is a security fix for CVE-2024-21626. See the advisory[1] for more details. Aleksa Sarai (6): init: don't special-case logrus fds libcontainer: mark all non-stdio fds O_CLOEXEC before spawning init cgroup: plug leaks of /sys/fs/cgroup handle init: close internal fds before execve setns init: do explicit lookup of execve argument early init: verify after chdir that cwd is inside the container Hang Jiang (1): Fix File to Close [1]: GHSA-xr7r-f8xq-vfvv Fixes: GHSA-xr7r-f8xq-vfvv CVE-2024-21626 LGTMs: cyphar AkihiroSuda kolyshkin lifubang
2 parents 099ff69 + e9665f4 commit 2a4ed3e

9 files changed

+179
-34
lines changed

libcontainer/cgroups/file.go

+16-15
Original file line numberDiff line numberDiff line change
@@ -77,16 +77,16 @@ var (
7777
// TestMode is set to true by unit tests that need "fake" cgroupfs.
7878
TestMode bool
7979

80-
cgroupFd int = -1
81-
prepOnce sync.Once
82-
prepErr error
83-
resolveFlags uint64
80+
cgroupRootHandle *os.File
81+
prepOnce sync.Once
82+
prepErr error
83+
resolveFlags uint64
8484
)
8585

8686
func prepareOpenat2() error {
8787
prepOnce.Do(func() {
8888
fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{
89-
Flags: unix.O_DIRECTORY | unix.O_PATH,
89+
Flags: unix.O_DIRECTORY | unix.O_PATH | unix.O_CLOEXEC,
9090
})
9191
if err != nil {
9292
prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err}
@@ -97,15 +97,16 @@ func prepareOpenat2() error {
9797
}
9898
return
9999
}
100+
file := os.NewFile(uintptr(fd), cgroupfsDir)
101+
100102
var st unix.Statfs_t
101-
if err = unix.Fstatfs(fd, &st); err != nil {
103+
if err := unix.Fstatfs(int(file.Fd()), &st); err != nil {
102104
prepErr = &os.PathError{Op: "statfs", Path: cgroupfsDir, Err: err}
103105
logrus.Warnf("falling back to securejoin: %s", prepErr)
104106
return
105107
}
106108

107-
cgroupFd = fd
108-
109+
cgroupRootHandle = file
109110
resolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS
110111
if st.Type == unix.CGROUP2_SUPER_MAGIC {
111112
// cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks
@@ -132,28 +133,28 @@ func openFile(dir, file string, flags int) (*os.File, error) {
132133
return openFallback(path, flags, mode)
133134
}
134135

135-
fd, err := unix.Openat2(cgroupFd, relPath,
136+
fd, err := unix.Openat2(int(cgroupRootHandle.Fd()), relPath,
136137
&unix.OpenHow{
137138
Resolve: resolveFlags,
138139
Flags: uint64(flags) | unix.O_CLOEXEC,
139140
Mode: uint64(mode),
140141
})
141142
if err != nil {
142143
err = &os.PathError{Op: "openat2", Path: path, Err: err}
143-
// Check if cgroupFd is still opened to cgroupfsDir
144+
// Check if cgroupRootHandle is still opened to cgroupfsDir
144145
// (happens when this package is incorrectly used
145146
// across the chroot/pivot_root/mntns boundary, or
146147
// when /sys/fs/cgroup is remounted).
147148
//
148149
// TODO: if such usage will ever be common, amend this
149-
// to reopen cgroupFd and retry openat2.
150-
fdStr := strconv.Itoa(cgroupFd)
150+
// to reopen cgroupRootHandle and retry openat2.
151+
fdStr := strconv.Itoa(int(cgroupRootHandle.Fd()))
151152
fdDest, _ := os.Readlink("/proc/self/fd/" + fdStr)
152153
if fdDest != cgroupfsDir {
153-
// Wrap the error so it is clear that cgroupFd
154+
// Wrap the error so it is clear that cgroupRootHandle
154155
// is opened to an unexpected/wrong directory.
155-
err = fmt.Errorf("cgroupFd %s unexpectedly opened to %s != %s: %w",
156-
fdStr, fdDest, cgroupfsDir, err)
156+
err = fmt.Errorf("cgroupRootHandle %d unexpectedly opened to %s != %s: %w",
157+
cgroupRootHandle.Fd(), fdDest, cgroupfsDir, err)
157158
}
158159
return nil, err
159160
}

libcontainer/cgroups/fs/paths.go

+1
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ func tryDefaultCgroupRoot() string {
8383
if err != nil {
8484
return ""
8585
}
86+
defer dir.Close()
8687
names, err := dir.Readdirnames(1)
8788
if err != nil {
8889
return ""

libcontainer/container_linux.go

+9
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,15 @@ func (c *linuxContainer) start(process *Process) (retErr error) {
353353
}()
354354
}
355355

356+
// Before starting "runc init", mark all non-stdio open files as O_CLOEXEC
357+
// to make sure we don't leak any files into "runc init". Any files to be
358+
// passed to "runc init" through ExtraFiles will get dup2'd by the Go
359+
// runtime and thus their O_CLOEXEC flag will be cleared. This is some
360+
// additional protection against attacks like CVE-2024-21626, by making
361+
// sure we never leak files to "runc init" we didn't intend to.
362+
if err := utils.CloseExecFrom(3); err != nil {
363+
return fmt.Errorf("unable to mark non-stdio fds as cloexec: %w", err)
364+
}
356365
if err := parent.start(); err != nil {
357366
return fmt.Errorf("unable to start container process: %w", err)
358367
}

libcontainer/init_linux.go

+31
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"io"
99
"net"
1010
"os"
11+
"path/filepath"
1112
"strings"
1213
"unsafe"
1314

@@ -135,6 +136,32 @@ func populateProcessEnvironment(env []string) error {
135136
return nil
136137
}
137138

139+
// verifyCwd ensures that the current directory is actually inside the mount
140+
// namespace root of the current process.
141+
func verifyCwd() error {
142+
// getcwd(2) on Linux detects if cwd is outside of the rootfs of the
143+
// current mount namespace root, and in that case prefixes "(unreachable)"
144+
// to the returned string. glibc's getcwd(3) and Go's Getwd() both detect
145+
// when this happens and return ENOENT rather than returning a non-absolute
146+
// path. In both cases we can therefore easily detect if we have an invalid
147+
// cwd by checking the return value of getcwd(3). See getcwd(3) for more
148+
// details, and CVE-2024-21626 for the security issue that motivated this
149+
// check.
150+
//
151+
// We have to use unix.Getwd() here because os.Getwd() has a workaround for
152+
// $PWD which involves doing stat(.), which can fail if the current
153+
// directory is inaccessible to the container process.
154+
if wd, err := unix.Getwd(); errors.Is(err, unix.ENOENT) {
155+
return errors.New("current working directory is outside of container mount namespace root -- possible container breakout detected")
156+
} else if err != nil {
157+
return fmt.Errorf("failed to verify if current working directory is safe: %w", err)
158+
} else if !filepath.IsAbs(wd) {
159+
// We shouldn't ever hit this, but check just in case.
160+
return fmt.Errorf("current working directory is not absolute -- possible container breakout detected: cwd is %q", wd)
161+
}
162+
return nil
163+
}
164+
138165
// finalizeNamespace drops the caps, sets the correct user
139166
// and working dir, and closes any leaked file descriptors
140167
// before executing the command inside the namespace
@@ -193,6 +220,10 @@ func finalizeNamespace(config *initConfig) error {
193220
return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %w", config.Cwd, err)
194221
}
195222
}
223+
// Make sure our final working directory is inside the container.
224+
if err := verifyCwd(); err != nil {
225+
return err
226+
}
196227
if err := system.ClearKeepCaps(); err != nil {
197228
return fmt.Errorf("unable to clear keep caps: %w", err)
198229
}

libcontainer/integration/seccomp_test.go

+10-10
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import (
1313
libseccomp "github.com/seccomp/libseccomp-golang"
1414
)
1515

16-
func TestSeccompDenyGetcwdWithErrno(t *testing.T) {
16+
func TestSeccompDenySyslogWithErrno(t *testing.T) {
1717
if testing.Short() {
1818
return
1919
}
@@ -25,7 +25,7 @@ func TestSeccompDenyGetcwdWithErrno(t *testing.T) {
2525
DefaultAction: configs.Allow,
2626
Syscalls: []*configs.Syscall{
2727
{
28-
Name: "getcwd",
28+
Name: "syslog",
2929
Action: configs.Errno,
3030
ErrnoRet: &errnoRet,
3131
},
@@ -39,7 +39,7 @@ func TestSeccompDenyGetcwdWithErrno(t *testing.T) {
3939
buffers := newStdBuffers()
4040
pwd := &libcontainer.Process{
4141
Cwd: "/",
42-
Args: []string{"pwd"},
42+
Args: []string{"dmesg"},
4343
Env: standardEnvironment,
4444
Stdin: buffers.Stdin,
4545
Stdout: buffers.Stdout,
@@ -65,17 +65,17 @@ func TestSeccompDenyGetcwdWithErrno(t *testing.T) {
6565
}
6666

6767
if exitCode == 0 {
68-
t.Fatalf("Getcwd should fail with negative exit code, instead got %d!", exitCode)
68+
t.Fatalf("dmesg should fail with negative exit code, instead got %d!", exitCode)
6969
}
7070

71-
expected := "pwd: getcwd: No such process"
71+
expected := "dmesg: klogctl: No such process"
7272
actual := strings.Trim(buffers.Stderr.String(), "\n")
7373
if actual != expected {
7474
t.Fatalf("Expected output %s but got %s\n", expected, actual)
7575
}
7676
}
7777

78-
func TestSeccompDenyGetcwd(t *testing.T) {
78+
func TestSeccompDenySyslog(t *testing.T) {
7979
if testing.Short() {
8080
return
8181
}
@@ -85,7 +85,7 @@ func TestSeccompDenyGetcwd(t *testing.T) {
8585
DefaultAction: configs.Allow,
8686
Syscalls: []*configs.Syscall{
8787
{
88-
Name: "getcwd",
88+
Name: "syslog",
8989
Action: configs.Errno,
9090
},
9191
},
@@ -98,7 +98,7 @@ func TestSeccompDenyGetcwd(t *testing.T) {
9898
buffers := newStdBuffers()
9999
pwd := &libcontainer.Process{
100100
Cwd: "/",
101-
Args: []string{"pwd"},
101+
Args: []string{"dmesg"},
102102
Env: standardEnvironment,
103103
Stdin: buffers.Stdin,
104104
Stdout: buffers.Stdout,
@@ -124,10 +124,10 @@ func TestSeccompDenyGetcwd(t *testing.T) {
124124
}
125125

126126
if exitCode == 0 {
127-
t.Fatalf("Getcwd should fail with negative exit code, instead got %d!", exitCode)
127+
t.Fatalf("dmesg should fail with negative exit code, instead got %d!", exitCode)
128128
}
129129

130-
expected := "pwd: getcwd: Operation not permitted"
130+
expected := "dmesg: klogctl: Operation not permitted"
131131
actual := strings.Trim(buffers.Stderr.String(), "\n")
132132
if actual != expected {
133133
t.Fatalf("Expected output %s but got %s\n", expected, actual)

libcontainer/setns_init_linux.go

+36-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"errors"
55
"fmt"
66
"os"
7+
"os/exec"
78
"strconv"
89

910
"github.com/opencontainers/selinux/go-selinux"
@@ -14,6 +15,7 @@ import (
1415
"github.com/opencontainers/runc/libcontainer/keys"
1516
"github.com/opencontainers/runc/libcontainer/seccomp"
1617
"github.com/opencontainers/runc/libcontainer/system"
18+
"github.com/opencontainers/runc/libcontainer/utils"
1719
)
1820

1921
// linuxSetnsInit performs the container's initialization for running a new process
@@ -82,6 +84,21 @@ func (l *linuxSetnsInit) Init() error {
8284
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
8385
return err
8486
}
87+
88+
// Check for the arg before waiting to make sure it exists and it is
89+
// returned as a create time error.
90+
name, err := exec.LookPath(l.config.Args[0])
91+
if err != nil {
92+
return err
93+
}
94+
// exec.LookPath in Go < 1.20 might return no error for an executable
95+
// residing on a file system mounted with noexec flag, so perform this
96+
// extra check now while we can still return a proper error.
97+
// TODO: remove this once go < 1.20 is not supported.
98+
if err := eaccess(name); err != nil {
99+
return &os.PathError{Op: "eaccess", Path: name, Err: err}
100+
}
101+
85102
// Set seccomp as close to execve as possible, so as few syscalls take
86103
// place afterward (reducing the amount of syscalls that users need to
87104
// enable in their seccomp profiles).
@@ -101,5 +118,23 @@ func (l *linuxSetnsInit) Init() error {
101118
return &os.PathError{Op: "close log pipe", Path: "fd " + strconv.Itoa(l.logFd), Err: err}
102119
}
103120

104-
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
121+
// Close all file descriptors we are not passing to the container. This is
122+
// necessary because the execve target could use internal runc fds as the
123+
// execve path, potentially giving access to binary files from the host
124+
// (which can then be opened by container processes, leading to container
125+
// escapes). Note that because this operation will close any open file
126+
// descriptors that are referenced by (*os.File) handles from underneath
127+
// the Go runtime, we must not do any file operations after this point
128+
// (otherwise the (*os.File) finaliser could close the wrong file). See
129+
// CVE-2024-21626 for more information as to why this protection is
130+
// necessary.
131+
//
132+
// This is not needed for runc-dmz, because the extra execve(2) step means
133+
// that all O_CLOEXEC file descriptors have already been closed and thus
134+
// the second execve(2) from runc-dmz cannot access internal file
135+
// descriptors from runc.
136+
if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil {
137+
return err
138+
}
139+
return system.Exec(name, l.config.Args[0:], os.Environ())
105140
}

libcontainer/standard_init_linux.go

+19
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"github.com/opencontainers/runc/libcontainer/keys"
1818
"github.com/opencontainers/runc/libcontainer/seccomp"
1919
"github.com/opencontainers/runc/libcontainer/system"
20+
"github.com/opencontainers/runc/libcontainer/utils"
2021
)
2122

2223
type linuxStandardInit struct {
@@ -258,5 +259,23 @@ func (l *linuxStandardInit) Init() error {
258259
return err
259260
}
260261

262+
// Close all file descriptors we are not passing to the container. This is
263+
// necessary because the execve target could use internal runc fds as the
264+
// execve path, potentially giving access to binary files from the host
265+
// (which can then be opened by container processes, leading to container
266+
// escapes). Note that because this operation will close any open file
267+
// descriptors that are referenced by (*os.File) handles from underneath
268+
// the Go runtime, we must not do any file operations after this point
269+
// (otherwise the (*os.File) finaliser could close the wrong file). See
270+
// CVE-2024-21626 for more information as to why this protection is
271+
// necessary.
272+
//
273+
// This is not needed for runc-dmz, because the extra execve(2) step means
274+
// that all O_CLOEXEC file descriptors have already been closed and thus
275+
// the second execve(2) from runc-dmz cannot access internal file
276+
// descriptors from runc.
277+
if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil {
278+
return err
279+
}
261280
return system.Exec(name, l.config.Args[0:], os.Environ())
262281
}

0 commit comments

Comments
 (0)