Skip to content

Commit dae652e

Browse files
committed
Add default sysctls to allow ping sockets and privileged ports with no capabilities
Currently default capability CAP_NET_RAW allows users to open ICMP echo sockets, and CAP_NET_BIND_SERVICE allows binding to ports under 1024. Both of these are safe operations, and Linux now provides ways that these can be set, per container, to be allowed without any capabilties for non root users. Enable these by default. Users can revert to the previous behaviour by overriding the sysctl values explicitly. Signed-off-by: Justin Cormack <[email protected]>
1 parent 41ac6be commit dae652e

3 files changed

Lines changed: 67 additions & 3 deletions

File tree

daemon/oci_linux.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,14 @@ func WithMounts(daemon *Daemon, c *container.Container) coci.SpecOpts {
716716
}
717717
}
718718

719+
// sysctlExists checks if a sysctl exists; runc will error if we add any that do not actually
720+
// exist, so do not add the default ones if running on an old kernel.
721+
func sysctlExists(s string) bool {
722+
f := filepath.Join("/proc", "sys", strings.Replace(s, ".", "/", -1))
723+
_, err := os.Stat(f)
724+
return err == nil
725+
}
726+
719727
// WithCommonOptions sets common docker options
720728
func WithCommonOptions(daemon *Daemon, c *container.Container) coci.SpecOpts {
721729
return func(ctx context.Context, _ coci.Client, _ *containers.Container, s *coci.Spec) error {
@@ -768,6 +776,23 @@ func WithCommonOptions(daemon *Daemon, c *container.Container) coci.SpecOpts {
768776
s.Hostname = c.Config.Hostname
769777
setLinuxDomainname(c, s)
770778

779+
// Add default sysctls that are generally safe and useful; currently we
780+
// grant the capabilities to allow these anyway. You can override if
781+
// you want to restore the original behaviour.
782+
// We do not set network sysctls if network namespace is host, or if we are
783+
// joining an existing namespace, only if we create a new net namespace.
784+
if c.HostConfig.NetworkMode.IsPrivate() {
785+
// We cannot set up ping socket support in a user namespace
786+
if !c.HostConfig.UsernsMode.IsPrivate() && sysctlExists("net.ipv4.ping_group_range") {
787+
// allow unprivileged ICMP echo sockets without CAP_NET_RAW
788+
s.Linux.Sysctl["net.ipv4.ping_group_range"] = "0 2147483647"
789+
}
790+
// allow opening any port less than 1024 without CAP_NET_BIND_SERVICE
791+
if sysctlExists("net.ipv4.ip_unprivileged_port_start") {
792+
s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"] = "0"
793+
}
794+
}
795+
771796
return nil
772797
}
773798
}

daemon/oci_linux_test.go

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,9 @@ func TestSysctlOverride(t *testing.T) {
114114
Domainname: "baz.cyphar.com",
115115
},
116116
HostConfig: &containertypes.HostConfig{
117-
Sysctls: map[string]string{},
117+
NetworkMode: "bridge",
118+
Sysctls: map[string]string{},
119+
UsernsMode: "host",
118120
},
119121
}
120122
d := setupFakeDaemon(t, c)
@@ -125,15 +127,51 @@ func TestSysctlOverride(t *testing.T) {
125127
assert.NilError(t, err)
126128
assert.Equal(t, s.Hostname, "foobar")
127129
assert.Equal(t, s.Linux.Sysctl["kernel.domainname"], c.Config.Domainname)
130+
if sysctlExists("net.ipv4.ip_unprivileged_port_start") {
131+
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0")
132+
}
133+
if sysctlExists("net.ipv4.ping_group_range") {
134+
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647")
135+
}
128136

129137
// Set an explicit sysctl.
130138
c.HostConfig.Sysctls["kernel.domainname"] = "foobar.net"
131139
assert.Assert(t, c.HostConfig.Sysctls["kernel.domainname"] != c.Config.Domainname)
140+
c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"] = "1024"
132141

133142
s, err = d.createSpec(c)
134143
assert.NilError(t, err)
135144
assert.Equal(t, s.Hostname, "foobar")
136145
assert.Equal(t, s.Linux.Sysctl["kernel.domainname"], c.HostConfig.Sysctls["kernel.domainname"])
146+
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"])
147+
}
148+
149+
// TestSysctlOverrideHost ensures that any implicit network sysctls are not set
150+
// with host networking
151+
func TestSysctlOverrideHost(t *testing.T) {
152+
c := &container.Container{
153+
Config: &containertypes.Config{},
154+
HostConfig: &containertypes.HostConfig{
155+
NetworkMode: "host",
156+
Sysctls: map[string]string{},
157+
UsernsMode: "host",
158+
},
159+
}
160+
d := setupFakeDaemon(t, c)
161+
defer cleanupFakeContainer(c)
162+
163+
// Ensure that the implicit sysctl is not set
164+
s, err := d.createSpec(c)
165+
assert.NilError(t, err)
166+
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "")
167+
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ping_group_range"], "")
168+
169+
// Set an explicit sysctl.
170+
c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"] = "1024"
171+
172+
s, err = d.createSpec(c)
173+
assert.NilError(t, err)
174+
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"])
137175
}
138176

139177
func TestGetSourceMount(t *testing.T) {

integration-cli/docker_cli_run_unix_test.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1252,12 +1252,13 @@ func (s *DockerSuite) TestUserNoEffectiveCapabilitiesNetBindService(c *testing.T
12521252
// test that a root user has default capability CAP_NET_BIND_SERVICE
12531253
dockerCmd(c, "run", "syscall-test", "socket-test")
12541254
// test that non root user does not have default capability CAP_NET_BIND_SERVICE
1255-
icmd.RunCommand(dockerBinary, "run", "--user", "1000:1000", "syscall-test", "socket-test").Assert(c, icmd.Expected{
1255+
// as we allow this via sysctl, also tweak the sysctl back to default
1256+
icmd.RunCommand(dockerBinary, "run", "--user", "1000:1000", "--sysctl", "net.ipv4.ip_unprivileged_port_start=1024", "syscall-test", "socket-test").Assert(c, icmd.Expected{
12561257
ExitCode: 1,
12571258
Err: "Permission denied",
12581259
})
12591260
// test that root user can drop default capability CAP_NET_BIND_SERVICE
1260-
icmd.RunCommand(dockerBinary, "run", "--cap-drop", "net_bind_service", "syscall-test", "socket-test").Assert(c, icmd.Expected{
1261+
icmd.RunCommand(dockerBinary, "run", "--cap-drop", "net_bind_service", "--sysctl", "net.ipv4.ip_unprivileged_port_start=1024", "syscall-test", "socket-test").Assert(c, icmd.Expected{
12611262
ExitCode: 1,
12621263
Err: "Permission denied",
12631264
})

0 commit comments

Comments
 (0)