Skip to content

Commit 27adcd5

Browse files
committed
libnet/d/bridge: port mappings: drop direct-access when gw_mode=nat
When a NAT-based port mapping is created, the daemon adds a DNAT rule in nat-DOCKER to replace the dest addr with the container IP. However, the daemon never sets up rules to filter packets destined directly to the container port. This allows a rogue neighbor (ie. a host that shares a L2 segment with the host) to send packets directly to the container on its container-side exposed port. For instance, if container port 5000 is mapped to host port 6000, a neighbor could send packets directly to the container on its port 5000. Since nat-DOCKER mangles the dest addr, and the nat table forbids DROP rules, this change adds a new rule in the raw-PREROUTING chain to filter ingress connections targeting the container's IP address. This filtering is only done when gw_mode=nat. For the unprotected variant, no filtering is done. Signed-off-by: Albin Kerouanton <[email protected]>
1 parent 8474153 commit 27adcd5

9 files changed

Lines changed: 314 additions & 11 deletions

File tree

Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,7 @@ RUN --mount=type=cache,sharing=locked,id=moby-dev-aptlib,target=/var/lib/apt \
542542
libprotobuf-c1 \
543543
libyajl2 \
544544
net-tools \
545+
netcat-openbsd \
545546
patch \
546547
pigz \
547548
sudo \

integration/internal/network/ops.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,22 @@ func WithIPv6() func(*network.CreateOptions) {
2727
}
2828
}
2929

30+
// WithIPv4Disabled makes sure IPv4 is disabled on the network.
31+
func WithIPv4Disabled() func(*network.CreateOptions) {
32+
return func(n *network.CreateOptions) {
33+
enable := false
34+
n.EnableIPv4 = &enable
35+
}
36+
}
37+
38+
// WithIPv6Disabled makes sure IPv6 is disabled on the network.
39+
func WithIPv6Disabled() func(*network.CreateOptions) {
40+
return func(n *network.CreateOptions) {
41+
enable := false
42+
n.EnableIPv6 = &enable
43+
}
44+
}
45+
3046
// WithInternal enables Internal flag on the create network request
3147
func WithInternal() func(*network.CreateOptions) {
3248
return func(n *network.CreateOptions) {

integration/network/bridge/iptablesdoc/generated/usernet-portmap.md

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ Note that:
9292
[1]: https://github.com/moby/moby/blob/675c2ac2db93e38bb9c5a6615d4155a969535fd9/libnetwork/drivers/bridge/port_mapping_linux.go#L795
9393
[2]: https://github.com/robmry/moby/blob/52c89d467fc5326149e4bbb8903d23589b66ff0d/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L252
9494

95-
And the corresponding nat table:
95+
The corresponding nat table:
9696

9797
Chain PREROUTING (policy ACCEPT 0 packets, 0 bytes)
9898
num pkts bytes target prot opt in out source destination
@@ -135,3 +135,27 @@ And the corresponding nat table:
135135

136136

137137
</details>
138+
139+
And the raw table:
140+
141+
Chain PREROUTING (policy ACCEPT 0 packets, 0 bytes)
142+
num pkts bytes target prot opt in out source destination
143+
1 0 0 DROP 6 -- !bridge1 * 0.0.0.0/0 192.0.2.2 tcp dpt:80
144+
145+
Chain OUTPUT (policy ACCEPT 0 packets, 0 bytes)
146+
num pkts bytes target prot opt in out source destination
147+
148+
149+
<details>
150+
<summary>iptables commands</summary>
151+
152+
-P PREROUTING ACCEPT
153+
-P OUTPUT ACCEPT
154+
-A PREROUTING -d 192.0.2.2/32 ! -i bridge1 -p tcp -m tcp --dport 80 -j DROP
155+
156+
157+
</details>
158+
159+
[filterDirectAccess][3] adds a DROP rule to the raw-PREROUTING chain to block direct remote access to the mapped port.
160+
161+
[3]: https://github.com/search?q=repo%3Amoby%2Fmoby%20filterDirectAccess&type=code

integration/network/bridge/iptablesdoc/iptablesdoc_linux_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,8 @@ const (
188188
iptCmdSFilterDocker4 iptCmdType = "SFilterDocker4"
189189
iptCmdLNat4 iptCmdType = "LNat4"
190190
iptCmdSNat4 iptCmdType = "SNat4"
191+
iptCmdLRaw4 iptCmdType = "LRaw4"
192+
iptCmdSRaw4 iptCmdType = "SRaw4"
191193
)
192194

193195
var iptCmds = map[iptCmdType][]string{
@@ -198,6 +200,8 @@ var iptCmds = map[iptCmdType][]string{
198200
iptCmdSFilterDocker4: {"iptables", "-S", "DOCKER"},
199201
iptCmdLNat4: {"iptables", "-nvL", "--line-numbers", "-t", "nat"},
200202
iptCmdSNat4: {"iptables", "-S", "-t", "nat"},
203+
iptCmdLRaw4: {"iptables", "-nvL", "--line-numbers", "-t", "raw"},
204+
iptCmdSRaw4: {"iptables", "-S", "-t", "raw"},
201205
}
202206

203207
func TestBridgeIptablesDoc(t *testing.T) {

integration/network/bridge/iptablesdoc/templates/usernet-portmap.md

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ Note that:
3737
[1]: https://github.com/moby/moby/blob/675c2ac2db93e38bb9c5a6615d4155a969535fd9/libnetwork/drivers/bridge/port_mapping_linux.go#L795
3838
[2]: https://github.com/robmry/moby/blob/52c89d467fc5326149e4bbb8903d23589b66ff0d/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L252
3939

40-
And the corresponding nat table:
40+
The corresponding nat table:
4141

4242
{{index . "LNat4"}}
4343

@@ -47,3 +47,18 @@ And the corresponding nat table:
4747
{{index . "SNat4"}}
4848

4949
</details>
50+
51+
And the raw table:
52+
53+
{{index . "LRaw4"}}
54+
55+
<details>
56+
<summary>iptables commands</summary>
57+
58+
{{index . "SRaw4"}}
59+
60+
</details>
61+
62+
[filterDirectAccess][3] adds a DROP rule to the raw-PREROUTING chain to block direct remote access to the mapped port.
63+
64+
[3]: https://github.com/search?q=repo%3Amoby%2Fmoby%20filterDirectAccess&type=code

integration/networking/port_mapping_linux_test.go

Lines changed: 202 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -728,6 +728,11 @@ func TestDirectRoutingOpenPorts(t *testing.T) {
728728
"nat-unprotected": pingSuccess,
729729
"routed": pingSuccess,
730730
}
731+
expMappedPortHTTP := map[string]string{
732+
"nat": httpFail,
733+
"nat-unprotected": httpSuccess,
734+
"routed": httpSuccess,
735+
}
731736
expUnmappedPortHTTP := map[string]string{
732737
"nat": httpFail,
733738
"nat-unprotected": httpSuccess,
@@ -769,13 +774,13 @@ func TestDirectRoutingOpenPorts(t *testing.T) {
769774
testPing(t, "ping6", networks[gwMode].ipv6, expPingExit[gwMode])
770775
})
771776
t.Run(gwMode+"/v4/http/80", func(t *testing.T) {
772-
testHttp(t, networks[gwMode].ipv4, "80", httpSuccess)
777+
testHttp(t, networks[gwMode].ipv4, "80", expMappedPortHTTP[gwMode])
773778
})
774779
t.Run(gwMode+"/v4/http/81", func(t *testing.T) {
775780
testHttp(t, networks[gwMode].ipv4, "81", expUnmappedPortHTTP[gwMode])
776781
})
777782
t.Run(gwMode+"/v6/http/80", func(t *testing.T) {
778-
testHttp(t, networks[gwMode].ipv6, "80", httpSuccess)
783+
testHttp(t, networks[gwMode].ipv6, "80", expMappedPortHTTP[gwMode])
779784
})
780785
t.Run(gwMode+"/v6/http/81", func(t *testing.T) {
781786
testHttp(t, networks[gwMode].ipv6, "81", expUnmappedPortHTTP[gwMode])
@@ -874,6 +879,201 @@ func TestAccessPublishedPortFromAnotherNetwork(t *testing.T) {
874879
}
875880
}
876881

882+
// TestDirectRemoteAccessOnExposedPort checks that remote hosts can't directly
883+
// reach a container on one of its exposed port. That is, if container has the
884+
// IP address 172.17.24.2, and its port 443 is exposed on the host, no remote
885+
// host should be able to reach 172.17.24.2:443 directly.
886+
//
887+
// Regression test for https://github.com/moby/moby/issues/45610.
888+
func TestDirectRemoteAccessOnExposedPort(t *testing.T) {
889+
// This test checks iptables rules that live in dockerd's netns. In the case
890+
// of rootlesskit, this is not the same netns as the host, so they don't
891+
// have any effect.
892+
// TODO(aker): we need to figure out what we want to do for rootlesskit.
893+
// skip.If(t, testEnv.IsRootless, "rootlesskit has its own netns")
894+
895+
ctx := setupTest(t)
896+
897+
const (
898+
hostIPv4 = "192.168.120.2"
899+
hostIPv6 = "fdbc:277b:d40b::2"
900+
)
901+
902+
l3 := networking.NewL3Segment(t, "test-direct-remote-access",
903+
netip.MustParsePrefix("192.168.120.1/24"),
904+
netip.MustParsePrefix("fdbc:277b:d40b::1/64"))
905+
defer l3.Destroy(t)
906+
// "docker" is the host where dockerd is running.
907+
l3.AddHost(t, "docker", networking.CurrentNetns, "test-eth",
908+
netip.MustParsePrefix(hostIPv4+"/24"),
909+
netip.MustParsePrefix(hostIPv6+"/64"))
910+
l3.AddHost(t, "attacker", "test-direct-remote-access-attacker", "eth0",
911+
netip.MustParsePrefix("192.168.120.3/24"),
912+
netip.MustParsePrefix("fdbc:277b:d40b::3/64"))
913+
914+
d := daemon.New(t)
915+
d.StartWithBusybox(ctx, t)
916+
defer d.Stop(t)
917+
918+
c := d.NewClientT(t)
919+
defer c.Close()
920+
for _, tc := range []struct {
921+
name string
922+
gwMode string
923+
gwAddr netip.Prefix
924+
ipv4Disabled bool
925+
ipv6Disabled bool
926+
}{
927+
{
928+
name: "NAT/IPv4",
929+
gwMode: "nat",
930+
gwAddr: netip.MustParsePrefix("172.24.10.1/24"),
931+
},
932+
{
933+
name: "NAT/IPv6",
934+
gwMode: "nat",
935+
gwAddr: netip.MustParsePrefix("fda9:a651:db6d::1/64"),
936+
},
937+
{
938+
name: "NAT unprotected/IPv4",
939+
gwMode: "nat-unprotected",
940+
gwAddr: netip.MustParsePrefix("172.24.10.1/24"),
941+
},
942+
{
943+
name: "NAT unprotected/IPv6",
944+
gwMode: "nat-unprotected",
945+
gwAddr: netip.MustParsePrefix("fda9:a651:db6d::1/64"),
946+
},
947+
{
948+
name: "Proxy/IPv4",
949+
gwMode: "nat",
950+
gwAddr: netip.MustParsePrefix("fd05:b021:403f::1/64"),
951+
ipv4Disabled: true,
952+
},
953+
{
954+
name: "Proxy/IPv6",
955+
gwMode: "nat",
956+
gwAddr: netip.MustParsePrefix("172.24.11.1/24"),
957+
ipv6Disabled: true,
958+
},
959+
{
960+
name: "Routed/IPv4",
961+
gwMode: "routed",
962+
gwAddr: netip.MustParsePrefix("172.24.12.1/24"),
963+
},
964+
{
965+
name: "Routed/IPv6",
966+
gwMode: "routed",
967+
gwAddr: netip.MustParsePrefix("fd82:5787:b217::1/64"),
968+
},
969+
} {
970+
t.Run(tc.name, func(t *testing.T) {
971+
skip.If(t, tc.gwMode == "routed" && testEnv.IsRootless(), "rootlesskit doesn't support routed mode as it's running in a separate netns")
972+
973+
testutil.StartSpan(ctx, t)
974+
975+
nwOpts := []func(*networktypes.CreateOptions){
976+
network.WithIPAM(tc.gwAddr.Masked().String(), tc.gwAddr.Addr().String()),
977+
network.WithOption(bridge.IPv4GatewayMode, tc.gwMode),
978+
network.WithOption(bridge.IPv6GatewayMode, tc.gwMode),
979+
}
980+
981+
if tc.ipv4Disabled {
982+
nwOpts = append(nwOpts, network.WithIPv4Disabled())
983+
}
984+
if tc.ipv6Disabled {
985+
nwOpts = append(nwOpts, network.WithIPv6Disabled())
986+
}
987+
if tc.gwAddr.Addr().Is6() {
988+
nwOpts = append(nwOpts, network.WithIPv6())
989+
}
990+
991+
const bridgeName = "brattacked"
992+
network.CreateNoError(ctx, t, c, bridgeName, append(nwOpts,
993+
network.WithDriver("bridge"),
994+
network.WithOption(bridge.BridgeName, bridgeName),
995+
)...)
996+
defer network.RemoveNoError(ctx, t, c, bridgeName)
997+
998+
const hostPort = "5000"
999+
hostIP := hostIPv4
1000+
if tc.gwAddr.Addr().Is6() {
1001+
hostIP = hostIPv6
1002+
}
1003+
1004+
ctrIP := tc.gwAddr.Addr().Next()
1005+
1006+
test := func(t *testing.T, host networking.Host, daddr, payload string) bool {
1007+
serverID := container.Run(ctx, t, c,
1008+
container.WithName(sanitizeCtrName(t.Name()+"-server")),
1009+
container.WithCmd("nc", "-lup", "5000"),
1010+
container.WithExposedPorts("5000/udp"),
1011+
container.WithPortMap(nat.PortMap{"5000/udp": {{HostPort: hostPort}}}),
1012+
container.WithNetworkMode(bridgeName),
1013+
container.WithEndpointSettings(bridgeName, &networktypes.EndpointSettings{
1014+
IPAddress: ctrIP.String(),
1015+
IPPrefixLen: ctrIP.BitLen(),
1016+
}))
1017+
defer c.ContainerRemove(ctx, serverID, containertypes.RemoveOptions{Force: true})
1018+
1019+
return sendPayloadFromHost(t, host, daddr, hostPort, payload, func() bool {
1020+
logs := getContainerStdout(t, ctx, c, serverID)
1021+
return strings.Contains(logs, payload)
1022+
})
1023+
}
1024+
1025+
if tc.gwMode != "routed" {
1026+
// Send a payload to the port mapped on the host -- this should work
1027+
res := test(t, l3.Hosts["attacker"], hostIP, "foobar")
1028+
assert.Assert(t, res, "Remote host should have access to port published on the host, but no payload was received by the container")
1029+
}
1030+
1031+
// Now send a payload directly to the container. With gw_mode=routed,
1032+
// this should work. With gw_mode=nat, this should fail.
1033+
expDirectAccess := tc.gwMode == "routed" || (tc.gwMode == "nat-unprotected" && !testEnv.IsRootless())
1034+
1035+
l3.Hosts["attacker"].Run(t, "ip", "route", "add", tc.gwAddr.Masked().String(), "via", hostIP, "dev", "eth0")
1036+
defer l3.Hosts["attacker"].Run(t, "ip", "route", "delete", tc.gwAddr.Masked().String(), "via", hostIP, "dev", "eth0")
1037+
1038+
res := test(t, l3.Hosts["attacker"], ctrIP.String(), "bar baz")
1039+
if expDirectAccess {
1040+
assert.Assert(t, res, "Remote host should have direct access to the published port, but no payload was received by the container")
1041+
} else {
1042+
assert.Assert(t, !res, "Remote host should not have direct access to the published port, but payload was received by the container")
1043+
}
1044+
})
1045+
}
1046+
}
1047+
1048+
// Send a payload to daddr:dport a few times from the 'host' netns. Stop
1049+
// sending payloads when 'check' returns true. Return the result of 'check'.
1050+
//
1051+
// UDP is preferred here as it's a one-way, connectionless protocol. With TCP
1052+
// the three-way handshake has to be completed before sending a payload, but
1053+
// since some test cases try to spoof the loopback address, the 'attacker host'
1054+
// will drop the SYN-ACK by default (because the source addr will be considered
1055+
// invalid / non-routable). This would require further tuning to make it work.
1056+
// With UDP, this problem doesn't exist - the payload can be sent straight away.
1057+
// But UDP is inherently unreliable, so we need to send the payload multiple
1058+
// times.
1059+
func sendPayloadFromHost(t *testing.T, host networking.Host, daddr, dport, payload string, check func() bool) bool {
1060+
var res bool
1061+
host.Do(t, func() {
1062+
for i := 0; i < 10; i++ {
1063+
t.Logf("Sending probe #%d to %s:%s from host %s", i, daddr, dport, host.Name)
1064+
icmd.RunCommand("/bin/sh", "-c", fmt.Sprintf("echo '%s' | nc -w1 -u %s %s", payload, daddr, dport)).Assert(t, icmd.Success)
1065+
1066+
res = check()
1067+
if res {
1068+
return
1069+
}
1070+
1071+
time.Sleep(50 * time.Millisecond)
1072+
}
1073+
})
1074+
return res
1075+
}
1076+
8771077
func getContainerStdout(t *testing.T, ctx context.Context, c *client.Client, ctrID string) string {
8781078
logReader, err := c.ContainerLogs(ctx, ctrID, containertypes.LogsOptions{
8791079
ShowStdout: true,

internal/testutils/networking/l3_segment_linux.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ func NewL3Segment(t *testing.T, nsName string, addrs ...netip.Prefix) *L3Segment
4545
Hosts: map[string]Host{},
4646
}
4747

48-
l3.bridge = newHost(t, nsName, "br0")
48+
l3.bridge = newHost(t, "bridge", nsName, "br0")
4949
defer func() {
5050
if t.Failed() {
5151
l3.Destroy(t)
@@ -70,7 +70,7 @@ func (l3 *L3Segment) AddHost(t *testing.T, hostname, nsName, ifname string, addr
7070
t.Fatalf("hostname too long")
7171
}
7272

73-
host := newHost(t, nsName, ifname)
73+
host := newHost(t, hostname, nsName, ifname)
7474
l3.Hosts[hostname] = host
7575

7676
host.MustRun(t, "ip", "link", "add", hostname, "netns", l3.bridge.ns, "type", "veth", "peer", "name", host.Iface)
@@ -83,18 +83,20 @@ func (l3 *L3Segment) AddHost(t *testing.T, hostname, nsName, ifname string, addr
8383
}
8484

8585
func (l3 *L3Segment) Destroy(t *testing.T) {
86+
t.Helper()
8687
for _, host := range l3.Hosts {
8788
host.Destroy(t)
8889
}
8990
l3.bridge.Destroy(t)
9091
}
9192

9293
type Host struct {
94+
Name string
9395
Iface string // Iface is the interface name in the host network namespace.
9496
ns string // ns is the network namespace name.
9597
}
9698

97-
func newHost(t *testing.T, nsName, ifname string) Host {
99+
func newHost(t *testing.T, hostname, nsName, ifname string) Host {
98100
t.Helper()
99101

100102
if len(ifname) >= syscall.IFNAMSIZ {
@@ -109,6 +111,7 @@ func newHost(t *testing.T, nsName, ifname string) Host {
109111
}
110112

111113
return Host{
114+
Name: hostname,
112115
Iface: ifname,
113116
ns: nsName,
114117
}

0 commit comments

Comments
 (0)