|
| 1 | +From f6f8b309322869c1e2cb7d7c847b1eb8fb67ed80 Mon Sep 17 00:00:00 2001 |
| 2 | +From: Stefan Agner < [email protected]> |
| 3 | +Date: Tue, 10 Mar 2026 23:29:13 +0100 |
| 4 | +Subject: [PATCH] bridge: protect bridge subnet from direct external access in |
| 5 | + raw PREROUTING |
| 6 | + |
| 7 | +Add a per-network rule to the raw PREROUTING chain dropping packets |
| 8 | +destined to any address in the bridge subnet from interfaces other than |
| 9 | +the bridge itself, loopback, or configured trusted host interfaces. |
| 10 | + |
| 11 | +This covers both containers on unpublished ports and the bridge's own |
| 12 | +gateway address (the host-side bridge IP, e.g. 172.30.32.1), which is |
| 13 | +otherwise reachable from external hosts that have a route to the bridge |
| 14 | +subnet. A single network-level subnet rule is simpler than per-container |
| 15 | +rules and ensures the gateway address is covered by the same policy. |
| 16 | + |
| 17 | +For iptables, the following rules are added per bridge (plus one ACCEPT |
| 18 | +per trusted interface): |
| 19 | + -t raw -A PREROUTING -d <subnet> -i lo -j ACCEPT |
| 20 | + -t raw -A PREROUTING -d <subnet> ! -i <bridge> -j DROP |
| 21 | + |
| 22 | +For nftables, a single compound rule is used: |
| 23 | + <family> daddr <subnet> iifname != { <bridge>, lo, ... } drop |
| 24 | + |
| 25 | +Loopback is always permitted: the gateway IP is a valid host address |
| 26 | +reachable from local processes via lo. The rules are not added for |
| 27 | +gateway modes "routed" or "nat-unprotected", nor when |
| 28 | +--allow-direct-routing is set, consistent with the intent of those |
| 29 | +options. DOCKER_INSECURE_NO_IPTABLES_RAW=1 disables the iptables rules. |
| 30 | + |
| 31 | +Co-Authored-By: Claude Sonnet 4.6 < [email protected]> |
| 32 | +Signed-off-by: Stefan Agner < [email protected]> |
| 33 | +--- |
| 34 | + .../bridge/internal/iptabler/cleaner.go | 14 +--- |
| 35 | + .../bridge/internal/iptabler/endpoint.go | 68 +------------------ |
| 36 | + .../bridge/internal/iptabler/network.go | 48 +++++++++++++ |
| 37 | + .../bridge/internal/nftabler/endpoint.go | 64 +---------------- |
| 38 | + .../bridge/internal/nftabler/network.go | 18 +++++ |
| 39 | + 5 files changed, 74 insertions(+), 138 deletions(-) |
| 40 | + |
| 41 | +diff --git a/daemon/libnetwork/drivers/bridge/internal/iptabler/cleaner.go b/daemon/libnetwork/drivers/bridge/internal/iptabler/cleaner.go |
| 42 | +index 1de527b08b..9f702a5ec7 100644 |
| 43 | +--- a/daemon/libnetwork/drivers/bridge/internal/iptabler/cleaner.go |
| 44 | ++++ b/daemon/libnetwork/drivers/bridge/internal/iptabler/cleaner.go |
| 45 | +@@ -81,17 +81,9 @@ func (ic iptablesCleaner) DelNetwork(ctx context.Context, nc firewaller.NetworkC |
| 46 | + } |
| 47 | + } |
| 48 | + |
| 49 | +-func (ic iptablesCleaner) DelEndpoint(ctx context.Context, nc firewaller.NetworkConfig, epIPv4, epIPv6 netip.Addr) { |
| 50 | +- n := network{ |
| 51 | +- config: nc, |
| 52 | +- ipt: &Iptabler{config: ic.config}, |
| 53 | +- } |
| 54 | +- if n.ipt.config.IPv4 && epIPv4.IsValid() { |
| 55 | +- _ = n.filterDirectAccess(ctx, iptables.IPv4, n.config.Config4, epIPv4, false) |
| 56 | +- } |
| 57 | +- if n.ipt.config.IPv6 && epIPv6.IsValid() { |
| 58 | +- _ = n.filterDirectAccess(ctx, iptables.IPv6, n.config.Config6, epIPv6, false) |
| 59 | +- } |
| 60 | ++func (ic iptablesCleaner) DelEndpoint(_ context.Context, _ firewaller.NetworkConfig, _, _ netip.Addr) { |
| 61 | ++ // Direct access filtering is now done at the network (subnet) level, not |
| 62 | ++ // per-endpoint. There are no per-endpoint raw PREROUTING rules to clean up. |
| 63 | + } |
| 64 | + |
| 65 | + func (ic iptablesCleaner) DelPorts(ctx context.Context, nc firewaller.NetworkConfig, pbs []types.PortBinding) { |
| 66 | +diff --git a/daemon/libnetwork/drivers/bridge/internal/iptabler/endpoint.go b/daemon/libnetwork/drivers/bridge/internal/iptabler/endpoint.go |
| 67 | +index 69a5bbc4d2..af8ff3c83c 100644 |
| 68 | +--- a/daemon/libnetwork/drivers/bridge/internal/iptabler/endpoint.go |
| 69 | ++++ b/daemon/libnetwork/drivers/bridge/internal/iptabler/endpoint.go |
| 70 | +@@ -5,74 +5,12 @@ package iptabler |
| 71 | + import ( |
| 72 | + "context" |
| 73 | + "net/netip" |
| 74 | +- |
| 75 | +- "github.com/moby/moby/v2/daemon/libnetwork/drivers/bridge/internal/firewaller" |
| 76 | +- "github.com/moby/moby/v2/daemon/libnetwork/iptables" |
| 77 | + ) |
| 78 | + |
| 79 | +-func (n *network) AddEndpoint(ctx context.Context, epIPv4, epIPv6 netip.Addr) error { |
| 80 | +- return n.modEndpoint(ctx, epIPv4, epIPv6, true) |
| 81 | +-} |
| 82 | +- |
| 83 | +-func (n *network) DelEndpoint(ctx context.Context, epIPv4, epIPv6 netip.Addr) error { |
| 84 | +- return n.modEndpoint(ctx, epIPv4, epIPv6, false) |
| 85 | +-} |
| 86 | +- |
| 87 | +-func (n *network) modEndpoint(ctx context.Context, epIPv4, epIPv6 netip.Addr, enable bool) error { |
| 88 | +- if n.ipt.config.IPv4 && epIPv4.IsValid() { |
| 89 | +- if err := n.filterDirectAccess(ctx, iptables.IPv4, n.config.Config4, epIPv4, enable); err != nil { |
| 90 | +- return err |
| 91 | +- } |
| 92 | +- } |
| 93 | +- if n.ipt.config.IPv6 && epIPv6.IsValid() { |
| 94 | +- if err := n.filterDirectAccess(ctx, iptables.IPv6, n.config.Config6, epIPv6, enable); err != nil { |
| 95 | +- return err |
| 96 | +- } |
| 97 | +- } |
| 98 | ++func (n *network) AddEndpoint(_ context.Context, _, _ netip.Addr) error { |
| 99 | + return nil |
| 100 | + } |
| 101 | + |
| 102 | +-// filterDirectAccess drops packets addressed directly to the container's IP address, |
| 103 | +-// when direct routing is not permitted by network configuration. |
| 104 | +-// |
| 105 | +-// It is a no-op if: |
| 106 | +-// - the network is internal |
| 107 | +-// - gateway mode is "nat-unprotected" or "routed". |
| 108 | +-// - direct routing is enabled at the daemon level. |
| 109 | +-// - "raw" rules are disabled (possibly because the host doesn't have the necessary |
| 110 | +-// kernel support). |
| 111 | +-// |
| 112 | +-// Packets originating on the bridge's own interface and addressed directly to the |
| 113 | +-// container are allowed - the host always has direct access to its own containers |
| 114 | +-// (it doesn't need to use the port mapped to its own addresses, although it can). |
| 115 | +-// |
| 116 | +-// "Trusted interfaces" are treated in the same way as the bridge itself. |
| 117 | +-func (n *network) filterDirectAccess(ctx context.Context, ipv iptables.IPVersion, config firewaller.NetworkConfigFam, epIP netip.Addr, enable bool) error { |
| 118 | +- if n.config.Internal || config.Unprotected || config.Routed { |
| 119 | +- return nil |
| 120 | +- } |
| 121 | +- // For config that may change between daemon restarts, make sure rules are |
| 122 | +- // removed - if the container was left running when the daemon stopped, and |
| 123 | +- // direct routing has since been disabled, the rules need to be deleted when |
| 124 | +- // cleanup happens on restart. This also means a change in config over a |
| 125 | +- // live-restore restart will take effect. |
| 126 | +- if n.ipt.config.AllowDirectRouting || rawRulesDisabled(ctx) { |
| 127 | +- enable = false |
| 128 | +- } |
| 129 | +- for _, ifName := range n.config.TrustedHostInterfaces { |
| 130 | +- accept := iptables.Rule{IPVer: ipv, Table: iptables.Raw, Chain: "PREROUTING", Args: []string{ |
| 131 | +- "-d", epIP.String(), |
| 132 | +- "-i", ifName, |
| 133 | +- "-j", "ACCEPT", |
| 134 | +- }} |
| 135 | +- if err := appendOrDelChainRule(accept, "DIRECT ACCESS FILTERING - ACCEPT", enable); err != nil { |
| 136 | +- return err |
| 137 | +- } |
| 138 | +- } |
| 139 | +- accept := iptables.Rule{IPVer: ipv, Table: iptables.Raw, Chain: "PREROUTING", Args: []string{ |
| 140 | +- "-d", epIP.String(), |
| 141 | +- "!", "-i", n.config.IfName, |
| 142 | +- "-j", "DROP", |
| 143 | +- }} |
| 144 | +- return appendOrDelChainRule(accept, "DIRECT ACCESS FILTERING - DROP", enable) |
| 145 | ++func (n *network) DelEndpoint(_ context.Context, _, _ netip.Addr) error { |
| 146 | ++ return nil |
| 147 | + } |
| 148 | +diff --git a/daemon/libnetwork/drivers/bridge/internal/iptabler/network.go b/daemon/libnetwork/drivers/bridge/internal/iptabler/network.go |
| 149 | +index 5eef4d6690..7ff03f4249 100644 |
| 150 | +--- a/daemon/libnetwork/drivers/bridge/internal/iptabler/network.go |
| 151 | ++++ b/daemon/libnetwork/drivers/bridge/internal/iptabler/network.go |
| 152 | +@@ -97,6 +97,13 @@ func (n *network) setupIPTables(ctx context.Context, ipVersion iptables.IPVersio |
| 153 | + return n.setupNonInternalNetworkRules(ctx, ipVersion, config, false) |
| 154 | + }) |
| 155 | + |
| 156 | ++ if err := n.setSubnetProtection(ctx, ipVersion, config, true); err != nil { |
| 157 | ++ return fmt.Errorf("Failed to setup subnet protection: %w", err) |
| 158 | ++ } |
| 159 | ++ n.registerCleanFunc(func() error { |
| 160 | ++ return n.setSubnetProtection(ctx, ipVersion, config, false) |
| 161 | ++ }) |
| 162 | ++ |
| 163 | + if err := deleteLegacyFilterRules(ipVersion, n.config.IfName); err != nil { |
| 164 | + return fmt.Errorf("failed to delete legacy rules in filter-FORWARD: %w", err) |
| 165 | + } |
| 166 | +@@ -360,6 +367,47 @@ func (n *network) setupNonInternalNetworkRules(ctx context.Context, ipVer iptabl |
| 167 | + return nil |
| 168 | + } |
| 169 | + |
| 170 | ++// setSubnetProtection drops packets addressed directly to any IP in the bridge |
| 171 | ++// subnet from interfaces other than the bridge or loopback. This prevents external |
| 172 | ++// hosts that have a route to the bridge subnet from accessing containers on |
| 173 | ++// unpublished ports, or services on the host bound to the gateway address. |
| 174 | ++// |
| 175 | ++// It is a no-op if: |
| 176 | ++// - gateway mode is "nat-unprotected" or "routed" (direct access is intentional). |
| 177 | ++// - direct routing is enabled at the daemon level. |
| 178 | ++// - "raw" rules are disabled. |
| 179 | ++func (n *network) setSubnetProtection(ctx context.Context, ipv iptables.IPVersion, config firewaller.NetworkConfigFam, enable bool) error { |
| 180 | ++ if config.Unprotected || config.Routed || !config.Prefix.IsValid() { |
| 181 | ++ return nil |
| 182 | ++ } |
| 183 | ++ if n.ipt.config.AllowDirectRouting || rawRulesDisabled(ctx) { |
| 184 | ++ enable = false |
| 185 | ++ } |
| 186 | ++ subnet := config.Prefix.String() |
| 187 | ++ // Accept loopback traffic to the subnet. This is needed for host-local access |
| 188 | ++ // to the gateway IP, which is assigned to the bridge and reachable via lo. |
| 189 | ++ loAccept := iptables.Rule{IPVer: ipv, Table: iptables.Raw, Chain: "PREROUTING", Args: []string{ |
| 190 | ++ "-d", subnet, "-i", "lo", "-j", "ACCEPT", |
| 191 | ++ }} |
| 192 | ++ if err := appendOrDelChainRule(loAccept, "SUBNET PROTECTION - ACCEPT LO", enable); err != nil { |
| 193 | ++ return err |
| 194 | ++ } |
| 195 | ++ // Accept traffic from trusted interfaces (e.g. flannel.1 in VXLAN setups). |
| 196 | ++ for _, ifName := range n.config.TrustedHostInterfaces { |
| 197 | ++ accept := iptables.Rule{IPVer: ipv, Table: iptables.Raw, Chain: "PREROUTING", Args: []string{ |
| 198 | ++ "-d", subnet, "-i", ifName, "-j", "ACCEPT", |
| 199 | ++ }} |
| 200 | ++ if err := appendOrDelChainRule(accept, "SUBNET PROTECTION - ACCEPT TRUSTED", enable); err != nil { |
| 201 | ++ return err |
| 202 | ++ } |
| 203 | ++ } |
| 204 | ++ // Drop traffic from any other non-bridge interface to the subnet. |
| 205 | ++ extDrop := iptables.Rule{IPVer: ipv, Table: iptables.Raw, Chain: "PREROUTING", Args: []string{ |
| 206 | ++ "-d", subnet, "!", "-i", n.config.IfName, "-j", "DROP", |
| 207 | ++ }} |
| 208 | ++ return appendOrDelChainRule(extDrop, "SUBNET PROTECTION - DROP", enable) |
| 209 | ++} |
| 210 | ++ |
| 211 | + func setIcc(ctx context.Context, version iptables.IPVersion, bridgeIface string, iccEnable, internal, insert bool) error { |
| 212 | + args := []string{"-i", bridgeIface, "-o", bridgeIface, "-j"} |
| 213 | + acceptRule := iptables.Rule{IPVer: version, Table: iptables.Filter, Chain: DockerForwardChain, Args: append(args, "ACCEPT")} |
| 214 | +diff --git a/daemon/libnetwork/drivers/bridge/internal/nftabler/endpoint.go b/daemon/libnetwork/drivers/bridge/internal/nftabler/endpoint.go |
| 215 | +index cfb45186d2..02b8dbcda1 100644 |
| 216 | +--- a/daemon/libnetwork/drivers/bridge/internal/nftabler/endpoint.go |
| 217 | ++++ b/daemon/libnetwork/drivers/bridge/internal/nftabler/endpoint.go |
| 218 | +@@ -4,76 +4,16 @@ package nftabler |
| 219 | + |
| 220 | + import ( |
| 221 | + "context" |
| 222 | +- "fmt" |
| 223 | + "net/netip" |
| 224 | +- "strings" |
| 225 | +- |
| 226 | +- "github.com/moby/moby/v2/daemon/libnetwork/drivers/bridge/internal/firewaller" |
| 227 | +- "github.com/moby/moby/v2/daemon/libnetwork/internal/nftables" |
| 228 | + ) |
| 229 | + |
| 230 | + func (n *network) AddEndpoint(ctx context.Context, epIPv4, epIPv6 netip.Addr) error { |
| 231 | + if n.fw.cleaner != nil { |
| 232 | + n.fw.cleaner.DelEndpoint(ctx, n.config, epIPv4, epIPv6) |
| 233 | + } |
| 234 | +- return n.modEndpoint(ctx, epIPv4, epIPv6, true) |
| 235 | +-} |
| 236 | +- |
| 237 | +-func (n *network) DelEndpoint(ctx context.Context, epIPv4, epIPv6 netip.Addr) error { |
| 238 | +- return n.modEndpoint(ctx, epIPv4, epIPv6, false) |
| 239 | +-} |
| 240 | +- |
| 241 | +-func (n *network) modEndpoint(ctx context.Context, epIPv4, epIPv6 netip.Addr, enable bool) error { |
| 242 | +- if n.fw.config.IPv4 && epIPv4.IsValid() { |
| 243 | +- tm := nftables.Modifier{} |
| 244 | +- updater := tm.Create |
| 245 | +- if !enable { |
| 246 | +- updater = tm.Delete |
| 247 | +- } |
| 248 | +- n.filterDirectAccess(updater, nftables.IPv4, n.config.Config4, epIPv4) |
| 249 | +- if err := n.fw.table4.Apply(ctx, tm); err != nil { |
| 250 | +- return fmt.Errorf("adding rules for bridge %s: %w", n.config.IfName, err) |
| 251 | +- } |
| 252 | +- } |
| 253 | +- if n.fw.config.IPv6 && epIPv6.IsValid() { |
| 254 | +- tm := nftables.Modifier{} |
| 255 | +- updater := tm.Create |
| 256 | +- if !enable { |
| 257 | +- updater = tm.Delete |
| 258 | +- } |
| 259 | +- n.filterDirectAccess(updater, nftables.IPv6, n.config.Config6, epIPv6) |
| 260 | +- if err := n.fw.table6.Apply(ctx, tm); err != nil { |
| 261 | +- return fmt.Errorf("adding rules for bridge %s: %w", n.config.IfName, err) |
| 262 | +- } |
| 263 | +- } |
| 264 | + return nil |
| 265 | + } |
| 266 | + |
| 267 | +-// filterDirectAccess drops packets addressed directly to the container's IP address, |
| 268 | +-// when direct routing is not permitted by network configuration. |
| 269 | +-// |
| 270 | +-// It is a no-op if: |
| 271 | +-// - gateway mode is "nat-unprotected" or "routed". |
| 272 | +-// - direct routing is enabled at the daemon level. |
| 273 | +-// - "raw" rules are disabled (possibly because the host doesn't have the necessary |
| 274 | +-// kernel support). |
| 275 | +-// |
| 276 | +-// Packets originating on the bridge's own interface and addressed directly to the |
| 277 | +-// container are allowed - the host always has direct access to its own containers. |
| 278 | +-// (It doesn't need to use the port mapped to its own addresses, although it can.) |
| 279 | +-// |
| 280 | +-// "Trusted interfaces" are treated in the same way as the bridge itself. |
| 281 | +-func (n *network) filterDirectAccess(updater func(nftables.Obj), fam nftables.Family, conf firewaller.NetworkConfigFam, epIP netip.Addr) { |
| 282 | +- if n.config.Internal || conf.Unprotected || conf.Routed || n.fw.config.AllowDirectRouting { |
| 283 | +- return |
| 284 | +- } |
| 285 | +- ifNames := strings.Join(n.config.TrustedHostInterfaces, ", ") |
| 286 | +- updater(nftables.Rule{ |
| 287 | +- Chain: rawPreroutingChain, |
| 288 | +- Group: rawPreroutingPortsRuleGroup, |
| 289 | +- Rule: []string{ |
| 290 | +- string(fam), "daddr", epIP.String(), |
| 291 | +- "iifname != {", n.config.IfName, ",", ifNames, `} counter drop comment "DROP DIRECT ACCESS"`, |
| 292 | +- }, |
| 293 | +- }) |
| 294 | ++func (n *network) DelEndpoint(_ context.Context, _, _ netip.Addr) error { |
| 295 | ++ return nil |
| 296 | + } |
| 297 | +diff --git a/daemon/libnetwork/drivers/bridge/internal/nftabler/network.go b/daemon/libnetwork/drivers/bridge/internal/nftabler/network.go |
| 298 | +index dc77c33c1a..9a10d491c2 100644 |
| 299 | +--- a/daemon/libnetwork/drivers/bridge/internal/nftabler/network.go |
| 300 | ++++ b/daemon/libnetwork/drivers/bridge/internal/nftabler/network.go |
| 301 | +@@ -174,6 +174,24 @@ func (n *network) configure(ctx context.Context, table nftables.Table, conf fire |
| 302 | + }) |
| 303 | + } |
| 304 | + |
| 305 | ++ // Drop packets destined to any IP in the bridge subnet from interfaces |
| 306 | ++ // other than the bridge, loopback, or any trusted host interfaces. This |
| 307 | ++ // protects containers on unpublished ports and the host's gateway address |
| 308 | ++ // from external hosts that have a direct route to the bridge subnet. |
| 309 | ++ if conf.Prefix.IsValid() && !n.fw.config.AllowDirectRouting { |
| 310 | ++ family := string(table.Family()) |
| 311 | ++ ifNames := strings.Join(append([]string{n.config.IfName, "lo"}, n.config.TrustedHostInterfaces...), ", ") |
| 312 | ++ tm.Create(nftables.Rule{ |
| 313 | ++ Chain: rawPreroutingChain, |
| 314 | ++ Group: initialRuleGroup, |
| 315 | ++ Rule: []string{ |
| 316 | ++ family, "daddr", conf.Prefix.String(), |
| 317 | ++ "iifname != {", ifNames, `}`, |
| 318 | ++ `counter drop comment "SUBNET DROP EXTERNAL"`, |
| 319 | ++ }, |
| 320 | ++ }) |
| 321 | ++ } |
| 322 | ++ |
| 323 | + // ICMP |
| 324 | + if conf.Routed { |
| 325 | + rule := "ip protocol icmp" |
| 326 | +-- |
| 327 | +2.53.0 |
| 328 | + |
0 commit comments