Skip to content

Commit fadc29b

Browse files
committed
libnetwork: send neighbor advertisements on restore
When the Docker daemon restarts with live-restore enabled, containers retain their network namespaces but neighboring hosts may have stale ARP/neighbor cache entries. This causes IPv6 connectivity issues because unlike IPv4, where gratuitous ARP is sent on interface setup, IPv6 relies on Neighbor Discovery Protocol which requires explicit Neighbor Advertisement messages to update caches. This change adds unsolicited ARP (for IPv4) and Neighbor Advertisement (for IPv6) messages when restoring interfaces after a daemon restart, mirroring the behavior that already exists in AddInterface for new containers. The fix also handles network drivers (such as SR-IOV and macvlan) that don't store the MAC address in the endpoint configuration by fetching it from the actual link when needed. Signed-off-by: Paul Saab <[email protected]>
1 parent 3f04413 commit fadc29b

6 files changed

Lines changed: 180 additions & 14 deletions

File tree

daemon/libnetwork/osl/interface_linux.go

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -665,6 +665,17 @@ func (n *Namespace) advertiseAddrs(ctx context.Context, ifIndex int, i *Interfac
665665
defer span.End()
666666

667667
mac := i.MacAddress()
668+
// If MAC is not stored in the interface struct, get it from the actual link.
669+
// This can happen with some network drivers (e.g., SR-IOV, macvlan) that don't
670+
// store the MAC in the endpoint configuration.
671+
if len(mac) == 0 {
672+
link, err := nlh.LinkByIndex(ifIndex)
673+
if err != nil {
674+
log.G(ctx).WithFields(log.Fields{"error": err, "ifi": ifIndex}).Warn("Failed to lookup link by index to determine MAC address; treating as no MAC to advertise")
675+
} else if hw := link.Attrs().HardwareAddr; len(hw) > 0 {
676+
mac = hw
677+
}
678+
}
668679
address4 := i.Address()
669680
address6 := i.AddressIPv6()
670681
ctx = log.WithLogger(ctx, log.G(ctx).WithFields(log.Fields{
@@ -681,7 +692,7 @@ func (n *Namespace) advertiseAddrs(ctx context.Context, ifIndex int, i *Interfac
681692
log.G(ctx).Debug("No IP addresses to advertise")
682693
return nil
683694
}
684-
if mac == nil {
695+
if len(mac) == 0 {
685696
// Nothing to do - for example, a layer-3 ipvlan.
686697
log.G(ctx).Debug("No MAC address to advertise")
687698
return nil
@@ -691,7 +702,7 @@ func (n *Namespace) advertiseAddrs(ctx context.Context, ifIndex int, i *Interfac
691702
return nil
692703
}
693704

694-
arpSender, naSender := n.prepAdvertiseAddrs(ctx, i, ifIndex)
705+
arpSender, naSender := n.prepAdvertiseAddrs(ctx, i, ifIndex, mac)
695706
if arpSender == nil && naSender == nil {
696707
return nil
697708
}
@@ -740,9 +751,13 @@ func (n *Namespace) advertiseAddrs(ctx context.Context, ifIndex int, i *Interfac
740751
return errors.Join(errs...)
741752
}
742753

743-
// Send an initial message. If it fails, skip the resends.
754+
// Send an initial message. If it fails, log a warning but don't fail container
755+
// creation - NA is an optimization, neighbors will still discover addresses via
756+
// normal NDP solicitation. This can happen with L3 ipvlan which doesn't support
757+
// multicast.
744758
if err := send(ctx); err != nil {
745-
return err
759+
log.G(ctx).WithError(err).Warn("Failed to send initial neighbor advertisement")
760+
return nil
746761
}
747762
if i.advertiseAddrNMsgs == 1 {
748763
return nil
@@ -775,20 +790,20 @@ func (n *Namespace) advertiseAddrs(ctx context.Context, ifIndex int, i *Interfac
775790
return nil
776791
}
777792

778-
func (n *Namespace) prepAdvertiseAddrs(ctx context.Context, i *Interface, ifIndex int) (*l2disco.UnsolARP, *l2disco.UnsolNA) {
793+
func (n *Namespace) prepAdvertiseAddrs(ctx context.Context, i *Interface, ifIndex int, mac net.HardwareAddr) (*l2disco.UnsolARP, *l2disco.UnsolNA) {
779794
var ua *l2disco.UnsolARP
780795
var un *l2disco.UnsolNA
781796
if err := n.InvokeFunc(func() {
782797
if address4 := i.Address(); address4 != nil {
783798
var err error
784-
ua, err = l2disco.NewUnsolARP(ctx, address4.IP, i.MacAddress(), ifIndex)
799+
ua, err = l2disco.NewUnsolARP(ctx, address4.IP, mac, ifIndex)
785800
if err != nil {
786801
log.G(ctx).WithError(err).Warn("Failed to prepare unsolicited ARP")
787802
}
788803
}
789804
if address6 := i.AddressIPv6(); address6 != nil {
790805
var err error
791-
un, err = l2disco.NewUnsolNA(ctx, address6.IP, i.MacAddress(), ifIndex)
806+
un, err = l2disco.NewUnsolNA(ctx, address6.IP, mac, ifIndex)
792807
if err != nil {
793808
log.G(ctx).WithError(err).Warn("Failed to prepare unsolicited NA")
794809
}

daemon/libnetwork/osl/namespace_linux.go

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -397,10 +397,14 @@ func (n *Namespace) Destroy() error {
397397
return nil
398398
}
399399

400-
// RestoreInterfaces restores the network namespace's interfaces.
401-
func (n *Namespace) RestoreInterfaces(interfaces map[Iface][]IfaceOption) error {
400+
// RestoreInterfaces restores the network namespace's interfaces and sends
401+
// unsolicited ARP/NA messages to update neighbor caches.
402+
func (n *Namespace) RestoreInterfaces(ctx context.Context, interfaces map[Iface][]IfaceOption) error {
402403
// restore interfaces
403404
for iface, opts := range interfaces {
405+
if err := ctx.Err(); err != nil {
406+
return err
407+
}
404408
i, err := newInterface(n, iface.SrcName, iface.DstPrefix, iface.DstName, opts...)
405409
if err != nil {
406410
return err
@@ -459,6 +463,29 @@ func (n *Namespace) RestoreInterfaces(interfaces map[Iface][]IfaceOption) error
459463
n.iFaces = append(n.iFaces, i)
460464
n.mu.Unlock()
461465
}
466+
467+
// Send unsolicited ARP/NA messages to update neighbor caches with the
468+
// MAC address associated with the interface's IP addresses. This is
469+
// necessary after a daemon restart because other hosts may have stale
470+
// neighbor cache entries.
471+
if i.dstName != "" {
472+
log.G(ctx).WithFields(log.Fields{
473+
"interface": i.dstName,
474+
"ipv4": i.address,
475+
"ipv6": i.addressIPv6,
476+
}).Debug("Sending neighbor advertisements during restore")
477+
link, err := n.nlHandle.LinkByName(i.dstName)
478+
if err != nil {
479+
log.G(ctx).WithFields(log.Fields{"error": err, "interface": i.dstName}).Warn("Failed to get link for neighbor advertisement during restore")
480+
continue
481+
}
482+
ifIndex := link.Attrs().Index
483+
waitForBridgePort(ctx, ns.NlHandle(), link)
484+
mcastRouteOk := waitForMcastRoute(ctx, ifIndex, i, n.nlHandle)
485+
if err := n.advertiseAddrs(ctx, ifIndex, i, n.nlHandle, mcastRouteOk); err != nil {
486+
log.G(ctx).WithError(err).WithField("interface", i.dstName).Warn("Failed to send neighbor advertisement during restore")
487+
}
488+
}
462489
}
463490
return nil
464491
}

daemon/libnetwork/sandbox_linux.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ func (sb *Sandbox) releaseOSSbox() error {
259259
return osSbox.Destroy()
260260
}
261261

262-
func (sb *Sandbox) restoreOslSandbox() error {
262+
func (sb *Sandbox) restoreOslSandbox(ctx context.Context) error {
263263
var routes []*types.StaticRoute
264264

265265
// restore osl sandbox
@@ -271,7 +271,7 @@ func (sb *Sandbox) restoreOslSandbox() error {
271271
ep.mu.Unlock()
272272

273273
if i == nil {
274-
log.G(context.TODO()).Errorf("error restoring endpoint %s for container %s", ep.Name(), sb.ContainerID())
274+
log.G(ctx).Errorf("error restoring endpoint %s for container %s", ep.Name(), sb.ContainerID())
275275
continue
276276
}
277277

@@ -298,7 +298,9 @@ func (sb *Sandbox) restoreOslSandbox() error {
298298
}
299299
}
300300

301-
if err := sb.osSbox.RestoreInterfaces(interfaces); err != nil {
301+
// Use WithoutCancel so that restore completes even if the parent context is
302+
// cancelled - we don't want to leave containers with partially restored networking.
303+
if err := sb.osSbox.RestoreInterfaces(context.WithoutCancel(ctx), interfaces); err != nil {
302304
return err
303305
}
304306
if len(routes) > 0 {

daemon/libnetwork/sandbox_store.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ func (c *Controller) sandboxRestore(activeSandboxes map[string]any) error {
256256

257257
// reconstruct osl sandbox field
258258
if !sb.config.useDefaultSandBox {
259-
if err := sb.restoreOslSandbox(); err != nil {
259+
if err := sb.restoreOslSandbox(ctx); err != nil {
260260
log.G(ctx).WithError(err).Error("Failed to populate fields for osl sandbox")
261261
continue
262262
}

daemon/libnetwork/sandbox_windows.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ func (sb *Sandbox) releaseOSSbox() error {
2424
return nil
2525
}
2626

27-
func (sb *Sandbox) restoreOslSandbox() error {
27+
func (sb *Sandbox) restoreOslSandbox(_ context.Context) error {
2828
// not implemented on Windows (Sandbox.osSbox is always nil)
2929
return nil
3030
}

integration/networking/bridge_linux_test.go

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1897,6 +1897,128 @@ func TestAdvertiseAddresses(t *testing.T) {
18971897
}
18981898
}
18991899

1900+
// TestAdvertiseAddressesLiveRestore verifies that unsolicited ARP/NA messages are
1901+
// sent when the daemon restarts with live-restore enabled. This ensures that
1902+
// neighbor caches on other hosts are updated with the container's MAC address
1903+
// after a daemon restart.
1904+
func TestAdvertiseAddressesLiveRestore(t *testing.T) {
1905+
skip.If(t, testEnv.IsRootless, "can't listen for ARP/NA messages in rootlesskit's namespace")
1906+
1907+
ctx := setupTest(t)
1908+
d := daemon.New(t)
1909+
d.StartWithBusybox(ctx, t, "--live-restore")
1910+
defer d.Stop(t)
1911+
c := d.NewClientT(t)
1912+
defer c.Close()
1913+
1914+
const netName = "dsnet-lr"
1915+
const brName = "br-advaddrlr"
1916+
network.CreateNoError(ctx, t, c, netName,
1917+
network.WithOption(bridge.BridgeName, brName),
1918+
network.WithIPv6(),
1919+
network.WithIPAM("172.23.23.0/24", "172.23.23.1"),
1920+
network.WithIPAM("fd4c:f70b:973d::/64", "fd4c:f70b:973d::1"),
1921+
)
1922+
defer network.RemoveNoError(ctx, t, c, netName)
1923+
1924+
// Create ctr1 which will be used to verify neighbor cache updates.
1925+
ctr1Id := container.Run(ctx, t, c, container.WithName("ctr1-lr"), container.WithNetworkMode(netName))
1926+
defer c.ContainerRemove(ctx, ctr1Id, client.ContainerRemoveOptions{Force: true})
1927+
1928+
// Create ctr2 with fixed IP addresses.
1929+
const ctr2Name = "ctr2-lr"
1930+
const ctr2Addr4 = "172.23.23.22"
1931+
const ctr2Addr6 = "fd4c:f70b:973d::2222"
1932+
ctr2Id := container.Run(ctx, t, c,
1933+
container.WithName(ctr2Name),
1934+
container.WithNetworkMode(netName),
1935+
container.WithIPv4(netName, ctr2Addr4),
1936+
container.WithIPv6(netName, ctr2Addr6),
1937+
)
1938+
defer c.ContainerRemove(ctx, ctr2Id, client.ContainerRemoveOptions{Force: true})
1939+
1940+
ctr2MAC := container.Inspect(ctx, t, c, ctr2Id).NetworkSettings.Networks[netName].MacAddress
1941+
1942+
// Ping from ctr1 to ctr2 to populate ctr1's neighbor caches.
1943+
pingRes := container.ExecT(ctx, t, c, ctr1Id, []string{"ping", "-4", "-c1", ctr2Name})
1944+
assert.Assert(t, is.Equal(pingRes.ExitCode, 0))
1945+
pingRes = container.ExecT(ctx, t, c, ctr1Id, []string{"ping", "-6", "-c1", ctr2Name})
1946+
assert.Assert(t, is.Equal(pingRes.ExitCode, 0))
1947+
1948+
// Verify ctr1 has neighbor entries for ctr2.
1949+
ctr1Neighs := container.ExecT(ctx, t, c, ctr1Id, []string{"ip", "neigh", "show"})
1950+
assert.Assert(t, is.Equal(ctr1Neighs.ExitCode, 0))
1951+
t.Logf("ctr1 neighbours before restart:\n%s", ctr1Neighs.Combined())
1952+
1953+
// Wait for initial ARP/NA retransmits from container creation to settle.
1954+
// The daemon sends unsolicited ARP/NA messages for a couple of seconds after
1955+
// AddInterface, so we need to wait before starting to listen to avoid counting
1956+
// those messages instead of the ones sent during restore.
1957+
t.Log("Waiting for initial ARP/NA retransmits to settle...")
1958+
time.Sleep(5 * time.Second)
1959+
1960+
// Now start listening for ARP/NA messages.
1961+
stopARPListen := network.CollectBcastARPs(t, brName)
1962+
defer stopARPListen()
1963+
stopICMP6Listen := network.CollectICMP6(t, brName)
1964+
defer stopICMP6Listen()
1965+
1966+
// Restart the daemon - this should trigger RestoreInterfaces which sends ARP/NA.
1967+
d.Restart(t, "--live-restore")
1968+
1969+
// Give time for ARP/NA messages to be sent after restart.
1970+
t.Log("Sleeping for 5s to collect ARP/NA messages after daemon restart...")
1971+
time.Sleep(5 * time.Second)
1972+
1973+
// Verify that ARP/NA messages were sent for ctr2's addresses.
1974+
arps := stopARPListen()
1975+
var arpCount int
1976+
for i, p := range arps {
1977+
ha, pa, err := network.UnpackUnsolARP(p)
1978+
if err != nil {
1979+
t.Logf("ARP %d: %s: %s: %s", i+1, p.ReceivedAt.Format("15:04:05.000"), hex.EncodeToString(p.Data), err)
1980+
continue
1981+
}
1982+
t.Logf("ARP %d: %s '%s' is at '%s'", i+1, p.ReceivedAt.Format("15:04:05.000"), pa, ha)
1983+
if pa == netip.MustParseAddr(ctr2Addr4) && slices.Compare(ha, net.HardwareAddr(ctr2MAC)) == 0 {
1984+
arpCount++
1985+
t.Logf("---> found ARP for ctr2")
1986+
}
1987+
}
1988+
assert.Check(t, arpCount >= 1, "expected at least 1 ARP message for ctr2 after live-restore, got %d", arpCount)
1989+
1990+
icmps := stopICMP6Listen()
1991+
var naCount int
1992+
for i, p := range icmps {
1993+
ha, pa, err := network.UnpackUnsolNA(p)
1994+
if err != nil {
1995+
t.Logf("ICMP6 %d: %s: %s: %s", i+1, p.ReceivedAt.Format("15:04:05.000"), hex.EncodeToString(p.Data), err)
1996+
continue
1997+
}
1998+
t.Logf("ICMP6 %d: %s '%s' is at '%s'", i+1, p.ReceivedAt.Format("15:04:05.000"), pa, ha)
1999+
if pa == netip.MustParseAddr(ctr2Addr6) && slices.Compare(ha, net.HardwareAddr(ctr2MAC)) == 0 {
2000+
naCount++
2001+
t.Logf("---> found NA for ctr2")
2002+
}
2003+
}
2004+
assert.Check(t, naCount >= 1, "expected at least 1 NA message for ctr2 after live-restore, got %d", naCount)
2005+
2006+
// Verify ctr1 still has valid neighbor entries (connectivity should work).
2007+
ctr1Neighs = container.ExecT(ctx, t, c, ctr1Id, []string{"ip", "neigh", "show"})
2008+
assert.Assert(t, is.Equal(ctr1Neighs.ExitCode, 0))
2009+
t.Logf("ctr1 neighbours after restart:\n%s", ctr1Neighs.Combined())
2010+
2011+
// Verify connectivity still works after restart.
2012+
pingRes = container.ExecT(ctx, t, c, ctr1Id, []string{"ping", "-4", "-c1", ctr2Name})
2013+
assert.Assert(t, is.Equal(pingRes.ExitCode, 0))
2014+
pingRes = container.ExecT(ctx, t, c, ctr1Id, []string{"ping", "-6", "-c1", ctr2Name})
2015+
assert.Assert(t, is.Equal(pingRes.ExitCode, 0))
2016+
2017+
if t.Failed() {
2018+
d.TailLogsT(t, 100)
2019+
}
2020+
}
2021+
19002022
// TestNetworkInspectGateway checks that gateways reported in inspect output are parseable as addresses.
19012023
func TestNetworkInspectGateway(t *testing.T) {
19022024
ctx := setupTest(t)

0 commit comments

Comments
 (0)