Skip to content

Commit 9a6e96f

Browse files
committed
Before sending ARPs/NAs, check the bridge is ready
We don't enable STP on bridges belonging to bridge networks, but bridge ports still need to transition from "disabled" to "forwarding", after the veth device comes "up". Until then, the bridge will just drop packets. So, if a container's network is a veth device, and its other end is slaved to a bridge - wait for the bridge port to be "forwarding". Signed-off-by: Rob Murray <[email protected]>
1 parent a5db428 commit 9a6e96f

1 file changed

Lines changed: 110 additions & 0 deletions

File tree

libnetwork/osl/interface_linux.go

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,9 @@ func (n *Namespace) AddInterface(ctx context.Context, srcName, dstPrefix string,
338338

339339
// If the interface is up, send unsolicited ARP/NA messages if necessary.
340340
if up {
341+
if err := waitForBridgePort(ctx, nlhHost, iface); err != nil {
342+
return fmt.Errorf("check bridge port state: %w", err)
343+
}
341344
if err := n.advertiseAddrs(ctx, iface.Attrs().Index, i, nlh); err != nil {
342345
return fmt.Errorf("failed to advertise addresses: %w", err)
343346
}
@@ -425,6 +428,113 @@ func waitForIfUpped(ctx context.Context, ns netns.NsHandle, ifIndex int) (bool,
425428
}
426429
}
427430

431+
// waitForBridgePort checks whether link iface is a veth. If it is, and the other
432+
// end of the veth is slaved to a bridge, waits for the bridge port's state to be
433+
// "forwarding". If STP is enabled on the bridge, it doesn't wait.
434+
func waitForBridgePort(ctx context.Context, nlh nlwrap.Handle, iface netlink.Link) error {
435+
if iface.Type() != "veth" {
436+
return nil
437+
}
438+
ctx, span := otel.Tracer("").Start(ctx, "libnetwork.osl.waitForBridgePort")
439+
defer span.End()
440+
ctx = log.WithLogger(ctx, log.G(ctx).WithField("veth", iface.Attrs().Name))
441+
442+
// The parent of a veth is the other end of the veth.
443+
parentIndex := iface.Attrs().ParentIndex
444+
if parentIndex <= 0 {
445+
log.G(ctx).Debug("veth has no parent index")
446+
return nil
447+
}
448+
parentIface, err := nlh.LinkByIndex(parentIndex)
449+
if err != nil {
450+
// The parent isn't in the host's netns, it's probably in a swarm load-balancer
451+
// sandbox, and we don't know where that is. But, swarm still uses IP-based MAC
452+
// addresses so the unsolicited ARPs aren't essential. If the first one goes
453+
// missing because the bridge's port isn't forwarding yet, it's ok.
454+
log.G(ctx).WithFields(log.Fields{"parentIndex": parentIndex, "error": err}).Debug("No parent interface")
455+
return nil
456+
}
457+
// If the other end of the veth has a MasterIndex, that's a bridge.
458+
if parentIface.Attrs().MasterIndex <= 0 {
459+
log.G(ctx).Debug("veth is not connected to a bridge")
460+
return nil
461+
}
462+
bridgeIface, err := nlh.LinkByIndex(parentIface.Attrs().MasterIndex)
463+
if err != nil {
464+
return fmt.Errorf("get bridge link by index %d: %w", parentIface.Attrs().MasterIndex, err)
465+
}
466+
467+
// Ideally, we'd read the port state via netlink. But, vishvananda/netlink needs a
468+
// patch to include state in its response.
469+
// - type Protinfo needs a "State uint8"
470+
// - parseProtinfo() needs "case nl.IFLA_BRPORT_STATE: pi.State = uint8(info.Value[0])"
471+
/*
472+
pi, err := nlh.LinkGetProtinfo(parentIface)
473+
if err != nil {
474+
return fmt.Errorf("get bridge protinfo: %w", err)
475+
}
476+
*/
477+
478+
// Check that STP is not enabled on the bridge. It won't be enabled on a
479+
// bridge network's own bridge. But, could be on a user-supplied bridge
480+
// and, if it is, it won't be forwarding within the timeout here.
481+
if stpEnabled(ctx, bridgeIface.Attrs().Name) {
482+
log.G(ctx).Debug("STP is enabled, not waiting for port to be forwarding")
483+
return nil
484+
}
485+
486+
// Read the port state from "/sys/class/net/<bridge>/brif/<veth>/state".
487+
var portStateFile *os.File
488+
path := filepath.Join("/sys/class/net", bridgeIface.Attrs().Name, "brif", parentIface.Attrs().Name, "state")
489+
portStateFile, err = os.Open(path)
490+
if err != nil {
491+
// In integration tests where the daemon is running in its own netns, the bridge
492+
// device isn't visible in "/sys/class/net". So, just wait for hopefully-long-enough
493+
// for the bridge's port to be ready.
494+
log.G(ctx).WithField("port", path).Debug("Failed to open port state file, waiting")
495+
time.Sleep(20 * time.Millisecond)
496+
return nil
497+
}
498+
defer portStateFile.Close()
499+
500+
// Poll the bridge port's state until it's "forwarding". (By now, it should be. So, poll
501+
// quickly, and not for long.)
502+
const pollInterval = 10 * time.Millisecond
503+
const maxWait = 200 * time.Millisecond
504+
for range int64(maxWait / pollInterval) {
505+
var stateFileContent [2]byte
506+
n, err := portStateFile.ReadAt(stateFileContent[:], 0)
507+
if err != nil {
508+
return fmt.Errorf("read %q: %w", path, err)
509+
}
510+
if n == 0 {
511+
return fmt.Errorf("empty file %q", path)
512+
}
513+
// Forwarding is state '3'.
514+
// https://elixir.bootlin.com/linux/v6.13/source/include/uapi/linux/if_bridge.h#L49-L53
515+
if stateFileContent[0] != '3' {
516+
log.G(ctx).WithField("portState", stateFileContent[0]).Debug("waiting for bridge port to be forwarding")
517+
time.Sleep(pollInterval)
518+
continue
519+
}
520+
log.G(ctx).Debug("Bridge port is forwarding")
521+
return nil
522+
}
523+
return fmt.Errorf("bridge port not forwarding after %v", maxWait)
524+
}
525+
526+
// stpEnabled returns true if "/sys/class/net/<name>/bridge/stp_state" can be read
527+
// and does not contain "0".
528+
func stpEnabled(ctx context.Context, name string) bool {
529+
stpStateFilename := filepath.Join("/sys/class/net", name, "bridge/stp_state")
530+
stpState, err := os.ReadFile(stpStateFilename)
531+
if err != nil {
532+
log.G(ctx).WithError(err).Warnf("Failed to read stp_state file %q", stpStateFilename)
533+
return false
534+
}
535+
return len(stpState) > 0 && stpState[0] != '0'
536+
}
537+
428538
// advertiseAddrs triggers send unsolicited ARP and Neighbour Advertisement
429539
// messages, so that caches are updated with the MAC address currently associated
430540
// with the interface's IP addresses.

0 commit comments

Comments
 (0)