@@ -338,6 +338,9 @@ func (n *Namespace) AddInterface(ctx context.Context, srcName, dstPrefix string,
338338
339339 // If the interface is up, send unsolicited ARP/NA messages if necessary.
340340 if up {
341+ if err := waitForBridgePort (ctx , nlhHost , iface ); err != nil {
342+ return fmt .Errorf ("check bridge port state: %w" , err )
343+ }
341344 if err := n .advertiseAddrs (ctx , iface .Attrs ().Index , i , nlh ); err != nil {
342345 return fmt .Errorf ("failed to advertise addresses: %w" , err )
343346 }
@@ -425,6 +428,113 @@ func waitForIfUpped(ctx context.Context, ns netns.NsHandle, ifIndex int) (bool,
425428 }
426429}
427430
431+ // waitForBridgePort checks whether link iface is a veth. If it is, and the other
432+ // end of the veth is slaved to a bridge, waits for the bridge port's state to be
433+ // "forwarding". If STP is enabled on the bridge, it doesn't wait.
434+ func waitForBridgePort (ctx context.Context , nlh nlwrap.Handle , iface netlink.Link ) error {
435+ if iface .Type () != "veth" {
436+ return nil
437+ }
438+ ctx , span := otel .Tracer ("" ).Start (ctx , "libnetwork.osl.waitForBridgePort" )
439+ defer span .End ()
440+ ctx = log .WithLogger (ctx , log .G (ctx ).WithField ("veth" , iface .Attrs ().Name ))
441+
442+ // The parent of a veth is the other end of the veth.
443+ parentIndex := iface .Attrs ().ParentIndex
444+ if parentIndex <= 0 {
445+ log .G (ctx ).Debug ("veth has no parent index" )
446+ return nil
447+ }
448+ parentIface , err := nlh .LinkByIndex (parentIndex )
449+ if err != nil {
450+ // The parent isn't in the host's netns, it's probably in a swarm load-balancer
451+ // sandbox, and we don't know where that is. But, swarm still uses IP-based MAC
452+ // addresses so the unsolicited ARPs aren't essential. If the first one goes
453+ // missing because the bridge's port isn't forwarding yet, it's ok.
454+ log .G (ctx ).WithFields (log.Fields {"parentIndex" : parentIndex , "error" : err }).Debug ("No parent interface" )
455+ return nil
456+ }
457+ // If the other end of the veth has a MasterIndex, that's a bridge.
458+ if parentIface .Attrs ().MasterIndex <= 0 {
459+ log .G (ctx ).Debug ("veth is not connected to a bridge" )
460+ return nil
461+ }
462+ bridgeIface , err := nlh .LinkByIndex (parentIface .Attrs ().MasterIndex )
463+ if err != nil {
464+ return fmt .Errorf ("get bridge link by index %d: %w" , parentIface .Attrs ().MasterIndex , err )
465+ }
466+
467+ // Ideally, we'd read the port state via netlink. But, vishvananda/netlink needs a
468+ // patch to include state in its response.
469+ // - type Protinfo needs a "State uint8"
470+ // - parseProtinfo() needs "case nl.IFLA_BRPORT_STATE: pi.State = uint8(info.Value[0])"
471+ /*
472+ pi, err := nlh.LinkGetProtinfo(parentIface)
473+ if err != nil {
474+ return fmt.Errorf("get bridge protinfo: %w", err)
475+ }
476+ */
477+
478+ // Check that STP is not enabled on the bridge. It won't be enabled on a
479+ // bridge network's own bridge. But, could be on a user-supplied bridge
480+ // and, if it is, it won't be forwarding within the timeout here.
481+ if stpEnabled (ctx , bridgeIface .Attrs ().Name ) {
482+ log .G (ctx ).Debug ("STP is enabled, not waiting for port to be forwarding" )
483+ return nil
484+ }
485+
486+ // Read the port state from "/sys/class/net/<bridge>/brif/<veth>/state".
487+ var portStateFile * os.File
488+ path := filepath .Join ("/sys/class/net" , bridgeIface .Attrs ().Name , "brif" , parentIface .Attrs ().Name , "state" )
489+ portStateFile , err = os .Open (path )
490+ if err != nil {
491+ // In integration tests where the daemon is running in its own netns, the bridge
492+ // device isn't visible in "/sys/class/net". So, just wait for hopefully-long-enough
493+ // for the bridge's port to be ready.
494+ log .G (ctx ).WithField ("port" , path ).Debug ("Failed to open port state file, waiting" )
495+ time .Sleep (20 * time .Millisecond )
496+ return nil
497+ }
498+ defer portStateFile .Close ()
499+
500+ // Poll the bridge port's state until it's "forwarding". (By now, it should be. So, poll
501+ // quickly, and not for long.)
502+ const pollInterval = 10 * time .Millisecond
503+ const maxWait = 200 * time .Millisecond
504+ for range int64 (maxWait / pollInterval ) {
505+ var stateFileContent [2 ]byte
506+ n , err := portStateFile .ReadAt (stateFileContent [:], 0 )
507+ if err != nil {
508+ return fmt .Errorf ("read %q: %w" , path , err )
509+ }
510+ if n == 0 {
511+ return fmt .Errorf ("empty file %q" , path )
512+ }
513+ // Forwarding is state '3'.
514+ // https://elixir.bootlin.com/linux/v6.13/source/include/uapi/linux/if_bridge.h#L49-L53
515+ if stateFileContent [0 ] != '3' {
516+ log .G (ctx ).WithField ("portState" , stateFileContent [0 ]).Debug ("waiting for bridge port to be forwarding" )
517+ time .Sleep (pollInterval )
518+ continue
519+ }
520+ log .G (ctx ).Debug ("Bridge port is forwarding" )
521+ return nil
522+ }
523+ return fmt .Errorf ("bridge port not forwarding after %v" , maxWait )
524+ }
525+
526+ // stpEnabled returns true if "/sys/class/net/<name>/bridge/stp_state" can be read
527+ // and does not contain "0".
528+ func stpEnabled (ctx context.Context , name string ) bool {
529+ stpStateFilename := filepath .Join ("/sys/class/net" , name , "bridge/stp_state" )
530+ stpState , err := os .ReadFile (stpStateFilename )
531+ if err != nil {
532+ log .G (ctx ).WithError (err ).Warnf ("Failed to read stp_state file %q" , stpStateFilename )
533+ return false
534+ }
535+ return len (stpState ) > 0 && stpState [0 ] != '0'
536+ }
537+
428538// advertiseAddrs triggers send unsolicited ARP and Neighbour Advertisement
429539// messages, so that caches are updated with the MAC address currently associated
430540// with the interface's IP addresses.
0 commit comments