Skip to content

Commit bace1b8

Browse files
committed
libnetwork/d/overlay: handle coalesced peer updates
The eventually-consistent nature of NetworkDB means we cannot depend on events being received in the same order that they were sent. Nor can we depend on receiving events for all intermediate states. It is possible for a series of entry UPDATEs, or a DELETE followed by a CREATE with the same key, to get coalesced into a single UPDATE event on the receiving node. Watchers of NetworkDB tables therefore need to be prepared to gracefully handle arbitrary UPDATEs of a key, including those where the new value may have nothing in common with the previous value. The overlay driver naively handled events for overlay_peer_table assuming that an endpoint leave followed by a rejoin of the same endpoint would always be expressed as a DELETE event followed by a CREATE. It would handle a coalesced UPDATE as a CREATE, inserting a new entry into peerDB without removing the old one. This would have various side effects, such as having the "transient state" of multiple entries in peerDB with the same peer IP never settle. Update driverapi to pass both the previous and new value of a table entry into the driver. Modify the overlay driver to handle an UPDATE by removing the previous peer entry from peerDB then adding the new one. Modify the Windows overlay driver to match. Signed-off-by: Cory Snider <[email protected]> (cherry picked from commit e1a586a) libn/d/overlay: don't deref nil PeerRecord on error If unmarshaling the peer record fails, there is no need to check if it's a record for a local peer. Attempting to do so anyway will result in a nil-dereference panic. Don't do that. The Windows overlay driver has a typo: prevPeer is being checked twice for whether it was a local-peer record. Check prevPeer once and newPeer once each, as intended. Signed-off-by: Cory Snider <[email protected]> (cherry picked from commit 12c6345) Signed-off-by: Cory Snider <[email protected]>
1 parent f9e5429 commit bace1b8

6 files changed

Lines changed: 128 additions & 107 deletions

File tree

libnetwork/agent.go

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -821,25 +821,8 @@ func (n *Network) handleDriverTableEvent(ev events.Event) {
821821
return
822822
}
823823

824-
var (
825-
etype driverapi.EventType
826-
value []byte
827-
)
828-
829824
event := ev.(networkdb.WatchEvent)
830-
switch {
831-
case event.IsCreate():
832-
value = event.Value
833-
etype = driverapi.Create
834-
case event.IsDelete():
835-
value = event.Prev
836-
etype = driverapi.Delete
837-
case event.IsUpdate():
838-
value = event.Value
839-
etype = driverapi.Update
840-
}
841-
842-
ed.EventNotify(etype, n.ID(), event.Table, event.Key, value)
825+
ed.EventNotify(n.ID(), event.Table, event.Key, event.Prev, event.Value)
843826
}
844827

845828
func (c *Controller) handleNodeTableEvent(ev events.Event) {

libnetwork/driverapi/driverapi.go

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ type TableWatcher interface {
7272
// happened on a table of its interest as soon as this node
7373
// receives such an event in the gossip layer. This method is
7474
// only invoked for the global scope driver.
75-
EventNotify(event EventType, nid string, tableName string, key string, value []byte)
75+
EventNotify(nid string, tableName string, key string, prev, value []byte)
7676

7777
// DecodeTableEntry passes the driver a key, value pair from table it registered
7878
// with libnetwork. Driver should return {object ID, map[string]string} tuple.
@@ -173,18 +173,6 @@ type IPAMData struct {
173173
AuxAddresses map[string]*net.IPNet
174174
}
175175

176-
// EventType defines a type for the CRUD event
177-
type EventType uint8
178-
179-
const (
180-
// Create event is generated when a table entry is created,
181-
Create EventType = 1 + iota
182-
// Update event is generated when a table entry is updated.
183-
Update
184-
// Delete event is generated when a table entry is deleted.
185-
Delete
186-
)
187-
188176
// ObjectType represents the type of object driver wants to store in libnetwork's networkDB
189177
type ObjectType int
190178

libnetwork/drivers/overlay/joinleave.go

Lines changed: 41 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111

1212
"github.com/containerd/log"
1313
"github.com/docker/docker/libnetwork/driverapi"
14-
"github.com/docker/docker/libnetwork/internal/hashable"
1514
"github.com/docker/docker/libnetwork/internal/netiputil"
1615
"github.com/docker/docker/libnetwork/ns"
1716
"github.com/docker/docker/libnetwork/osl"
@@ -151,41 +150,43 @@ func (d *driver) DecodeTableEntry(tablename string, key string, value []byte) (s
151150
}
152151
}
153152

154-
func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key string, value []byte) {
153+
func (d *driver) EventNotify(nid, tableName, key string, prev, value []byte) {
155154
if tableName != OverlayPeerTable {
156155
log.G(context.TODO()).Errorf("Unexpected table notification for table %s received", tableName)
157156
return
158157
}
159158

160159
eid := key
161160

162-
var peer PeerRecord
163-
if err := proto.Unmarshal(value, &peer); err != nil {
164-
log.G(context.TODO()).Errorf("Failed to unmarshal peer record: %v", err)
165-
return
166-
}
167-
168-
// Ignore local peers. We already know about them and they
169-
// should not be added to vxlan fdb.
170-
if addr, _ := netip.ParseAddr(peer.TunnelEndpointIP); addr == d.advertiseAddress {
171-
return
161+
var prevPeer, newPeer *Peer
162+
if prev != nil {
163+
var err error
164+
prevPeer, err = UnmarshalPeerRecord(prev)
165+
if err != nil {
166+
log.G(context.TODO()).WithError(err).Error("Failed to unmarshal previous peer record")
167+
} else if prevPeer.TunnelEndpointIP == d.advertiseAddress {
168+
// Ignore local peers. We don't add them to the VXLAN
169+
// FDB so don't need to remove them.
170+
prevPeer = nil
171+
}
172172
}
173-
174-
addr, err := netip.ParsePrefix(peer.EndpointIP)
175-
if err != nil {
176-
log.G(context.TODO()).WithError(err).Errorf("Invalid peer IP %s received in event notify", peer.EndpointIP)
177-
return
173+
if value != nil {
174+
var err error
175+
newPeer, err = UnmarshalPeerRecord(value)
176+
if err != nil {
177+
log.G(context.TODO()).WithError(err).Error("Failed to unmarshal peer record")
178+
} else if newPeer.TunnelEndpointIP == d.advertiseAddress {
179+
newPeer = nil
180+
}
178181
}
179182

180-
mac, err := hashable.ParseMAC(peer.EndpointMAC)
181-
if err != nil {
182-
log.G(context.TODO()).WithError(err).Errorf("Invalid mac %s received in event notify", peer.EndpointMAC)
183+
if prevPeer == nil && newPeer == nil {
184+
// Nothing to do! Either the event was for a local peer,
185+
// or unmarshaling failed.
183186
return
184187
}
185-
186-
vtep, err := netip.ParseAddr(peer.TunnelEndpointIP)
187-
if err != nil {
188-
log.G(context.TODO()).WithError(err).Errorf("Invalid VTEP %s received in event notify", peer.TunnelEndpointIP)
188+
if prevPeer != nil && newPeer != nil && *prevPeer == *newPeer {
189+
// The update did not materially change the FDB entry.
189190
return
190191
}
191192

@@ -199,21 +200,23 @@ func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key stri
199200
}
200201
defer unlock()
201202

202-
var opname string
203-
if etype == driverapi.Delete {
204-
opname = "delete"
205-
err = n.peerDelete(eid, addr, mac, vtep)
206-
} else {
207-
opname = "add"
208-
err = n.peerAdd(eid, addr, mac, vtep)
203+
if prevPeer != nil {
204+
if err := n.peerDelete(eid, prevPeer.EndpointIP, prevPeer.EndpointMAC, prevPeer.TunnelEndpointIP); err != nil {
205+
log.G(context.TODO()).WithFields(log.Fields{
206+
"error": err,
207+
"nid": n.id,
208+
"peer": prevPeer,
209+
}).Warn("overlay: failed to delete peer entry")
210+
}
209211
}
210-
if err != nil {
211-
log.G(context.TODO()).WithFields(log.Fields{
212-
"error": err,
213-
"nid": n.id,
214-
"peer": peer,
215-
"op": opname,
216-
}).Warn("Peer operation failed")
212+
if newPeer != nil {
213+
if err := n.peerAdd(eid, newPeer.EndpointIP, newPeer.EndpointMAC, newPeer.TunnelEndpointIP); err != nil {
214+
log.G(context.TODO()).WithFields(log.Fields{
215+
"error": err,
216+
"nid": n.id,
217+
"peer": newPeer,
218+
}).Warn("overlay: failed to add peer entry")
219+
}
217220
}
218221
}
219222

libnetwork/drivers/overlay/peer.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,42 @@
11
package overlay
22

3+
import (
4+
"fmt"
5+
"net/netip"
6+
7+
"github.com/docker/docker/libnetwork/internal/hashable"
8+
"github.com/gogo/protobuf/proto"
9+
)
10+
311
// OverlayPeerTable is the NetworkDB table for overlay network peer discovery.
412
const OverlayPeerTable = "overlay_peer_table"
13+
14+
type Peer struct {
15+
EndpointIP netip.Prefix
16+
EndpointMAC hashable.MACAddr
17+
TunnelEndpointIP netip.Addr
18+
}
19+
20+
func UnmarshalPeerRecord(data []byte) (*Peer, error) {
21+
var pr PeerRecord
22+
if err := proto.Unmarshal(data, &pr); err != nil {
23+
return nil, fmt.Errorf("failed to unmarshal peer record: %w", err)
24+
}
25+
var (
26+
p Peer
27+
err error
28+
)
29+
p.EndpointIP, err = netip.ParsePrefix(pr.EndpointIP)
30+
if err != nil {
31+
return nil, fmt.Errorf("invalid peer IP %q received: %w", pr.EndpointIP, err)
32+
}
33+
p.EndpointMAC, err = hashable.ParseMAC(pr.EndpointMAC)
34+
if err != nil {
35+
return nil, fmt.Errorf("invalid MAC %q received: %w", pr.EndpointMAC, err)
36+
}
37+
p.TunnelEndpointIP, err = netip.ParseAddr(pr.TunnelEndpointIP)
38+
if err != nil {
39+
return nil, fmt.Errorf("invalid VTEP %q received: %w", pr.TunnelEndpointIP, err)
40+
}
41+
return &p, nil
42+
}

libnetwork/drivers/windows/overlay/joinleave_windows.go

Lines changed: 45 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,10 @@ package overlay
33
import (
44
"context"
55
"fmt"
6-
"net"
76

87
"github.com/containerd/log"
98
"github.com/docker/docker/libnetwork/driverapi"
109
"github.com/docker/docker/libnetwork/drivers/overlay"
11-
"github.com/docker/docker/libnetwork/types"
1210
"github.com/gogo/protobuf/proto"
1311
)
1412

@@ -48,57 +46,68 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
4846
return nil
4947
}
5048

51-
func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key string, value []byte) {
49+
func (d *driver) EventNotify(nid, tableName, key string, prev, value []byte) {
5250
if tableName != overlay.OverlayPeerTable {
5351
log.G(context.TODO()).Errorf("Unexpected table notification for table %s received", tableName)
5452
return
5553
}
5654

5755
eid := key
5856

59-
var peer overlay.PeerRecord
60-
if err := proto.Unmarshal(value, &peer); err != nil {
61-
log.G(context.TODO()).Errorf("Failed to unmarshal peer record: %v", err)
62-
return
63-
}
64-
6557
n := d.network(nid)
6658
if n == nil {
6759
return
6860
}
6961

70-
// Ignore local peers. We already know about them and they
71-
// should not be added to vxlan fdb.
72-
if peer.TunnelEndpointIP == n.providerAddress {
73-
return
74-
}
75-
76-
addr, err := types.ParseCIDR(peer.EndpointIP)
77-
if err != nil {
78-
log.G(context.TODO()).Errorf("Invalid peer IP %s received in event notify", peer.EndpointIP)
79-
return
80-
}
81-
82-
mac, err := net.ParseMAC(peer.EndpointMAC)
83-
if err != nil {
84-
log.G(context.TODO()).Errorf("Invalid mac %s received in event notify", peer.EndpointMAC)
85-
return
86-
}
87-
88-
vtep := net.ParseIP(peer.TunnelEndpointIP)
89-
if vtep == nil {
90-
log.G(context.TODO()).Errorf("Invalid VTEP %s received in event notify", peer.TunnelEndpointIP)
62+
var prevPeer, newPeer *overlay.Peer
63+
if prev != nil {
64+
var err error
65+
prevPeer, err = overlay.UnmarshalPeerRecord(prev)
66+
if err != nil {
67+
log.G(context.TODO()).WithError(err).Error("Failed to unmarshal previous peer record")
68+
} else if prevPeer.TunnelEndpointIP.String() == n.providerAddress {
69+
// Ignore local peers. We don't add them to the VXLAN
70+
// FDB so don't need to remove them.
71+
prevPeer = nil
72+
}
73+
}
74+
if value != nil {
75+
var err error
76+
newPeer, err = overlay.UnmarshalPeerRecord(value)
77+
if err != nil {
78+
log.G(context.TODO()).WithError(err).Error("Failed to unmarshal peer record")
79+
} else if newPeer.TunnelEndpointIP.String() == n.providerAddress {
80+
newPeer = nil
81+
}
82+
}
83+
84+
if prevPeer == nil && newPeer == nil {
85+
// Nothing to do! Either the event was for a local peer,
86+
// or unmarshaling failed.
9187
return
9288
}
93-
94-
if etype == driverapi.Delete {
95-
d.peerDelete(nid, eid, addr.IP, addr.Mask, mac, vtep, true)
89+
if prevPeer != nil && newPeer != nil && *prevPeer == *newPeer {
90+
// The update did not materially change the FDB entry.
9691
return
9792
}
9893

99-
err = d.peerAdd(nid, eid, addr.IP, addr.Mask, mac, vtep, true)
100-
if err != nil {
101-
log.G(context.TODO()).Errorf("peerAdd failed (%v) for ip %s with mac %s", err, addr.IP.String(), mac.String())
94+
if prevPeer != nil {
95+
if err := d.peerDelete(nid, eid, prevPeer.EndpointIP.Addr().AsSlice(), true); err != nil {
96+
log.G(context.TODO()).WithFields(log.Fields{
97+
"error": err,
98+
"nid": n.id,
99+
"peer": prevPeer,
100+
}).Warn("overlay: failed to delete peer entry")
101+
}
102+
}
103+
if newPeer != nil {
104+
if err := d.peerAdd(nid, eid, newPeer.EndpointIP.Addr().AsSlice(), newPeer.EndpointMAC.AsSlice(), newPeer.TunnelEndpointIP.AsSlice(), true); err != nil {
105+
log.G(context.TODO()).WithFields(log.Fields{
106+
"error": err,
107+
"nid": n.id,
108+
"peer": newPeer,
109+
}).Warn("overlay: failed to add peer entry")
110+
}
102111
}
103112
}
104113

libnetwork/drivers/windows/overlay/peerdb_windows.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import (
1111
"github.com/docker/docker/libnetwork/types"
1212
)
1313

14-
func (d *driver) peerAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, updateDb bool) error {
14+
func (d *driver) peerAdd(nid, eid string, peerIP net.IP, peerMac net.HardwareAddr, vtep net.IP, updateDb bool) error {
1515
log.G(context.TODO()).Debugf("WINOVERLAY: Enter peerAdd for ca ip %s with ca mac %s", peerIP.String(), peerMac.String())
1616

1717
if err := validateID(nid, eid); err != nil {
@@ -81,7 +81,7 @@ func (d *driver) peerAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask,
8181
return nil
8282
}
8383

84-
func (d *driver) peerDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, updateDb bool) error {
84+
func (d *driver) peerDelete(nid, eid string, peerIP net.IP, updateDb bool) error {
8585
log.G(context.TODO()).Infof("WINOVERLAY: Enter peerDelete for endpoint %s and peer ip %s", eid, peerIP.String())
8686

8787
if err := validateID(nid, eid); err != nil {

0 commit comments

Comments
 (0)