@@ -5,6 +5,7 @@ package overlay
55
66import (
77 "context"
8+ "errors"
89 "fmt"
910 "net"
1011 "net/netip"
@@ -156,63 +157,57 @@ func (d *driver) peerDbDelete(nid, eid string, peerIP netip.Prefix, peerMac net.
156157// networkDB has already delivered some events of peers already available on remote nodes,
157158// these peers are saved into the peerDB and this function is used to properly configure
158159// the network sandbox with all those peers that got previously notified.
159- // Note also that this method sends a single message on the channel and the go routine on the
160- // other side, will atomically loop on the whole table of peers and will program their state
161- // in one single atomic operation. This is fundamental to guarantee consistency, and avoid that
160+ // Note also that this method atomically loops on the whole table of peers
161+ // and programs their state in one single atomic operation.
162+ // This is fundamental to guarantee consistency, and avoid that
162163// new peerAdd or peerDelete gets reordered during the sandbox init.
163164func (d * driver ) initSandboxPeerDB (nid string ) {
164165 d .peerOpMu .Lock ()
165166 defer d .peerOpMu .Unlock ()
166- if err := d .peerInitOp (nid ); err != nil {
167- log .G (context .TODO ()).WithError (err ).Warn ("Peer init operation failed" )
168- }
169- }
170-
171- func (d * driver ) peerInitOp (nid string ) error {
172- return d .peerDbNetworkWalk (nid , func (peerIP netip.Addr , peerMac net.HardwareAddr , pEntry * peerEntry ) bool {
173- // Local entries do not need to be added
174- if pEntry .isLocal () {
175- return false
167+ err := d .peerDbNetworkWalk (nid , func (peerIP netip.Addr , peerMac net.HardwareAddr , pEntry * peerEntry ) bool {
168+ if ! pEntry .isLocal () {
169+ d .addNeighbor (nid , netip .PrefixFrom (peerIP , pEntry .prefixBits ), peerMac , pEntry .vtep )
176170 }
177-
178- d .peerAddOp (nid , pEntry .eid , netip .PrefixFrom (peerIP , pEntry .prefixBits ), peerMac , pEntry .vtep , false )
179- // return false to loop on all entries
180- return false
171+ return false // walk all entries
181172 })
173+
174+ if err != nil {
175+ log .G (context .TODO ()).WithError (err ).Warn ("Peer init operation failed" )
176+ }
182177}
183178
184179// peerAdd adds a new entry to the peer database.
185180//
186181// Local peers are signified by an invalid vtep (i.e. netip.Addr{}).
187182func (d * driver ) peerAdd (nid , eid string , peerIP netip.Prefix , peerMac net.HardwareAddr , vtep netip.Addr ) {
188- d .peerOpMu .Lock ()
189- defer d .peerOpMu .Unlock ()
190- err := d .peerAddOp (nid , eid , peerIP , peerMac , vtep , true )
191- if err != nil {
183+ if err := validateID (nid , eid ); err != nil {
192184 log .G (context .TODO ()).WithError (err ).Warn ("Peer add operation failed" )
185+ return
193186 }
194- }
195187
196- func (d * driver ) peerAddOp (nid , eid string , peerIP netip.Prefix , peerMac net.HardwareAddr , vtep netip.Addr , updateDB bool ) error {
197- if err := validateID (nid , eid ); err != nil {
198- return err
199- }
188+ d .peerOpMu .Lock ()
189+ defer d .peerOpMu .Unlock ()
200190
201- var dbEntries int
202- var inserted bool
203- if updateDB {
204- inserted , dbEntries = d .peerDbAdd (nid , eid , peerIP , peerMac , vtep )
205- if ! inserted {
206- log .G (context .TODO ()).Warnf ("Entry already present in db: nid:%s eid:%s peerIP:%v peerMac:%v vtep:%v" ,
207- nid , eid , peerIP , peerMac , vtep )
208- }
191+ inserted , dbEntries := d .peerDbAdd (nid , eid , peerIP , peerMac , vtep )
192+ if ! inserted {
193+ log .G (context .TODO ()).Warnf ("Entry already present in db: nid:%s eid:%s peerIP:%v peerMac:%v vtep:%v" ,
194+ nid , eid , peerIP , peerMac , vtep )
209195 }
210-
211- // Local peers do not need any further configuration
212- if ! vtep .IsValid () {
213- return nil
196+ if vtep .IsValid () {
197+ err := d .addNeighbor (nid , peerIP , peerMac , vtep )
198+ if err != nil {
199+ if dbEntries > 1 && errors .As (err , & osl.NeighborSearchError {}) {
200+ // We are in the transient case so only the first configuration is programmed into the kernel.
201+ // Upon deletion if the active configuration is deleted the next one from the database will be restored.
202+ return
203+ }
204+ log .G (context .TODO ()).WithFields (log.Fields {"nid" : nid , "eid" : eid }).WithError (err ).Warn ("Peer add operation failed" )
205+ }
214206 }
207+ }
215208
209+ // addNeighbor programs the kernel so the given peer is reachable through the VXLAN tunnel.
210+ func (d * driver ) addNeighbor (nid string , peerIP netip.Prefix , peerMac net.HardwareAddr , vtep netip.Addr ) error {
216211 n := d .network (nid )
217212 if n == nil {
218213 return nil
@@ -243,18 +238,12 @@ func (d *driver) peerAddOp(nid, eid string, peerIP netip.Prefix, peerMac net.Har
243238
244239 // Add neighbor entry for the peer IP
245240 if err := sbox .AddNeighbor (peerIP .Addr ().AsSlice (), peerMac , osl .WithLinkName (s .vxlanName )); err != nil {
246- if _ , ok := err .(osl.NeighborSearchError ); ok && dbEntries > 1 {
247- // We are in the transient case so only the first configuration is programmed into the kernel
248- // Upon deletion if the active configuration is deleted the next one from the database will be restored
249- // Note we are skipping also the next configuration
250- return nil
251- }
252- return fmt .Errorf ("could not add neighbor entry for nid:%s eid:%s into the sandbox:%v" , nid , eid , err )
241+ return fmt .Errorf ("could not add neighbor entry into the sandbox: %w" , err )
253242 }
254243
255244 // Add fdb entry to the bridge for the peer mac
256245 if err := sbox .AddNeighbor (vtep .AsSlice (), peerMac , osl .WithLinkName (s .vxlanName ), osl .WithFamily (syscall .AF_BRIDGE )); err != nil {
257- return fmt .Errorf ("could not add fdb entry for nid:%s eid:%s into the sandbox:%v" , nid , eid , err )
246+ return fmt .Errorf ("could not add fdb entry into the sandbox: %w" , err )
258247 }
259248
260249 return nil
@@ -264,25 +253,67 @@ func (d *driver) peerAddOp(nid, eid string, peerIP netip.Prefix, peerMac net.Har
264253//
265254// Local peers are signified by an invalid vtep (i.e. netip.Addr{}).
266255func (d * driver ) peerDelete (nid , eid string , peerIP netip.Prefix , peerMac net.HardwareAddr , vtep netip.Addr ) {
267- d .peerOpMu .Lock ()
268- defer d .peerOpMu .Unlock ()
269- err := d .peerDeleteOp (nid , eid , peerIP , peerMac , vtep )
270- if err != nil {
271- log .G (context .TODO ()).WithError (err ).Warn ("Peer delete operation failed" )
272- }
273- }
274-
275- func (d * driver ) peerDeleteOp (nid , eid string , peerIP netip.Prefix , peerMac net.HardwareAddr , vtep netip.Addr ) error {
256+ logger := log .G (context .TODO ()).WithFields (log.Fields {
257+ "nid" : nid ,
258+ "eid" : eid ,
259+ "ip" : peerIP ,
260+ "mac" : peerMac ,
261+ "vtep" : vtep ,
262+ })
276263 if err := validateID (nid , eid ); err != nil {
277- return err
264+ logger .WithError (err ).Warn ("Peer delete operation failed" )
265+ return
278266 }
279267
268+ d .peerOpMu .Lock ()
269+ defer d .peerOpMu .Unlock ()
270+
280271 deleted , dbEntries := d .peerDbDelete (nid , eid , peerIP , peerMac , vtep )
281272 if ! deleted {
282- log .G (context .TODO ()).Warnf ("Entry was not in db: nid:%s eid:%s peerIP:%v peerMac:%v vtep:%v" ,
283- nid , eid , peerIP , peerMac , vtep )
273+ logger .Warn ("Peer entry was not in db" )
274+ }
275+
276+ if vtep .IsValid () {
277+ err := d .deleteNeighbor (nid , peerIP , peerMac , vtep )
278+ if err != nil {
279+ if dbEntries > 0 && errors .As (err , & osl.NeighborSearchError {}) {
280+ // We fall in here if there is a transient state and if the neighbor that is being deleted
281+ // was never been configured into the kernel (we allow only 1 configuration at the time per <ip,mac> mapping)
282+ return
283+ }
284+ logger .WithError (err ).Warn ("Peer delete operation failed" )
285+ }
286+
287+ if dbEntries > 0 {
288+ // If there is still an entry into the database and the deletion went through without errors means that there is now no
289+ // configuration active in the kernel.
290+ // Restore one configuration for the <ip,mac> directly from the database, note that is guaranteed that there is one
291+ peerIPAddr , peerMac , peerEntry , err := d .peerDbSearch (nid , peerIP .Addr ())
292+ if err != nil {
293+ log .G (context .TODO ()).WithFields (log.Fields {
294+ "nid" : nid ,
295+ "ip" : peerIP ,
296+ }).WithError (err ).Error ("peerDelete unable to restore a configuration" )
297+ return
298+ }
299+ peerIP = netip .PrefixFrom (peerIPAddr , peerEntry .prefixBits )
300+ err = d .addNeighbor (nid , peerIP , peerMac , peerEntry .vtep )
301+ if err != nil {
302+ log .G (context .TODO ()).WithFields (log.Fields {
303+ "nid" : nid ,
304+ "eid" : eid ,
305+ "ip" : peerIP ,
306+ "mac" : peerMac ,
307+ "vtep" : peerEntry .vtep ,
308+ }).WithError (err ).Error ("Peer delete operation failed" )
309+ }
310+ }
284311 }
312+ }
285313
314+ // deleteNeighbor removes programming from the kernel for the given peer to be
315+ // reachable through the VXLAN tunnel. It is the inverse of [driver.addNeighbor].
316+ func (d * driver ) deleteNeighbor (nid string , peerIP netip.Prefix , peerMac net.HardwareAddr , vtep netip.Addr ) error {
286317 n := d .network (nid )
287318 if n == nil {
288319 return nil
@@ -299,58 +330,32 @@ func (d *driver) peerDeleteOp(nid, eid string, peerIP netip.Prefix, peerMac net.
299330 }
300331 }
301332
302- // Local peers do not have any local configuration to delete
303- if vtep .IsValid () {
304- s := n .getSubnetforIP (peerIP )
305- if s == nil {
306- return fmt .Errorf ("could not find the subnet %q in network %q" , peerIP .String (), n .id )
307- }
308- // Remove fdb entry to the bridge for the peer mac
309- if err := sbox .DeleteNeighbor (vtep .AsSlice (), peerMac , osl .WithLinkName (s .vxlanName ), osl .WithFamily (syscall .AF_BRIDGE )); err != nil {
310- if _ , ok := err .(osl.NeighborSearchError ); ok && dbEntries > 0 {
311- // We fall in here if there is a transient state and if the neighbor that is being deleted
312- // was never been configured into the kernel (we allow only 1 configuration at the time per <ip,mac> mapping)
313- return nil
314- }
315- return fmt .Errorf ("could not delete fdb entry for nid:%s eid:%s into the sandbox:%v" , nid , eid , err )
316- }
317-
318- // Delete neighbor entry for the peer IP
319- if err := sbox .DeleteNeighbor (peerIP .Addr ().AsSlice (), peerMac , osl .WithLinkName (s .vxlanName )); err != nil {
320- return fmt .Errorf ("could not delete neighbor entry for nid:%s eid:%s into the sandbox:%v" , nid , eid , err )
321- }
333+ s := n .getSubnetforIP (peerIP )
334+ if s == nil {
335+ return fmt .Errorf ("could not find the subnet %q in network %q" , peerIP .String (), n .id )
322336 }
323-
324- if dbEntries == 0 {
325- return nil
337+ // Remove fdb entry to the bridge for the peer mac
338+ if err := sbox . DeleteNeighbor ( vtep . AsSlice (), peerMac , osl . WithLinkName ( s . vxlanName ), osl . WithFamily ( syscall . AF_BRIDGE )); err != nil {
339+ return fmt . Errorf ( "could not delete fdb entry in the sandbox: %w" , err )
326340 }
327341
328- // If there is still an entry into the database and the deletion went through without errors means that there is now no
329- // configuration active in the kernel.
330- // Restore one configuration for the <ip,mac> directly from the database, note that is guaranteed that there is one
331- peerIPAddr , peerMac , peerEntry , err := d .peerDbSearch (nid , peerIP .Addr ())
332- if err != nil {
333- log .G (context .TODO ()).Errorf ("peerDeleteOp unable to restore a configuration for nid:%s ip:%v mac:%v err:%s" , nid , peerIP , peerMac , err )
334- return err
342+ // Delete neighbor entry for the peer IP
343+ if err := sbox .DeleteNeighbor (peerIP .Addr ().AsSlice (), peerMac , osl .WithLinkName (s .vxlanName )); err != nil {
344+ return fmt .Errorf ("could not delete neighbor entry in the sandbox:%v" , err )
335345 }
336- return d .peerAddOp (nid , peerEntry .eid , netip .PrefixFrom (peerIPAddr , peerEntry .prefixBits ), peerMac , peerEntry .vtep , false )
346+
347+ return nil
337348}
338349
339350func (d * driver ) peerFlush (nid string ) {
340351 d .peerOpMu .Lock ()
341352 defer d .peerOpMu .Unlock ()
342- if err := d .peerFlushOp (nid ); err != nil {
343- log .G (context .TODO ()).WithError (err ).Warn ("Peer flush operation failed" )
344- }
345- }
346-
347- func (d * driver ) peerFlushOp (nid string ) error {
348353 d .peerDb .Lock ()
349354 defer d .peerDb .Unlock ()
350355 _ , ok := d .peerDb .mp [nid ]
351356 if ! ok {
352- return fmt .Errorf ("Unable to find the peerDB for nid:%s" , nid )
357+ log .G (context .TODO ()).Warnf ("Peer flush operation failed: unable to find the peerDB for nid:%s" , nid )
358+ return
353359 }
354360 delete (d .peerDb .mp , nid )
355- return nil
356361}
0 commit comments