Skip to content

Commit 843cd96

Browse files
committed
libn/d/overlay: fix encryption race conditions
There is a dedicated mutex for synchronizing access to the encrMap. Separately, the main driver mutex is used for synchronizing access to the encryption keys. Their use is sufficient to prevent data races (if used correctly, which is not the case) but not logical race conditions. Programming the encryption parameters for a peer can race with encryption keys being updated, which could lead to inconsistencies between the parameters programmed into the kernel and the desired state. Introduce a new mutex for synchronizing encryption operations. Use that mutex to synchronize access to both encrMap and keys. Handle encryption key updates in a critical section so they can no longer be interleaved with kernel programming of encryption parameters. Signed-off-by: Cory Snider <[email protected]>
1 parent a1d2997 commit 843cd96

3 files changed

Lines changed: 51 additions & 44 deletions

File tree

libnetwork/drivers/overlay/encryption.go

Lines changed: 36 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ import (
1313
"net"
1414
"net/netip"
1515
"strconv"
16-
"sync"
1716
"syscall"
1817

1918
"github.com/containerd/log"
@@ -96,16 +95,14 @@ type encrNode struct {
9695
count int
9796
}
9897

99-
type encrMap struct {
100-
nodes map[netip.Addr]encrNode
101-
mu sync.Mutex
102-
}
98+
// encrMap is a map of node IP addresses to their encryption parameters.
99+
//
100+
// Like all Go maps, it is not safe for concurrent use.
101+
type encrMap map[netip.Addr]encrNode
103102

104-
func (e *encrMap) String() string {
105-
e.mu.Lock()
106-
defer e.mu.Unlock()
103+
func (e encrMap) String() string {
107104
b := new(bytes.Buffer)
108-
for k, v := range e.nodes {
105+
for k, v := range e {
109106
b.WriteString("\n")
110107
b.WriteString(k.String())
111108
b.WriteString(":")
@@ -124,16 +121,19 @@ func (e *encrMap) String() string {
124121
func (d *driver) setupEncryption(remoteIP netip.Addr) error {
125122
log.G(context.TODO()).Debugf("setupEncryption(%s)", remoteIP)
126123

127-
localIP, advIP := d.bindAddress, d.advertiseAddress
128-
keys := d.keys // FIXME: data race
129-
if len(keys) == 0 {
124+
d.encrMu.Lock()
125+
defer d.encrMu.Unlock()
126+
if len(d.keys) == 0 {
130127
return types.ForbiddenErrorf("encryption key is not present")
131128
}
129+
d.mu.Lock()
130+
localIP, advIP := d.bindAddress, d.advertiseAddress
131+
d.mu.Unlock()
132132
log.G(context.TODO()).Debugf("Programming encryption between %s and %s", localIP, remoteIP)
133133

134-
indices := make([]spi, 0, len(keys))
134+
indices := make([]spi, 0, len(d.keys))
135135

136-
for i, k := range keys {
136+
for i, k := range d.keys {
137137
spis := spi{buildSPI(advIP.AsSlice(), remoteIP.AsSlice(), k.tag), buildSPI(remoteIP.AsSlice(), advIP.AsSlice(), k.tag)}
138138
dir := reverse
139139
if i == 0 {
@@ -153,31 +153,29 @@ func (d *driver) setupEncryption(remoteIP netip.Addr) error {
153153
}
154154
}
155155

156-
d.secMap.mu.Lock()
157-
node := d.secMap.nodes[remoteIP]
156+
node := d.secMap[remoteIP]
158157
node.spi = indices
159158
node.count++
160-
d.secMap.nodes[remoteIP] = node
161-
d.secMap.mu.Unlock()
159+
d.secMap[remoteIP] = node
162160

163161
return nil
164162
}
165163

166164
func (d *driver) removeEncryption(remoteIP netip.Addr) error {
167165
log.G(context.TODO()).Debugf("removeEncryption(%s)", remoteIP)
168166

169-
spi := func() []spi {
170-
d.secMap.mu.Lock()
171-
defer d.secMap.mu.Unlock()
172-
node := d.secMap.nodes[remoteIP]
173-
if node.count == 1 {
174-
delete(d.secMap.nodes, remoteIP)
175-
return node.spi
176-
}
167+
d.encrMu.Lock()
168+
defer d.encrMu.Unlock()
169+
170+
var spi []spi
171+
node := d.secMap[remoteIP]
172+
if node.count == 1 {
173+
delete(d.secMap, remoteIP)
174+
spi = node.spi
175+
} else {
177176
node.count--
178-
d.secMap.nodes[remoteIP] = node
179-
return nil
180-
}()
177+
d.secMap[remoteIP] = node
178+
}
181179

182180
for i, idxs := range spi {
183181
dir := reverse
@@ -452,20 +450,24 @@ func buildAeadAlgo(k *key, s int) *netlink.XfrmStateAlgo {
452450
}
453451

454452
func (d *driver) setKeys(keys []*key) error {
453+
d.encrMu.Lock()
454+
defer d.encrMu.Unlock()
455+
455456
// Remove any stale policy, state
456457
clearEncryptionStates()
457458
// Accept the encryption keys and clear any stale encryption map
458-
d.mu.Lock()
459+
d.secMap = encrMap{}
459460
d.keys = keys
460-
d.secMap = &encrMap{nodes: map[netip.Addr]encrNode{}}
461-
d.mu.Unlock()
462461
log.G(context.TODO()).Debugf("Initial encryption keys: %v", keys)
463462
return nil
464463
}
465464

466465
// updateKeys allows to add a new key and/or change the primary key and/or prune an existing key
467466
// The primary key is the key used in transmission and will go in first position in the list.
468467
func (d *driver) updateKeys(newKey, primary, pruneKey *key) error {
468+
d.encrMu.Lock()
469+
defer d.encrMu.Unlock()
470+
469471
log.G(context.TODO()).Debugf("Updating Keys. New: %v, Primary: %v, Pruned: %v", newKey, primary, pruneKey)
470472

471473
log.G(context.TODO()).Debugf("Current: %v", d.keys)
@@ -478,9 +480,6 @@ func (d *driver) updateKeys(newKey, primary, pruneKey *key) error {
478480
aIP = d.advertiseAddress
479481
)
480482

481-
d.mu.Lock()
482-
defer d.mu.Unlock()
483-
484483
// add new
485484
if newKey != nil {
486485
d.keys = append(d.keys, newKey)
@@ -506,14 +505,12 @@ func (d *driver) updateKeys(newKey, primary, pruneKey *key) error {
506505
return types.InvalidParameterErrorf("attempting to both make a key (index %d) primary and delete it", priIdx)
507506
}
508507

509-
d.secMap.mu.Lock()
510-
for rIP, node := range d.secMap.nodes {
508+
for rIP, node := range d.secMap {
511509
idxs := updateNodeKey(lIP.AsSlice(), aIP.AsSlice(), rIP.AsSlice(), node.spi, d.keys, newIdx, priIdx, delIdx)
512510
if idxs != nil {
513-
d.secMap.nodes[rIP] = encrNode{idxs, node.count}
511+
d.secMap[rIP] = encrNode{idxs, node.count}
514512
}
515513
}
516-
d.secMap.mu.Unlock()
517514

518515
// swap primary
519516
if priIdx != -1 {

libnetwork/drivers/overlay/joinleave.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,13 @@ func (d *driver) Join(ctx context.Context, nid, eid string, sboxKey string, jinf
4545
return fmt.Errorf("could not find endpoint with id %s", eid)
4646
}
4747

48-
if n.secure && len(d.keys) == 0 {
49-
return errors.New("cannot join secure network: encryption keys not present")
48+
if n.secure {
49+
d.encrMu.Lock()
50+
nkeys := len(d.keys)
51+
d.encrMu.Unlock()
52+
if nkeys == 0 {
53+
return errors.New("cannot join secure network: encryption keys not present")
54+
}
5055
}
5156

5257
nlh := ns.NlHandle()

libnetwork/drivers/overlay/overlay.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,17 @@ var _ discoverapi.Discover = (*driver)(nil)
3131
type driver struct {
3232
bindAddress, advertiseAddress netip.Addr
3333

34+
// encrMu guards secMap and keys,
35+
// and synchronizes the application of encryption parameters
36+
// to the kernel.
37+
encrMu sync.Mutex
38+
secMap encrMap
39+
keys []*key
40+
3441
config map[string]interface{}
3542
peerDb peerNetworkMap
36-
secMap *encrMap
3743
networks networkTable
3844
initOS sync.Once
39-
keys []*key
4045
peerOpMu sync.Mutex
4146
mu sync.Mutex
4247
}
@@ -48,7 +53,7 @@ func Register(r driverapi.Registerer, config map[string]interface{}) error {
4853
peerDb: peerNetworkMap{
4954
mp: map[string]*peerMap{},
5055
},
51-
secMap: &encrMap{nodes: map[netip.Addr]encrNode{}},
56+
secMap: encrMap{},
5257
config: config,
5358
}
5459
return r.RegisterDriver(NetworkType, d, driverapi.Capability{

0 commit comments

Comments
 (0)