Skip to content

Commit ad54b8f

Browse files
committed
libn/d/overlay: fix encryption race conditions
There is a dedicated mutex for synchronizing access to the encrMap. Separately, the main driver mutex is used for synchronizing access to the encryption keys. Their use is sufficient to prevent data races (if used correctly, which is not the case) but not logical race conditions. Programming the encryption parameters for a peer can race with encryption keys being updated, which could lead to inconsistencies between the parameters programmed into the kernel and the desired state. Introduce a new mutex for synchronizing encryption operations. Use that mutex to synchronize access to both encrMap and keys. Handle encryption key updates in a critical section so they can no longer be interleaved with kernel programming of encryption parameters. Signed-off-by: Cory Snider <[email protected]> (cherry picked from commit 843cd96) Signed-off-by: Cory Snider <[email protected]>
1 parent 8075689 commit ad54b8f

3 files changed

Lines changed: 52 additions & 44 deletions

File tree

libnetwork/drivers/overlay/encryption.go

Lines changed: 36 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import (
1212
"net"
1313
"net/netip"
1414
"strconv"
15-
"sync"
1615
"syscall"
1716

1817
"github.com/containerd/log"
@@ -95,16 +94,14 @@ type encrNode struct {
9594
count int
9695
}
9796

98-
type encrMap struct {
99-
nodes map[netip.Addr]encrNode
100-
mu sync.Mutex
101-
}
97+
// encrMap is a map of node IP addresses to their encryption parameters.
98+
//
99+
// Like all Go maps, it is not safe for concurrent use.
100+
type encrMap map[netip.Addr]encrNode
102101

103-
func (e *encrMap) String() string {
104-
e.mu.Lock()
105-
defer e.mu.Unlock()
102+
func (e encrMap) String() string {
106103
b := new(bytes.Buffer)
107-
for k, v := range e.nodes {
104+
for k, v := range e {
108105
b.WriteString("\n")
109106
b.WriteString(k.String())
110107
b.WriteString(":")
@@ -123,16 +120,19 @@ func (e *encrMap) String() string {
123120
func (d *driver) setupEncryption(remoteIP netip.Addr) error {
124121
log.G(context.TODO()).Debugf("setupEncryption(%s)", remoteIP)
125122

126-
localIP, advIP := d.bindAddress, d.advertiseAddress
127-
keys := d.keys // FIXME: data race
128-
if len(keys) == 0 {
123+
d.encrMu.Lock()
124+
defer d.encrMu.Unlock()
125+
if len(d.keys) == 0 {
129126
return types.ForbiddenErrorf("encryption key is not present")
130127
}
128+
d.mu.Lock()
129+
localIP, advIP := d.bindAddress, d.advertiseAddress
130+
d.mu.Unlock()
131131
log.G(context.TODO()).Debugf("Programming encryption between %s and %s", localIP, remoteIP)
132132

133-
indices := make([]spi, 0, len(keys))
133+
indices := make([]spi, 0, len(d.keys))
134134

135-
for i, k := range keys {
135+
for i, k := range d.keys {
136136
spis := spi{buildSPI(advIP.AsSlice(), remoteIP.AsSlice(), k.tag), buildSPI(remoteIP.AsSlice(), advIP.AsSlice(), k.tag)}
137137
dir := reverse
138138
if i == 0 {
@@ -152,31 +152,29 @@ func (d *driver) setupEncryption(remoteIP netip.Addr) error {
152152
}
153153
}
154154

155-
d.secMap.mu.Lock()
156-
node := d.secMap.nodes[remoteIP]
155+
node := d.secMap[remoteIP]
157156
node.spi = indices
158157
node.count++
159-
d.secMap.nodes[remoteIP] = node
160-
d.secMap.mu.Unlock()
158+
d.secMap[remoteIP] = node
161159

162160
return nil
163161
}
164162

165163
func (d *driver) removeEncryption(remoteIP netip.Addr) error {
166164
log.G(context.TODO()).Debugf("removeEncryption(%s)", remoteIP)
167165

168-
spi := func() []spi {
169-
d.secMap.mu.Lock()
170-
defer d.secMap.mu.Unlock()
171-
node := d.secMap.nodes[remoteIP]
172-
if node.count == 1 {
173-
delete(d.secMap.nodes, remoteIP)
174-
return node.spi
175-
}
166+
d.encrMu.Lock()
167+
defer d.encrMu.Unlock()
168+
169+
var spi []spi
170+
node := d.secMap[remoteIP]
171+
if node.count == 1 {
172+
delete(d.secMap, remoteIP)
173+
spi = node.spi
174+
} else {
176175
node.count--
177-
d.secMap.nodes[remoteIP] = node
178-
return nil
179-
}()
176+
d.secMap[remoteIP] = node
177+
}
180178

181179
for i, idxs := range spi {
182180
dir := reverse
@@ -451,20 +449,24 @@ func buildAeadAlgo(k *key, s int) *netlink.XfrmStateAlgo {
451449
}
452450

453451
func (d *driver) setKeys(keys []*key) error {
452+
d.encrMu.Lock()
453+
defer d.encrMu.Unlock()
454+
454455
// Remove any stale policy, state
455456
clearEncryptionStates()
456457
// Accept the encryption keys and clear any stale encryption map
457-
d.mu.Lock()
458+
d.secMap = encrMap{}
458459
d.keys = keys
459-
d.secMap = &encrMap{nodes: map[netip.Addr]encrNode{}}
460-
d.mu.Unlock()
461460
log.G(context.TODO()).Debugf("Initial encryption keys: %v", keys)
462461
return nil
463462
}
464463

465464
// updateKeys allows to add a new key and/or change the primary key and/or prune an existing key
466465
// The primary key is the key used in transmission and will go in first position in the list.
467466
func (d *driver) updateKeys(newKey, primary, pruneKey *key) error {
467+
d.encrMu.Lock()
468+
defer d.encrMu.Unlock()
469+
468470
log.G(context.TODO()).Debugf("Updating Keys. New: %v, Primary: %v, Pruned: %v", newKey, primary, pruneKey)
469471

470472
log.G(context.TODO()).Debugf("Current: %v", d.keys)
@@ -477,9 +479,6 @@ func (d *driver) updateKeys(newKey, primary, pruneKey *key) error {
477479
aIP = d.advertiseAddress
478480
)
479481

480-
d.mu.Lock()
481-
defer d.mu.Unlock()
482-
483482
// add new
484483
if newKey != nil {
485484
d.keys = append(d.keys, newKey)
@@ -505,14 +504,12 @@ func (d *driver) updateKeys(newKey, primary, pruneKey *key) error {
505504
return types.InvalidParameterErrorf("attempting to both make a key (index %d) primary and delete it", priIdx)
506505
}
507506

508-
d.secMap.mu.Lock()
509-
for rIP, node := range d.secMap.nodes {
507+
for rIP, node := range d.secMap {
510508
idxs := updateNodeKey(lIP.AsSlice(), aIP.AsSlice(), rIP.AsSlice(), node.spi, d.keys, newIdx, priIdx, delIdx)
511509
if idxs != nil {
512-
d.secMap.nodes[rIP] = encrNode{idxs, node.count}
510+
d.secMap[rIP] = encrNode{idxs, node.count}
513511
}
514512
}
515-
d.secMap.mu.Unlock()
516513

517514
// swap primary
518515
if priIdx != -1 {

libnetwork/drivers/overlay/joinleave.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ package overlay
44

55
import (
66
"context"
7+
"errors"
78
"fmt"
89
"net"
910
"net/netip"
@@ -34,8 +35,13 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
3435
return fmt.Errorf("could not find endpoint with id %s", eid)
3536
}
3637

37-
if n.secure && len(d.keys) == 0 {
38-
return fmt.Errorf("cannot join secure network: encryption keys not present")
38+
if n.secure {
39+
d.encrMu.Lock()
40+
nkeys := len(d.keys)
41+
d.encrMu.Unlock()
42+
if nkeys == 0 {
43+
return errors.New("cannot join secure network: encryption keys not present")
44+
}
3945
}
4046

4147
nlh := ns.NlHandle()

libnetwork/drivers/overlay/overlay.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,17 @@ var _ discoverapi.Discover = (*driver)(nil)
3030
type driver struct {
3131
bindAddress, advertiseAddress netip.Addr
3232

33+
// encrMu guards secMap and keys,
34+
// and synchronizes the application of encryption parameters
35+
// to the kernel.
36+
encrMu sync.Mutex
37+
secMap encrMap
38+
keys []*key
39+
3340
config map[string]interface{}
3441
peerDb peerNetworkMap
35-
secMap *encrMap
3642
networks networkTable
3743
initOS sync.Once
38-
keys []*key
3944
peerOpMu sync.Mutex
4045
mu sync.Mutex
4146
}
@@ -47,7 +52,7 @@ func Register(r driverapi.Registerer, config map[string]interface{}) error {
4752
peerDb: peerNetworkMap{
4853
mp: map[string]*peerMap{},
4954
},
50-
secMap: &encrMap{nodes: map[netip.Addr]encrNode{}},
55+
secMap: encrMap{},
5156
config: config,
5257
}
5358
return r.RegisterDriver(NetworkType, d, driverapi.Capability{

0 commit comments

Comments
 (0)