Skip to content

Commit ac5f464

Browse files
committed
libnetwork/networkdb: improve quality of randomness
The property test for the mRandomNodes function revealed that it may sometimes pick out a sample of fewer than m nodes even when the number of nodes to pick from (excluding the local node) is >= m. Rewrite it using a random shuffle or permutation so that it always picks a uniformly-distributed sample of the requested size whenever the population is large enough. Signed-off-by: Cory Snider <[email protected]>
1 parent 5799deb commit ac5f464

3 files changed

Lines changed: 37 additions & 40 deletions

File tree

daemon/libnetwork/networkdb/cluster.go

Lines changed: 28 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
1+
// FIXME(thaJeztah): remove once we are a module; the go:build directive prevents go from downgrading language version to go1.16:
2+
//go:build go1.23
3+
14
package networkdb
25

36
import (
47
"bytes"
58
"context"
6-
"crypto/rand"
79
"encoding/hex"
810
"fmt"
911
golog "log"
10-
"math/big"
1112
rnd "math/rand"
1213
"net"
1314
"net/netip"
@@ -339,7 +340,10 @@ func (nDB *NetworkDB) reconnectNode() {
339340
}
340341
nDB.RUnlock()
341342

342-
node := nodes[randomOffset(len(nodes))]
343+
nDB.rngMu.Lock()
344+
offset := nDB.rng.IntN(len(nodes))
345+
nDB.rngMu.Unlock()
346+
node := nodes[offset]
343347
addr := net.UDPAddr{IP: node.Addr, Port: int(node.Port)}
344348

345349
if _, err := nDB.memberlist.Join([]string{addr.String()}); err != nil {
@@ -699,49 +703,35 @@ func (nDB *NetworkDB) bulkSyncNode(networks []string, node string, unsolicited b
699703
return nil
700704
}
701705

702-
// Returns a random offset between 0 and n
703-
func randomOffset(n int) int {
704-
if n == 0 {
705-
return 0
706-
}
707-
708-
val, err := rand.Int(rand.Reader, big.NewInt(int64(n)))
709-
if err != nil {
710-
log.G(context.TODO()).Errorf("Failed to get a random offset: %v", err)
711-
return 0
712-
}
713-
714-
return int(val.Int64())
715-
}
716-
717706
// mRandomNodes is used to select up to m random nodes. It is possible
718707
// that less than m nodes are returned.
719708
func (nDB *NetworkDB) mRandomNodes(m int, nodes []string) []string {
720-
n := len(nodes)
721-
mNodes := make([]string, 0, m)
722-
OUTER:
723-
// Probe up to 3*n times, with large n this is not necessary
724-
// since k << n, but with small n we want search to be
725-
// exhaustive
726-
for i := 0; i < 3*n && len(mNodes) < m; i++ {
727-
// Get random node
728-
idx := randomOffset(n)
729-
node := nodes[idx]
730-
709+
mNodes := make([]string, 0, max(0, len(nodes)-1))
710+
for _, node := range nodes {
731711
if node == nDB.config.NodeID {
712+
// Skip myself
732713
continue
733714
}
715+
mNodes = append(mNodes, node)
716+
}
734717

735-
// Check if we have this node already
736-
for j := 0; j < len(mNodes); j++ {
737-
if node == mNodes[j] {
738-
continue OUTER
739-
}
740-
}
718+
if len(mNodes) < m {
719+
nDB.rngMu.Lock()
720+
nDB.rng.Shuffle(len(mNodes), func(i, j int) {
721+
mNodes[i], mNodes[j] = mNodes[j], mNodes[i]
722+
})
723+
nDB.rngMu.Unlock()
724+
return mNodes
725+
}
741726

742-
// Append the node
743-
mNodes = append(mNodes, node)
727+
nDB.rngMu.Lock()
728+
perm := nDB.rng.Perm(len(mNodes))
729+
nDB.rngMu.Unlock()
730+
731+
sample := make([]string, 0, m)
732+
for _, idx := range perm[:m] {
733+
sample = append(sample, mNodes[idx])
744734
}
745735

746-
return mNodes
736+
return sample
747737
}

daemon/libnetwork/networkdb/cluster_test.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
//go:build xfail
2-
31
package networkdb
42

53
import (

daemon/libnetwork/networkdb/networkdb.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@ package networkdb
77

88
import (
99
"context"
10+
cryptorand "crypto/rand"
1011
"fmt"
12+
"math/rand/v2"
1113
"os"
1214
"strings"
1315
"sync"
@@ -113,6 +115,9 @@ type NetworkDB struct {
113115

114116
// lastHealthTimestamp is the last timestamp when the health score got printed
115117
lastHealthTimestamp time.Time
118+
119+
rngMu sync.Mutex
120+
rng *rand.Rand
116121
}
117122

118123
// PeerInfo represents the peer (gossip cluster) nodes of a network
@@ -291,6 +296,9 @@ func newNetworkDB(c *Config) *NetworkDB {
291296
// there is at least 5 extra cycle to make sure that all the entries are properly deleted before deleting the network.
292297
c.reapNetworkInterval = c.reapEntryInterval + 5*reapPeriod
293298

299+
var rngSeed [32]byte
300+
_, _ = cryptorand.Read(rngSeed[:]) // Documented never to return an error
301+
294302
return &NetworkDB{
295303
config: c,
296304
indexes: map[int]*iradix.Tree[*entry]{
@@ -305,6 +313,7 @@ func newNetworkDB(c *Config) *NetworkDB {
305313
networkNodes: make(map[string][]string),
306314
bulkSyncAckTbl: make(map[string]chan struct{}),
307315
broadcaster: events.NewBroadcaster(),
316+
rng: rand.New(rand.NewChaCha8(rngSeed)), //gosec:disable G404 -- not used in a security sensitive context
308317
}
309318
}
310319

0 commit comments

Comments
 (0)