Skip to content

Commit 467d717

Browse files
committed
Do not tolerate ENOENT while reconnecting socket
Containerd may stuck for hours at startup while reconnecting shim sockets which are unexpected missing. This fix adds a new ContextDialerFunc to allow you customize error handler during the dialing. Signed-off-by: Ethan Chen <[email protected]>
1 parent 3f42de4 commit 467d717

2 files changed

Lines changed: 10 additions & 1 deletion

File tree

pkg/dialer/dialer.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,13 @@ func ContextDialer(ctx context.Context, address string) (net.Conn, error) {
4545
return contextDialer(ctx, address, isNoent)
4646
}
4747

48+
// ContextDialerFunc returns a GRPC net.Conn connected to the provided address,
49+
// but may tolerate certain dial errors and keep retrying until context canceled.
50+
// If tolerateErr is nil, it will tolerate no error and return what it got immediately while dialing.
51+
func ContextDialerFunc(ctx context.Context, address string, tolerateErr func(error) bool) (net.Conn, error) {
52+
return contextDialer(ctx, address, tolerateErr)
53+
}
54+
4855
// Dialer returns a GRPC net.Conn connected to the provided address
4956
// Deprecated: use ContextDialer and grpc.WithContextDialer.
5057
var Dialer = timeoutDialer

runtime/v2/shim/util_unix.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,9 @@ func AnonDialer(address string, timeout time.Duration) (net.Conn, error) {
8181

8282
// AnonReconnectDialer returns a dialer for an existing socket on reconnection
8383
func AnonReconnectDialer(address string, timeout time.Duration) (net.Conn, error) {
84-
return AnonDialer(address, timeout)
84+
ctx, cancel := context.WithTimeout(context.TODO(), timeout)
85+
defer cancel()
86+
return dialer.ContextDialerFunc(ctx, address, nil)
8587
}
8688

8789
// NewSocket returns a new socket

0 commit comments

Comments
 (0)