@@ -136,8 +136,23 @@ func (c *criService) recover(ctx context.Context) error {
136
136
return nil
137
137
}
138
138
139
+ // loadContainerTimeout is the default timeout for loading a container/sandbox.
140
+ // One container/sandbox hangs (e.g. containerd#2438) should not affect other
141
+ // containers/sandboxes.
142
+ // Most CRI container/sandbox related operations are per container, the ones
143
+ // which handle multiple containers at a time are:
144
+ // * ListPodSandboxes: Don't talk with containerd services.
145
+ // * ListContainers: Don't talk with containerd services.
146
+ // * ListContainerStats: Not in critical code path, a default timeout will
147
+ // be applied at CRI level.
148
+ // * Recovery logic: We should set a time for each container/sandbox recovery.
149
+ // * Event montior: We should set a timeout for each container/sandbox event handling.
150
+ const loadContainerTimeout = 10 * time .Second
151
+
139
152
// loadContainer loads container from containerd and status checkpoint.
140
153
func (c * criService ) loadContainer (ctx context.Context , cntr containerd.Container ) (containerstore.Container , error ) {
154
+ ctx , cancel := context .WithTimeout (ctx , loadContainerTimeout )
155
+ defer cancel ()
141
156
id := cntr .ID ()
142
157
containerDir := c .getContainerRootDir (id )
143
158
volatileContainerDir := c .getVolatileContainerRootDir (id )
@@ -290,16 +305,18 @@ const (
290
305
// unknownContainerStatus returns the default container status when its status is unknown.
291
306
func unknownContainerStatus () containerstore.Status {
292
307
return containerstore.Status {
293
- CreatedAt : time . Now (). UnixNano () ,
294
- StartedAt : time . Now (). UnixNano () ,
295
- FinishedAt : time . Now (). UnixNano () ,
308
+ CreatedAt : 0 ,
309
+ StartedAt : 0 ,
310
+ FinishedAt : 0 ,
296
311
ExitCode : unknownExitCode ,
297
312
Reason : unknownExitReason ,
298
313
}
299
314
}
300
315
301
316
// loadSandbox loads sandbox from containerd.
302
317
func loadSandbox (ctx context.Context , cntr containerd.Container ) (sandboxstore.Sandbox , error ) {
318
+ ctx , cancel := context .WithTimeout (ctx , loadContainerTimeout )
319
+ defer cancel ()
303
320
var sandbox sandboxstore.Sandbox
304
321
// Load sandbox metadata.
305
322
exts , err := cntr .Extensions (ctx )
0 commit comments