Skip to content

Commit 8dcb2a6

Browse files
committed
pkg/cri/sbserver: fix leaked shim issue for podsandbox mode
Fixes: containerd#7496 containerd#8931 Signed-off-by: Wei Fu <[email protected]>
1 parent 72bc63d commit 8dcb2a6

File tree

2 files changed

+111
-0
lines changed

2 files changed

+111
-0
lines changed

pkg/cri/sbserver/events.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525

2626
"github.com/containerd/containerd"
2727
eventtypes "github.com/containerd/containerd/api/events"
28+
apitasks "github.com/containerd/containerd/api/services/tasks/v1"
2829
containerdio "github.com/containerd/containerd/cio"
2930
"github.com/containerd/containerd/errdefs"
3031
"github.com/containerd/containerd/events"
@@ -404,6 +405,51 @@ func handleContainerExit(ctx context.Context, e *eventtypes.TaskExit, cntr conta
404405
// Move on to make sure container status is updated.
405406
}
406407
}
408+
409+
// NOTE: Both sb.Container.Task and task.Delete interface always ensures
410+
// that the status of target task. However, the interfaces return
411+
// ErrNotFound, which doesn't mean that the shim instance doesn't exist.
412+
//
413+
// There are two caches for task in containerd:
414+
//
415+
// 1. io.containerd.service.v1.tasks-service
416+
// 2. io.containerd.runtime.v2.task
417+
//
418+
// First one is to maintain the shim connection and shutdown the shim
419+
// in Delete API. And the second one is to maintain the lifecycle of
420+
// task in shim server.
421+
//
422+
// So, if the shim instance is running and task has been deleted in shim
423+
// server, the sb.Container.Task and task.Delete will receive the
424+
// ErrNotFound. If we don't delete the shim instance in io.containerd.service.v1.tasks-service,
425+
// shim will be leaky.
426+
//
427+
// Based on containerd/containerd#7496 issue, when host is under IO
428+
// pressure, the umount2 syscall will take more than 10 seconds so that
429+
// the CRI plugin will cancel this task.Delete call. However, the shim
430+
// server isn't aware about this. After return from umount2 syscall, the
431+
// shim server continue delete the task record. And then CRI plugin
432+
// retries to delete task and retrieves ErrNotFound and marks it as
433+
// stopped. Therefore, The shim is leaky.
434+
//
435+
// It's hard to handle the connection lost or request canceled cases in
436+
// shim server. We should call Delete API to io.containerd.service.v1.tasks-service
437+
// to ensure that shim instance is shutdown.
438+
//
439+
// REF:
440+
// 1. https://github.com/containerd/containerd/issues/7496#issuecomment-1671100968
441+
// 2. https://github.com/containerd/containerd/issues/8931
442+
if errdefs.IsNotFound(err) {
443+
_, err = c.client.TaskService().Delete(ctx, &apitasks.DeleteTaskRequest{ContainerID: cntr.Container.ID()})
444+
if err != nil {
445+
err = errdefs.FromGRPC(err)
446+
if !errdefs.IsNotFound(err) {
447+
return fmt.Errorf("failed to cleanup container %s in task-service: %w", cntr.Container.ID(), err)
448+
}
449+
}
450+
log.L.Infof("Ensure that container %s in task-service has been cleanup successfully", cntr.Container.ID())
451+
}
452+
407453
err = cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
408454
if status.FinishedAt == 0 {
409455
status.Pid = 0

pkg/cri/sbserver/podsandbox/sandbox_delete.go

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"fmt"
2222

2323
"github.com/containerd/containerd"
24+
apitasks "github.com/containerd/containerd/api/services/tasks/v1"
2425
"github.com/containerd/containerd/errdefs"
2526
"github.com/containerd/containerd/log"
2627
)
@@ -49,6 +50,10 @@ func (c *Controller) Shutdown(ctx context.Context, sandboxID string) error {
4950

5051
// Delete sandbox container.
5152
if sandbox.Container != nil {
53+
if err := c.cleanupSandboxTask(ctx, sandbox.Container); err != nil {
54+
return fmt.Errorf("failed to delete sandbox task %q: %w", sandboxID, err)
55+
}
56+
5257
if err := sandbox.Container.Delete(ctx, containerd.WithSnapshotCleanup); err != nil {
5358
if !errdefs.IsNotFound(err) {
5459
return fmt.Errorf("failed to delete sandbox container %q: %w", sandboxID, err)
@@ -59,3 +64,63 @@ func (c *Controller) Shutdown(ctx context.Context, sandboxID string) error {
5964

6065
return nil
6166
}
67+
68+
func (c *Controller) cleanupSandboxTask(ctx context.Context, sbCntr containerd.Container) error {
69+
task, err := sbCntr.Task(ctx, nil)
70+
if err != nil {
71+
if !errdefs.IsNotFound(err) {
72+
return fmt.Errorf("failed to load task for sandbox: %w", err)
73+
}
74+
} else {
75+
if _, err = task.Delete(ctx, containerd.WithProcessKill); err != nil {
76+
if !errdefs.IsNotFound(err) {
77+
return fmt.Errorf("failed to stop sandbox: %w", err)
78+
}
79+
}
80+
}
81+
82+
// NOTE: Both sb.Container.Task and task.Delete interface always ensures
83+
// that the status of target task. However, the interfaces return
84+
// ErrNotFound, which doesn't mean that the shim instance doesn't exist.
85+
//
86+
// There are two caches for task in containerd:
87+
//
88+
// 1. io.containerd.service.v1.tasks-service
89+
// 2. io.containerd.runtime.v2.task
90+
//
91+
// First one is to maintain the shim connection and shutdown the shim
92+
// in Delete API. And the second one is to maintain the lifecycle of
93+
// task in shim server.
94+
//
95+
// So, if the shim instance is running and task has been deleted in shim
96+
// server, the sb.Container.Task and task.Delete will receive the
97+
// ErrNotFound. If we don't delete the shim instance in io.containerd.service.v1.tasks-service,
98+
// shim will be leaky.
99+
//
100+
// Based on containerd/containerd#7496 issue, when host is under IO
101+
// pressure, the umount2 syscall will take more than 10 seconds so that
102+
// the CRI plugin will cancel this task.Delete call. However, the shim
103+
// server isn't aware about this. After return from umount2 syscall, the
104+
// shim server continue delete the task record. And then CRI plugin
105+
// retries to delete task and retrieves ErrNotFound and marks it as
106+
// stopped. Therefore, The shim is leaky.
107+
//
108+
// It's hard to handle the connection lost or request canceled cases in
109+
// shim server. We should call Delete API to io.containerd.service.v1.tasks-service
110+
// to ensure that shim instance is shutdown.
111+
//
112+
// REF:
113+
// 1. https://github.com/containerd/containerd/issues/7496#issuecomment-1671100968
114+
// 2. https://github.com/containerd/containerd/issues/8931
115+
if errdefs.IsNotFound(err) {
116+
_, err = c.client.TaskService().Delete(ctx, &apitasks.DeleteTaskRequest{ContainerID: sbCntr.ID()})
117+
if err != nil {
118+
err = errdefs.FromGRPC(err)
119+
if !errdefs.IsNotFound(err) {
120+
return fmt.Errorf("failed to cleanup sandbox %s in task-service: %w", sbCntr.ID(), err)
121+
}
122+
}
123+
log.G(ctx).Infof("Ensure that sandbox %s in task-service has been cleanup successfully", sbCntr.ID())
124+
}
125+
return nil
126+
}

0 commit comments

Comments
 (0)