@@ -25,6 +25,7 @@ import (
2525
2626 "github.com/containerd/containerd"
2727 eventtypes "github.com/containerd/containerd/api/events"
28+ apitasks "github.com/containerd/containerd/api/services/tasks/v1"
2829 containerdio "github.com/containerd/containerd/cio"
2930 "github.com/containerd/containerd/errdefs"
3031 "github.com/containerd/containerd/events"
@@ -393,6 +394,51 @@ func handleContainerExit(ctx context.Context, e *eventtypes.TaskExit, cntr conta
393394 // Move on to make sure container status is updated.
394395 }
395396 }
397+
398+ // NOTE: Both sb.Container.Task and task.Delete interface always ensures
399+ // that the status of target task. However, the interfaces return
400+ // ErrNotFound, which doesn't mean that the shim instance doesn't exist.
401+ //
402+ // There are two caches for task in containerd:
403+ //
404+ // 1. io.containerd.service.v1.tasks-service
405+ // 2. io.containerd.runtime.v2.task
406+ //
407+ // First one is to maintain the shim connection and shutdown the shim
408+ // in Delete API. And the second one is to maintain the lifecycle of
409+ // task in shim server.
410+ //
411+ // So, if the shim instance is running and task has been deleted in shim
412+ // server, the sb.Container.Task and task.Delete will receive the
413+ // ErrNotFound. If we don't delete the shim instance in io.containerd.service.v1.tasks-service,
414+ // shim will be leaky.
415+ //
416+ // Based on containerd/containerd#7496 issue, when host is under IO
417+ // pressure, the umount2 syscall will take more than 10 seconds so that
418+ // the CRI plugin will cancel this task.Delete call. However, the shim
419+ // server isn't aware about this. After return from umount2 syscall, the
420+ // shim server continue delete the task record. And then CRI plugin
421+ // retries to delete task and retrieves ErrNotFound and marks it as
422+ // stopped. Therefore, The shim is leaky.
423+ //
424+ // It's hard to handle the connection lost or request canceled cases in
425+ // shim server. We should call Delete API to io.containerd.service.v1.tasks-service
426+ // to ensure that shim instance is shutdown.
427+ //
428+ // REF:
429+ // 1. https://github.com/containerd/containerd/issues/7496#issuecomment-1671100968
430+ // 2. https://github.com/containerd/containerd/issues/8931
431+ if errdefs .IsNotFound (err ) {
432+ _ , err = c .client .TaskService ().Delete (ctx , & apitasks.DeleteTaskRequest {ContainerID : cntr .Container .ID ()})
433+ if err != nil {
434+ err = errdefs .FromGRPC (err )
435+ if ! errdefs .IsNotFound (err ) {
436+ return fmt .Errorf ("failed to cleanup container %s in task-service: %w" , cntr .Container .ID (), err )
437+ }
438+ }
439+ logrus .Infof ("Ensure that container %s in task-service has been cleanup successfully" , cntr .Container .ID ())
440+ }
441+
396442 err = cntr .Status .UpdateSync (func (status containerstore.Status ) (containerstore.Status , error ) {
397443 if status .FinishedAt == 0 {
398444 status .Pid = 0
@@ -434,6 +480,50 @@ func handleSandboxExit(ctx context.Context, e *eventtypes.TaskExit, sb sandboxst
434480 // Move on to make sure container status is updated.
435481 }
436482 }
483+
484+ // NOTE: Both sb.Container.Task and task.Delete interface always ensures
485+ // that the status of target task. However, the interfaces return
486+ // ErrNotFound, which doesn't mean that the shim instance doesn't exist.
487+ //
488+ // There are two caches for task in containerd:
489+ //
490+ // 1. io.containerd.service.v1.tasks-service
491+ // 2. io.containerd.runtime.v2.task
492+ //
493+ // First one is to maintain the shim connection and shutdown the shim
494+ // in Delete API. And the second one is to maintain the lifecycle of
495+ // task in shim server.
496+ //
497+ // So, if the shim instance is running and task has been deleted in shim
498+ // server, the sb.Container.Task and task.Delete will receive the
499+ // ErrNotFound. If we don't delete the shim instance in io.containerd.service.v1.tasks-service,
500+ // shim will be leaky.
501+ //
502+ // Based on containerd/containerd#7496 issue, when host is under IO
503+ // pressure, the umount2 syscall will take more than 10 seconds so that
504+ // the CRI plugin will cancel this task.Delete call. However, the shim
505+ // server isn't aware about this. After return from umount2 syscall, the
506+ // shim server continue delete the task record. And then CRI plugin
507+ // retries to delete task and retrieves ErrNotFound and marks it as
508+ // stopped. Therefore, The shim is leaky.
509+ //
510+ // It's hard to handle the connection lost or request canceled cases in
511+ // shim server. We should call Delete API to io.containerd.service.v1.tasks-service
512+ // to ensure that shim instance is shutdown.
513+ //
514+ // REF:
515+ // 1. https://github.com/containerd/containerd/issues/7496#issuecomment-1671100968
516+ // 2. https://github.com/containerd/containerd/issues/8931
517+ if errdefs .IsNotFound (err ) {
518+ _ , err = c .client .TaskService ().Delete (ctx , & apitasks.DeleteTaskRequest {ContainerID : sb .Container .ID ()})
519+ if err != nil {
520+ err = errdefs .FromGRPC (err )
521+ if ! errdefs .IsNotFound (err ) {
522+ return fmt .Errorf ("failed to cleanup sandbox %s in task-service: %w" , sb .Container .ID (), err )
523+ }
524+ }
525+ logrus .Infof ("Ensure that sandbox %s in task-service has been cleanup successfully" , sb .Container .ID ())
526+ }
437527 err = sb .Status .Update (func (status sandboxstore.Status ) (sandboxstore.Status , error ) {
438528 status .State = sandboxstore .StateNotReady
439529 status .Pid = 0
0 commit comments