@@ -24,6 +24,7 @@ import (
2424
2525 "github.com/containerd/containerd"
2626 eventtypes "github.com/containerd/containerd/api/events"
27+ apitasks "github.com/containerd/containerd/api/services/tasks/v1"
2728 containerdio "github.com/containerd/containerd/cio"
2829 "github.com/containerd/containerd/errdefs"
2930 "github.com/containerd/containerd/events"
@@ -391,6 +392,51 @@ func handleContainerExit(ctx context.Context, e *eventtypes.TaskExit, cntr conta
391392 // Move on to make sure container status is updated.
392393 }
393394 }
395+
396+ // NOTE: Both sb.Container.Task and task.Delete interface always ensures
397+ // that the status of target task. However, the interfaces return
398+ // ErrNotFound, which doesn't mean that the shim instance doesn't exist.
399+ //
400+ // There are two caches for task in containerd:
401+ //
402+ // 1. io.containerd.service.v1.tasks-service
403+ // 2. io.containerd.runtime.v2.task
404+ //
405+ // First one is to maintain the shim connection and shutdown the shim
406+ // in Delete API. And the second one is to maintain the lifecycle of
407+ // task in shim server.
408+ //
409+ // So, if the shim instance is running and task has been deleted in shim
410+ // server, the sb.Container.Task and task.Delete will receive the
411+ // ErrNotFound. If we don't delete the shim instance in io.containerd.service.v1.tasks-service,
412+ // shim will be leaky.
413+ //
414+ // Based on containerd/containerd#7496 issue, when host is under IO
415+ // pressure, the umount2 syscall will take more than 10 seconds so that
416+ // the CRI plugin will cancel this task.Delete call. However, the shim
417+ // server isn't aware about this. After return from umount2 syscall, the
418+ // shim server continue delete the task record. And then CRI plugin
419+ // retries to delete task and retrieves ErrNotFound and marks it as
420+ // stopped. Therefore, The shim is leaky.
421+ //
422+ // It's hard to handle the connection lost or request canceled cases in
423+ // shim server. We should call Delete API to io.containerd.service.v1.tasks-service
424+ // to ensure that shim instance is shutdown.
425+ //
426+ // REF:
427+ // 1. https://github.com/containerd/containerd/issues/7496#issuecomment-1671100968
428+ // 2. https://github.com/containerd/containerd/issues/8931
429+ if errdefs .IsNotFound (err ) {
430+ _ , err = c .client .TaskService ().Delete (ctx , & apitasks.DeleteTaskRequest {ContainerID : cntr .Container .ID ()})
431+ if err != nil {
432+ err = errdefs .FromGRPC (err )
433+ if ! errdefs .IsNotFound (err ) {
434+ return fmt .Errorf ("failed to cleanup container %s in task-service: %w" , cntr .Container .ID (), err )
435+ }
436+ }
437+ logrus .Infof ("Ensure that container %s in task-service has been cleanup successfully" , cntr .Container .ID ())
438+ }
439+
394440 err = cntr .Status .UpdateSync (func (status containerstore.Status ) (containerstore.Status , error ) {
395441 if status .FinishedAt == 0 {
396442 status .Pid = 0
@@ -431,6 +477,50 @@ func handleSandboxExit(ctx context.Context, e *eventtypes.TaskExit, sb sandboxst
431477 // Move on to make sure container status is updated.
432478 }
433479 }
480+
481+ // NOTE: Both sb.Container.Task and task.Delete interface always ensures
482+ // that the status of target task. However, the interfaces return
483+ // ErrNotFound, which doesn't mean that the shim instance doesn't exist.
484+ //
485+ // There are two caches for task in containerd:
486+ //
487+ // 1. io.containerd.service.v1.tasks-service
488+ // 2. io.containerd.runtime.v2.task
489+ //
490+ // First one is to maintain the shim connection and shutdown the shim
491+ // in Delete API. And the second one is to maintain the lifecycle of
492+ // task in shim server.
493+ //
494+ // So, if the shim instance is running and task has been deleted in shim
495+ // server, the sb.Container.Task and task.Delete will receive the
496+ // ErrNotFound. If we don't delete the shim instance in io.containerd.service.v1.tasks-service,
497+ // shim will be leaky.
498+ //
499+ // Based on containerd/containerd#7496 issue, when host is under IO
500+ // pressure, the umount2 syscall will take more than 10 seconds so that
501+ // the CRI plugin will cancel this task.Delete call. However, the shim
502+ // server isn't aware about this. After return from umount2 syscall, the
503+ // shim server continue delete the task record. And then CRI plugin
504+ // retries to delete task and retrieves ErrNotFound and marks it as
505+ // stopped. Therefore, The shim is leaky.
506+ //
507+ // It's hard to handle the connection lost or request canceled cases in
508+ // shim server. We should call Delete API to io.containerd.service.v1.tasks-service
509+ // to ensure that shim instance is shutdown.
510+ //
511+ // REF:
512+ // 1. https://github.com/containerd/containerd/issues/7496#issuecomment-1671100968
513+ // 2. https://github.com/containerd/containerd/issues/8931
514+ if errdefs .IsNotFound (err ) {
515+ _ , err = c .client .TaskService ().Delete (ctx , & apitasks.DeleteTaskRequest {ContainerID : sb .Container .ID ()})
516+ if err != nil {
517+ err = errdefs .FromGRPC (err )
518+ if ! errdefs .IsNotFound (err ) {
519+ return fmt .Errorf ("failed to cleanup sandbox %s in task-service: %w" , sb .Container .ID (), err )
520+ }
521+ }
522+ logrus .Infof ("Ensure that sandbox %s in task-service has been cleanup successfully" , sb .Container .ID ())
523+ }
434524 err = sb .Status .Update (func (status sandboxstore.Status ) (sandboxstore.Status , error ) {
435525 status .State = sandboxstore .StateNotReady
436526 status .Pid = 0
0 commit comments