Skip to content

Containerd fails to stop containers #6080

@haslersn

Description

@haslersn

Description

When deleting Pods in my kubernetes cluster with containerd, on one node they are stuck in status Terminating. This happens both with runc and kata containers. containerd shows the following logs that seem to be related:

Oct 02 12:37:56 kube03 containerd[18499]: time="2021-10-02T12:37:56.639566670+02:00" level=error msg="StopContainer for \"6134f5baf2ea672cc180359ec2527ff6936942e437857999cdaefe374805413b\" failed" error="rpc error: code = Canceled desc = an error occurs during waiting for container \"6134f5baf2ea672cc180359ec2527ff6936942e437857999cdaefe374805413b\" to be killed: wait container \"6134f5baf2ea672cc180359ec2527ff6936942e437857999cdaefe374805413b\": context canceled"
Oct 02 12:38:22 kube03 containerd[18499]: time="2021-10-02T12:38:22.534116584+02:00" level=error msg="Failed to handle backOff event &TaskExit{ContainerID:6134f5baf2ea672cc180359ec2527ff6936942e437857999cdaefe374805413b,ID:6134f5baf2ea672cc180359ec2527ff6936942e437857999cdaefe374805413b,Pid:10355,ExitStatus:9,ExitedAt:2021-08-11 00:14:39.884690995 +0000 UTC,XXX_unrecognized:[],} for 6134f5baf2ea672cc180359ec2527ff6936942e437857999cdaefe374805413b" error="failed to handle container TaskExit event: failed to stop container: Container not ready or stopped, impossible to delete: unknown"

Describe the results you expected

containerd should successfully delete the container.

Version

Kubernetes v1.19.15 via kubeadm on bare-metal

$ containerd --version
containerd containerd.io 1.4.9 e25210fe30a0a703442421b0f60afac609f950a3

Configuration

$ cat /etc/containerd/config.toml
"disabled_plugins" = []
"oom_score" = 0
"plugin_dir" = ""
"required_plugins" = []
"root" = "/var/lib/containerd"
"state" = "/run/containerd"
"version" = 2

["cgroup"]
  "path" = ""

["debug"]
  "address" = ""
  "gid" = 0
  "level" = "warn"
  "uid" = 0

["grpc"]
  "address" = "/run/containerd/containerd.sock"
  "gid" = 0
  "max_recv_message_size" = 16777216
  "max_send_message_size" = 16777216
  "tcp_address" = ""
  "tcp_tls_cert" = ""
  "tcp_tls_key" = ""
  "uid" = 0

["metrics"]
  "address" = ""
  "grpc_histogram" = false

["plugins"]

  ["plugins"."io.containerd.gc.v1.scheduler"]
    "deletion_threshold" = 0
    "mutation_threshold" = 100
    "pause_threshold" = 0.02
    "schedule_delay" = "0s"
    "startup_delay" = "100ms"

  ["plugins"."io.containerd.grpc.v1.cri"]
    "disable_apparmor" = false
    "disable_cgroup" = false
    "disable_proc_mount" = false
    "disable_tcp_service" = true
    "enable_selinux" = false
    "enable_tls_streaming" = false
    "max_concurrent_downloads" = 3
    "max_container_log_line_size" = 16384
    "restrict_oom_score_adj" = false
    "sandbox_image" = "k8s.gcr.io/pause:3.1"
    "stats_collect_period" = 10
    "stream_idle_timeout" = "4h0m0s"
    "stream_server_address" = "127.0.0.1"
    "stream_server_port" = "0"

    ["plugins"."io.containerd.grpc.v1.cri"."cni"]
      "bin_dir" = "/opt/cni/bin"
      "conf_dir" = "/etc/cni/net.d"
      "conf_template" = ""
      "max_conf_num" = 1

    ["plugins"."io.containerd.grpc.v1.cri"."containerd"]
      "default_runtime_name" = "runc"
      "no_pivot" = false
      "snapshotter" = "overlayfs"

      ["plugins"."io.containerd.grpc.v1.cri"."containerd"."runtimes"]

        ["plugins"."io.containerd.grpc.v1.cri"."containerd"."runtimes"."kata"]
          "privileged_without_host_devices" = true
          "runtime_type" = "io.containerd.kata.v2"

          ["plugins"."io.containerd.grpc.v1.cri"."containerd"."runtimes"."kata"."options"]
            "ConfigPath" = "/opt/kata/share/defaults/kata-containers/configuration.toml"

        ["plugins"."io.containerd.grpc.v1.cri"."containerd"."runtimes"."runc"]
          "privileged_without_host_devices" = false
          "runtime_engine" = ""
          "runtime_root" = ""
          "runtime_type" = "io.containerd.runc.v2"

          ["plugins"."io.containerd.grpc.v1.cri"."containerd"."runtimes"."runc"."options"]
            "SystemdCgroup" = true

        ["plugins"."io.containerd.grpc.v1.cri"."containerd"."runtimes"."untrusted"]
          "privileged_without_host_devices" = true
          "runtime_type" = "io.containerd.kata.v2"

          ["plugins"."io.containerd.grpc.v1.cri"."containerd"."runtimes"."untrusted"."options"]
            "ConfigPath" = "/opt/kata/share/defaults/kata-containers/configuration.toml"

    ["plugins"."io.containerd.grpc.v1.cri"."registry"]

      ["plugins"."io.containerd.grpc.v1.cri"."registry"."mirrors"]

        ["plugins"."io.containerd.grpc.v1.cri"."registry"."mirrors"."docker.io"]
          "endpoint" = [
            "https://registry-1.docker.io"
          ]

    ["plugins"."io.containerd.grpc.v1.cri"."x509_key_pair_streaming"]
      "tls_cert_file" = ""
      "tls_key_file" = ""

  ["plugins"."io.containerd.internal.v1.opt"]
    "path" = "/opt/containerd"

  ["plugins"."io.containerd.internal.v1.restart"]
    "interval" = "10s"

  ["plugins"."io.containerd.metadata.v1.bolt"]
    "content_sharing_policy" = "shared"

  ["plugins"."io.containerd.monitor.v1.cgroups"]
    "no_prometheus" = false

  ["plugins"."io.containerd.runtime.v1.linux"]
    "no_shim" = false
    "runtime" = "runc"
    "runtime_root" = ""
    "shim" = "containerd-shim"
    "shim_debug" = false

  ["plugins"."io.containerd.runtime.v2.task"]
    "platforms" = [
      "linux/amd64"
    ]

  ["plugins"."io.containerd.service.v1.diff-service"]
    "default" = [
      "walking"
    ]

  ["plugins"."io.containerd.snapshotter.v1.devmapper"]
    "base_image_size" = ""
    "pool_name" = ""
    "root_path" = ""

["timeouts"]
  "io.containerd.timeout.shim.cleanup" = "5s"
  "io.containerd.timeout.shim.load" = "5s"
  "io.containerd.timeout.shim.shutdown" = "3s"
  "io.containerd.timeout.task.state" = "2s"

["ttrpc"]
  "address" = ""
  "gid" = 0
  "uid" = 0

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    Projects

    Status

    Todo

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions