Skip to content

Container stuck, can't be stopped or killed, can't exec into it either #32827

@garceri

Description

@garceri

Description

Steps to reproduce the issue:
1.Stop a container from rancher
2.Status is reported as Stopping
3.Login to rancher host , use docker commands to try to stop or kill the container

Describe the results you received:

# docker ps |grep robin
6fde7857082f        brightpowersoftware/robin-statement:27       "/.r/r java -XX:+U..."   20 hours ago        Up 15 hours                             r-bp-robin-statement-robin-statement-1-389a5813

[root@ip-10-30-0-193 log]# docker stop 6fde7857082f
6fde7857082f
[root@ip-10-30-0-193 log]# docker kill 6fde7857082f
6fde7857082f
# docker ps |grep robin
6fde7857082f        brightpowersoftware/robin-statement:27       "/.r/r java -XX:+U..."   20 hours ago        Up 15 hours                             r-bp-robin-statement-robin-statement-1-389a5813

[root@ip-10-30-0-193 log]# docker exec -ti 6fde7857082f bash
rpc error: code = 2 desc = containerd: container not found


docker.log:
time="2017-04-25T22:32:58.671506327Z" level=warning msg="container kill failed because of 'container not found' or 'no such process': Cannot kill container 6fde7857082f06185279934dadc939749584d8aac911156b6fae5b962a05010a: rpc error: code = 2 desc = containerd: container not found" 
time="2017-04-25T22:33:03.005621630Z" level=info msg="Container 6fde7857082f failed to exit within 10 seconds of kill - trying direct SIGKILL" 
time="2017-04-25T22:33:03.005961036Z" level=error msg="collecting stats for 6fde7857082f06185279934dadc939749584d8aac911156b6fae5b962a05010a: rpc error: code = 2 desc = containerd: container not found" 
time="2017-04-25T22:33:03.013659966Z" level=error msg="collecting stats for 6fde7857082f06185279934dadc939749584d8aac911156b6fae5b962a05010a: rpc error: code = 2 desc = containerd: container not found" 
time="2017-04-25T22:33:03.851538781Z" level=error msg="collecting stats for 6fde7857082f06185279934dadc939749584d8aac911156b6fae5b962a05010a: rpc error: code = 2 desc = containerd: container not found" 
time="2017-04-25T22:33:04.653255006Z" level=error msg="collecting stats for 6fde7857082f06185279934dadc939749584d8aac911156b6fae5b962a05010a: rpc error: code = 2 desc = containerd: container not found" 
time="2017-04-25T22:33:05.767981020Z" level=error msg="collecting stats for 6fde7857082f06185279934dadc939749584d8aac911156b6fae5b962a05010a: rpc error: code = 2 desc = containerd: container not found" 
time="2017-04-25T22:33:06.279304801Z" level=error msg="collecting stats for 6fde7857082f06185279934dadc939749584d8aac911156b6fae5b962a05010a: rpc error: code = 2 desc = containerd: container not found" 
time="2017-04-25T22:33:08.189192027Z" level=error msg="collecting stats for 6fde7857082f06185279934dadc939749584d8aac911156b6fae5b962a05010a: rpc error: code = 2 desc = containerd: container not found" 
time="2017-04-25T22:33:09.692933913Z" level=error msg="collecting stats for 6fde7857082f06185279934dadc939749584d8aac911156b6fae5b962a05010a: rpc error: code = 2 desc = containerd: container not found" 
time="2017-04-25T22:33:10.511364640Z" level=error msg="collecting stats for 6fde7857082f06185279934dadc939749584d8aac911156b6fae5b962a05010a: rpc error: code = 2 desc = containerd: container not found" 
time="2017-04-25T22:33:11.134226201Z" level=error msg="collecting stats for 6fde7857082f06185279934dadc939749584d8aac911156b6fae5b962a05010a: rpc error: code = 2 desc = containerd: container not found" 
time="2017-04-25T22:33:12.225999176Z" level=error msg="collecting stats for 6fde7857082f06185279934dadc939749584d8aac911156b6fae5b962a05010a: rpc error: code = 2 desc = containerd: container not found" 
time="2017-04-25T22:36:15.829833261Z" level=warning msg="libcontainerd: client is out of sync, restore was called on a fully synced container (6fde7857082f06185279934dadc939749584d8aac911156b6fae5b962a05010a)." 
time="2017-04-25T22:36:15.830257050Z" level=warning msg="libcontainerd: failed to retrieve container 6fde7857082f06185279934dadc939749584d8aac911156b6fae5b962a05010a state: rpc error: code = 2 desc = containerd: container not found" 
time="2017-04-25T22:36:28.544300083Z" level=info msg="Removing stale sandbox 70784eabf5de958d6930a4d951db3f8b18d08cd581a238d705c0c9ff2c00216e (6fde7857082f06185279934dadc939749584d8aac911156b6fae5b962a05010a)" 

Describe the results you expected:
Container should be stopped/killed

Additional information you deem important (e.g. issue happens only occasionally):
Happens occasionally, almost daily as of this week, with several unrelated containers

Output of docker version:

# docker version
Client:
 Version:      17.03.1-ce
 API version:  1.27
 Go version:   go1.7.5
 Git commit:   c6d412e
 Built:        Tue Mar 28 00:40:02 2017
 OS/Arch:      linux/amd64

Server:
 Version:      17.03.1-ce
 API version:  1.27 (minimum version 1.12)
 Go version:   go1.7.5
 Git commit:   c6d412e
 Built:        Tue Mar 28 00:40:02 2017
 OS/Arch:      linux/amd64
 Experimental: false

Output of docker info:

# docker info
Containers: 79
 Running: 33
 Paused: 0
 Stopped: 46
Images: 137
Server Version: 17.03.1-ce
Storage Driver: overlay
 Backing Filesystem: extfs
 Supports d_type: true
Logging Driver: json-file
Cgroup Driver: cgroupfs
Plugins: 
 Volume: local rancher-nfs
 Network: bridge host macvlan null overlay
Swarm: inactive
Runtimes: runc
Default Runtime: runc
Init Binary: docker-init
containerd version: 4ab9917febca54791c5f071a9d1f404867857fcc
runc version: 54296cf40ad8143b62dbcaa1d90e520a2136ddfe
init version: N/A (expected: 949e6facb77383876aeff8a6944dde66b3089574)
Security Options:
 seccomp
  Profile: default
Kernel Version: 4.9.21-rancher
Operating System: RancherOS v1.0.0
OSType: linux
Architecture: x86_64
CPUs: 2
Total Memory: 14.94 GiB
Name: ip-10-30-0-193.ec2.internal
ID: DVSV:NLW5:VD5U:UOYY:FIOB:4RTH:7NGQ:GIBO:KKTR:VPYO:J6A2:4SZE
Docker Root Dir: /var/lib/docker
Debug Mode (client): false
Debug Mode (server): false
Registry: https://index.docker.io/v1/
Experimental: false
Insecure Registries:
 127.0.0.0/8
Live Restore Enabled: false

Additional environment details (AWS, VirtualBox, physical, etc.):
AWS

Metadata

Metadata

Assignees

No one assigned

    Labels

    area/runtimeRuntimepriority/P1Important: P1 issues are a top priority and a must-have for the next release.version/17.03

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions