Description
As of f266f13, TestHealthKillContainer fails intermittently on cgroup v2 hosts
(EDIT: irrelevant to cgroup version)
|
// GitHub #37263 |
|
// Do not stop healthchecks just because we sent a signal to the container |
|
func TestHealthKillContainer(t *testing.T) { |
|
skip.If(t, testEnv.OSType == "windows", "Windows only supports SIGKILL and SIGTERM? See https://github.com/moby/moby/issues/39574") |
|
defer setupTest(t)() |
|
|
|
ctx := context.Background() |
|
client := testEnv.APIClient() |
|
|
|
id := container.Run(ctx, t, client, func(c *container.TestContainerConfig) { |
|
c.Config.Healthcheck = &containertypes.HealthConfig{ |
|
Test: []string{"CMD-SHELL", "sleep 1"}, |
|
Interval: time.Second, |
|
Retries: 5, |
|
} |
|
}) |
|
|
|
ctxPoll, cancel := context.WithTimeout(ctx, 30*time.Second) |
|
defer cancel() |
|
poll.WaitOn(t, pollForHealthStatus(ctxPoll, client, id, "healthy"), poll.WithDelay(100*time.Millisecond)) |
|
|
|
err := client.ContainerKill(ctx, id, "SIGUSR1") |
|
assert.NilError(t, err) |
|
|
|
ctxPoll, cancel = context.WithTimeout(ctx, 30*time.Second) |
|
defer cancel() |
|
poll.WaitOn(t, pollForHealthStatus(ctxPoll, client, id, "healthy"), poll.WithDelay(100*time.Millisecond)) |
|
} |
|
|
|
func pollForHealthStatus(ctx context.Context, client client.APIClient, containerID string, healthStatus string) func(log poll.LogT) poll.Result { |
|
return func(log poll.LogT) poll.Result { |
|
inspect, err := client.ContainerInspect(ctx, containerID) |
|
|
|
switch { |
|
case err != nil: |
|
return poll.Error(err) |
|
case inspect.State.Health.Status == healthStatus: |
|
return poll.Success() |
|
default: |
|
return poll.Continue("waiting for container to become %s", healthStatus) |
|
} |
|
} |
|
} |
Steps to reproduce the issue:
$ make TEST_SKIP_INTEGRATION_CLI=1 TESTFLAGS="-test.run TestHealthKillContainer -test.count 10" test-integration
Describe the results you received:
4 PASS, 6 FAIL
=== RUN TestHealthKillContainer
--- FAIL: TestHealthKillContainer (13.03s)
health_test.go:62: timeout hit after 10s: waiting for container to become healthy
=== RUN TestHealthKillContainer
--- PASS: TestHealthKillContainer (2.56s)
=== RUN TestHealthKillContainer
--- PASS: TestHealthKillContainer (2.49s)
=== RUN TestHealthKillContainer
--- FAIL: TestHealthKillContainer (12.42s)
health_test.go:62: timeout hit after 10s: waiting for container to become healthy
=== RUN TestHealthKillContainer
--- PASS: TestHealthKillContainer (2.50s)
=== RUN TestHealthKillContainer
--- FAIL: TestHealthKillContainer (12.42s)
health_test.go:62: timeout hit after 10s: waiting for container to become healthy
=== RUN TestHealthKillContainer
--- FAIL: TestHealthKillContainer (12.46s)
health_test.go:62: timeout hit after 10s: waiting for container to become healthy
=== RUN TestHealthKillContainer
--- FAIL: TestHealthKillContainer (12.39s)
health_test.go:62: timeout hit after 10s: waiting for container to become healthy
=== RUN TestHealthKillContainer
--- PASS: TestHealthKillContainer (2.48s)
=== RUN TestHealthKillContainer
--- FAIL: TestHealthKillContainer (12.43s)
health_test.go:62: timeout hit after 10s: waiting for container to become healthy
FAIL
Describe the results you expected:
10 PASS
Additional information you deem important (e.g. issue happens only occasionally):
- kernel cmdline:
systemd.unified_cgroup_hierarchy=1 cgroup_enable=memory swapaccount=1
Output of docker version:
Client:
Version: 20.10.0-dev
API version: 1.41
Go version: go1.13.15
Git commit: c6bb56136
Built: Tue Jan 26 08:46:11 2021
OS/Arch: linux/amd64
Context: default
Experimental: true
Server:
Engine:
Version: dev
API version: 1.41 (minimum version 1.12)
Go version: go1.13.15
Git commit: f266f13965
Built: Tue Jan 26 08:45:56 2021
OS/Arch: linux/amd64
Experimental: true
containerd:
Version: v1.4.0-2523-g1230bd630
GitCommit: 1230bd63031ba4b65709103b5cb8f5be78a43b75
runc:
Version: 1.0.0-rc92+dev
GitCommit: c69ae759fbf5acf6e8ef805471b99feee8246c3c
docker-init:
Version: 0.19.0
GitCommit: de40ad0
Output of docker info:
Client:
Context: default
Debug Mode: false
Plugins:
buildx: Build with BuildKit (Docker Inc., v0.5.1-3-g8b8725d)
Server:
Containers: 0
Running: 0
Paused: 0
Stopped: 0
Images: 3
Server Version: dev
Storage Driver: overlay2
Backing Filesystem: extfs
Supports d_type: true
Native Overlay Diff: true
Logging Driver: json-file
Cgroup Driver: systemd
Cgroup Version: 2
Plugins:
Volume: local
Network: bridge host ipvlan macvlan null overlay
Log: awslogs fluentd gcplogs gelf journald json-file local logentries splunk syslog
Swarm: inactive
Runtimes: io.containerd.runtime.v1.linux kata runsc-kvm sysbox-runc crun io.containerd.runc.v2 runsc runc runc-rc92
Default Runtime: runc
Init Binary: docker-init
containerd version: 1230bd63031ba4b65709103b5cb8f5be78a43b75
runc version: c69ae759fbf5acf6e8ef805471b99feee8246c3c
init version: de40ad0
Security Options:
apparmor
seccomp
Profile: default
cgroupns
Kernel Version: 5.8.0-40-generic
Operating System: Ubuntu 20.10
OSType: linux
Architecture: x86_64
CPUs: 4
Total Memory: 15.61GiB
Name: suda-ws01
ID: E2YB:EGZO:6BNW:EPHS:4WFQ:EIDV:ZZ6D:QBZK:6673:CIOR:DLZ6:SI3D
Docker Root Dir: /var/lib/docker
Debug Mode: true
File Descriptors: 22
Goroutines: 34
System Time: 2021-01-26T17:58:54.968084742+09:00
EventsListeners: 0
Username: akihirosuda
Registry: https://index.docker.io/v1/
Labels:
Experimental: true
Insecure Registries:
127.0.0.0/8
Live Restore Enabled: true
WARNING: Support for cgroup v2 is experimental
Additional environment details (AWS, VirtualBox, physical, etc.):
VMware Fusion
Description
As of f266f13,
TestHealthKillContainerfails intermittentlyon cgroup v2 hosts(EDIT: irrelevant to cgroup version)
moby/integration/container/health_test.go
Lines 36 to 78 in f266f13
Steps to reproduce the issue:
$ make TEST_SKIP_INTEGRATION_CLI=1 TESTFLAGS="-test.run TestHealthKillContainer -test.count 10" test-integrationDescribe the results you received:
4 PASS, 6 FAIL
Describe the results you expected:
10 PASS
Additional information you deem important (e.g. issue happens only occasionally):
systemd.unified_cgroup_hierarchy=1 cgroup_enable=memory swapaccount=1Output of
docker version:Output of
docker info:Additional environment details (AWS, VirtualBox, physical, etc.):
VMware Fusion