Skip to content

Commit 05d7386

Browse files
committed
daemon: daemon.containerRestart: don't cancel restart on context cancel
commit def549c passed through the context to the daemon.ContainerStart function. As a result, restarting containers no longer is an atomic operation, because a context cancellation could interrupt the restart (between "stopping" and "(re)starting"), resulting in the container being stopped, but not restarted. Restarting a container, or more factually; making a successful request on the `/containers/{id]/restart` endpoint, should be an atomic operation. This patch uses a context.WithoutCancel for restart requests. It's worth noting that daemon.containerStop already uses context.WithoutCancel, so in that function, we'll be wrapping the context twice, but this should likely not cause issues (just redundant for this code-path). Before this patch, starting a container that bind-mounts the docker socket, then restarting itself from within the container would cancel the restart operation. The container would be stopped, but not started after that: docker run -dit --name myself -v /var/run/docker.sock:/var/run/docker.sock docker:cli sh docker exec myself sh -c 'docker restart myself' docker ps -a CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 3a2a741c65ff docker:cli "docker-entrypoint.s…" 26 seconds ago Exited (128) 7 seconds ago myself With this patch: the stop still cancels the exec, but does not cancel the restart operation, and the container is started again: docker run -dit --name myself -v /var/run/docker.sock:/var/run/docker.sock docker:cli sh docker exec myself sh -c 'docker restart myself' docker ps CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 4393a01f7c75 docker:cli "docker-entrypoint.s…" About a minute ago Up 4 seconds myself Signed-off-by: Sebastiaan van Stijn <[email protected]> (cherry picked from commit aeb8972) Signed-off-by: Sebastiaan van Stijn <[email protected]>
1 parent 9b20b1a commit 05d7386

2 files changed

Lines changed: 77 additions & 0 deletions

File tree

daemon/restart.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66

77
containertypes "github.com/docker/docker/api/types/container"
88
"github.com/docker/docker/container"
9+
"github.com/docker/docker/internal/compatcontext"
910
)
1011

1112
// ContainerRestart stops and starts a container. It attempts to
@@ -31,6 +32,10 @@ func (daemon *Daemon) ContainerRestart(ctx context.Context, name string, options
3132
// gracefully stop, before forcefully terminating the container. If
3233
// given a negative duration, wait forever for a graceful stop.
3334
func (daemon *Daemon) containerRestart(ctx context.Context, container *container.Container, options containertypes.StopOptions) error {
35+
// Restarting is expected to be an atomic operation, and cancelling
36+
// the request should not cancel the stop -> start sequence.
37+
ctx = compatcontext.WithoutCancel(ctx)
38+
3439
// Determine isolation. If not specified in the hostconfig, use daemon default.
3540
actualIsolation := container.HostConfig.Isolation
3641
if containertypes.Isolation.IsDefault(actualIsolation) {

integration/container/restart_test.go

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,18 @@ package container // import "github.com/docker/docker/integration/container"
33
import (
44
"context"
55
"fmt"
6+
"runtime"
67
"testing"
78
"time"
89

910
"github.com/docker/docker/api/types"
1011
"github.com/docker/docker/api/types/container"
12+
"github.com/docker/docker/api/types/filters"
1113
"github.com/docker/docker/client"
1214
testContainer "github.com/docker/docker/integration/internal/container"
1315
"github.com/docker/docker/testutil/daemon"
1416
"gotest.tools/v3/assert"
17+
is "gotest.tools/v3/assert/cmp"
1518
"gotest.tools/v3/poll"
1619
"gotest.tools/v3/skip"
1720
)
@@ -208,3 +211,72 @@ func TestContainerWithAutoRemoveCanBeRestarted(t *testing.T) {
208211
})
209212
}
210213
}
214+
215+
// TestContainerRestartWithCancelledRequest verifies that cancelling a restart
216+
// request does not cancel the restart operation, and still starts the container
217+
// after it was stopped.
218+
//
219+
// Regression test for https://github.com/moby/moby/discussions/46682
220+
func TestContainerRestartWithCancelledRequest(t *testing.T) {
221+
ctx := context.TODO()
222+
defer setupTest(t)()
223+
apiClient := testEnv.APIClient()
224+
225+
// Create a container that ignores SIGTERM and doesn't stop immediately,
226+
// giving us time to cancel the request.
227+
//
228+
// Restarting a container is "stop" (and, if needed, "kill"), then "start"
229+
// the container. We're trying to create the scenario where the "stop" is
230+
// handled, but the request was cancelled and therefore the "start" not
231+
// taking place.
232+
cID := testContainer.Run(ctx, t, apiClient, testContainer.WithCmd("sh", "-c", "trap 'echo received TERM' TERM; while true; do usleep 10; done"))
233+
defer func() {
234+
err := apiClient.ContainerRemove(ctx, cID, types.ContainerRemoveOptions{Force: true})
235+
if t.Failed() && err != nil {
236+
t.Logf("Cleaning up test container failed with error: %v", err)
237+
}
238+
}()
239+
240+
// Start listening for events.
241+
messages, errs := apiClient.Events(ctx, types.EventsOptions{
242+
Filters: filters.NewArgs(
243+
filters.Arg("container", cID),
244+
filters.Arg("event", "restart"),
245+
),
246+
})
247+
248+
// Make restart request, but cancel the request before the container
249+
// is (forcibly) killed.
250+
ctx2, cancel := context.WithTimeout(ctx, 100*time.Millisecond)
251+
stopTimeout := 1
252+
err := apiClient.ContainerRestart(ctx2, cID, container.StopOptions{
253+
Timeout: &stopTimeout,
254+
})
255+
assert.Check(t, is.ErrorIs(err, context.DeadlineExceeded))
256+
cancel()
257+
258+
// Validate that the restart event occurred, which is emitted
259+
// after the restart (stop (kill) start) finished.
260+
//
261+
// Note that we cannot use RestartCount for this, as that's only
262+
// used for restart-policies.
263+
restartTimeout := 2 * time.Second
264+
if runtime.GOOS == "windows" {
265+
// hcs can sometimes take a long time to stop container.
266+
restartTimeout = StopContainerWindowsPollTimeout
267+
}
268+
select {
269+
case m := <-messages:
270+
assert.Check(t, is.Equal(m.Actor.ID, cID))
271+
assert.Check(t, is.Equal(m.Action, "restart"))
272+
case err := <-errs:
273+
assert.NilError(t, err)
274+
case <-time.After(restartTimeout):
275+
t.Errorf("timeout waiting for restart event")
276+
}
277+
278+
// Container should be restarted (running).
279+
inspect, err := apiClient.ContainerInspect(ctx, cID)
280+
assert.NilError(t, err)
281+
assert.Check(t, is.Equal(inspect.State.Status, "running"))
282+
}

0 commit comments

Comments
 (0)