Skip to content

Commit aeb8972

Browse files
committed
daemon: daemon.containerRestart: don't cancel restart on context cancel
commit def549c passed through the context to the daemon.ContainerStart function. As a result, restarting containers no longer is an atomic operation, because a context cancellation could interrupt the restart (between "stopping" and "(re)starting"), resulting in the container being stopped, but not restarted. Restarting a container, or more factually; making a successful request on the `/containers/{id]/restart` endpoint, should be an atomic operation. This patch uses a context.WithoutCancel for restart requests. It's worth noting that daemon.containerStop already uses context.WithoutCancel, so in that function, we'll be wrapping the context twice, but this should likely not cause issues (just redundant for this code-path). Before this patch, starting a container that bind-mounts the docker socket, then restarting itself from within the container would cancel the restart operation. The container would be stopped, but not started after that: docker run -dit --name myself -v /var/run/docker.sock:/var/run/docker.sock docker:cli sh docker exec myself sh -c 'docker restart myself' docker ps -a CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 3a2a741c65ff docker:cli "docker-entrypoint.s…" 26 seconds ago Exited (128) 7 seconds ago myself With this patch: the stop still cancels the exec, but does not cancel the restart operation, and the container is started again: docker run -dit --name myself -v /var/run/docker.sock:/var/run/docker.sock docker:cli sh docker exec myself sh -c 'docker restart myself' docker ps CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 4393a01f7c75 docker:cli "docker-entrypoint.s…" About a minute ago Up 4 seconds myself Signed-off-by: Sebastiaan van Stijn <[email protected]>
1 parent b4a08b3 commit aeb8972

2 files changed

Lines changed: 80 additions & 0 deletions

File tree

daemon/restart.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
containertypes "github.com/docker/docker/api/types/container"
88
"github.com/docker/docker/api/types/events"
99
"github.com/docker/docker/container"
10+
"github.com/docker/docker/internal/compatcontext"
1011
)
1112

1213
// ContainerRestart stops and starts a container. It attempts to
@@ -32,6 +33,10 @@ func (daemon *Daemon) ContainerRestart(ctx context.Context, name string, options
3233
// gracefully stop, before forcefully terminating the container. If
3334
// given a negative duration, wait forever for a graceful stop.
3435
func (daemon *Daemon) containerRestart(ctx context.Context, daemonCfg *configStore, container *container.Container, options containertypes.StopOptions) error {
36+
// Restarting is expected to be an atomic operation, and cancelling
37+
// the request should not cancel the stop -> start sequence.
38+
ctx = compatcontext.WithoutCancel(ctx)
39+
3540
// Determine isolation. If not specified in the hostconfig, use daemon default.
3641
actualIsolation := container.HostConfig.Isolation
3742
if containertypes.Isolation.IsDefault(actualIsolation) {

integration/container/restart_test.go

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,20 @@ package container // import "github.com/docker/docker/integration/container"
33
import (
44
"context"
55
"fmt"
6+
"runtime"
67
"testing"
78
"time"
89

10+
"github.com/docker/docker/api/types"
911
"github.com/docker/docker/api/types/container"
12+
"github.com/docker/docker/api/types/events"
13+
"github.com/docker/docker/api/types/filters"
1014
"github.com/docker/docker/client"
1115
testContainer "github.com/docker/docker/integration/internal/container"
1216
"github.com/docker/docker/testutil"
1317
"github.com/docker/docker/testutil/daemon"
1418
"gotest.tools/v3/assert"
19+
is "gotest.tools/v3/assert/cmp"
1520
"gotest.tools/v3/poll"
1621
"gotest.tools/v3/skip"
1722
)
@@ -213,3 +218,73 @@ func TestContainerWithAutoRemoveCanBeRestarted(t *testing.T) {
213218
})
214219
}
215220
}
221+
222+
// TestContainerRestartWithCancelledRequest verifies that cancelling a restart
223+
// request does not cancel the restart operation, and still starts the container
224+
// after it was stopped.
225+
//
226+
// Regression test for https://github.com/moby/moby/discussions/46682
227+
func TestContainerRestartWithCancelledRequest(t *testing.T) {
228+
ctx := setupTest(t)
229+
apiClient := testEnv.APIClient()
230+
231+
testutil.StartSpan(ctx, t)
232+
233+
// Create a container that ignores SIGTERM and doesn't stop immediately,
234+
// giving us time to cancel the request.
235+
//
236+
// Restarting a container is "stop" (and, if needed, "kill"), then "start"
237+
// the container. We're trying to create the scenario where the "stop" is
238+
// handled, but the request was cancelled and therefore the "start" not
239+
// taking place.
240+
cID := testContainer.Run(ctx, t, apiClient, testContainer.WithCmd("sh", "-c", "trap 'echo received TERM' TERM; while true; do usleep 10; done"))
241+
defer func() {
242+
err := apiClient.ContainerRemove(ctx, cID, container.RemoveOptions{Force: true})
243+
if t.Failed() && err != nil {
244+
t.Logf("Cleaning up test container failed with error: %v", err)
245+
}
246+
}()
247+
248+
// Start listening for events.
249+
messages, errs := apiClient.Events(ctx, types.EventsOptions{
250+
Filters: filters.NewArgs(
251+
filters.Arg("container", cID),
252+
filters.Arg("event", string(events.ActionRestart)),
253+
),
254+
})
255+
256+
// Make restart request, but cancel the request before the container
257+
// is (forcibly) killed.
258+
ctx2, cancel := context.WithTimeout(ctx, 100*time.Millisecond)
259+
stopTimeout := 1
260+
err := apiClient.ContainerRestart(ctx2, cID, container.StopOptions{
261+
Timeout: &stopTimeout,
262+
})
263+
assert.Check(t, is.ErrorIs(err, context.DeadlineExceeded))
264+
cancel()
265+
266+
// Validate that the restart event occurred, which is emitted
267+
// after the restart (stop (kill) start) finished.
268+
//
269+
// Note that we cannot use RestartCount for this, as that's only
270+
// used for restart-policies.
271+
restartTimeout := 2 * time.Second
272+
if runtime.GOOS == "windows" {
273+
// hcs can sometimes take a long time to stop container.
274+
restartTimeout = StopContainerWindowsPollTimeout
275+
}
276+
select {
277+
case m := <-messages:
278+
assert.Check(t, is.Equal(m.Actor.ID, cID))
279+
assert.Check(t, is.Equal(m.Action, events.ActionRestart))
280+
case err := <-errs:
281+
assert.NilError(t, err)
282+
case <-time.After(restartTimeout):
283+
t.Errorf("timeout waiting for restart event")
284+
}
285+
286+
// Container should be restarted (running).
287+
inspect, err := apiClient.ContainerInspect(ctx, cID)
288+
assert.NilError(t, err)
289+
assert.Check(t, is.Equal(inspect.State.Status, "running"))
290+
}

0 commit comments

Comments
 (0)