Skip to content

Commit 9503d72

Browse files
authored
Merge pull request #6744 from Junnplus/restart-policy
Add restart policy for enhanced restart manager
2 parents 184883b + 3df7674 commit 9503d72

5 files changed

Lines changed: 473 additions & 23 deletions

File tree

integration/client/restart_monitor_test.go

Lines changed: 115 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,24 @@ package client
1919
import (
2020
"bytes"
2121
"context"
22+
"errors"
2223
"fmt"
2324
"os"
2425
"path/filepath"
2526
"runtime"
27+
"strconv"
2628
"syscall"
2729
"testing"
2830
"time"
2931

3032
. "github.com/containerd/containerd"
31-
"github.com/containerd/containerd/containers"
33+
eventtypes "github.com/containerd/containerd/api/events"
3234
"github.com/containerd/containerd/oci"
3335
"github.com/containerd/containerd/pkg/testutil"
36+
"github.com/containerd/containerd/runtime/restart"
3437
srvconfig "github.com/containerd/containerd/services/server/config"
3538
"github.com/containerd/containerd/sys"
39+
"github.com/containerd/typeurl"
3640
exec "golang.org/x/sys/execabs"
3741
)
3842

@@ -148,7 +152,7 @@ version = 2
148152
oci.WithImageConfig(image),
149153
longCommand,
150154
),
151-
withRestartStatus(Running),
155+
restart.WithStatus(Running),
152156
)
153157
if err != nil {
154158
t.Fatal(err)
@@ -229,14 +233,115 @@ version = 2
229233
t.Logf("%v: the task was restarted since %v", time.Now(), lastCheck)
230234
}
231235

232-
// withRestartStatus is a copy of "github.com/containerd/containerd/runtime/restart".WithStatus.
233-
// This copy is needed because `go test` refuses circular imports.
234-
func withRestartStatus(status ProcessStatus) func(context.Context, *Client, *containers.Container) error {
235-
return func(_ context.Context, _ *Client, c *containers.Container) error {
236-
if c.Labels == nil {
237-
c.Labels = make(map[string]string)
236+
func TestRestartMonitorWithOnFailurePolicy(t *testing.T) {
237+
const (
238+
interval = 5 * time.Second
239+
)
240+
configTOML := fmt.Sprintf(`
241+
version = 2
242+
[plugins]
243+
[plugins."io.containerd.internal.v1.restart"]
244+
interval = "%s"
245+
`, interval.String())
246+
client, _, cleanup := newDaemonWithConfig(t, configTOML)
247+
defer cleanup()
248+
249+
var (
250+
ctx, cancel = testContext(t)
251+
id = t.Name()
252+
)
253+
defer cancel()
254+
255+
image, err := client.Pull(ctx, testImage, WithPullUnpack)
256+
if err != nil {
257+
t.Fatal(err)
258+
}
259+
260+
policy, _ := restart.NewPolicy("on-failure:1")
261+
container, err := client.NewContainer(ctx, id,
262+
WithNewSnapshot(id, image),
263+
WithNewSpec(
264+
oci.WithImageConfig(image),
265+
// always exited with 1
266+
withExitStatus(1),
267+
),
268+
restart.WithStatus(Running),
269+
restart.WithPolicy(policy),
270+
)
271+
if err != nil {
272+
t.Fatal(err)
273+
}
274+
defer func() {
275+
if err := container.Delete(ctx, WithSnapshotCleanup); err != nil {
276+
t.Logf("failed to delete container: %v", err)
238277
}
239-
c.Labels["containerd.io/restart.status"] = string(status)
240-
return nil
278+
}()
279+
280+
task, err := container.NewTask(ctx, empty())
281+
if err != nil {
282+
t.Fatal(err)
283+
}
284+
defer func() {
285+
if _, err := task.Delete(ctx, WithProcessKill); err != nil {
286+
t.Logf("failed to delete task: %v", err)
287+
}
288+
}()
289+
290+
if err := task.Start(ctx); err != nil {
291+
t.Fatal(err)
292+
}
293+
294+
statusCh, err := task.Wait(ctx)
295+
if err != nil {
296+
t.Fatal(err)
297+
}
298+
299+
eventCh, eventErrCh := client.Subscribe(ctx, `topic=="/tasks/create"`)
300+
301+
select {
302+
case <-statusCh:
303+
case <-time.After(30 * time.Second):
304+
t.Fatal("should receive exit event in time")
305+
}
306+
307+
select {
308+
case e := <-eventCh:
309+
cid, err := convertTaskCreateEvent(e.Event)
310+
if err != nil {
311+
t.Fatal(err)
312+
}
313+
if cid != id {
314+
t.Fatalf("expected task id = %s, but got %s", id, cid)
315+
}
316+
case err := <-eventErrCh:
317+
t.Fatalf("unexpected error from event channel: %v", err)
318+
case <-time.After(1 * time.Minute):
319+
t.Fatal("should receive create event in time")
320+
}
321+
322+
labels, err := container.Labels(ctx)
323+
if err != nil {
324+
t.Fatal(err)
325+
}
326+
restartCount, _ := strconv.Atoi(labels[restart.CountLabel])
327+
if restartCount != 1 {
328+
t.Fatalf("expected restart count to be 1, got %d", restartCount)
329+
}
330+
}
331+
332+
func convertTaskCreateEvent(e typeurl.Any) (string, error) {
333+
id := ""
334+
335+
evt, err := typeurl.UnmarshalAny(e)
336+
if err != nil {
337+
return "", fmt.Errorf("failed to unmarshalany: %w", err)
338+
}
339+
340+
switch e := evt.(type) {
341+
case *eventtypes.TaskCreate:
342+
id = e.ContainerID
343+
default:
344+
return "", errors.New("unsupported event")
241345
}
346+
return id, nil
242347
}

runtime/restart/monitor/change.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,12 @@ import (
2020
"context"
2121
"fmt"
2222
"net/url"
23+
"strconv"
2324
"syscall"
2425

2526
"github.com/containerd/containerd"
2627
"github.com/containerd/containerd/cio"
28+
"github.com/containerd/containerd/runtime/restart"
2729
"github.com/sirupsen/logrus"
2830
)
2931

@@ -38,6 +40,7 @@ func (s *stopChange) apply(ctx context.Context, client *containerd.Client) error
3840
type startChange struct {
3941
container containerd.Container
4042
logURI string
43+
count int
4144

4245
// Deprecated(in release 1.5): but recognized now, prefer to use logURI
4346
logPath string
@@ -61,6 +64,15 @@ func (s *startChange) apply(ctx context.Context, client *containerd.Client) erro
6164
s.logPath, s.logURI)
6265
}
6366

67+
if s.count > 0 {
68+
labels := map[string]string{
69+
restart.CountLabel: strconv.Itoa(s.count),
70+
}
71+
opt := containerd.WithAdditionalContainerLabels(labels)
72+
if err := s.container.Update(ctx, containerd.UpdateContainerOpts(opt)); err != nil {
73+
return err
74+
}
75+
}
6476
killTask(ctx, s.container)
6577
task, err := s.container.NewTask(ctx, log)
6678
if err != nil {

runtime/restart/monitor/monitor.go

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package monitor
1919
import (
2020
"context"
2121
"fmt"
22+
"strconv"
2223
"sync"
2324
"time"
2425

@@ -72,6 +73,7 @@ func init() {
7273
},
7374
},
7475
InitFn: func(ic *plugin.InitContext) (interface{}, error) {
76+
ic.Meta.Capabilities = []string{"no", "always", "on-failure", "unless-stopped"}
7577
opts, err := getServicesOpts(ic)
7678
if err != nil {
7779
return nil, err
@@ -217,15 +219,29 @@ func (m *monitor) monitor(ctx context.Context) ([]change, error) {
217219
return nil, err
218220
}
219221
desiredStatus := containerd.ProcessStatus(labels[restart.StatusLabel])
220-
if m.isSameStatus(ctx, desiredStatus, c) {
222+
task, err := c.Task(ctx, nil)
223+
if err != nil && desiredStatus == containerd.Stopped {
221224
continue
222225
}
226+
status, err := task.Status(ctx)
227+
if err != nil && desiredStatus == containerd.Stopped {
228+
continue
229+
}
230+
if desiredStatus == status.Status {
231+
continue
232+
}
233+
223234
switch desiredStatus {
224235
case containerd.Running:
236+
if !restart.Reconcile(status, labels) {
237+
continue
238+
}
239+
restartCount, _ := strconv.Atoi(labels[restart.CountLabel])
225240
changes = append(changes, &startChange{
226241
container: c,
227242
logPath: labels[restart.LogPathLabel],
228243
logURI: labels[restart.LogURILabel],
244+
count: restartCount + 1,
229245
})
230246
case containerd.Stopped:
231247
changes = append(changes, &stopChange{
@@ -235,15 +251,3 @@ func (m *monitor) monitor(ctx context.Context) ([]change, error) {
235251
}
236252
return changes, nil
237253
}
238-
239-
func (m *monitor) isSameStatus(ctx context.Context, desired containerd.ProcessStatus, container containerd.Container) bool {
240-
task, err := container.Task(ctx, nil)
241-
if err != nil {
242-
return desired == containerd.Stopped
243-
}
244-
state, err := task.Status(ctx)
245-
if err != nil {
246-
return desired == containerd.Stopped
247-
}
248-
return desired == state.Status
249-
}

0 commit comments

Comments
 (0)