Skip to content

Commit 11af051

Browse files
committedFeb 13, 2025
cri,nri: block NRI plugin sync. during event processing.
Block the synchronization of registering NRI plugins during CRI events to avoid the plugin ending up in an inconsistent starting state after initial sync (missing pods, containers or missed events for some pods or containers). Signed-off-by: Krisztian Litkey <krisztian.litkey@intel.com>
1 parent d4036cd commit 11af051

19 files changed

+65
-0
lines changed
 

‎pkg/cri/nri/nri_api_linux.go

+9
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,15 @@ func (a *API) WithContainerExit(criCtr *cstore.Container) containerd.ProcessDele
361361
}
362362
}
363363

364+
type PluginSyncBlock = nri.PluginSyncBlock
365+
366+
func (a *API) BlockPluginSync() *PluginSyncBlock {
367+
if a.IsDisabled() {
368+
return nil
369+
}
370+
return a.nri.BlockPluginSync()
371+
}
372+
364373
//
365374
// NRI-CRI 'domain' interface
366375
//

‎pkg/cri/nri/nri_api_other.go

+8
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,14 @@ func (*API) WithContainerExit(*cstore.Container) containerd.ProcessDeleteOpts {
108108
}
109109
}
110110

111+
type PluginSyncBlock struct{}
112+
113+
func (*API) BlockPluginSync() *PluginSyncBlock {
114+
return nil
115+
}
116+
117+
func (*PluginSyncBlock) Unblock() {}
118+
111119
//
112120
// NRI-CRI no-op 'domain' interface
113121
//

‎pkg/cri/sbserver/container_create.go

+2
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,8 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
285285
}
286286
}()
287287

288+
defer c.nri.BlockPluginSync().Unblock()
289+
288290
var cntr containerd.Container
289291
if cntr, err = c.client.NewContainer(ctx, id, opts...); err != nil {
290292
return nil, fmt.Errorf("failed to create containerd container: %w", err)

‎pkg/cri/sbserver/container_remove.go

+3
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveConta
4444
log.G(ctx).Tracef("RemoveContainer called for container %q that does not exist", ctrID)
4545
return &runtime.RemoveContainerResponse{}, nil
4646
}
47+
48+
defer c.nri.BlockPluginSync().Unblock()
49+
4750
id := container.ID
4851
i, err := container.Container.Info(ctx)
4952
if err != nil {

‎pkg/cri/sbserver/container_start.go

+2
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,8 @@ func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContain
146146
return nil, fmt.Errorf("failed to wait for containerd task: %w", err)
147147
}
148148

149+
defer c.nri.BlockPluginSync().Unblock()
150+
149151
defer func() {
150152
if retErr != nil {
151153
deferCtx, deferCancel := ctrdutil.DeferContext()

‎pkg/cri/sbserver/container_stop.go

+2
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainer
5050
return &runtime.StopContainerResponse{}, nil
5151
}
5252

53+
defer c.nri.BlockPluginSync().Unblock()
54+
5355
if err := c.stopContainer(ctx, container, time.Duration(r.GetTimeout())*time.Second); err != nil {
5456
return nil, err
5557
}

‎pkg/cri/sbserver/container_update_resources.go

+2
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.Up
4747
return nil, err
4848
}
4949

50+
defer c.nri.BlockPluginSync().Unblock()
51+
5052
resources := r.GetLinux()
5153
updated, err := c.nri.UpdateContainerResources(ctx, &sandbox, &container, resources)
5254
if err != nil {

‎pkg/cri/sbserver/sandbox_remove.go

+3
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ func (c *criService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodS
4444
r.GetPodSandboxId())
4545
return &runtime.RemovePodSandboxResponse{}, nil
4646
}
47+
48+
defer c.nri.BlockPluginSync().Unblock()
49+
4750
// Use the full sandbox id.
4851
id := sandbox.ID
4952

‎pkg/cri/sbserver/sandbox_run.go

+2
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,8 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
263263

264264
sandbox.ProcessLabel = labels["selinux_label"]
265265

266+
defer c.nri.BlockPluginSync().Unblock()
267+
266268
err = c.nri.RunPodSandbox(ctx, &sandbox)
267269
if err != nil {
268270
return nil, fmt.Errorf("NRI RunPodSandbox failed: %w", err)

‎pkg/cri/sbserver/sandbox_stop.go

+2
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ func (c *criService) StopPodSandbox(ctx context.Context, r *runtime.StopPodSandb
4545
return &runtime.StopPodSandboxResponse{}, nil
4646
}
4747

48+
defer c.nri.BlockPluginSync().Unblock()
49+
4850
if err := c.stopPodSandbox(ctx, sandbox); err != nil {
4951
return nil, err
5052
}

‎pkg/cri/server/container_create.go

+2
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,8 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
259259
}
260260
}()
261261

262+
defer c.nri.BlockPluginSync().Unblock()
263+
262264
var cntr containerd.Container
263265
if cntr, err = c.client.NewContainer(ctx, id, opts...); err != nil {
264266
return nil, fmt.Errorf("failed to create containerd container: %w", err)

‎pkg/cri/server/container_remove.go

+3
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveConta
4444
log.G(ctx).Tracef("RemoveContainer called for container %q that does not exist", ctrID)
4545
return &runtime.RemoveContainerResponse{}, nil
4646
}
47+
48+
defer c.nri.BlockPluginSync().Unblock()
49+
4750
id := container.ID
4851
i, err := container.Container.Info(ctx)
4952
if err != nil {

‎pkg/cri/server/container_start.go

+2
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,8 @@ func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContain
146146
return nil, fmt.Errorf("failed to wait for containerd task: %w", err)
147147
}
148148

149+
defer c.nri.BlockPluginSync().Unblock()
150+
149151
defer func() {
150152
if retErr != nil {
151153
deferCtx, deferCancel := ctrdutil.DeferContext()

‎pkg/cri/server/container_stop.go

+2
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainer
5050
return &runtime.StopContainerResponse{}, nil
5151
}
5252

53+
defer c.nri.BlockPluginSync().Unblock()
54+
5355
if err := c.stopContainer(ctx, container, time.Duration(r.GetTimeout())*time.Second); err != nil {
5456
return nil, err
5557
}

‎pkg/cri/server/container_update_resources.go

+2
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.Up
4747
return nil, err
4848
}
4949

50+
defer c.nri.BlockPluginSync().Unblock()
51+
5052
resources := r.GetLinux()
5153
updated, err := c.nri.UpdateContainerResources(ctx, &sandbox, &container, resources)
5254
if err != nil {

‎pkg/cri/server/sandbox_remove.go

+3
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ func (c *criService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodS
4545
r.GetPodSandboxId())
4646
return &runtime.RemovePodSandboxResponse{}, nil
4747
}
48+
49+
defer c.nri.BlockPluginSync().Unblock()
50+
4851
// Use the full sandbox id.
4952
id := sandbox.ID
5053

‎pkg/cri/server/sandbox_run.go

+2
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,8 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
463463
sandboxCreateNetworkTimer.UpdateSince(netStart)
464464
}
465465

466+
defer c.nri.BlockPluginSync().Unblock()
467+
466468
err = c.nri.RunPodSandbox(ctx, &sandbox)
467469
if err != nil {
468470
return nil, fmt.Errorf("NRI RunPodSandbox failed: %w", err)

‎pkg/cri/server/sandbox_stop.go

+2
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ func (c *criService) StopPodSandbox(ctx context.Context, r *runtime.StopPodSandb
5050
return &runtime.StopPodSandboxResponse{}, nil
5151
}
5252

53+
defer c.nri.BlockPluginSync().Unblock()
54+
5355
if err := c.stopPodSandbox(ctx, sandbox); err != nil {
5456
return nil, err
5557
}

‎pkg/nri/nri.go

+12
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,9 @@ type API interface {
8282

8383
// StopContainer relays container removal events to NRI.
8484
RemoveContainer(context.Context, PodSandbox, Container) error
85+
86+
// BlockPluginSync blocks plugin synchronization until it is Unblock()ed.
87+
BlockPluginSync() *PluginSyncBlock
8588
}
8689

8790
type State int
@@ -436,6 +439,15 @@ func (l *local) RemoveContainer(ctx context.Context, pod PodSandbox, ctr Contain
436439
return err
437440
}
438441

442+
type PluginSyncBlock = nri.PluginSyncBlock
443+
444+
func (l *local) BlockPluginSync() *PluginSyncBlock {
445+
if !l.IsEnabled() {
446+
return nil
447+
}
448+
return l.nri.BlockPluginSync()
449+
}
450+
439451
func (l *local) syncPlugin(ctx context.Context, syncFn nri.SyncCB) error {
440452
l.Lock()
441453
defer l.Unlock()

0 commit comments

Comments
 (0)