Skip to content

Commit 7f5d3c5

Browse files
committed
cri: ensure NRI API never has nil CRI
A nil CRIImplementation field can cause a nil pointer dereference and panic during startup recovery. Prior to this change, the nri.API struct would have a nil cri (CRIImplementation) field after nri.NewAPI until nri.Register was called. Register is called mid-way through initialization of the CRI plugin, but recovery for containers occurs prior to that. Container recovery includes establishing new exit monitors for existing containers that were discovered. When a container exits, NRI plugins are given the opportunity to be notified about the lifecycle event, and this is done by accessing that CRIImplementation field inside the nri.API. If a container exits prior to nri.Register being called, access to the CRIImplementation field can cause a panic. Here's the call-path: * The CRI plugin starts running [here](https://github.com/containerd/containerd/blob/ae71819c4f5e67bb4d5ae76a6b735f29cc25774e/pkg/cri/server/service.go#L222) * It then [calls into](https://github.com/containerd/containerd/blob/ae71819c4f5e67bb4d5ae76a6b735f29cc25774e/pkg/cri/server/service.go#L227) `recover()` to recover state from previous runs of containerd * `recover()` then attempts to recover all containers through [`loadContainer()`](https://github.com/containerd/containerd/blob/ae7d74b9e21bd08260586db104a1fe04af754545/internal/cri/server/restart.go#L175) * When `loadContainer()` finds a container that is still running, it waits for the task (internal containerd object) to exit and sets up [exit monitoring](https://github.com/containerd/containerd/blob/ae7d74b9e21bd08260586db104a1fe04af754545/internal/cri/server/restart.go#L391) * Any exit that then happens must be [handled](https://github.com/containerd/containerd/blob/ae7d74b9e21bd08260586db104a1fe04af754545/internal/cri/server/events.go#L145) * Handling an exit includes [deleting the Task](https://github.com/containerd/containerd/blob/ae7d74b9e21bd08260586db104a1fe04af754545/internal/cri/server/events.go#L188) and specifying [`nri.WithContainerExit`](https://github.com/containerd/containerd/blob/ae7d74b9e21bd08260586db104a1fe04af754545/internal/cri/nri/nri_api_linux.go#L348) to [notify](https://github.com/containerd/containerd/blob/ae7d74b9e21bd08260586db104a1fe04af754545/internal/cri/nri/nri_api_linux.go#L356) any subscribed NRI plugins * NRI plugins need to know information about the pod (not just the sandbox), so before a plugin is notified the NRI API package [queries the Sandbox Store](https://github.com/containerd/containerd/blob/ae7d74b9e21bd08260586db104a1fe04af754545/internal/cri/nri/nri_api_linux.go#L232) through the CRI implementation * The `cri` implementation member field in the `nri.API` struct is set as part of the [`Register()`](https://github.com/containerd/containerd/blob/ae7d74b9e21bd08260586db104a1fe04af754545/internal/cri/nri/nri_api_linux.go#L66) method * The `nri.Register()` method is only called [much further down in the CRI `Run()` method](https://github.com/containerd/containerd/blob/ae71819c4f5e67bb4d5ae76a6b735f29cc25774e/pkg/cri/server/service.go#L279) (manually backported from commit 10aec35) Signed-off-by: Samuel Karp <[email protected]>
1 parent 0794797 commit 7f5d3c5

6 files changed

Lines changed: 16 additions & 15 deletions

File tree

pkg/cri/cri.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ import (
2828
"github.com/containerd/containerd"
2929
criconfig "github.com/containerd/containerd/pkg/cri/config"
3030
"github.com/containerd/containerd/pkg/cri/constants"
31-
"github.com/containerd/containerd/pkg/cri/nri"
3231
"github.com/containerd/containerd/pkg/cri/sbserver"
3332
"github.com/containerd/containerd/pkg/cri/server"
3433
nriservice "github.com/containerd/containerd/pkg/nri"
@@ -145,7 +144,7 @@ func setGLogLevel() error {
145144
}
146145

147146
// Get the NRI plugin, and set up our NRI API for it.
148-
func getNRIAPI(ic *plugin.InitContext) *nri.API {
147+
func getNRIAPI(ic *plugin.InitContext) nriservice.API {
149148
const (
150149
pluginType = plugin.NRIApiPlugin
151150
pluginName = "nri"
@@ -168,5 +167,5 @@ func getNRIAPI(ic *plugin.InitContext) *nri.API {
168167

169168
log.G(ctx).Info("using experimental NRI integration - disable nri plugin to prevent this")
170169

171-
return nri.NewAPI(api)
170+
return api
172171
}

pkg/cri/nri/nri_api_linux.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,10 @@ type API struct {
4747
nri nri.API
4848
}
4949

50-
func NewAPI(nri nri.API) *API {
50+
func NewAPI(nri nri.API, cri CRIImplementation) *API {
5151
return &API{
5252
nri: nri,
53+
cri: cri,
5354
}
5455
}
5556

@@ -59,12 +60,11 @@ func (a *API) IsDisabled() bool {
5960

6061
func (a *API) IsEnabled() bool { return !a.IsDisabled() }
6162

62-
func (a *API) Register(cri CRIImplementation) error {
63+
func (a *API) Register() error {
6364
if a.IsDisabled() {
6465
return nil
6566
}
6667

67-
a.cri = cri
6868
nri.RegisterDomain(a)
6969

7070
return a.nri.Start()

pkg/cri/nri/nri_api_other.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,11 @@ import (
3737
type API struct {
3838
}
3939

40-
func NewAPI(nri.API) *API {
40+
func NewAPI(nri.API, CRIImplementation) *API {
4141
return nil
4242
}
4343

44-
func (a *API) Register(CRIImplementation) error {
44+
func (a *API) Register() error {
4545
return nil
4646
}
4747

pkg/cri/sbserver/service.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import (
3333
"github.com/containerd/containerd/pkg/cri/sbserver/podsandbox"
3434
"github.com/containerd/containerd/pkg/cri/streaming"
3535
"github.com/containerd/containerd/pkg/kmutex"
36+
nriservice "github.com/containerd/containerd/pkg/nri"
3637
"github.com/containerd/containerd/plugin"
3738
"github.com/containerd/containerd/sandbox"
3839
"github.com/containerd/containerd/services/warning"
@@ -128,7 +129,7 @@ type criService struct {
128129
}
129130

130131
// NewCRIService returns a new instance of CRIService
131-
func NewCRIService(config criconfig.Config, client *containerd.Client, nri *nri.API, warn warning.Service) (CRIService, error) {
132+
func NewCRIService(config criconfig.Config, client *containerd.Client, nriservice nriservice.API, warn warning.Service) (CRIService, error) {
132133
var err error
133134
labels := label.NewStore()
134135
c := &criService{
@@ -197,7 +198,7 @@ func NewCRIService(config criconfig.Config, client *containerd.Client, nri *nri.
197198
c.sandboxControllers[criconfig.ModePodSandbox] = podsandbox.New(config, client, c.sandboxStore, c.os, c, c.baseOCISpecs)
198199
c.sandboxControllers[criconfig.ModeShim] = client.SandboxController()
199200

200-
c.nri = nri
201+
c.nri = nri.NewAPI(nriservice, &criImplementation{c})
201202

202203
return c, nil
203204
}
@@ -281,7 +282,7 @@ func (c *criService) Run(ready func()) error {
281282
}()
282283

283284
// register CRI domain with NRI
284-
if err := c.nri.Register(&criImplementation{c}); err != nil {
285+
if err := c.nri.Register(); err != nil {
285286
return fmt.Errorf("failed to set up NRI for CRI service: %w", err)
286287
}
287288

pkg/cri/server/service.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import (
3434
"github.com/containerd/containerd/pkg/cri/nri"
3535
"github.com/containerd/containerd/pkg/cri/streaming"
3636
"github.com/containerd/containerd/pkg/kmutex"
37+
nriservice "github.com/containerd/containerd/pkg/nri"
3738
"github.com/containerd/containerd/plugin"
3839
"github.com/containerd/containerd/services/warning"
3940
runtime_alpha "github.com/containerd/containerd/third_party/k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
@@ -125,7 +126,7 @@ type criService struct {
125126
}
126127

127128
// NewCRIService returns a new instance of CRIService
128-
func NewCRIService(config criconfig.Config, client *containerd.Client, nri *nri.API, warn warning.Service) (CRIService, error) {
129+
func NewCRIService(config criconfig.Config, client *containerd.Client, nriservice nriservice.API, warn warning.Service) (CRIService, error) {
129130
var err error
130131
labels := label.NewStore()
131132

@@ -198,7 +199,7 @@ func NewCRIService(config criconfig.Config, client *containerd.Client, nri *nri.
198199
return nil, err
199200
}
200201

201-
c.nri = nri
202+
c.nri = nri.NewAPI(nriservice, &criImplementation{c})
202203

203204
return c, nil
204205
}
@@ -276,7 +277,7 @@ func (c *criService) Run(ready func()) error {
276277
}()
277278

278279
// register CRI domain with NRI
279-
if err := c.nri.Register(&criImplementation{c}); err != nil {
280+
if err := c.nri.Register(); err != nil {
280281
return fmt.Errorf("failed to set up NRI for CRI service: %w", err)
281282
}
282283

pkg/nri/nri.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ type API interface {
3939
// IsEnabled returns true if the NRI interface is enabled and initialized.
4040
IsEnabled() bool
4141

42-
// Start start the NRI interface, allowing external NRI plugins to
42+
// Start starts the NRI interface, allowing external NRI plugins to
4343
// connect, register, and hook themselves into the lifecycle events
4444
// of pods and containers.
4545
Start() error

0 commit comments

Comments
 (0)