Skip to content

Commit 10aec35

Browse files
committed
cri: ensure NRI API never has nil CRI
A nil CRIImplementation field can cause a nil pointer dereference and panic during startup recovery. Prior to this change, the nri.API struct would have a nil cri (CRIImplementation) field after nri.NewAPI until nri.Register was called. Register is called mid-way through initialization of the CRI plugin, but recovery for containers occurs prior to that. Container recovery includes establishing new exit monitors for existing containers that were discovered. When a container exits, NRI plugins are given the opportunity to be notified about the lifecycle event, and this is done by accessing that CRIImplementation field inside the nri.API. If a container exits prior to nri.Register being called, access to the CRIImplementation field can cause a panic. Here's the call-path: * The CRI plugin starts running [here](https://github.com/containerd/containerd/blob/ae71819c4f5e67bb4d5ae76a6b735f29cc25774e/pkg/cri/server/service.go#L222) * It then [calls into](https://github.com/containerd/containerd/blob/ae71819c4f5e67bb4d5ae76a6b735f29cc25774e/pkg/cri/server/service.go#L227) `recover()` to recover state from previous runs of containerd * `recover()` then attempts to recover all containers through [`loadContainer()`](https://github.com/containerd/containerd/blob/ae7d74b9e21bd08260586db104a1fe04af754545/internal/cri/server/restart.go#L175) * When `loadContainer()` finds a container that is still running, it waits for the task (internal containerd object) to exit and sets up [exit monitoring](https://github.com/containerd/containerd/blob/ae7d74b9e21bd08260586db104a1fe04af754545/internal/cri/server/restart.go#L391) * Any exit that then happens must be [handled](https://github.com/containerd/containerd/blob/ae7d74b9e21bd08260586db104a1fe04af754545/internal/cri/server/events.go#L145) * Handling an exit includes [deleting the Task](https://github.com/containerd/containerd/blob/ae7d74b9e21bd08260586db104a1fe04af754545/internal/cri/server/events.go#L188) and specifying [`nri.WithContainerExit`](https://github.com/containerd/containerd/blob/ae7d74b9e21bd08260586db104a1fe04af754545/internal/cri/nri/nri_api_linux.go#L348) to [notify](https://github.com/containerd/containerd/blob/ae7d74b9e21bd08260586db104a1fe04af754545/internal/cri/nri/nri_api_linux.go#L356) any subscribed NRI plugins * NRI plugins need to know information about the pod (not just the sandbox), so before a plugin is notified the NRI API package [queries the Sandbox Store](https://github.com/containerd/containerd/blob/ae7d74b9e21bd08260586db104a1fe04af754545/internal/cri/nri/nri_api_linux.go#L232) through the CRI implementation * The `cri` implementation member field in the `nri.API` struct is set as part of the [`Register()`](https://github.com/containerd/containerd/blob/ae7d74b9e21bd08260586db104a1fe04af754545/internal/cri/nri/nri_api_linux.go#L66) method * The `nri.Register()` method is only called [much further down in the CRI `Run()` method](https://github.com/containerd/containerd/blob/ae71819c4f5e67bb4d5ae76a6b735f29cc25774e/pkg/cri/server/service.go#L279) Signed-off-by: Samuel Karp <[email protected]>
1 parent ae7d74b commit 10aec35

5 files changed

Lines changed: 13 additions & 13 deletions

File tree

internal/cri/nri/nri_api_linux.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,10 @@ type API struct {
4646
nri nri.API
4747
}
4848

49-
func NewAPI(nri nri.API) *API {
49+
func NewAPI(nri nri.API, cri CRIImplementation) *API {
5050
return &API{
5151
nri: nri,
52+
cri: cri,
5253
}
5354
}
5455

@@ -58,12 +59,11 @@ func (a *API) IsDisabled() bool {
5859

5960
func (a *API) IsEnabled() bool { return !a.IsDisabled() }
6061

61-
func (a *API) Register(cri CRIImplementation) error {
62+
func (a *API) Register() error {
6263
if a.IsDisabled() {
6364
return nil
6465
}
6566

66-
a.cri = cri
6767
nri.RegisterDomain(a)
6868

6969
return a.nri.Start()

internal/cri/nri/nri_api_other.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,11 @@ import (
3737
type API struct {
3838
}
3939

40-
func NewAPI(nri.API) *API {
40+
func NewAPI(nri.API, CRIImplementation) *API {
4141
return nil
4242
}
4343

44-
func (a *API) Register(CRIImplementation) error {
44+
func (a *API) Register() error {
4545
return nil
4646
}
4747

internal/cri/server/service.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import (
3535
"k8s.io/kubelet/pkg/cri/streaming"
3636

3737
apitypes "github.com/containerd/containerd/api/types"
38+
3839
containerd "github.com/containerd/containerd/v2/client"
3940
"github.com/containerd/containerd/v2/core/introspection"
4041
_ "github.com/containerd/containerd/v2/core/runtime" // for typeurl init
@@ -50,6 +51,7 @@ import (
5051
snapshotstore "github.com/containerd/containerd/v2/internal/cri/store/snapshot"
5152
ctrdutil "github.com/containerd/containerd/v2/internal/cri/util"
5253
"github.com/containerd/containerd/v2/internal/eventq"
54+
nriservice "github.com/containerd/containerd/v2/internal/nri"
5355
"github.com/containerd/containerd/v2/internal/registrar"
5456
"github.com/containerd/containerd/v2/pkg/oci"
5557
osinterface "github.com/containerd/containerd/v2/pkg/os"
@@ -164,7 +166,7 @@ type CRIServiceOptions struct {
164166

165167
StreamingConfig streaming.Config
166168

167-
NRI *nri.API
169+
NRI nriservice.API
168170

169171
// SandboxControllers is a map of all the loaded sandbox controllers
170172
SandboxControllers map[string]sandbox.Controller
@@ -236,7 +238,7 @@ func NewCRIService(options *CRIServiceOptions) (CRIService, runtime.RuntimeServi
236238
}
237239
}
238240

239-
c.nri = options.NRI
241+
c.nri = nri.NewAPI(options.NRI, &criImplementation{c})
240242

241243
c.runtimeHandlers, err = c.introspectRuntimeHandlers(ctx)
242244
if err != nil {
@@ -297,7 +299,7 @@ func (c *criService) Run(ready func()) error {
297299
}()
298300

299301
// register CRI domain with NRI
300-
if err := c.nri.Register(&criImplementation{c}); err != nil {
302+
if err := c.nri.Register(); err != nil {
301303
return fmt.Errorf("failed to set up NRI for CRI service: %w", err)
302304
}
303305

internal/nri/nri.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ type API interface {
3838
// IsEnabled returns true if the NRI interface is enabled and initialized.
3939
IsEnabled() bool
4040

41-
// Start start the NRI interface, allowing external NRI plugins to
41+
// Start starts the NRI interface, allowing external NRI plugins to
4242
// connect, register, and hook themselves into the lifecycle events
4343
// of pods and containers.
4444
Start() error

plugins/cri/cri.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ import (
3232
criconfig "github.com/containerd/containerd/v2/internal/cri/config"
3333
"github.com/containerd/containerd/v2/internal/cri/constants"
3434
"github.com/containerd/containerd/v2/internal/cri/instrument"
35-
"github.com/containerd/containerd/v2/internal/cri/nri"
3635
"github.com/containerd/containerd/v2/internal/cri/server"
3736
nriservice "github.com/containerd/containerd/v2/internal/nri"
3837
"github.com/containerd/containerd/v2/plugins"
@@ -212,7 +211,7 @@ func (c criGRPCServerWithTCP) RegisterTCP(s *grpc.Server) error {
212211
}
213212

214213
// Get the NRI plugin, and set up our NRI API for it.
215-
func getNRIAPI(ic *plugin.InitContext) *nri.API {
214+
func getNRIAPI(ic *plugin.InitContext) nriservice.API {
216215
const (
217216
pluginType = plugins.NRIApiPlugin
218217
pluginName = "nri"
@@ -234,8 +233,7 @@ func getNRIAPI(ic *plugin.InitContext) *nri.API {
234233
}
235234

236235
log.G(ctx).Info("using experimental NRI integration - disable nri plugin to prevent this")
237-
238-
return nri.NewAPI(api)
236+
return api
239237
}
240238

241239
func getSandboxControllers(ic *plugin.InitContext) (map[string]sandbox.Controller, error) {

0 commit comments

Comments
 (0)