Skip to content

Commit 22499fe

Browse files
elezarclaude
andcommitted
Factor device discovery strategy resolution out of plugin and cdi packages
Resolve the device discovery strategy once in GetPlugins() using a local resolveStrategy() helper and pass the concrete value ("nvml", "tegra", etc.) to both cdi.New() and plugin.New() via new WithDeviceDiscoveryStrategy options. This eliminates duplicate infolib.ResolvePlatform() calls and makes the dependency on platform detection explicit at the call site. Both constructors now return an error when the strategy is not set rather than falling back to their own resolution logic. The CDI handler uses the strategy directly to gate CSV-mode behaviour instead of re-checking the platform independently. Co-Authored-By: Claude Sonnet 4.6 <[email protected]> Signed-off-by: Evan Lezar <[email protected]>
1 parent f27fd14 commit 22499fe

5 files changed

Lines changed: 56 additions & 21 deletions

File tree

cmd/nvidia-device-plugin/plugin-manager.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
2424
"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
2525
"github.com/NVIDIA/go-nvml/pkg/nvml"
26+
"k8s.io/klog/v2"
2627

2728
spec "github.com/NVIDIA/k8s-device-plugin/api/config/v1"
2829
"github.com/NVIDIA/k8s-device-plugin/internal/cdi"
@@ -45,6 +46,9 @@ func GetPlugins(ctx context.Context, infolib info.Interface, nvmllib nvml.Interf
4546
return nil, fmt.Errorf("error querying IMEX channels: %w", err)
4647
}
4748

49+
resolvedStrategy := resolveStrategy(*config.Flags.DeviceDiscoveryStrategy, infolib)
50+
klog.Infof("Using device discovery strategy: %s", resolvedStrategy)
51+
4852
cdiHandler, err := cdi.New(infolib, nvmllib, devicelib,
4953
cdi.WithDeviceListStrategies(deviceListStrategies),
5054
cdi.WithDriverRoot(string(driverRoot)),
@@ -59,6 +63,7 @@ func GetPlugins(ctx context.Context, infolib info.Interface, nvmllib nvml.Interf
5963
cdi.WithMofedEnabled(*config.Flags.MOFEDEnabled),
6064
cdi.WithImexChannels(imexChannels),
6165
cdi.WithFeatureFlags(o.cdiFeatureFlags.Value()...),
66+
cdi.WithDeviceDiscoveryStrategy(resolvedStrategy),
6267
)
6368
if err != nil {
6469
return nil, fmt.Errorf("unable to create cdi handler: %v", err)
@@ -70,6 +75,7 @@ func GetPlugins(ctx context.Context, infolib info.Interface, nvmllib nvml.Interf
7075
plugin.WithDeviceListStrategies(deviceListStrategies),
7176
plugin.WithFailOnInitError(*config.Flags.FailOnInitError),
7277
plugin.WithImexChannels(imexChannels),
78+
plugin.WithDeviceDiscoveryStrategy(resolvedStrategy),
7379
)
7480
if err != nil {
7581
return nil, fmt.Errorf("unable to create plugins: %w", err)
@@ -81,3 +87,22 @@ func GetPlugins(ctx context.Context, infolib info.Interface, nvmllib nvml.Interf
8187

8288
return plugins, nil
8389
}
90+
91+
// resolveStrategy resolves an "auto" device discovery strategy to a concrete
92+
// value based on the detected platform. Non-auto values are returned unchanged.
93+
func resolveStrategy(strategy string, infolib info.Interface) string {
94+
if strategy != "" && strategy != "auto" {
95+
klog.Infof("Using requested device discovery strategy: %s", strategy)
96+
return strategy
97+
}
98+
99+
platform := infolib.ResolvePlatform()
100+
klog.Infof("Detected platform: %s", platform)
101+
switch platform {
102+
case info.PlatformNVML, info.PlatformWSL:
103+
return "nvml"
104+
case info.PlatformTegra:
105+
return "tegra"
106+
}
107+
return strategy
108+
}

internal/cdi/cdi.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,8 @@ type cdiHandler struct {
5656
vendor string
5757
deviceIDStrategy string
5858

59-
deviceListStrategies spec.DeviceListStrategies
59+
deviceListStrategies spec.DeviceListStrategies
60+
deviceDiscoveryStrategy string
6061

6162
// nvcdiFeatureFlags allows finer control over CDI spec generation.
6263
nvcdiFeatureFlags []string
@@ -87,9 +88,11 @@ func New(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interf
8788
if !c.deviceListStrategies.AnyCDIEnabled() {
8889
return &null{}, nil
8990
}
91+
if c.deviceDiscoveryStrategy == "" {
92+
return nil, fmt.Errorf("device discovery strategy not set")
93+
}
9094
hasNVML, _ := infolib.HasNvml()
91-
platform := infolib.ResolvePlatform()
92-
if !hasNVML && platform != info.PlatformTegra {
95+
if !hasNVML && c.deviceDiscoveryStrategy != "tegra" {
9396
klog.Warning("No valid resources detected, creating a null CDI handler")
9497
return &null{}, nil
9598
}
@@ -133,7 +136,7 @@ func New(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interf
133136
// On Tegra (CSV mode), the default CSV files live under the driver root.
134137
// Inject driver-root-aware paths explicitly so the nvcdi library does not
135138
// fall back to the hardcoded absolute paths returned by csv.DefaultFileList().
136-
if platform == info.PlatformTegra {
139+
if c.deviceDiscoveryStrategy == "tegra" {
137140
commonOptions = append(commonOptions, nvcdi.WithCSVFiles(csvFilesForRoot(c.driverRoot)))
138141
}
139142

internal/cdi/options.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,3 +115,12 @@ func WithImexChannels(imexChannels imex.Channels) Option {
115115
c.imexChannels = imexChannels
116116
}
117117
}
118+
119+
// WithDeviceDiscoveryStrategy sets a pre-resolved device discovery strategy.
120+
// When "tegra", the CDI handler uses CSV-based spec generation with
121+
// driver-root-aware file paths.
122+
func WithDeviceDiscoveryStrategy(strategy string) Option {
123+
return func(c *cdiHandler) {
124+
c.deviceDiscoveryStrategy = strategy
125+
}
126+
}

internal/plugin/factory.go

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ type options struct {
4141
cdiHandler cdi.Interface
4242
config *spec.Config
4343

44-
deviceListStrategies spec.DeviceListStrategies
44+
deviceListStrategies spec.DeviceListStrategies
45+
deviceDiscoveryStrategy string
4546

4647
imexChannels imex.Channels
4748
}
@@ -66,6 +67,10 @@ func New(ctx context.Context, infolib info.Interface, nvmllib nvml.Interface, de
6667
o.cdiHandler = cdi.NewNullHandler()
6768
}
6869

70+
if o.deviceDiscoveryStrategy == "" {
71+
return nil, fmt.Errorf("device discovery strategy not set")
72+
}
73+
6974
resourceManagers, err := o.getResourceManagers()
7075
if err != nil {
7176
return nil, fmt.Errorf("failed to construct resource managers: %w", err)
@@ -86,7 +91,7 @@ func New(ctx context.Context, infolib info.Interface, nvmllib nvml.Interface, de
8691
// Each resource manager maps to a specific named extended resource and may
8792
// include full GPUs or MIG devices.
8893
func (o *options) getResourceManagers() ([]rm.ResourceManager, error) {
89-
strategy := o.resolveStrategy(*o.config.Flags.DeviceDiscoveryStrategy)
94+
strategy := o.deviceDiscoveryStrategy
9095
switch strategy {
9196
case "nvml":
9297
ret := o.nvmllib.Init()
@@ -121,18 +126,3 @@ func (o *options) getResourceManagers() ([]rm.ResourceManager, error) {
121126
return nil, nil
122127
}
123128
}
124-
125-
func (o *options) resolveStrategy(strategy string) string {
126-
if strategy != "" && strategy != "auto" {
127-
return strategy
128-
}
129-
130-
platform := o.infolib.ResolvePlatform()
131-
switch platform {
132-
case info.PlatformNVML, info.PlatformWSL:
133-
return "nvml"
134-
case info.PlatformTegra:
135-
return "tegra"
136-
}
137-
return strategy
138-
}

internal/plugin/options.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,11 @@ func WithImexChannels(imexChannels imex.Channels) Option {
7676
m.imexChannels = imexChannels
7777
}
7878
}
79+
80+
// WithDeviceDiscoveryStrategy sets a pre-resolved device discovery strategy,
81+
// bypassing the automatic platform-based resolution.
82+
func WithDeviceDiscoveryStrategy(strategy string) Option {
83+
return func(m *options) {
84+
m.deviceDiscoveryStrategy = strategy
85+
}
86+
}

0 commit comments

Comments
 (0)