Skip to content

Commit f27fd14

Browse files
elezarclaude
andcommitted
Fix CDI spec generation to respect driver root for Tegra CSV files
On Tegra-based systems the nvcdi library resolves CSV mount specs from /etc/nvidia-container-runtime/host-files-for-container.d using hardcoded absolute paths, ignoring the configured driver root. When the device plugin runs in a container with the host filesystem mounted at a non-default driver root the CSV files are unreachable at their absolute paths, causing CDI spec generation to fail. Inject driver-root-aware CSV file paths via nvcdi.WithCSVFiles() when the platform is detected as Tegra. The helper csvFilesForRoot() checks for the CSV directory under the driver root first, falling back to the absolute path, and returns nil (letting nvcdi use its own defaults) if neither location exists. Also extend the NVML guard so that pure iGPU Tegra systems without NVML are not incorrectly reduced to a null CDI handler. Co-Authored-By: Claude Sonnet 4.6 <[email protected]> Signed-off-by: Evan Lezar <[email protected]>
1 parent 51f7ece commit f27fd14

2 files changed

Lines changed: 108 additions & 11 deletions

File tree

internal/cdi/cdi.go

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package cdi
1818

1919
import (
2020
"fmt"
21+
"os"
2122
"path/filepath"
2223
"strings"
2324

@@ -87,7 +88,8 @@ func New(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interf
8788
return &null{}, nil
8889
}
8990
hasNVML, _ := infolib.HasNvml()
90-
if !hasNVML {
91+
platform := infolib.ResolvePlatform()
92+
if !hasNVML && platform != info.PlatformTegra {
9193
klog.Warning("No valid resources detected, creating a null CDI handler")
9294
return &null{}, nil
9395
}
@@ -128,6 +130,13 @@ func New(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interf
128130
nvcdi.WithVendor(c.vendor),
129131
}
130132

133+
// On Tegra (CSV mode), the default CSV files live under the driver root.
134+
// Inject driver-root-aware paths explicitly so the nvcdi library does not
135+
// fall back to the hardcoded absolute paths returned by csv.DefaultFileList().
136+
if platform == info.PlatformTegra {
137+
commonOptions = append(commonOptions, nvcdi.WithCSVFiles(csvFilesForRoot(c.driverRoot)))
138+
}
139+
131140
c.cdilibs = make(map[string]nvcdi.SpecGenerator)
132141

133142
c.cdilibs["gpu"], err = nvcdi.New(
@@ -177,16 +186,6 @@ func (cdi *cdiHandler) CreateSpecFile() error {
177186
for class, cdilib := range cdi.cdilibs {
178187
cdi.logger.Infof("Generating CDI spec for resource: %s/%s", cdi.vendor, class)
179188

180-
if class == "gpu" {
181-
ret := cdi.nvmllib.Init()
182-
if ret != nvml.SUCCESS {
183-
return fmt.Errorf("failed to initialize NVML: %v", ret)
184-
}
185-
defer func() {
186-
_ = cdi.nvmllib.Shutdown()
187-
}()
188-
}
189-
190189
spec, err := cdilib.GetSpec()
191190
if err != nil {
192191
return fmt.Errorf("failed to get CDI spec: %v", err)
@@ -268,3 +267,33 @@ func (cdi *cdiHandler) AdditionalDevices() []string {
268267
}
269268
return devices
270269
}
270+
271+
// defaultCSVMountSpecPath mirrors the constant defined in the nvidia-container-toolkit's
272+
// internal/platform-support/tegra/csv package.
273+
const defaultCSVMountSpecPath = "/etc/nvidia-container-runtime/host-files-for-container.d"
274+
275+
var defaultCSVFileNames = []string{"devices.csv", "drivers.csv", "l4t.csv"}
276+
277+
// csvFilesForRoot returns the Tegra CSV file paths to use for CDI spec generation.
278+
// It checks for the CSV directory at the driver root first, then falls back to the
279+
// absolute path. Returns nil if the directory is not found at either location,
280+
// allowing nvcdi to fall back to csv.DefaultFileList().
281+
func csvFilesForRoot(driverRoot string) []string {
282+
roots := []string{driverRoot}
283+
if driverRoot != "/" {
284+
roots = append(roots, "/")
285+
}
286+
for _, root := range roots {
287+
csvDir := filepath.Join(root, defaultCSVMountSpecPath)
288+
stat, err := os.Stat(csvDir)
289+
if err != nil || !stat.IsDir() {
290+
continue
291+
}
292+
paths := make([]string, len(defaultCSVFileNames))
293+
for i, f := range defaultCSVFileNames {
294+
paths[i] = filepath.Join(csvDir, f)
295+
}
296+
return paths
297+
}
298+
return nil
299+
}

internal/cdi/csv_test.go

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/**
2+
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
**/
16+
17+
package cdi
18+
19+
import (
20+
"os"
21+
"path/filepath"
22+
"testing"
23+
24+
"github.com/stretchr/testify/require"
25+
)
26+
27+
func TestCSVFilesForRoot(t *testing.T) {
28+
testCases := []struct {
29+
description string
30+
setup func(t *testing.T) string // returns driverRoot
31+
expectedPaths func(driverRoot string) []string
32+
}{
33+
{
34+
description: "directory exists at driver root",
35+
setup: func(t *testing.T) string {
36+
root := t.TempDir()
37+
csvDir := filepath.Join(root, defaultCSVMountSpecPath)
38+
require.NoError(t, os.MkdirAll(csvDir, 0755))
39+
return root
40+
},
41+
expectedPaths: func(driverRoot string) []string {
42+
csvDir := filepath.Join(driverRoot, defaultCSVMountSpecPath)
43+
return []string{
44+
filepath.Join(csvDir, "devices.csv"),
45+
filepath.Join(csvDir, "drivers.csv"),
46+
filepath.Join(csvDir, "l4t.csv"),
47+
}
48+
},
49+
},
50+
{
51+
description: "directory does not exist at driver root or absolute path",
52+
setup: func(t *testing.T) string {
53+
return t.TempDir()
54+
},
55+
expectedPaths: func(_ string) []string {
56+
return nil
57+
},
58+
},
59+
}
60+
61+
for _, tc := range testCases {
62+
t.Run(tc.description, func(t *testing.T) {
63+
driverRoot := tc.setup(t)
64+
got := csvFilesForRoot(driverRoot)
65+
require.Equal(t, tc.expectedPaths(driverRoot), got)
66+
})
67+
}
68+
}

0 commit comments

Comments
 (0)