Skip to content

Commit b949697

Browse files
committed
Add nvidia gpu support via libnvidia-container
This adds nvidia gpu support via the libnvidia-container project and `nvidia-container-cli`. Signed-off-by: Michael Crosby <[email protected]>
1 parent 5445572 commit b949697

File tree

4 files changed

+199
-1
lines changed

4 files changed

+199
-1
lines changed

cmd/containerd/command/oci-hook.go

+6-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ import (
3131

3232
var ociHook = cli.Command{
3333
Name: "oci-hook",
34-
Usage: "provides a base for OCI runtime hooks that allow arguements to be templated.",
34+
Usage: "provides a base for OCI runtime hooks to allow arguments to be injected.",
3535
Action: func(context *cli.Context) error {
3636
state, err := loadHookState(os.Stdin)
3737
if err != nil {
@@ -70,6 +70,7 @@ func newTemplateContext(state *specs.State) *templateContext {
7070
"rootfs": t.rootfs,
7171
"pid": t.pid,
7272
"annotation": t.annotation,
73+
"status": t.status,
7374
}
7475
return t
7576
}
@@ -99,6 +100,10 @@ func (t *templateContext) annotation(k string) string {
99100
return t.state.Annotations[k]
100101
}
101102

103+
func (t *templateContext) status() string {
104+
return t.state.Status
105+
}
106+
102107
func render(ctx *templateContext, source string, out io.Writer) error {
103108
t, err := template.New("oci-hook").Funcs(ctx.funcs).Parse(source)
104109
if err != nil {

cmd/ctr/commands/run/run.go

+4
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,10 @@ var ContainerFlags = []cli.Flag{
9393
Name: "pid-file",
9494
Usage: "file path to write the task's pid",
9595
},
96+
cli.IntFlag{
97+
Name: "gpus",
98+
Usage: "add gpus to the container",
99+
},
96100
}
97101

98102
func loadSpec(path string, s *specs.Spec) error {

cmd/ctr/commands/run/run_unix.go

+4
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424

2525
"github.com/containerd/containerd"
2626
"github.com/containerd/containerd/cmd/ctr/commands"
27+
"github.com/containerd/containerd/contrib/nvidia"
2728
"github.com/containerd/containerd/oci"
2829
specs "github.com/opencontainers/runtime-spec/specs-go"
2930
"github.com/pkg/errors"
@@ -123,6 +124,9 @@ func NewContainer(ctx gocontext.Context, client *containerd.Client, context *cli
123124
Path: parts[1],
124125
}))
125126
}
127+
if context.IsSet("gpus") {
128+
opts = append(opts, nvidia.WithGPUs(nvidia.WithDevices(context.Int("gpus")), nvidia.WithAllCapabilities))
129+
}
126130
if context.IsSet("config") {
127131
var s specs.Spec
128132
if err := loadSpec(context.String("config"), &s); err != nil {

contrib/nvidia/nvidia.go

+185
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
/*
2+
Copyright The containerd Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package nvidia
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"os"
23+
"os/exec"
24+
"strconv"
25+
"strings"
26+
27+
"github.com/containerd/containerd/containers"
28+
"github.com/containerd/containerd/oci"
29+
specs "github.com/opencontainers/runtime-spec/specs-go"
30+
)
31+
32+
const nvidiaCLI = "nvidia-container-cli"
33+
34+
// Capability specifies capabilities for the gpu inside the container
35+
// Detailed explaination of options can be found:
36+
// https://github.com/nvidia/nvidia-container-runtime#supported-driver-capabilities
37+
type Capability int
38+
39+
const (
40+
// Compute capability
41+
Compute Capability = iota + 1
42+
// Compat32 capability
43+
Compat32
44+
// Graphics capability
45+
Graphics
46+
// Utility capability
47+
Utility
48+
// Video capability
49+
Video
50+
// Display capability
51+
Display
52+
)
53+
54+
// WithGPUs adds NVIDIA gpu support to a container
55+
func WithGPUs(opts ...Opts) oci.SpecOpts {
56+
return func(_ context.Context, _ oci.Client, _ *containers.Container, s *specs.Spec) error {
57+
c := &config{}
58+
for _, o := range opts {
59+
if err := o(c); err != nil {
60+
return err
61+
}
62+
}
63+
path, err := exec.LookPath("containerd")
64+
if err != nil {
65+
return err
66+
}
67+
nvidiaPath, err := exec.LookPath(nvidiaCLI)
68+
if err != nil {
69+
return err
70+
}
71+
if s.Hooks == nil {
72+
s.Hooks = &specs.Hooks{}
73+
}
74+
s.Hooks.Prestart = append(s.Hooks.Prestart, specs.Hook{
75+
Path: path,
76+
Args: append([]string{
77+
"containerd",
78+
"oci-hook",
79+
"--",
80+
nvidiaPath,
81+
}, c.args()...),
82+
Env: os.Environ(),
83+
})
84+
return nil
85+
}
86+
}
87+
88+
type config struct {
89+
Devices []int
90+
DeviceUUID string
91+
Capabilities []Capability
92+
LoadKmods bool
93+
LDCache string
94+
LDConfig string
95+
Requirements []string
96+
}
97+
98+
func (c *config) args() []string {
99+
var args []string
100+
101+
if c.LoadKmods {
102+
args = append(args, "--load-kmods")
103+
}
104+
if c.LDCache != "" {
105+
args = append(args, fmt.Sprintf("--ldcache=%s", c.LDCache))
106+
}
107+
args = append(args,
108+
"configure",
109+
)
110+
if len(c.Devices) > 0 {
111+
args = append(args, fmt.Sprintf("--device=%s", strings.Join(toStrings(c.Devices), ",")))
112+
}
113+
if c.DeviceUUID != "" {
114+
args = append(args, fmt.Sprintf("--device=%s", c.DeviceUUID))
115+
}
116+
for _, c := range c.Capabilities {
117+
args = append(args, fmt.Sprintf("--%s", capFlags[c]))
118+
}
119+
if c.LDConfig != "" {
120+
args = append(args, fmt.Sprintf("--ldconfig=%s", c.LDConfig))
121+
}
122+
for _, r := range c.Requirements {
123+
args = append(args, fmt.Sprintf("--require=%s", r))
124+
}
125+
args = append(args, "--pid={{pid}}", "{{rootfs}}")
126+
return args
127+
}
128+
129+
var capFlags = map[Capability]string{
130+
Compute: "compute",
131+
Compat32: "compat32",
132+
Graphics: "graphics",
133+
Utility: "utility",
134+
Video: "video",
135+
Display: "display",
136+
}
137+
138+
func toStrings(ints []int) []string {
139+
var s []string
140+
for _, i := range ints {
141+
s = append(s, strconv.Itoa(i))
142+
}
143+
return s
144+
}
145+
146+
// Opts are options for configuring gpu support
147+
type Opts func(*config) error
148+
149+
// WithDevices adds the provided device indexes to the container
150+
func WithDevices(ids ...int) Opts {
151+
return func(c *config) error {
152+
c.Devices = ids
153+
return nil
154+
}
155+
}
156+
157+
// WithDeviceUUID adds the specific device UUID to the container
158+
func WithDeviceUUID(guid string) Opts {
159+
return func(c *config) error {
160+
c.DeviceUUID = guid
161+
return nil
162+
}
163+
}
164+
165+
// WithAllDevices adds all gpus to the container
166+
func WithAllDevices(c *config) error {
167+
c.DeviceUUID = "all"
168+
return nil
169+
}
170+
171+
// WithAllCapabilities adds all capabilities to the container for the gpus
172+
func WithAllCapabilities(c *config) error {
173+
for k := range capFlags {
174+
c.Capabilities = append(c.Capabilities, k)
175+
}
176+
return nil
177+
}
178+
179+
// WithRequiredCUDAVersion sets the required cuda version
180+
func WithRequiredCUDAVersion(major, minor int) Opts {
181+
return func(c *config) error {
182+
c.Requirements = append(c.Requirements, fmt.Sprintf("cuda>=%d.%d", major, minor))
183+
return nil
184+
}
185+
}

0 commit comments

Comments
 (0)