Skip to content

Commit 6e9fb18

Browse files
committed
Move most of OCI spec options to common builder
Signed-off-by: Maksym Pavlenko <[email protected]>
1 parent ac5112f commit 6e9fb18

5 files changed

Lines changed: 175 additions & 209 deletions

File tree

pkg/cri/sbserver/container_create.go

Lines changed: 163 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,15 @@ import (
2121
"errors"
2222
"fmt"
2323
"path/filepath"
24+
"strconv"
2425
"time"
2526

2627
"github.com/containerd/containerd"
2728
"github.com/containerd/containerd/api/types"
2829
"github.com/containerd/containerd/containers"
2930
"github.com/containerd/containerd/log"
3031
"github.com/containerd/containerd/oci"
32+
"github.com/containerd/containerd/pkg/cri/annotations"
3133
"github.com/containerd/containerd/pkg/cri/config"
3234
criconfig "github.com/containerd/containerd/pkg/cri/config"
3335
cio "github.com/containerd/containerd/pkg/cri/io"
@@ -394,6 +396,13 @@ func (c *criService) runtimeSnapshotter(ctx context.Context, ociRuntime criconfi
394396
return ociRuntime.Snapshotter
395397
}
396398

399+
const (
400+
// relativeRootfsPath is the rootfs path relative to bundle path.
401+
relativeRootfsPath = "rootfs"
402+
// hostnameEnv is the key for HOSTNAME env.
403+
hostnameEnv = "HOSTNAME"
404+
)
405+
397406
// buildContainerSpec build container's OCI spec depending on controller's target platform OS.
398407
func (c *criService) buildContainerSpec(
399408
platform *types.Platform,
@@ -410,8 +419,49 @@ func (c *criService) buildContainerSpec(
410419
ociRuntime config.Runtime,
411420
) (_ *runtimespec.Spec, retErr error) {
412421
var (
413-
specOpts []oci.SpecOpts
414-
isLinux = platform.OS == "linux"
422+
specOpts []oci.SpecOpts
423+
isLinux = platform.OS == "linux"
424+
isWindows = platform.OS == "windows"
425+
)
426+
427+
specOpts = append(specOpts, customopts.WithProcessArgs(config, imageConfig))
428+
429+
if config.GetWorkingDir() != "" {
430+
specOpts = append(specOpts, oci.WithProcessCwd(config.GetWorkingDir()))
431+
} else if imageConfig.WorkingDir != "" {
432+
specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir))
433+
}
434+
435+
if config.GetTty() {
436+
specOpts = append(specOpts, oci.WithTTY)
437+
}
438+
439+
// Apply envs from image config first, so that envs from container config
440+
// can override them.
441+
env := append([]string{}, imageConfig.Env...)
442+
for _, e := range config.GetEnvs() {
443+
env = append(env, e.GetKey()+"="+e.GetValue())
444+
}
445+
specOpts = append(specOpts, oci.WithEnv(env))
446+
447+
for pKey, pValue := range getPassthroughAnnotations(sandboxConfig.Annotations,
448+
ociRuntime.PodAnnotations) {
449+
specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue))
450+
}
451+
452+
for pKey, pValue := range getPassthroughAnnotations(config.Annotations,
453+
ociRuntime.ContainerAnnotations) {
454+
specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue))
455+
}
456+
457+
specOpts = append(specOpts,
458+
customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer),
459+
customopts.WithAnnotation(annotations.SandboxID, sandboxID),
460+
customopts.WithAnnotation(annotations.SandboxNamespace, sandboxConfig.GetMetadata().GetNamespace()),
461+
customopts.WithAnnotation(annotations.SandboxUID, sandboxConfig.GetMetadata().GetUid()),
462+
customopts.WithAnnotation(annotations.SandboxName, sandboxConfig.GetMetadata().GetName()),
463+
customopts.WithAnnotation(annotations.ContainerName, containerName),
464+
customopts.WithAnnotation(annotations.ImageName, imageName),
415465
)
416466

417467
if isLinux {
@@ -423,6 +473,117 @@ func (c *criService) buildContainerSpec(
423473
if ociRuntime.BaseRuntimeSpec == "" {
424474
specOpts = append(specOpts, customopts.WithoutDefaultSecuritySettings)
425475
}
476+
477+
specOpts = append(specOpts,
478+
customopts.WithRelativeRoot(relativeRootfsPath),
479+
oci.WithDefaultPathEnv,
480+
// this will be set based on the security context below
481+
oci.WithNewPrivileges,
482+
)
483+
484+
// Add HOSTNAME env.
485+
var (
486+
err error
487+
hostname = sandboxConfig.GetHostname()
488+
)
489+
if hostname == "" {
490+
if hostname, err = c.os.Hostname(); err != nil {
491+
return nil, err
492+
}
493+
}
494+
specOpts = append(specOpts, oci.WithEnv([]string{hostnameEnv + "=" + hostname}))
495+
496+
securityContext := config.GetLinux().GetSecurityContext()
497+
498+
// Clear all ambient capabilities. The implication of non-root + caps
499+
// is not clearly defined in Kubernetes.
500+
// See https://github.com/kubernetes/kubernetes/issues/56374
501+
// Keep docker's behavior for now.
502+
specOpts = append(specOpts, customopts.WithoutAmbientCaps)
503+
504+
if securityContext.GetPrivileged() {
505+
if !sandboxConfig.GetLinux().GetSecurityContext().GetPrivileged() {
506+
return nil, errors.New("no privileged container allowed in sandbox")
507+
}
508+
specOpts = append(specOpts, oci.WithPrivileged)
509+
if !ociRuntime.PrivilegedWithoutHostDevices {
510+
specOpts = append(specOpts, oci.WithHostDevices, oci.WithAllDevicesAllowed)
511+
} else if ociRuntime.PrivilegedWithoutHostDevicesAllDevicesAllowed {
512+
// allow rwm on all devices for the container
513+
specOpts = append(specOpts, oci.WithAllDevicesAllowed)
514+
}
515+
}
516+
517+
// TODO: Figure out whether we should set no new privilege for sandbox container by default
518+
if securityContext.GetNoNewPrivs() {
519+
specOpts = append(specOpts, oci.WithNoNewPrivileges)
520+
}
521+
// TODO(random-liu): [P1] Set selinux options (privileged or not).
522+
if securityContext.GetReadonlyRootfs() {
523+
specOpts = append(specOpts, oci.WithRootFSReadonly())
524+
}
525+
526+
if !c.config.DisableProcMount {
527+
// Change the default masked/readonly paths to empty slices
528+
// See https://github.com/containerd/containerd/issues/5029
529+
// TODO: Provide an option to set default paths to the ones in oci.populateDefaultUnixSpec()
530+
specOpts = append(specOpts, oci.WithMaskedPaths([]string{}), oci.WithReadonlyPaths([]string{}))
531+
532+
// Apply masked paths if specified.
533+
// If the container is privileged, this will be cleared later on.
534+
if maskedPaths := securityContext.GetMaskedPaths(); maskedPaths != nil {
535+
specOpts = append(specOpts, oci.WithMaskedPaths(maskedPaths))
536+
}
537+
538+
// Apply readonly paths if specified.
539+
// If the container is privileged, this will be cleared later on.
540+
if readonlyPaths := securityContext.GetReadonlyPaths(); readonlyPaths != nil {
541+
specOpts = append(specOpts, oci.WithReadonlyPaths(readonlyPaths))
542+
}
543+
}
544+
545+
supplementalGroups := securityContext.GetSupplementalGroups()
546+
specOpts = append(specOpts, customopts.WithSupplementalGroups(supplementalGroups))
547+
548+
// Default target PID namespace is the sandbox PID.
549+
targetPid := sandboxPid
550+
// If the container targets another container's PID namespace,
551+
// set targetPid to the PID of that container.
552+
nsOpts := securityContext.GetNamespaceOptions()
553+
if nsOpts.GetPid() == runtime.NamespaceMode_TARGET {
554+
targetContainer, err := c.validateTargetContainer(sandboxID, nsOpts.TargetId)
555+
if err != nil {
556+
return nil, fmt.Errorf("invalid target container: %w", err)
557+
}
558+
559+
status := targetContainer.Status.Get()
560+
targetPid = status.Pid
561+
}
562+
563+
specOpts = append(specOpts,
564+
// TODO: This is a hack to make this compile. We should move userns support to sbserver.
565+
customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid, nil, nil),
566+
)
567+
568+
} else if isWindows {
569+
specOpts = append(specOpts,
570+
// Clear the root location since hcsshim expects it.
571+
// NOTE: readonly rootfs doesn't work on windows.
572+
customopts.WithoutRoot,
573+
oci.WithWindowsNetworkNamespace(netNSPath),
574+
oci.WithHostname(sandboxConfig.GetHostname()),
575+
)
576+
577+
// All containers in a pod need to have HostProcess set if it was set on the pod,
578+
// and vice versa no containers in the pod can be HostProcess if the pods spec
579+
// didn't have the field set. The only case that is valid is if these are the same value.
580+
cntrHpc := config.GetWindows().GetSecurityContext().GetHostProcess()
581+
sandboxHpc := sandboxConfig.GetWindows().GetSecurityContext().GetHostProcess()
582+
if cntrHpc != sandboxHpc {
583+
return nil, errors.New("pod spec and all containers inside must have the HostProcess field set to be valid")
584+
}
585+
586+
specOpts = append(specOpts, customopts.WithAnnotation(annotations.WindowsHostProcess, strconv.FormatBool(sandboxHpc)))
426587
}
427588

428589
// Get spec opts that depend on features offered by the platform containerd daemon is running on.

pkg/cri/sbserver/container_create_linux.go

Lines changed: 6 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ import (
3636
"github.com/opencontainers/selinux/go-selinux/label"
3737
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
3838

39-
"github.com/containerd/containerd/pkg/cri/annotations"
4039
"github.com/containerd/containerd/pkg/cri/config"
4140
customopts "github.com/containerd/containerd/pkg/cri/opts"
4241
)
@@ -126,51 +125,7 @@ func (c *criService) platformSpec(
126125
extraMounts []*runtime.Mount,
127126
ociRuntime config.Runtime,
128127
) (_ []oci.SpecOpts, retErr error) {
129-
specOpts := []oci.SpecOpts{
130-
oci.WithoutRunMount,
131-
}
132-
// only clear the default security settings if the runtime does not have a custom
133-
// base runtime spec spec. Admins can use this functionality to define
134-
// default ulimits, seccomp, or other default settings.
135-
if ociRuntime.BaseRuntimeSpec == "" {
136-
specOpts = append(specOpts, customopts.WithoutDefaultSecuritySettings)
137-
}
138-
specOpts = append(specOpts,
139-
customopts.WithRelativeRoot(relativeRootfsPath),
140-
customopts.WithProcessArgs(config, imageConfig),
141-
oci.WithDefaultPathEnv,
142-
// this will be set based on the security context below
143-
oci.WithNewPrivileges,
144-
)
145-
if config.GetWorkingDir() != "" {
146-
specOpts = append(specOpts, oci.WithProcessCwd(config.GetWorkingDir()))
147-
} else if imageConfig.WorkingDir != "" {
148-
specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir))
149-
}
150-
151-
if config.GetTty() {
152-
specOpts = append(specOpts, oci.WithTTY)
153-
}
154-
155-
// Add HOSTNAME env.
156-
var (
157-
err error
158-
hostname = sandboxConfig.GetHostname()
159-
)
160-
if hostname == "" {
161-
if hostname, err = c.os.Hostname(); err != nil {
162-
return nil, err
163-
}
164-
}
165-
specOpts = append(specOpts, oci.WithEnv([]string{hostnameEnv + "=" + hostname}))
166-
167-
// Apply envs from image config first, so that envs from container config
168-
// can override them.
169-
env := append([]string{}, imageConfig.Env...)
170-
for _, e := range config.GetEnvs() {
171-
env = append(env, e.GetKey()+"="+e.GetValue())
172-
}
173-
specOpts = append(specOpts, oci.WithEnv(env))
128+
specOpts := []oci.SpecOpts{}
174129

175130
securityContext := config.GetLinux().GetSecurityContext()
176131
labelOptions, err := toLabel(securityContext.GetSelinuxOptions())
@@ -197,61 +152,13 @@ func (c *criService) platformSpec(
197152
}
198153
}()
199154

200-
specOpts = append(specOpts, customopts.WithMounts(c.os, config, extraMounts, mountLabel))
201-
202-
if !c.config.DisableProcMount {
203-
// Change the default masked/readonly paths to empty slices
204-
// See https://github.com/containerd/containerd/issues/5029
205-
// TODO: Provide an option to set default paths to the ones in oci.populateDefaultUnixSpec()
206-
specOpts = append(specOpts, oci.WithMaskedPaths([]string{}), oci.WithReadonlyPaths([]string{}))
207-
208-
// Apply masked paths if specified.
209-
// If the container is privileged, this will be cleared later on.
210-
if maskedPaths := securityContext.GetMaskedPaths(); maskedPaths != nil {
211-
specOpts = append(specOpts, oci.WithMaskedPaths(maskedPaths))
212-
}
213-
214-
// Apply readonly paths if specified.
215-
// If the container is privileged, this will be cleared later on.
216-
if readonlyPaths := securityContext.GetReadonlyPaths(); readonlyPaths != nil {
217-
specOpts = append(specOpts, oci.WithReadonlyPaths(readonlyPaths))
218-
}
219-
}
220-
221-
specOpts = append(specOpts, customopts.WithDevices(c.os, config, c.config.DeviceOwnershipFromSecurityContext),
222-
customopts.WithCapabilities(securityContext, c.allCaps))
223-
224-
if securityContext.GetPrivileged() {
225-
if !sandboxConfig.GetLinux().GetSecurityContext().GetPrivileged() {
226-
return nil, errors.New("no privileged container allowed in sandbox")
227-
}
228-
specOpts = append(specOpts, oci.WithPrivileged)
229-
if !ociRuntime.PrivilegedWithoutHostDevices {
230-
specOpts = append(specOpts, oci.WithHostDevices, oci.WithAllDevicesAllowed)
231-
} else if ociRuntime.PrivilegedWithoutHostDevicesAllDevicesAllowed {
232-
// allow rwm on all devices for the container
233-
specOpts = append(specOpts, oci.WithAllDevicesAllowed)
234-
}
235-
}
236-
237-
// Clear all ambient capabilities. The implication of non-root + caps
238-
// is not clearly defined in Kubernetes.
239-
// See https://github.com/kubernetes/kubernetes/issues/56374
240-
// Keep docker's behavior for now.
241155
specOpts = append(specOpts,
242-
customopts.WithoutAmbientCaps,
243156
customopts.WithSelinuxLabels(processLabel, mountLabel),
157+
customopts.WithMounts(c.os, config, extraMounts, mountLabel),
158+
customopts.WithDevices(c.os, config, c.config.DeviceOwnershipFromSecurityContext),
159+
customopts.WithCapabilities(securityContext, c.allCaps),
244160
)
245161

246-
// TODO: Figure out whether we should set no new privilege for sandbox container by default
247-
if securityContext.GetNoNewPrivs() {
248-
specOpts = append(specOpts, oci.WithNoNewPrivileges)
249-
}
250-
// TODO(random-liu): [P1] Set selinux options (privileged or not).
251-
if securityContext.GetReadonlyRootfs() {
252-
specOpts = append(specOpts, oci.WithRootFSReadonly())
253-
}
254-
255162
if c.config.DisableCgroup {
256163
specOpts = append(specOpts, customopts.WithDisabledCgroups)
257164
} else {
@@ -262,8 +169,6 @@ func (c *criService) platformSpec(
262169
}
263170
}
264171

265-
supplementalGroups := securityContext.GetSupplementalGroups()
266-
267172
// Get blockio class
268173
blockIOClass, err := c.blockIOClassFromAnnotations(config.GetMetadata().GetName(), config.Annotations, sandboxConfig.Annotations)
269174
if err != nil {
@@ -286,53 +191,14 @@ func (c *criService) platformSpec(
286191
specOpts = append(specOpts, oci.WithRdt(rdtClass, "", ""))
287192
}
288193

289-
for pKey, pValue := range getPassthroughAnnotations(sandboxConfig.Annotations,
290-
ociRuntime.PodAnnotations) {
291-
specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue))
292-
}
293-
294-
for pKey, pValue := range getPassthroughAnnotations(config.Annotations,
295-
ociRuntime.ContainerAnnotations) {
296-
specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue))
297-
}
298-
299-
// Default target PID namespace is the sandbox PID.
300-
targetPid := sandboxPid
301-
// If the container targets another container's PID namespace,
302-
// set targetPid to the PID of that container.
303-
nsOpts := securityContext.GetNamespaceOptions()
304-
if nsOpts.GetPid() == runtime.NamespaceMode_TARGET {
305-
targetContainer, err := c.validateTargetContainer(sandboxID, nsOpts.TargetId)
306-
if err != nil {
307-
return nil, fmt.Errorf("invalid target container: %w", err)
308-
}
309-
310-
status := targetContainer.Status.Get()
311-
targetPid = status.Pid
312-
}
194+
specOpts = append(specOpts, customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj))
313195

314-
specOpts = append(specOpts,
315-
customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj),
316-
// TODO: This is a hack to make this compile. We should move userns support to sbserver.
317-
customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid, nil, nil),
318-
customopts.WithSupplementalGroups(supplementalGroups),
319-
customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer),
320-
customopts.WithAnnotation(annotations.SandboxID, sandboxID),
321-
customopts.WithAnnotation(annotations.SandboxNamespace, sandboxConfig.GetMetadata().GetNamespace()),
322-
customopts.WithAnnotation(annotations.SandboxUID, sandboxConfig.GetMetadata().GetUid()),
323-
customopts.WithAnnotation(annotations.SandboxName, sandboxConfig.GetMetadata().GetName()),
324-
customopts.WithAnnotation(annotations.ContainerName, containerName),
325-
customopts.WithAnnotation(annotations.ImageName, imageName),
326-
)
327196
// cgroupns is used for hiding /sys/fs/cgroup from containers.
328197
// For compatibility, cgroupns is not used when running in cgroup v1 mode or in privileged.
329198
// https://github.com/containers/libpod/issues/4363
330199
// https://github.com/kubernetes/enhancements/blob/0e409b47497e398b369c281074485c8de129694f/keps/sig-node/20191118-cgroups-v2.md#cgroup-namespace
331200
if cgroups.Mode() == cgroups.Unified && !securityContext.GetPrivileged() {
332-
specOpts = append(specOpts, oci.WithLinuxNamespace(
333-
runtimespec.LinuxNamespace{
334-
Type: runtimespec.CgroupNamespace,
335-
}))
201+
specOpts = append(specOpts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.CgroupNamespace}))
336202
}
337203

338204
return specOpts, nil

pkg/cri/sbserver/container_create_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ func TestGeneralContainerSpec(t *testing.T) {
6262
testID := "test-id"
6363
testPid := uint32(1234)
6464
containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
65+
containerConfig.Command = []string{"/bin/true"}
6566
ociRuntime := config.Runtime{}
6667
c := newTestCRIService()
6768
testSandboxID := "sandbox-id"

0 commit comments

Comments
 (0)