Skip to content

Commit a7adeb6

Browse files
committed
cri: Support pods with user namespaces
This patch requests the OCI runtime to create a userns when the CRI message includes such request. Signed-off-by: Rodrigo Campos <[email protected]>
1 parent 31a6449 commit a7adeb6

6 files changed

Lines changed: 268 additions & 3 deletions

File tree

pkg/cri/opts/spec_linux.go

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -661,7 +661,7 @@ func WithSupplementalGroups(groups []int64) oci.SpecOpts {
661661
}
662662

663663
// WithPodNamespaces sets the pod namespaces for the container
664-
func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, sandboxPid uint32, targetPid uint32) oci.SpecOpts {
664+
func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, sandboxPid uint32, targetPid uint32, uids, gids []runtimespec.LinuxIDMapping) oci.SpecOpts {
665665
namespaces := config.GetNamespaceOptions()
666666

667667
opts := []oci.SpecOpts{
@@ -672,6 +672,17 @@ func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, sandboxPid
672672
if namespaces.GetPid() != runtime.NamespaceMode_CONTAINER {
673673
opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.PIDNamespace, Path: GetPIDNamespace(targetPid)}))
674674
}
675+
676+
if namespaces.GetUsernsOptions() != nil {
677+
switch namespaces.GetUsernsOptions().GetMode() {
678+
case runtime.NamespaceMode_NODE:
679+
// Nothing to do. Not adding userns field uses the node userns.
680+
case runtime.NamespaceMode_POD:
681+
opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.UserNamespace, Path: GetUserNamespace(sandboxPid)}))
682+
opts = append(opts, oci.WithUserNamespace(uids, gids))
683+
}
684+
}
685+
675686
return oci.Compose(opts...)
676687
}
677688

@@ -745,6 +756,8 @@ const (
745756
utsNSFormat = "/proc/%v/ns/uts"
746757
// pidNSFormat is the format of pid namespace of a process.
747758
pidNSFormat = "/proc/%v/ns/pid"
759+
// userNSFormat is the format of user namespace of a process.
760+
userNSFormat = "/proc/%v/ns/user"
748761
)
749762

750763
// GetNetworkNamespace returns the network namespace of a process.
@@ -767,6 +780,11 @@ func GetPIDNamespace(pid uint32) string {
767780
return fmt.Sprintf(pidNSFormat, pid)
768781
}
769782

783+
// GetUserNamespace returns the user namespace of a process.
784+
func GetUserNamespace(pid uint32) string {
785+
return fmt.Sprintf(userNSFormat, pid)
786+
}
787+
770788
// WithCDI updates OCI spec with CDI content
771789
func WithCDI(annotations map[string]string) oci.SpecOpts {
772790
return func(ctx context.Context, _ oci.Client, c *containers.Container, s *oci.Spec) error {

pkg/cri/sbserver/container_create_linux.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,8 @@ func (c *criService) containerSpec(
313313

314314
specOpts = append(specOpts,
315315
customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj),
316-
customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid),
316+
// TODO: This is a hack to make this compile. We should move userns support to sbserver.
317+
customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid, nil, nil),
317318
customopts.WithSupplementalGroups(supplementalGroups),
318319
customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer),
319320
customopts.WithAnnotation(annotations.SandboxID, sandboxID),

pkg/cri/server/container_create_linux.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,9 +311,14 @@ func (c *criService) containerSpec(
311311
targetPid = status.Pid
312312
}
313313

314+
uids, gids, err := parseUsernsIDs(nsOpts.GetUsernsOptions())
315+
if err != nil {
316+
return nil, fmt.Errorf("user namespace configuration: %w", err)
317+
}
318+
314319
specOpts = append(specOpts,
315320
customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj),
316-
customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid),
321+
customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid, uids, gids),
317322
customopts.WithSupplementalGroups(supplementalGroups),
318323
customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer),
319324
customopts.WithAnnotation(annotations.SandboxID, sandboxID),

pkg/cri/server/container_create_linux_test.go

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -804,6 +804,113 @@ func TestPidNamespace(t *testing.T) {
804804
}
805805
}
806806

807+
func TestUserNamespace(t *testing.T) {
808+
testID := "test-id"
809+
testPid := uint32(1234)
810+
testSandboxID := "sandbox-id"
811+
testContainerName := "container-name"
812+
idMap := runtime.IDMapping{
813+
HostId: 1000,
814+
ContainerId: 1000,
815+
Length: 10,
816+
}
817+
expIDMap := runtimespec.LinuxIDMapping{
818+
HostID: 1000,
819+
ContainerID: 1000,
820+
Size: 10,
821+
}
822+
containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData()
823+
ociRuntime := config.Runtime{}
824+
c := newTestCRIService()
825+
for desc, test := range map[string]struct {
826+
userNS *runtime.UserNamespace
827+
expNS *runtimespec.LinuxNamespace
828+
expNotNS *runtimespec.LinuxNamespace // Does NOT contain this namespace
829+
expUIDMapping []runtimespec.LinuxIDMapping
830+
expGIDMapping []runtimespec.LinuxIDMapping
831+
err bool
832+
}{
833+
"node namespace mode": {
834+
userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode_NODE},
835+
// Expect userns to NOT be present.
836+
expNotNS: &runtimespec.LinuxNamespace{
837+
Type: runtimespec.UserNamespace,
838+
Path: opts.GetUserNamespace(testPid),
839+
},
840+
},
841+
"node namespace mode with mappings": {
842+
userNS: &runtime.UserNamespace{
843+
Mode: runtime.NamespaceMode_NODE,
844+
Uids: []*runtime.IDMapping{&idMap},
845+
Gids: []*runtime.IDMapping{&idMap},
846+
},
847+
err: true,
848+
},
849+
"container namespace mode": {
850+
userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode_CONTAINER},
851+
err: true,
852+
},
853+
"target namespace mode": {
854+
userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode_TARGET},
855+
err: true,
856+
},
857+
"unknown namespace mode": {
858+
userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode(100)},
859+
err: true,
860+
},
861+
"pod namespace mode": {
862+
userNS: &runtime.UserNamespace{
863+
Mode: runtime.NamespaceMode_POD,
864+
Uids: []*runtime.IDMapping{&idMap},
865+
Gids: []*runtime.IDMapping{&idMap},
866+
},
867+
expNS: &runtimespec.LinuxNamespace{
868+
Type: runtimespec.UserNamespace,
869+
Path: opts.GetUserNamespace(testPid),
870+
},
871+
expUIDMapping: []runtimespec.LinuxIDMapping{expIDMap},
872+
expGIDMapping: []runtimespec.LinuxIDMapping{expIDMap},
873+
},
874+
"pod namespace mode with several mappings": {
875+
userNS: &runtime.UserNamespace{
876+
Mode: runtime.NamespaceMode_POD,
877+
Uids: []*runtime.IDMapping{&idMap, &idMap},
878+
Gids: []*runtime.IDMapping{&idMap, &idMap},
879+
},
880+
err: true,
881+
},
882+
"pod namespace mode with uneven mappings": {
883+
userNS: &runtime.UserNamespace{
884+
Mode: runtime.NamespaceMode_POD,
885+
Uids: []*runtime.IDMapping{&idMap, &idMap},
886+
Gids: []*runtime.IDMapping{&idMap},
887+
},
888+
err: true,
889+
},
890+
} {
891+
t.Run(desc, func(t *testing.T) {
892+
containerConfig.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{UsernsOptions: test.userNS}
893+
spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
894+
895+
if test.err {
896+
assert.Error(t, err)
897+
assert.Nil(t, spec)
898+
return
899+
}
900+
assert.NoError(t, err)
901+
assert.Equal(t, spec.Linux.UIDMappings, test.expUIDMapping)
902+
assert.Equal(t, spec.Linux.GIDMappings, test.expGIDMapping)
903+
904+
if test.expNS != nil {
905+
assert.Contains(t, spec.Linux.Namespaces, *test.expNS)
906+
}
907+
if test.expNotNS != nil {
908+
assert.NotContains(t, spec.Linux.Namespaces, *test.expNotNS)
909+
}
910+
})
911+
}
912+
}
913+
807914
func TestNoDefaultRunMount(t *testing.T) {
808915
testID := "test-id"
809916
testPid := uint32(1234)

pkg/cri/server/sandbox_run_linux.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,23 @@ func (c *criService) sandboxContainerSpec(id string, config *runtime.PodSandboxC
9696
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.IPCNamespace))
9797
}
9898

99+
usernsOpts := nsOptions.GetUsernsOptions()
100+
uids, gids, err := parseUsernsIDs(usernsOpts)
101+
if err != nil {
102+
return nil, fmt.Errorf("user namespace configuration: %w", err)
103+
}
104+
105+
if usernsOpts != nil {
106+
switch mode := usernsOpts.GetMode(); mode {
107+
case runtime.NamespaceMode_NODE:
108+
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.UserNamespace))
109+
case runtime.NamespaceMode_POD:
110+
specOpts = append(specOpts, oci.WithUserNamespace(uids, gids))
111+
default:
112+
return nil, fmt.Errorf("unsupported user namespace mode: %q", mode)
113+
}
114+
}
115+
99116
// It's fine to generate the spec before the sandbox /dev/shm
100117
// is actually created.
101118
sandboxDevShm := c.getSandboxDevShm(id)

pkg/cri/server/sandbox_run_linux_test.go

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,17 @@ func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConf
9898
func TestLinuxSandboxContainerSpec(t *testing.T) {
9999
testID := "test-id"
100100
nsPath := "test-cni"
101+
idMap := runtime.IDMapping{
102+
HostId: 1000,
103+
ContainerId: 1000,
104+
Length: 10,
105+
}
106+
expIDMap := runtimespec.LinuxIDMapping{
107+
HostID: 1000,
108+
ContainerID: 1000,
109+
Size: 10,
110+
}
111+
101112
for desc, test := range map[string]struct {
102113
configChange func(*runtime.PodSandboxConfig)
103114
specCheck func(*testing.T, *runtimespec.Spec)
@@ -122,6 +133,9 @@ func TestLinuxSandboxContainerSpec(t *testing.T) {
122133
})
123134
assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0")
124135
assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647")
136+
assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
137+
Type: runtimespec.UserNamespace,
138+
})
125139
},
126140
},
127141
"host namespace": {
@@ -149,10 +163,113 @@ func TestLinuxSandboxContainerSpec(t *testing.T) {
149163
assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
150164
Type: runtimespec.IPCNamespace,
151165
})
166+
assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
167+
Type: runtimespec.UserNamespace,
168+
})
152169
assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0")
153170
assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647")
154171
},
155172
},
173+
"user namespace": {
174+
configChange: func(c *runtime.PodSandboxConfig) {
175+
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
176+
NamespaceOptions: &runtime.NamespaceOption{
177+
UsernsOptions: &runtime.UserNamespace{
178+
Mode: runtime.NamespaceMode_POD,
179+
Uids: []*runtime.IDMapping{&idMap},
180+
Gids: []*runtime.IDMapping{&idMap},
181+
},
182+
},
183+
}
184+
},
185+
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
186+
require.NotNil(t, spec.Linux)
187+
assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
188+
Type: runtimespec.UserNamespace,
189+
})
190+
require.Equal(t, spec.Linux.UIDMappings, []runtimespec.LinuxIDMapping{expIDMap})
191+
require.Equal(t, spec.Linux.GIDMappings, []runtimespec.LinuxIDMapping{expIDMap})
192+
193+
},
194+
},
195+
"user namespace mode node and mappings": {
196+
configChange: func(c *runtime.PodSandboxConfig) {
197+
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
198+
NamespaceOptions: &runtime.NamespaceOption{
199+
UsernsOptions: &runtime.UserNamespace{
200+
Mode: runtime.NamespaceMode_NODE,
201+
Uids: []*runtime.IDMapping{&idMap},
202+
Gids: []*runtime.IDMapping{&idMap},
203+
},
204+
},
205+
}
206+
},
207+
expectErr: true,
208+
},
209+
"user namespace with several mappings": {
210+
configChange: func(c *runtime.PodSandboxConfig) {
211+
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
212+
NamespaceOptions: &runtime.NamespaceOption{
213+
UsernsOptions: &runtime.UserNamespace{
214+
Mode: runtime.NamespaceMode_NODE,
215+
Uids: []*runtime.IDMapping{&idMap, &idMap},
216+
Gids: []*runtime.IDMapping{&idMap, &idMap},
217+
},
218+
},
219+
}
220+
},
221+
expectErr: true,
222+
},
223+
"user namespace with uneven mappings": {
224+
configChange: func(c *runtime.PodSandboxConfig) {
225+
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
226+
NamespaceOptions: &runtime.NamespaceOption{
227+
UsernsOptions: &runtime.UserNamespace{
228+
Mode: runtime.NamespaceMode_NODE,
229+
Uids: []*runtime.IDMapping{&idMap, &idMap},
230+
Gids: []*runtime.IDMapping{&idMap},
231+
},
232+
},
233+
}
234+
},
235+
expectErr: true,
236+
},
237+
"user namespace mode container": {
238+
configChange: func(c *runtime.PodSandboxConfig) {
239+
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
240+
NamespaceOptions: &runtime.NamespaceOption{
241+
UsernsOptions: &runtime.UserNamespace{
242+
Mode: runtime.NamespaceMode_CONTAINER,
243+
},
244+
},
245+
}
246+
},
247+
expectErr: true,
248+
},
249+
"user namespace mode target": {
250+
configChange: func(c *runtime.PodSandboxConfig) {
251+
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
252+
NamespaceOptions: &runtime.NamespaceOption{
253+
UsernsOptions: &runtime.UserNamespace{
254+
Mode: runtime.NamespaceMode_TARGET,
255+
},
256+
},
257+
}
258+
},
259+
expectErr: true,
260+
},
261+
"user namespace unknown mode": {
262+
configChange: func(c *runtime.PodSandboxConfig) {
263+
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
264+
NamespaceOptions: &runtime.NamespaceOption{
265+
UsernsOptions: &runtime.UserNamespace{
266+
Mode: runtime.NamespaceMode(100),
267+
},
268+
},
269+
}
270+
},
271+
expectErr: true,
272+
},
156273
"should set supplemental groups correctly": {
157274
configChange: func(c *runtime.PodSandboxConfig) {
158275
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{

0 commit comments

Comments
 (0)