@@ -329,26 +329,41 @@ func mountCgroupV2(m *configs.Mount, c *mountConfig) error {
329329 if err := os .MkdirAll (dest , 0o755 ); err != nil {
330330 return err
331331 }
332- return utils .WithProcfd (c .root , m .Destination , func (procfd string ) error {
333- if err := mount (m .Source , m .Destination , procfd , "cgroup2" , uintptr (m .Flags ), m .Data ); err != nil {
334- // when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
335- if errors .Is (err , unix .EPERM ) || errors .Is (err , unix .EBUSY ) {
336- src := fs2 .UnifiedMountpoint
337- if c .cgroupns && c .cgroup2Path != "" {
338- // Emulate cgroupns by bind-mounting
339- // the container cgroup path rather than
340- // the whole /sys/fs/cgroup.
341- src = c .cgroup2Path
342- }
343- err = mount (src , m .Destination , procfd , "" , uintptr (m .Flags )| unix .MS_BIND , "" )
344- if c .rootlessCgroups && errors .Is (err , unix .ENOENT ) {
345- err = nil
346- }
347- }
348- return err
349- }
350- return nil
332+ err = utils .WithProcfd (c .root , m .Destination , func (procfd string ) error {
333+ return mount (m .Source , m .Destination , procfd , "cgroup2" , uintptr (m .Flags ), m .Data )
351334 })
335+ if err == nil || ! (errors .Is (err , unix .EPERM ) || errors .Is (err , unix .EBUSY )) {
336+ return err
337+ }
338+
339+ // When we are in UserNS but CgroupNS is not unshared, we cannot mount
340+ // cgroup2 (#2158), so fall back to bind mount.
341+ bindM := & configs.Mount {
342+ Device : "bind" ,
343+ Source : fs2 .UnifiedMountpoint ,
344+ Destination : m .Destination ,
345+ Flags : unix .MS_BIND | m .Flags ,
346+ PropagationFlags : m .PropagationFlags ,
347+ }
348+ if c .cgroupns && c .cgroup2Path != "" {
349+ // Emulate cgroupns by bind-mounting the container cgroup path
350+ // rather than the whole /sys/fs/cgroup.
351+ bindM .Source = c .cgroup2Path
352+ }
353+ // mountToRootfs() handles remounting for MS_RDONLY.
354+ // No need to set c.fd here, because mountToRootfs() calls utils.WithProcfd() by itself in mountPropagate().
355+ err = mountToRootfs (bindM , c )
356+ if c .rootlessCgroups && errors .Is (err , unix .ENOENT ) {
357+ // ENOENT (for `src = c.cgroup2Path`) happens when rootless runc is being executed
358+ // outside the userns+mountns.
359+ //
360+ // Mask `/sys/fs/cgroup` to ensure it is read-only, even when `/sys` is mounted
361+ // with `rbind,ro` (`runc spec --rootless` produces `rbind,ro` for `/sys`).
362+ err = utils .WithProcfd (c .root , m .Destination , func (procfd string ) error {
363+ return maskPath (procfd , c .label )
364+ })
365+ }
366+ return err
352367}
353368
354369func doTmpfsCopyUp (m * configs.Mount , rootfs , mountLabel string ) (Err error ) {
0 commit comments