-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Description
Description
After pr #8617, create handler of containerd-shim-runc-v2 will call handleStarted() to record the init process and handle its exit.
containerd/runtime/v2/runc/task/service.go
Lines 206 to 243 in 31b6cdf
| func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ *taskAPI.CreateTaskResponse, err error) { | |
| s.mu.Lock() | |
| defer s.mu.Unlock() | |
| handleStarted, cleanup := s.preStart(nil) | |
| defer cleanup() | |
| container, err := runc.NewContainer(ctx, s.platform, r) | |
| if err != nil { | |
| return nil, err | |
| } | |
| s.containers[r.ID] = container | |
| s.send(&eventstypes.TaskCreate{ | |
| ContainerID: r.ID, | |
| Bundle: r.Bundle, | |
| Rootfs: r.Rootfs, | |
| IO: &eventstypes.TaskIO{ | |
| Stdin: r.Stdin, | |
| Stdout: r.Stdout, | |
| Stderr: r.Stderr, | |
| Terminal: r.Terminal, | |
| }, | |
| Checkpoint: r.Checkpoint, | |
| Pid: uint32(container.Pid()), | |
| }) | |
| // The following line cannot return an error as the only state in which that | |
| // could happen would also cause the container.Pid() call above to | |
| // nil-deference panic. | |
| proc, _ := container.Process("") | |
| handleStarted(container, proc) | |
| return &taskAPI.CreateTaskResponse{ | |
| Pid: uint32(container.Pid()), | |
| }, nil | |
| } |
Although in normal circumstances, init process wouldn't quit so early. But if the init process exits, handleStarted() will call handleProcessExit().
containerd/runtime/v2/runc/task/service.go
Lines 168 to 192 in 31b6cdf
| handleStarted = func(c *runc.Container, p process.Process) { | |
| var pid int | |
| if p != nil { | |
| pid = p.Pid() | |
| } | |
| s.lifecycleMu.Lock() | |
| ees, exited := exits[pid] | |
| delete(s.exitSubscribers, &exits) | |
| exits = nil | |
| if pid == 0 { // no-op | |
| s.lifecycleMu.Unlock() | |
| } else if exited { | |
| s.lifecycleMu.Unlock() | |
| for _, ee := range ees { | |
| s.handleProcessExit(ee, c, p) | |
| } | |
| } else { | |
| s.running[pid] = append(s.running[pid], containerProcess{ | |
| Container: c, | |
| Process: p, | |
| }) | |
| s.lifecycleMu.Unlock() | |
| } | |
| } |
It will cause deadlock because create() had acquired s.mu, and handleProcessExit() will try to lock it again.
containerd/runtime/v2/runc/task/service.go
Lines 206 to 207 in 31b6cdf
| func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ *taskAPI.CreateTaskResponse, err error) { | |
| s.mu.Lock() |
containerd/runtime/v2/runc/task/service.go
Lines 643 to 644 in 31b6cdf
| func (s *service) handleProcessExit(e runcC.Exit, c *runc.Container, p process.Process) { | |
| s.mu.Lock() |
Steps to reproduce the issue
Describe the results you received and expected
If the above scenario occurs, we will see a goroutine be blocked forever.
goroutine 36 [semacquire, 202 minutes]:
sync.runtime_SemacquireMutex(0x7ffc9cac9e28?, 0x0?, 0x203000?)
/usr/local/go/src/runtime/sema.go:77 +0x25
sync.(*Mutex).lockSlow(0xc000024a80)
/usr/local/go/src/sync/mutex.go:171 +0x165
sync.(*Mutex).Lock(...)
/usr/local/go/src/sync/mutex.go:90
github.com/containerd/containerd/runtime/v2/runc/task.(*service).handleProcessExit(0xc000024a80, {{0xc137d350cc8cc553, 0x18ca659b, 0xd15720}, 0x286ab8, 0x89}, 0xc000200960, {0x9f7840, 0xc000218420?})
/go/src/github.com/containerd/containerd/runtime/v2/runc/task/service.go:643 +0xa7
github.com/containerd/containerd/runtime/v2/runc/task.(*service).preStart.func1(0xc000200960, {0x9f7840, 0xc000218420})
/go/src/github.com/containerd/containerd/runtime/v2/runc/task/service.go:182 +0x347
github.com/containerd/containerd/runtime/v2/runc/task.(*service).Create(0xc000024a80, {0x9f47b8, 0xc0002161e0}, 0xc000240000)
/go/src/github.com/containerd/containerd/runtime/v2/runc/task/service.go:237 +0x347
github.com/containerd/containerd/runtime/v2/task.RegisterTaskService.func2({0x9f47b8, 0xc0002161e0}, 0xc00021a0e0)
/go/src/github.com/containerd/containerd/runtime/v2/task/shim.pb.go:3463 +0x98
github.com/containerd/ttrpc.defaultServerInterceptor({0x9f47b8?, 0xc0002161e0?}, 0x17?, 0xc00023e040?, 0x6?)
/go/src/github.com/containerd/containerd/vendor/github.com/containerd/ttrpc/interceptor.go:45 +0x26
github.com/containerd/ttrpc.(*serviceSet).dispatch(0xc00005be10, {0x9f47b8, 0xc0002161e0}, {0xc00023a000, 0x17}, {0xc00022a038, 0x6}, {0xc00023c000, 0x1e3, 0x200})
/go/src/github.com/containerd/containerd/vendor/github.com/containerd/ttrpc/services.go:95 +0x1be
github.com/containerd/ttrpc.(*serviceSet).call(0xc000204050?, {0x9f47b8?, 0xc0002161e0?}, {0xc00023a000?, 0xc0002001e0?}, {0xc00022a038?, 0x7ffc9cac9e28?}, {0xc00023c000, 0x1e3, 0x200})
/go/src/github.com/containerd/containerd/vendor/github.com/containerd/ttrpc/services.go:64 +0x71
github.com/containerd/ttrpc.(*serverConn).run.func2(0x1)
/go/src/github.com/containerd/containerd/vendor/github.com/containerd/ttrpc/server.go:439 +0xe5
created by github.com/containerd/ttrpc.(*serverConn).run
/go/src/github.com/containerd/containerd/vendor/github.com/containerd/ttrpc/server.go:435 +0x6b1
What version of containerd are you using?
1.6.22
Any other relevant information
No response
Show configuration if it is related to CRI plugin.
No response