Skip to content

Commit 68dd47e

Browse files
committed
containerd-shim-runc-v2: avoid potential deadlock in create handler
After pr #8617, create handler of containerd-shim-runc-v2 will call handleStarted() to record the init process and handle its exit. Init process wouldn't quit so early in normal circumstances. But if this screnario occurs, handleStarted() will call handleProcessExit(), which will cause deadlock because create() had acquired s.mu, and handleProcessExit() will try to lock it again. So, I added a parameter muLocked to handleStarted to indicate whether or not s.mu is currently locked, and thus deciding whether or not to lock it when calling handleProcessExit. Fix: #9103 Signed-off-by: Chen Yiyang <[email protected]>
1 parent 6604ff6 commit 68dd47e

1 file changed

Lines changed: 18 additions & 9 deletions

File tree

runtime/v2/runc/task/service.go

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -137,11 +137,14 @@ type containerProcess struct {
137137
// that its exit can be handled efficiently. If the process has already exited,
138138
// it handles the exit immediately. handleStarted should be called after the
139139
// event announcing the start of the process has been published.
140+
// Note that handleStarted needs to be aware of whether s.mu is already held
141+
// when it is called. If s.mu has been held, we don't need to lock it when
142+
// calling handleProcessExit.
140143
//
141144
// The returned cleanup closure releases resources used to handle early exits.
142145
// It must be called before the caller of preStart returns, otherwise severe
143146
// memory leaks will occur.
144-
func (s *service) preStart(c *runc.Container) (handleStarted func(*runc.Container, process.Process), cleanup func()) {
147+
func (s *service) preStart(c *runc.Container) (handleStarted func(*runc.Container, process.Process, bool), cleanup func()) {
145148
exits := make(map[int][]runcC.Exit)
146149

147150
s.lifecycleMu.Lock()
@@ -165,7 +168,7 @@ func (s *service) preStart(c *runc.Container) (handleStarted func(*runc.Containe
165168
}
166169
}
167170

168-
handleStarted = func(c *runc.Container, p process.Process) {
171+
handleStarted = func(c *runc.Container, p process.Process, muLocked bool) {
169172
var pid int
170173
if p != nil {
171174
pid = p.Pid()
@@ -180,7 +183,13 @@ func (s *service) preStart(c *runc.Container) (handleStarted func(*runc.Containe
180183
} else if exited {
181184
s.lifecycleMu.Unlock()
182185
for _, ee := range ees {
183-
s.handleProcessExit(ee, c, p)
186+
if muLocked {
187+
s.handleProcessExit(ee, c, p)
188+
} else {
189+
s.mu.Lock()
190+
s.handleProcessExit(ee, c, p)
191+
s.mu.Unlock()
192+
}
184193
}
185194
} else {
186195
s.running[pid] = append(s.running[pid], containerProcess{
@@ -235,7 +244,7 @@ func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ *
235244
// could happen would also cause the container.Pid() call above to
236245
// nil-deference panic.
237246
proc, _ := container.Process("")
238-
handleStarted(container, proc)
247+
handleStarted(container, proc, true)
239248

240249
return &taskAPI.CreateTaskResponse{
241250
Pid: uint32(container.Pid()),
@@ -262,7 +271,7 @@ func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI.
262271
defer cleanup()
263272
p, err := container.Start(ctx, r)
264273
if err != nil {
265-
handleStarted(container, p)
274+
handleStarted(container, p, false)
266275
return nil, errdefs.ToGRPC(err)
267276
}
268277

@@ -302,7 +311,7 @@ func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI.
302311
Pid: uint32(p.Pid()),
303312
})
304313
}
305-
handleStarted(container, p)
314+
handleStarted(container, p, false)
306315
return &taskAPI.StartResponse{
307316
Pid: uint32(p.Pid()),
308317
}, nil
@@ -631,7 +640,9 @@ func (s *service) processExits() {
631640
s.lifecycleMu.Unlock()
632641

633642
for _, cp := range cps {
643+
s.mu.Lock()
634644
s.handleProcessExit(e, cp.Container, cp.Process)
645+
s.mu.Unlock()
635646
}
636647
}
637648
}
@@ -640,10 +651,8 @@ func (s *service) send(evt interface{}) {
640651
s.events <- evt
641652
}
642653

654+
// s.mu must be locked when calling handleProcessExit
643655
func (s *service) handleProcessExit(e runcC.Exit, c *runc.Container, p process.Process) {
644-
s.mu.Lock()
645-
defer s.mu.Unlock()
646-
647656
if ip, ok := p.(*process.Init); ok {
648657
// Ensure all children are killed
649658
if runc.ShouldKillAllOnExit(s.context, c.Bundle) {

0 commit comments

Comments
 (0)