Skip to content

Commit 0449124

Browse files
cyyzerothaJeztah
authored andcommitted
containerd-shim-runc-v2: avoid potential deadlock in create handler
After pr #8617, create handler of containerd-shim-runc-v2 will call handleStarted() to record the init process and handle its exit. Init process wouldn't quit so early in normal circumstances. But if this screnario occurs, handleStarted() will call handleProcessExit(), which will cause deadlock because create() had acquired s.mu, and handleProcessExit() will try to lock it again. So, I added a parameter muLocked to handleStarted to indicate whether or not s.mu is currently locked, and thus deciding whether or not to lock it when calling handleProcessExit. Fix: #9103 Signed-off-by: Chen Yiyang <[email protected]> (cherry picked from commit 68dd47e) Signed-off-by: Sebastiaan van Stijn <[email protected]>
1 parent 6982a0d commit 0449124

1 file changed

Lines changed: 18 additions & 9 deletions

File tree

runtime/v2/runc/task/service.go

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -136,11 +136,14 @@ type containerProcess struct {
136136
// that its exit can be handled efficiently. If the process has already exited,
137137
// it handles the exit immediately. handleStarted should be called after the
138138
// event announcing the start of the process has been published.
139+
// Note that handleStarted needs to be aware of whether s.mu is already held
140+
// when it is called. If s.mu has been held, we don't need to lock it when
141+
// calling handleProcessExit.
139142
//
140143
// The returned cleanup closure releases resources used to handle early exits.
141144
// It must be called before the caller of preStart returns, otherwise severe
142145
// memory leaks will occur.
143-
func (s *service) preStart(c *runc.Container) (handleStarted func(*runc.Container, process.Process), cleanup func()) {
146+
func (s *service) preStart(c *runc.Container) (handleStarted func(*runc.Container, process.Process, bool), cleanup func()) {
144147
exits := make(map[int][]runcC.Exit)
145148

146149
s.lifecycleMu.Lock()
@@ -164,7 +167,7 @@ func (s *service) preStart(c *runc.Container) (handleStarted func(*runc.Containe
164167
}
165168
}
166169

167-
handleStarted = func(c *runc.Container, p process.Process) {
170+
handleStarted = func(c *runc.Container, p process.Process, muLocked bool) {
168171
var pid int
169172
if p != nil {
170173
pid = p.Pid()
@@ -179,7 +182,13 @@ func (s *service) preStart(c *runc.Container) (handleStarted func(*runc.Containe
179182
} else if exited {
180183
s.lifecycleMu.Unlock()
181184
for _, ee := range ees {
182-
s.handleProcessExit(ee, c, p)
185+
if muLocked {
186+
s.handleProcessExit(ee, c, p)
187+
} else {
188+
s.mu.Lock()
189+
s.handleProcessExit(ee, c, p)
190+
s.mu.Unlock()
191+
}
183192
}
184193
} else {
185194
s.running[pid] = append(s.running[pid], containerProcess{
@@ -234,7 +243,7 @@ func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ *
234243
// could happen would also cause the container.Pid() call above to
235244
// nil-deference panic.
236245
proc, _ := container.Process("")
237-
handleStarted(container, proc)
246+
handleStarted(container, proc, true)
238247

239248
return &taskAPI.CreateTaskResponse{
240249
Pid: uint32(container.Pid()),
@@ -261,7 +270,7 @@ func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI.
261270
defer cleanup()
262271
p, err := container.Start(ctx, r)
263272
if err != nil {
264-
handleStarted(container, p)
273+
handleStarted(container, p, false)
265274
return nil, errdefs.ToGRPC(err)
266275
}
267276

@@ -301,7 +310,7 @@ func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI.
301310
Pid: uint32(p.Pid()),
302311
})
303312
}
304-
handleStarted(container, p)
313+
handleStarted(container, p, false)
305314
return &taskAPI.StartResponse{
306315
Pid: uint32(p.Pid()),
307316
}, nil
@@ -630,7 +639,9 @@ func (s *service) processExits() {
630639
s.lifecycleMu.Unlock()
631640

632641
for _, cp := range cps {
642+
s.mu.Lock()
633643
s.handleProcessExit(e, cp.Container, cp.Process)
644+
s.mu.Unlock()
634645
}
635646
}
636647
}
@@ -639,10 +650,8 @@ func (s *service) send(evt interface{}) {
639650
s.events <- evt
640651
}
641652

653+
// s.mu must be locked when calling handleProcessExit
642654
func (s *service) handleProcessExit(e runcC.Exit, c *runc.Container, p process.Process) {
643-
s.mu.Lock()
644-
defer s.mu.Unlock()
645-
646655
if ip, ok := p.(*process.Init); ok {
647656
// Ensure all children are killed
648657
if runc.ShouldKillAllOnExit(s.context, c.Bundle) {

0 commit comments

Comments
 (0)