Skip to content

Commit a6edb25

Browse files
Wait for waitInitExit() to return
This change gives waitInitExit() a chance to cleanup resource when DeleteExec() is called, before returning. This should fix situations where the shim exits before releasing container resources. Signed-off-by: Gabriel Adrian Samfira <[email protected]>
1 parent 76881a2 commit a6edb25

2 files changed

Lines changed: 47 additions & 0 deletions

File tree

cmd/containerd-shim-runhcs-v1/task_hcs.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,41 @@ func (ht *hcsTask) DeleteExec(ctx context.Context, eid string) (int, uint32, tim
590590
case shimExecStateRunning:
591591
return 0, 0, time.Time{}, newExecInvalidStateError(ht.id, eid, state, "delete")
592592
}
593+
594+
if eid == "" {
595+
// We are killing the init task, so we expect the container to be
596+
// stopped after this.
597+
//
598+
// The task process may have already exited, and the status set to
599+
// shimExecStateExited, but resources may still be in the process
600+
// of being cleaned up. Wait for ht.closed to be closed. This signals
601+
// that waitInitExit() has finished destroying container resources,
602+
// and layers were umounted.
603+
// If the shim exits before resources are cleaned up, those resources
604+
// will remain locked and untracked, which leads to lingering sandboxes
605+
// and container resources like base vhdx.
606+
select {
607+
case <-time.After(30 * time.Second):
608+
log.G(ctx).Error("timed out waiting for resource cleanup")
609+
return 0, 0, time.Time{}, errors.Wrap(hcs.ErrTimeout, "waiting for container resource cleanup")
610+
case <-ht.closed:
611+
}
612+
613+
// The init task has now exited. A ForceExit() has already been sent to
614+
// execs. Cleanup execs and continue.
615+
ht.execs.Range(func(key, value interface{}) bool {
616+
if key == "" {
617+
// Iterate next.
618+
return true
619+
}
620+
ht.execs.Delete(key)
621+
622+
// Iterate all. Returning false stops the iteration. See:
623+
// https://pkg.go.dev/sync#Map.Range
624+
return true
625+
})
626+
}
627+
593628
status := e.Status()
594629
if eid != "" {
595630
ht.execs.Delete(eid)

cmd/containerd-shim-runhcs-v1/task_hcs_test.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,8 @@ func Test_hcsTask_DeleteExec_InitExecID_CreatedState_Success(t *testing.T) {
161161
// remove the 2nd exec so we just check without it.
162162
lt.execs.Delete(second.id)
163163

164+
// Simulate waitInitExit() closing the host
165+
close(lt.closed)
164166
// try to delete the init exec
165167
pid, status, at, err := lt.DeleteExec(context.TODO(), "")
166168

@@ -178,6 +180,8 @@ func Test_hcsTask_DeleteExec_InitExecID_RunningState_Error(t *testing.T) {
178180
// Start the init exec
179181
_ = init.Start(context.TODO())
180182

183+
// Simulate waitInitExit() closing the host
184+
close(lt.closed)
181185
// try to delete the init exec
182186
pid, status, at, err := lt.DeleteExec(context.TODO(), "")
183187

@@ -192,6 +196,8 @@ func Test_hcsTask_DeleteExec_InitExecID_ExitedState_Success(t *testing.T) {
192196

193197
_ = init.Kill(context.TODO(), 0xf)
194198

199+
// Simulate waitInitExit() closing the host
200+
close(lt.closed)
195201
// try to delete the init exec
196202
pid, status, at, err := lt.DeleteExec(context.TODO(), "")
197203

@@ -207,6 +213,8 @@ func Test_hcsTask_DeleteExec_InitExecID_2ndExec_CreatedState_Error(t *testing.T)
207213
// start the init exec (required to have 2nd exec)
208214
_ = init.Start(context.TODO())
209215

216+
// Simulate waitInitExit() closing the host
217+
close(lt.closed)
210218
// try to delete the init exec
211219
pid, status, at, err := lt.DeleteExec(context.TODO(), "")
212220

@@ -226,6 +234,8 @@ func Test_hcsTask_DeleteExec_InitExecID_2ndExec_RunningState_Error(t *testing.T)
226234
// put the 2nd exec into the running state
227235
_ = second.Start(context.TODO())
228236

237+
// Simulate waitInitExit() closing the host
238+
close(lt.closed)
229239
// try to delete the init exec
230240
pid, status, at, err := lt.DeleteExec(context.TODO(), "")
231241

@@ -244,6 +254,8 @@ func Test_hcsTask_DeleteExec_InitExecID_2ndExec_ExitedState_Success(t *testing.T
244254
// put the 2nd exec into the exited state
245255
_ = second.Kill(context.TODO(), 0xf)
246256

257+
// Simulate waitInitExit() closing the host
258+
close(lt.closed)
247259
// try to delete the init exec
248260
pid, status, at, err := lt.DeleteExec(context.TODO(), "")
249261

0 commit comments

Comments
 (0)