Skip to content

Commit 6241c53

Browse files
authored
Merge pull request microsoft#1249 from gabriel-samfira/wait-for-init-exit
Wait for waitInitExit() to return
2 parents 422eb31 + a6edb25 commit 6241c53

3 files changed

Lines changed: 62 additions & 10 deletions

File tree

cmd/containerd-shim-runhcs-v1/pod.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -380,8 +380,9 @@ func (p *pod) KillTask(ctx context.Context, tid, eid string, signal uint32, all
380380
return wt.KillExec(ctx, eid, signal, all)
381381
})
382382

383-
// iterate all
384-
return false
383+
// Iterate all. Returning false stops the iteration. See:
384+
// https://pkg.go.dev/sync#Map.Range
385+
return true
385386
})
386387
}
387388
eg.Go(func() error {

cmd/containerd-shim-runhcs-v1/task_hcs.go

Lines changed: 47 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -536,8 +536,9 @@ func (ht *hcsTask) KillExec(ctx context.Context, eid string, signal uint32, all
536536
}).Warn("failed to kill exec in task")
537537
}
538538

539-
// iterate all
540-
return false
539+
// Iterate all. Returning false stops the iteration. See:
540+
// https://pkg.go.dev/sync#Map.Range
541+
return true
541542
})
542543
}
543544
if signal == 0x9 && eid == "" && ht.host != nil {
@@ -578,8 +579,9 @@ func (ht *hcsTask) DeleteExec(ctx context.Context, eid string) (int, uint32, tim
578579
ex.ForceExit(ctx, 1)
579580
}
580581

581-
// iterate next
582-
return false
582+
// Iterate all. Returning false stops the iteration. See:
583+
// https://pkg.go.dev/sync#Map.Range
584+
return true
583585
})
584586
}
585587
switch state := e.State(); state {
@@ -588,6 +590,41 @@ func (ht *hcsTask) DeleteExec(ctx context.Context, eid string) (int, uint32, tim
588590
case shimExecStateRunning:
589591
return 0, 0, time.Time{}, newExecInvalidStateError(ht.id, eid, state, "delete")
590592
}
593+
594+
if eid == "" {
595+
// We are killing the init task, so we expect the container to be
596+
// stopped after this.
597+
//
598+
// The task process may have already exited, and the status set to
599+
// shimExecStateExited, but resources may still be in the process
600+
// of being cleaned up. Wait for ht.closed to be closed. This signals
601+
// that waitInitExit() has finished destroying container resources,
602+
// and layers were umounted.
603+
// If the shim exits before resources are cleaned up, those resources
604+
// will remain locked and untracked, which leads to lingering sandboxes
605+
// and container resources like base vhdx.
606+
select {
607+
case <-time.After(30 * time.Second):
608+
log.G(ctx).Error("timed out waiting for resource cleanup")
609+
return 0, 0, time.Time{}, errors.Wrap(hcs.ErrTimeout, "waiting for container resource cleanup")
610+
case <-ht.closed:
611+
}
612+
613+
// The init task has now exited. A ForceExit() has already been sent to
614+
// execs. Cleanup execs and continue.
615+
ht.execs.Range(func(key, value interface{}) bool {
616+
if key == "" {
617+
// Iterate next.
618+
return true
619+
}
620+
ht.execs.Delete(key)
621+
622+
// Iterate all. Returning false stops the iteration. See:
623+
// https://pkg.go.dev/sync#Map.Range
624+
return true
625+
})
626+
}
627+
591628
status := e.Status()
592629
if eid != "" {
593630
ht.execs.Delete(eid)
@@ -617,8 +654,9 @@ func (ht *hcsTask) Pids(ctx context.Context) ([]runhcsopts.ProcessDetails, error
617654
ex := value.(shimExec)
618655
pidMap[ex.Pid()] = ex.ID()
619656

620-
// Iterate all
621-
return false
657+
// Iterate all. Returning false stops the iteration. See:
658+
// https://pkg.go.dev/sync#Map.Range
659+
return true
622660
})
623661
pidMap[ht.init.Pid()] = ht.init.ID()
624662

@@ -699,8 +737,9 @@ func (ht *hcsTask) waitForHostExit() {
699737
ex := value.(shimExec)
700738
ex.ForceExit(ctx, 1)
701739

702-
// iterate all
703-
return false
740+
// Iterate all. Returning false stops the iteration. See:
741+
// https://pkg.go.dev/sync#Map.Range
742+
return true
704743
})
705744
ht.init.ForceExit(ctx, 1)
706745
ht.closeHost(ctx)

cmd/containerd-shim-runhcs-v1/task_hcs_test.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,8 @@ func Test_hcsTask_DeleteExec_InitExecID_CreatedState_Success(t *testing.T) {
161161
// remove the 2nd exec so we just check without it.
162162
lt.execs.Delete(second.id)
163163

164+
// Simulate waitInitExit() closing the host
165+
close(lt.closed)
164166
// try to delete the init exec
165167
pid, status, at, err := lt.DeleteExec(context.TODO(), "")
166168

@@ -178,6 +180,8 @@ func Test_hcsTask_DeleteExec_InitExecID_RunningState_Error(t *testing.T) {
178180
// Start the init exec
179181
_ = init.Start(context.TODO())
180182

183+
// Simulate waitInitExit() closing the host
184+
close(lt.closed)
181185
// try to delete the init exec
182186
pid, status, at, err := lt.DeleteExec(context.TODO(), "")
183187

@@ -192,6 +196,8 @@ func Test_hcsTask_DeleteExec_InitExecID_ExitedState_Success(t *testing.T) {
192196

193197
_ = init.Kill(context.TODO(), 0xf)
194198

199+
// Simulate waitInitExit() closing the host
200+
close(lt.closed)
195201
// try to delete the init exec
196202
pid, status, at, err := lt.DeleteExec(context.TODO(), "")
197203

@@ -207,6 +213,8 @@ func Test_hcsTask_DeleteExec_InitExecID_2ndExec_CreatedState_Error(t *testing.T)
207213
// start the init exec (required to have 2nd exec)
208214
_ = init.Start(context.TODO())
209215

216+
// Simulate waitInitExit() closing the host
217+
close(lt.closed)
210218
// try to delete the init exec
211219
pid, status, at, err := lt.DeleteExec(context.TODO(), "")
212220

@@ -226,6 +234,8 @@ func Test_hcsTask_DeleteExec_InitExecID_2ndExec_RunningState_Error(t *testing.T)
226234
// put the 2nd exec into the running state
227235
_ = second.Start(context.TODO())
228236

237+
// Simulate waitInitExit() closing the host
238+
close(lt.closed)
229239
// try to delete the init exec
230240
pid, status, at, err := lt.DeleteExec(context.TODO(), "")
231241

@@ -244,6 +254,8 @@ func Test_hcsTask_DeleteExec_InitExecID_2ndExec_ExitedState_Success(t *testing.T
244254
// put the 2nd exec into the exited state
245255
_ = second.Kill(context.TODO(), 0xf)
246256

257+
// Simulate waitInitExit() closing the host
258+
close(lt.closed)
247259
// try to delete the init exec
248260
pid, status, at, err := lt.DeleteExec(context.TODO(), "")
249261

0 commit comments

Comments
 (0)