Skip to content

Commit 5d5b210

Browse files
kiashokKirtana Ashok
andauthored
Remove blocking wait on container exit for every exec created (#1604)
Commit fixes the memory leak seen in the shim. It removes creation of channel that waits on container exit for every new exec. Instead, the container wait channel is exposed through WaitChannel() function which callers can use to decide if container has exited or not. Signed-off-by: Kirtana Ashok <[email protected]> (cherry picked from commit 5fc00c5) Signed-off-by: Kirtana Ashok <[email protected]> Signed-off-by: Kirtana Ashok <[email protected]> Co-authored-by: Kirtana Ashok <[email protected]>
1 parent e0b7d33 commit 5d5b210

8 files changed

Lines changed: 112 additions & 44 deletions

File tree

cmd/containerd-shim-runhcs-v1/exec_hcs.go

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -481,13 +481,9 @@ func (he *hcsExec) waitForContainerExit() {
481481
trace.StringAttribute("tid", he.tid),
482482
trace.StringAttribute("eid", he.id))
483483

484-
cexit := make(chan struct{})
485-
go func() {
486-
_ = he.c.Wait()
487-
close(cexit)
488-
}()
484+
// wait for container or process to exit and ckean up resrources
489485
select {
490-
case <-cexit:
486+
case <-he.c.WaitChannel():
491487
// Container exited first. We need to force the process into the exited
492488
// state and cleanup any resources
493489
he.sl.Lock()

internal/cow/cow.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,12 @@ type Container interface {
8686
// container to be terminated by some error condition (including calling
8787
// Close).
8888
Wait() error
89+
// WaitChannel returns the wait channel of the container
90+
WaitChannel() <-chan struct{}
91+
// WaitError returns the container termination error.
92+
// This function should only be called after the channel in WaitChannel()
93+
// is closed. Otherwise it is not thread safe.
94+
WaitError() error
8995
// Modify sends a request to modify container resources
9096
Modify(ctx context.Context, config interface{}) error
9197
}

internal/gcs/container.go

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ type Container struct {
2424
notifyCh chan struct{}
2525
closeCh chan struct{}
2626
closeOnce sync.Once
27+
// waitBlock is the channel used to wait for container shutdown or termination
28+
waitBlock chan struct{}
29+
// waitError indicates the container termination error if any
30+
waitError error
2731
}
2832

2933
var _ cow.Container = &Container{}
@@ -37,10 +41,11 @@ func (gc *GuestConnection) CreateContainer(ctx context.Context, cid string, conf
3741
span.AddAttributes(trace.StringAttribute("cid", cid))
3842

3943
c := &Container{
40-
gc: gc,
41-
id: cid,
42-
notifyCh: make(chan struct{}),
43-
closeCh: make(chan struct{}),
44+
gc: gc,
45+
id: cid,
46+
notifyCh: make(chan struct{}),
47+
closeCh: make(chan struct{}),
48+
waitBlock: make(chan struct{}),
4449
}
4550
err = gc.requestNotify(cid, c.notifyCh)
4651
if err != nil {
@@ -63,10 +68,11 @@ func (gc *GuestConnection) CreateContainer(ctx context.Context, cid string, conf
6368
// container that is already running inside the UVM (after cloning).
6469
func (gc *GuestConnection) CloneContainer(ctx context.Context, cid string) (_ *Container, err error) {
6570
c := &Container{
66-
gc: gc,
67-
id: cid,
68-
notifyCh: make(chan struct{}),
69-
closeCh: make(chan struct{}),
71+
gc: gc,
72+
id: cid,
73+
notifyCh: make(chan struct{}),
74+
closeCh: make(chan struct{}),
75+
waitBlock: make(chan struct{}),
7076
}
7177
err = gc.requestNotify(cid, c.notifyCh)
7278
if err != nil {
@@ -93,6 +99,8 @@ func (c *Container) Close() error {
9399
_, span := trace.StartSpan(context.Background(), "gcs::Container::Close")
94100
defer span.End()
95101
span.AddAttributes(trace.StringAttribute("cid", c.id))
102+
103+
close(c.closeCh)
96104
})
97105
return nil
98106
}
@@ -222,23 +230,33 @@ func (c *Container) Terminate(ctx context.Context) (err error) {
222230
return c.shutdown(ctx, rpcShutdownForced)
223231
}
224232

233+
func (c *Container) WaitChannel() <-chan struct{} {
234+
return c.waitBlock
235+
}
236+
237+
func (c *Container) WaitError() error {
238+
return c.waitError
239+
}
240+
225241
// Wait waits for the container to terminate (or Close to be called, or the
226242
// guest connection to terminate).
227243
func (c *Container) Wait() error {
228-
select {
229-
case <-c.notifyCh:
230-
return nil
231-
case <-c.closeCh:
232-
return errors.New("container closed")
233-
}
244+
<-c.WaitChannel()
245+
return c.WaitError()
234246
}
235247

236248
func (c *Container) waitBackground() {
237249
ctx, span := trace.StartSpan(context.Background(), "gcs::Container::waitBackground")
238250
defer span.End()
239251
span.AddAttributes(trace.StringAttribute("cid", c.id))
240252

241-
err := c.Wait()
253+
select {
254+
case <-c.notifyCh:
255+
case <-c.closeCh:
256+
c.waitError = errors.New("container closed")
257+
}
258+
close(c.waitBlock)
259+
242260
log.G(ctx).Debug("container exited")
243-
oc.SetSpanStatus(span, err)
261+
oc.SetSpanStatus(span, c.waitError)
244262
}

internal/hcs/system.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -287,11 +287,19 @@ func (computeSystem *System) waitBackground() {
287287
oc.SetSpanStatus(span, err)
288288
}
289289

290+
func (computeSystem *System) WaitChannel() <-chan struct{} {
291+
return computeSystem.waitBlock
292+
}
293+
294+
func (computeSystem *System) WaitError() error {
295+
return computeSystem.waitError
296+
}
297+
290298
// Wait synchronously waits for the compute system to shutdown or terminate. If
291299
// the compute system has already exited returns the previous error (if any).
292300
func (computeSystem *System) Wait() error {
293-
<-computeSystem.waitBlock
294-
return computeSystem.waitError
301+
<-computeSystem.WaitChannel()
302+
return computeSystem.WaitError()
295303
}
296304

297305
// ExitError returns an error describing the reason the compute system terminated.

internal/jobcontainers/jobcontainer.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -471,11 +471,19 @@ func (c *JobContainer) Terminate(ctx context.Context) error {
471471
return nil
472472
}
473473

474+
func (c *JobContainer) WaitChannel() <-chan struct{} {
475+
return c.waitBlock
476+
}
477+
478+
func (c *JobContainer) WaitError() error {
479+
return c.waitError
480+
}
481+
474482
// Wait synchronously waits for the container to shutdown or terminate. If
475483
// the container has already exited returns the previous error (if any).
476484
func (c *JobContainer) Wait() error {
477-
<-c.waitBlock
478-
return c.waitError
485+
<-c.WaitChannel()
486+
return c.WaitError()
479487
}
480488

481489
func (c *JobContainer) waitBackground(ctx context.Context) {

test/vendor/github.com/Microsoft/hcsshim/internal/cow/cow.go

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

test/vendor/github.com/Microsoft/hcsshim/internal/gcs/container.go

Lines changed: 34 additions & 16 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

test/vendor/github.com/Microsoft/hcsshim/internal/hcs/system.go

Lines changed: 10 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)