Skip to content

Commit d895118

Browse files
committed
runtime/v2/runc: fix leaking socket path
When runC shimv2 starts, the StartShim interface will re-exec itself as long-running process, which will read the `address` during initializing. ```happycase Process containerd-shim-runc-v1/v2 start containerd-shim-runc-v1/v2 initializing socket reexec containerd-shim-runc-v1/v2 write address into file initializing read address write back to containerd daemon serving ... remove address in Shutdown call ``` However, there is no synchronization after reexec. Then the data race is like: ```leaking-case Process containerd-shim-runc-v1/v2 start containerd-shim-runc-v1/v2 initializing socket reexec containerd-shim-runc-v1/v2 initializing read address write address into file write back to containerd daemon serving ... fail to remove address because of empty address ``` The `address` should be writen into file first before reexec. And if shutdown the whole service before cleanup temporary resource (like socket file), the Shutdown caller will receive `ttrpc: closed` sometime, which depends on go runtime scheduler. Then it also causes leaking socket files. Since the shimV2-Delete binary API must be called to cleanup shim temporary resource and shimV2-runC-v1 doesn't support grouping multi containers in one, it is safe to remove the socket file in the binary call for shimV2-runC-v1. But for the shimV2-runC-v2 shim, we still cleanup socket in Shutdown. Hopefully we can find a way to cleanup socket in shimV2-Delete binary call. Fix: #5173 Signed-off-by: Wei Fu <[email protected]>
1 parent d8208e2 commit d895118

3 files changed

Lines changed: 34 additions & 16 deletions

File tree

runtime/v2/runc/v1/service.go

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,11 @@ func (s *service) StartShim(ctx context.Context, id, containerdBinary, container
153153
_ = shim.RemoveSocket(address)
154154
}
155155
}()
156+
// make sure that reexec shim-v2 binary use the value if need
157+
if err := shim.WriteAddress("address", address); err != nil {
158+
return "", err
159+
}
160+
156161
f, err := socket.File()
157162
if err != nil {
158163
return "", err
@@ -174,9 +179,6 @@ func (s *service) StartShim(ctx context.Context, id, containerdBinary, container
174179
if err := shim.WritePidFile("shim.pid", cmd.Process.Pid); err != nil {
175180
return "", err
176181
}
177-
if err := shim.WriteAddress("address", address); err != nil {
178-
return "", err
179-
}
180182
if data, err := ioutil.ReadAll(os.Stdin); err == nil {
181183
if len(data) > 0 {
182184
var any ptypes.Any
@@ -209,6 +211,12 @@ func (s *service) StartShim(ctx context.Context, id, containerdBinary, container
209211
}
210212

211213
func (s *service) Cleanup(ctx context.Context) (*taskAPI.DeleteResponse, error) {
214+
if address, err := shim.ReadAddress("address"); err == nil {
215+
if err = shim.RemoveSocket(address); err != nil {
216+
return nil, err
217+
}
218+
}
219+
212220
path, err := os.Getwd()
213221
if err != nil {
214222
return nil, err
@@ -562,11 +570,10 @@ func (s *service) Connect(ctx context.Context, r *taskAPI.ConnectRequest) (*task
562570
}
563571

564572
func (s *service) Shutdown(ctx context.Context, r *taskAPI.ShutdownRequest) (*ptypes.Empty, error) {
573+
// please make sure that temporary resource has been cleanup
574+
// before shutdown service.
565575
s.cancel()
566576
close(s.events)
567-
if address, err := shim.ReadAddress("address"); err == nil {
568-
_ = shim.RemoveSocket(address)
569-
}
570577
return empty, nil
571578
}
572579

runtime/v2/runc/v2/service.go

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,12 @@ func (s *service) StartShim(ctx context.Context, id, containerdBinary, container
220220
_ = shim.RemoveSocket(address)
221221
}
222222
}()
223+
224+
// make sure that reexec shim-v2 binary use the value if need
225+
if err := shim.WriteAddress("address", address); err != nil {
226+
return "", err
227+
}
228+
223229
f, err := socket.File()
224230
if err != nil {
225231
return "", err
@@ -238,9 +244,6 @@ func (s *service) StartShim(ctx context.Context, id, containerdBinary, container
238244
}()
239245
// make sure to wait after start
240246
go cmd.Wait()
241-
if err := shim.WriteAddress("address", address); err != nil {
242-
return "", err
243-
}
244247
if data, err := ioutil.ReadAll(os.Stdin); err == nil {
245248
if len(data) > 0 {
246249
var any ptypes.Any
@@ -290,6 +293,7 @@ func (s *service) Cleanup(ctx context.Context) (*taskAPI.DeleteResponse, error)
290293
if err != nil {
291294
return nil, err
292295
}
296+
293297
path := filepath.Join(filepath.Dir(cwd), s.id)
294298
ns, err := namespaces.NamespaceRequired(ctx)
295299
if err != nil {
@@ -668,15 +672,19 @@ func (s *service) Shutdown(ctx context.Context, r *taskAPI.ShutdownRequest) (*pt
668672
if len(s.containers) > 0 {
669673
return empty, nil
670674
}
671-
s.cancel()
672-
close(s.events)
673675

674676
if s.platform != nil {
675677
s.platform.Close()
676678
}
679+
677680
if s.shimAddress != "" {
678681
_ = shim.RemoveSocket(s.shimAddress)
679682
}
683+
684+
// please make sure that temporary resource has been cleanup
685+
// before shutdown service.
686+
s.cancel()
687+
close(s.events)
680688
return empty, nil
681689
}
682690

runtime/v2/shim/shim.go

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,13 @@ func run(id string, initFunc Init, config Config) error {
239239
return err
240240
}
241241
}
242+
243+
// NOTE: If the shim server is down(like oom killer), the address
244+
// socket might be leaking.
245+
if address, err := ReadAddress("address"); err == nil {
246+
_ = RemoveSocket(address)
247+
}
248+
242249
select {
243250
case <-publisher.Done():
244251
return nil
@@ -299,15 +306,11 @@ func serve(ctx context.Context, server *ttrpc.Server, path string) error {
299306
return err
300307
}
301308
go func() {
309+
defer l.Close()
302310
if err := server.Serve(ctx, l); err != nil &&
303311
!strings.Contains(err.Error(), "use of closed network connection") {
304312
logrus.WithError(err).Fatal("containerd-shim: ttrpc server failure")
305313
}
306-
l.Close()
307-
if address, err := ReadAddress("address"); err == nil {
308-
_ = RemoveSocket(address)
309-
}
310-
311314
}()
312315
return nil
313316
}

0 commit comments

Comments
 (0)