Skip to content

Name resolution stuck due to deadlock between different network struct methods #2632

@EdMcBane

Description

@EdMcBane

As per title, a bug is present in the latest version of libnetwork (both master and the version used by docker 20.10.6), where name resolution and most docker commands (e.g. docker run) stop working, hanging indefinitely.

The bug is due to the acquisition of locks on controller and network instances in inconsistent order between different methods in network.

Specifically, network.ResolveName (and others) lock (in order):

  • controller
  • network (self), via network.ID()

network.getSvcRecords (called by endpoint.Leave via endpoint.sbLeave) instead locks:

  • network (self)
  • controller

This creates a race condition where, occasionally, a deadlock occurs, as per the following stack trace:

goroutine 37031 [semacquire, 664 minutes]:
sync.runtime_SemacquireMutex(0xc0015f48b4, 0x56222028e400, 0x1)
	/usr/local/go/src/runtime/sema.go:71 +0x49
sync.(*Mutex).lockSlow(0xc0015f48b0)
	/usr/local/go/src/sync/mutex.go:138 +0xfe
sync.(*Mutex).Lock(...)
	/usr/local/go/src/sync/mutex.go:81
github.com/docker/docker/vendor/github.com/docker/libnetwork.(*network).ID(0xc0015f4700, 0x0, 0x0)
	/root/rpmbuild/BUILD/src/engine/.gopath/src/github.com/docker/docker/vendor/github.com/docker/libnetwork/network.go:254 +0xbd
github.com/docker/docker/vendor/github.com/docker/libnetwork.(*network).ResolveName(0xc0015f4700, 0xc001a45910, 0xc, 0x2, 0x0, 0x0, 0x0, 0x0)
	/root/rpmbuild/BUILD/src/engine/.gopath/src/github.com/docker/docker/vendor/github.com/docker/libnetwork/network.go:1977 +0xdb
github.com/docker/docker/vendor/github.com/docker/libnetwork.(*sandbox).resolveName(0xc002ccc240, 0xc001a45910, 0xc, 0x56221da98540, 0x0, 0xc00067f558, 0x1, 0x1, 0xc002165d00, 0x2, ...)
	/root/rpmbuild/BUILD/src/engine/.gopath/src/github.com/docker/docker/vendor/github.com/docker/libnetwork/sandbox.go:624 +0x194
github.com/docker/docker/vendor/github.com/docker/libnetwork.(*sandbox).ResolveName(0xc002ccc240, 0xc001a45910, 0xd, 0x2, 0xc0019fbbc0, 0xc, 0x56221c00faea, 0x0)
	/root/rpmbuild/BUILD/src/engine/.gopath/src/github.com/docker/docker/vendor/github.com/docker/libnetwork/sandbox.go:579 +0x6c1
github.com/docker/docker/vendor/github.com/docker/libnetwork.(*resolver).handleIPQuery(0xc000e780e0, 0xc001a45910, 0xd, 0xc002846000, 0x2, 0x0, 0x0, 0x0)
	/root/rpmbuild/BUILD/src/engine/.gopath/src/github.com/docker/docker/vendor/github.com/docker/libnetwork/resolver.go:246 +0x74
github.com/docker/docker/vendor/github.com/docker/libnetwork.(*resolver).ServeDNS(0xc000e780e0, 0x56221eed5fa0, 0xc0022683c0, 0xc002846000)
	/root/rpmbuild/BUILD/src/engine/.gopath/src/github.com/docker/docker/vendor/github.com/docker/libnetwork/resolver.go:375 +0x2027
github.com/docker/docker/vendor/github.com/miekg/dns.(*Server).serveDNS(0xc000e6a000, 0xc0028a9e00, 0x1e, 0x200, 0xc0022683c0)
	/root/rpmbuild/BUILD/src/engine/.gopath/src/github.com/docker/docker/vendor/github.com/miekg/dns/server.go:609 +0x2e2
github.com/docker/docker/vendor/github.com/miekg/dns.(*Server).serveUDPPacket(0xc000e6a000, 0xc0024f5650, 0xc0028a9e00, 0x1e, 0x200, 0xc001016838, 0xc002817a80)
	/root/rpmbuild/BUILD/src/engine/.gopath/src/github.com/docker/docker/vendor/github.com/miekg/dns/server.go:549 +0xb4
created by github.com/docker/docker/vendor/github.com/miekg/dns.(*Server).serveUDP
	/root/rpmbuild/BUILD/src/engine/.gopath/src/github.com/docker/docker/vendor/github.com/miekg/dns/server.go:479 +0x2ae

goroutine 36782 [semacquire, 664 minutes]:
sync.runtime_SemacquireMutex(0xc0001bb0fc, 0x56221ce39100, 0x1)
	/usr/local/go/src/runtime/sema.go:71 +0x49
sync.(*Mutex).lockSlow(0xc0001bb0f8)
	/usr/local/go/src/sync/mutex.go:138 +0xfe
sync.(*Mutex).Lock(...)
	/usr/local/go/src/sync/mutex.go:81
github.com/docker/docker/vendor/github.com/docker/libnetwork.(*network).getSvcRecords(0xc0015f4700, 0xc000a78000, 0x0, 0x0, 0x0)
	/root/rpmbuild/BUILD/src/engine/.gopath/src/github.com/docker/docker/vendor/github.com/docker/libnetwork/network.go:1487 +0x6b5
github.com/docker/docker/vendor/github.com/docker/libnetwork.(*endpoint).sbLeave(0xc000a78000, 0xc0019fc900, 0x0, 0x0, 0x0, 0x0, 0xc0019606f8, 0xc001960740)
	/root/rpmbuild/BUILD/src/engine/.gopath/src/github.com/docker/docker/vendor/github.com/docker/libnetwork/endpoint.go:785 +0x67c
github.com/docker/docker/vendor/github.com/docker/libnetwork.(*endpoint).Leave(0xc0018d7b80, 0x56221eee8d00, 0xc0019fc900, 0x0, 0x0, 0x0, 0x0, 0x0)
	/root/rpmbuild/BUILD/src/engine/.gopath/src/github.com/docker/docker/vendor/github.com/docker/libnetwork/endpoint.go:707 +0x1b5
github.com/docker/docker/vendor/github.com/docker/libnetwork.(*sandbox).delete(0xc0019fc900, 0xc0001bb000, 0x5622201ef5f0, 0xc001a9a820)
	/root/rpmbuild/BUILD/src/engine/.gopath/src/github.com/docker/docker/vendor/github.com/docker/libnetwork/sandbox.go:224 +0x2cb
github.com/docker/docker/vendor/github.com/docker/libnetwork.(*sandbox).Delete(0xc0019fc900, 0xc002c4acc0, 0x40)
	/root/rpmbuild/BUILD/src/engine/.gopath/src/github.com/docker/docker/vendor/github.com/docker/libnetwork/sandbox.go:185 +0x32
github.com/docker/docker/daemon.(*Daemon).releaseNetwork(0xc00000c1e0, 0xc000f28f00)
	/root/rpmbuild/BUILD/src/engine/.gopath/src/github.com/docker/docker/daemon/container_operations.go:1048 +0x539
github.com/docker/docker/daemon.(*Daemon).Cleanup(0xc00000c1e0, 0xc000f28f00)
	/root/rpmbuild/BUILD/src/engine/.gopath/src/github.com/docker/docker/daemon/start.go:229 +0x52
github.com/docker/docker/daemon.(*Daemon).handleContainerExit(0xc00000c1e0, 0xc000f28f00, 0xc0019612f8, 0x0, 0x0)
	/root/rpmbuild/BUILD/src/engine/.gopath/src/github.com/docker/docker/daemon/monitor.go:72 +0x725
github.com/docker/docker/daemon.(*Daemon).ProcessEvent(0xc00000c1e0, 0xc002c4bdc0, 0x40, 0x56221da99b03, 0x4, 0xc002c4bdc0, 0x40, 0xc002c4be00, 0x40, 0x4ddf0, ...)
	/root/rpmbuild/BUILD/src/engine/.gopath/src/github.com/docker/docker/daemon/monitor.go:129 +0x6c6
github.com/docker/docker/libcontainerd/remote.(*client).processEvent.func1()
	/root/rpmbuild/BUILD/src/engine/.gopath/src/github.com/docker/docker/libcontainerd/remote/client.go:677 +0x109
github.com/docker/docker/libcontainerd/queue.(*Queue).Append.func1(0xc002c4bd00, 0x0, 0xc001efba70, 0xc0011dd4a0, 0xc0008b8270, 0xc002c4bdc0, 0x40)
	/root/rpmbuild/BUILD/src/engine/.gopath/src/github.com/docker/docker/libcontainerd/queue/queue.go:28 +0x3a
created by github.com/docker/docker/libcontainerd/queue.(*Queue).Append
	/root/rpmbuild/BUILD/src/engine/.gopath/src/github.com/docker/docker/libcontainerd/queue/queue.go:24 +0x1cf

This in turn deadlocks all accesses to network and controller.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions