Skip to content

Commit 517ba44

Browse files
committed
Merge Container and State mutexes
Resolved all deadlocks and fixed race between kill and monitor.resetContainer Fixes moby#7600 Signed-off-by: Alexandr Morozov <[email protected]>
1 parent d9f8d3e commit 517ba44

4 files changed

Lines changed: 47 additions & 50 deletions

File tree

daemon/container.go

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import (
1010
"path"
1111
"path/filepath"
1212
"strings"
13-
"sync"
1413
"syscall"
1514
"time"
1615

@@ -43,7 +42,7 @@ var (
4342
)
4443

4544
type Container struct {
46-
sync.Mutex
45+
*State
4746
root string // Path to the "home" of the container, including metadata.
4847
basefs string // Path to the graphdriver mountpoint
4948

@@ -55,7 +54,6 @@ type Container struct {
5554
Args []string
5655

5756
Config *runconfig.Config
58-
State *State
5957
Image string
6058

6159
NetworkSettings *NetworkSettings
@@ -276,7 +274,7 @@ func (container *Container) Start() (err error) {
276274
container.Lock()
277275
defer container.Unlock()
278276

279-
if container.State.IsRunning() {
277+
if container.State.Running {
280278
return nil
281279
}
282280

@@ -526,11 +524,11 @@ func (container *Container) KillSig(sig int) error {
526524
defer container.Unlock()
527525

528526
// We could unpause the container for them rather than returning this error
529-
if container.State.IsPaused() {
527+
if container.State.Paused {
530528
return fmt.Errorf("Container %s is paused. Unpause the container before stopping", container.ID)
531529
}
532530

533-
if !container.State.IsRunning() {
531+
if !container.State.Running {
534532
return nil
535533
}
536534

@@ -541,7 +539,7 @@ func (container *Container) KillSig(sig int) error {
541539
// if the container is currently restarting we do not need to send the signal
542540
// to the process. Telling the monitor that it should exit on it's next event
543541
// loop is enough
544-
if container.State.IsRestarting() {
542+
if container.State.Restarting {
545543
return nil
546544
}
547545

daemon/list.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ func (daemon *Daemon) Containers(job *engine.Job) engine.Status {
7070
writeCont := func(container *Container) error {
7171
container.Lock()
7272
defer container.Unlock()
73-
if !container.State.IsRunning() && !all && n <= 0 && since == "" && before == "" {
73+
if !container.State.Running && !all && n <= 0 && since == "" && before == "" {
7474
return nil
7575
}
7676
if before != "" && !foundBefore {
@@ -87,7 +87,7 @@ func (daemon *Daemon) Containers(job *engine.Job) engine.Status {
8787
return errLast
8888
}
8989
}
90-
if len(filt_exited) > 0 && !container.State.IsRunning() {
90+
if len(filt_exited) > 0 && !container.State.Running {
9191
should_skip := true
9292
for _, code := range filt_exited {
9393
if code == container.State.GetExitCode() {

daemon/monitor.go

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ func (m *containerMonitor) Start() error {
110110
defer func() {
111111
if afterRun {
112112
m.container.Lock()
113-
m.container.State.SetStopped(exitStatus)
113+
m.container.State.setStopped(exitStatus)
114114
defer m.container.Unlock()
115115
}
116116
m.Close()
@@ -123,7 +123,7 @@ func (m *containerMonitor) Start() error {
123123
m.container.RestartCount++
124124

125125
if err := m.container.startLoggingToDisk(); err != nil {
126-
m.resetContainer()
126+
m.resetContainer(false)
127127

128128
return err
129129
}
@@ -138,7 +138,7 @@ func (m *containerMonitor) Start() error {
138138
// if we receive an internal error from the initial start of a container then lets
139139
// return it instead of entering the restart loop
140140
if m.container.RestartCount == 0 {
141-
m.resetContainer()
141+
m.resetContainer(false)
142142

143143
return err
144144
}
@@ -154,7 +154,7 @@ func (m *containerMonitor) Start() error {
154154
if m.shouldRestart(exitStatus) {
155155
m.container.State.SetRestarting(exitStatus)
156156
m.container.LogEvent("die")
157-
m.resetContainer()
157+
m.resetContainer(true)
158158

159159
// sleep with a small time increment between each restart to help avoid issues cased by quickly
160160
// restarting the container because of some types of errors ( networking cut out, etc... )
@@ -168,7 +168,7 @@ func (m *containerMonitor) Start() error {
168168
continue
169169
}
170170
m.container.LogEvent("die")
171-
m.resetContainer()
171+
m.resetContainer(true)
172172
return err
173173
}
174174
}
@@ -243,7 +243,7 @@ func (m *containerMonitor) callback(command *execdriver.Command) {
243243
}
244244
}
245245

246-
m.container.State.SetRunning(command.Pid())
246+
m.container.State.setRunning(command.Pid())
247247

248248
// signal that the process has started
249249
// close channel only if not closed
@@ -260,8 +260,13 @@ func (m *containerMonitor) callback(command *execdriver.Command) {
260260

261261
// resetContainer resets the container's IO and ensures that the command is able to be executed again
262262
// by copying the data into a new struct
263-
func (m *containerMonitor) resetContainer() {
263+
// if lock is true, then container locked during reset
264+
func (m *containerMonitor) resetContainer(lock bool) {
264265
container := m.container
266+
if lock {
267+
container.Lock()
268+
defer container.Unlock()
269+
}
265270

266271
if container.Config.OpenStdin {
267272
if err := container.stdin.Close(); err != nil {

daemon/state.go

Lines changed: 28 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package daemon
22

33
import (
4-
"encoding/json"
54
"fmt"
65
"sync"
76
"time"
@@ -10,7 +9,7 @@ import (
109
)
1110

1211
type State struct {
13-
sync.RWMutex
12+
sync.Mutex
1413
Running bool
1514
Paused bool
1615
Restarting bool
@@ -29,9 +28,6 @@ func NewState() *State {
2928

3029
// String returns a human-readable description of the state
3130
func (s *State) String() string {
32-
s.RLock()
33-
defer s.RUnlock()
34-
3531
if s.Running {
3632
if s.Paused {
3733
return fmt.Sprintf("Up %s (Paused)", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
@@ -50,16 +46,6 @@ func (s *State) String() string {
5046
return fmt.Sprintf("Exited (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
5147
}
5248

53-
type jState State
54-
55-
// MarshalJSON for state is needed to avoid race conditions on inspect
56-
func (s *State) MarshalJSON() ([]byte, error) {
57-
s.RLock()
58-
b, err := json.Marshal(jState(*s))
59-
s.RUnlock()
60-
return b, err
61-
}
62-
6349
func wait(waitChan <-chan struct{}, timeout time.Duration) error {
6450
if timeout < 0 {
6551
<-waitChan
@@ -77,14 +63,14 @@ func wait(waitChan <-chan struct{}, timeout time.Duration) error {
7763
// immediatly. If you want wait forever you must supply negative timeout.
7864
// Returns pid, that was passed to SetRunning
7965
func (s *State) WaitRunning(timeout time.Duration) (int, error) {
80-
s.RLock()
81-
if s.IsRunning() {
66+
s.Lock()
67+
if s.Running {
8268
pid := s.Pid
83-
s.RUnlock()
69+
s.Unlock()
8470
return pid, nil
8571
}
8672
waitChan := s.waitChan
87-
s.RUnlock()
73+
s.Unlock()
8874
if err := wait(waitChan, timeout); err != nil {
8975
return -1, err
9076
}
@@ -95,43 +81,48 @@ func (s *State) WaitRunning(timeout time.Duration) (int, error) {
9581
// immediatly. If you want wait forever you must supply negative timeout.
9682
// Returns exit code, that was passed to SetStopped
9783
func (s *State) WaitStop(timeout time.Duration) (int, error) {
98-
s.RLock()
84+
s.Lock()
9985
if !s.Running {
10086
exitCode := s.ExitCode
101-
s.RUnlock()
87+
s.Unlock()
10288
return exitCode, nil
10389
}
10490
waitChan := s.waitChan
105-
s.RUnlock()
91+
s.Unlock()
10692
if err := wait(waitChan, timeout); err != nil {
10793
return -1, err
10894
}
10995
return s.GetExitCode(), nil
11096
}
11197

11298
func (s *State) IsRunning() bool {
113-
s.RLock()
99+
s.Lock()
114100
res := s.Running
115-
s.RUnlock()
101+
s.Unlock()
116102
return res
117103
}
118104

119105
func (s *State) GetPid() int {
120-
s.RLock()
106+
s.Lock()
121107
res := s.Pid
122-
s.RUnlock()
108+
s.Unlock()
123109
return res
124110
}
125111

126112
func (s *State) GetExitCode() int {
127-
s.RLock()
113+
s.Lock()
128114
res := s.ExitCode
129-
s.RUnlock()
115+
s.Unlock()
130116
return res
131117
}
132118

133119
func (s *State) SetRunning(pid int) {
134120
s.Lock()
121+
s.setRunning(pid)
122+
s.Unlock()
123+
}
124+
125+
func (s *State) setRunning(pid int) {
135126
s.Running = true
136127
s.Paused = false
137128
s.Restarting = false
@@ -140,19 +131,22 @@ func (s *State) SetRunning(pid int) {
140131
s.StartedAt = time.Now().UTC()
141132
close(s.waitChan) // fire waiters for start
142133
s.waitChan = make(chan struct{})
143-
s.Unlock()
144134
}
145135

146136
func (s *State) SetStopped(exitCode int) {
147137
s.Lock()
138+
s.setStopped(exitCode)
139+
s.Unlock()
140+
}
141+
142+
func (s *State) setStopped(exitCode int) {
148143
s.Running = false
149144
s.Restarting = false
150145
s.Pid = 0
151146
s.FinishedAt = time.Now().UTC()
152147
s.ExitCode = exitCode
153148
close(s.waitChan) // fire waiters for stop
154149
s.waitChan = make(chan struct{})
155-
s.Unlock()
156150
}
157151

158152
// SetRestarting is when docker hanldes the auto restart of containers when they are
@@ -172,9 +166,9 @@ func (s *State) SetRestarting(exitCode int) {
172166
}
173167

174168
func (s *State) IsRestarting() bool {
175-
s.RLock()
169+
s.Lock()
176170
res := s.Restarting
177-
s.RUnlock()
171+
s.Unlock()
178172
return res
179173
}
180174

@@ -191,8 +185,8 @@ func (s *State) SetUnpaused() {
191185
}
192186

193187
func (s *State) IsPaused() bool {
194-
s.RLock()
188+
s.Lock()
195189
res := s.Paused
196-
s.RUnlock()
190+
s.Unlock()
197191
return res
198192
}

0 commit comments

Comments
 (0)