Skip to content

Commit 79f4c65

Browse files
Merge pull request #3755 from thaJeztah/1.2_backport_avoid_unnecessary_runc_state
[release/1.2 backport] backport exec fixes
2 parents ec48c95 + 0877136 commit 79f4c65

7 files changed

Lines changed: 127 additions & 34 deletions

File tree

runtime/v1/linux/proc/deleted_state.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,7 @@ func (s *deletedState) SetExited(status int) {
7070
func (s *deletedState) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) {
7171
return nil, errors.Errorf("cannot exec in a deleted state")
7272
}
73+
74+
func (s *deletedState) Status(ctx context.Context) (string, error) {
75+
return "stopped", nil
76+
}

runtime/v1/linux/proc/exec.go

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
"golang.org/x/sys/unix"
3232

3333
"github.com/containerd/console"
34+
"github.com/containerd/containerd/errdefs"
3435
"github.com/containerd/containerd/runtime/proc"
3536
"github.com/containerd/fifo"
3637
runc "github.com/containerd/go-runc"
@@ -49,7 +50,7 @@ type execProcess struct {
4950
io runc.IO
5051
status int
5152
exited time.Time
52-
pid *safePid
53+
pid safePid
5354
closers []io.Closer
5455
stdin io.Closer
5556
stdio proc.Stdio
@@ -95,6 +96,7 @@ func (e *execProcess) setExited(status int) {
9596
e.status = status
9697
e.exited = time.Now()
9798
e.parent.Platform.ShutdownConsole(context.Background(), e.console)
99+
e.pid.set(StoppedPID)
98100
close(e.waitBlock)
99101
}
100102

@@ -106,7 +108,7 @@ func (e *execProcess) Delete(ctx context.Context) error {
106108
}
107109

108110
func (e *execProcess) delete(ctx context.Context) error {
109-
e.wg.Wait()
111+
waitTimeout(ctx, &e.wg, 2*time.Second)
110112
if e.io != nil {
111113
for _, c := range e.closers {
112114
c.Close()
@@ -142,7 +144,12 @@ func (e *execProcess) Kill(ctx context.Context, sig uint32, _ bool) error {
142144

143145
func (e *execProcess) kill(ctx context.Context, sig uint32, _ bool) error {
144146
pid := e.pid.get()
145-
if pid != 0 {
147+
switch {
148+
case pid == 0:
149+
return errors.Wrap(errdefs.ErrFailedPrecondition, "process not created")
150+
case pid < 0:
151+
return errors.Wrapf(errdefs.ErrNotFound, "process already finished")
152+
default:
146153
if err := unix.Kill(pid, syscall.Signal(sig)); err != nil {
147154
return errors.Wrapf(checkKillError(err), "exec kill error")
148155
}
@@ -248,14 +255,5 @@ func (e *execProcess) Status(ctx context.Context) (string, error) {
248255
}
249256
e.mu.Lock()
250257
defer e.mu.Unlock()
251-
// if we don't have a pid then the exec process has just been created
252-
if e.pid.get() == 0 {
253-
return "created", nil
254-
}
255-
// if we have a pid and it can be signaled, the process is running
256-
if err := unix.Kill(e.pid.get(), 0); err == nil {
257-
return "running", nil
258-
}
259-
// else if we have a pid but it can nolonger be signaled, it has stopped
260-
return "stopped", nil
258+
return e.execState.Status(ctx)
261259
}

runtime/v1/linux/proc/exec_state.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ type execState interface {
3131
Delete(context.Context) error
3232
Kill(context.Context, uint32, bool) error
3333
SetExited(int)
34+
Status(context.Context) (string, error)
3435
}
3536

3637
type execCreatedState struct {
@@ -82,6 +83,10 @@ func (s *execCreatedState) SetExited(status int) {
8283
}
8384
}
8485

86+
func (s *execCreatedState) Status(ctx context.Context) (string, error) {
87+
return "created", nil
88+
}
89+
8590
type execRunningState struct {
8691
p *execProcess
8792
}
@@ -120,6 +125,10 @@ func (s *execRunningState) SetExited(status int) {
120125
}
121126
}
122127

128+
func (s *execRunningState) Status(ctx context.Context) (string, error) {
129+
return "running", nil
130+
}
131+
123132
type execStoppedState struct {
124133
p *execProcess
125134
}
@@ -157,3 +166,7 @@ func (s *execStoppedState) Kill(ctx context.Context, sig uint32, all bool) error
157166
func (s *execStoppedState) SetExited(status int) {
158167
// no op
159168
}
169+
170+
func (s *execStoppedState) Status(ctx context.Context) (string, error) {
171+
return "stopped", nil
172+
}

runtime/v1/linux/proc/init.go

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -59,15 +59,17 @@ type Init struct {
5959

6060
WorkDir string
6161

62-
id string
63-
Bundle string
64-
console console.Console
65-
Platform proc.Platform
66-
io runc.IO
67-
runtime *runc.Runc
62+
id string
63+
Bundle string
64+
console console.Console
65+
Platform proc.Platform
66+
io runc.IO
67+
runtime *runc.Runc
68+
// pausing preserves the pausing state.
69+
pausing *atomicBool
6870
status int
6971
exited time.Time
70-
pid int
72+
pid safePid
7173
closers []io.Closer
7274
stdin io.Closer
7375
stdio proc.Stdio
@@ -99,6 +101,7 @@ func New(id string, runtime *runc.Runc, stdio proc.Stdio) *Init {
99101
p := &Init{
100102
id: id,
101103
runtime: runtime,
104+
pausing: new(atomicBool),
102105
stdio: stdio,
103106
status: 0,
104107
waitBlock: make(chan struct{}),
@@ -113,6 +116,9 @@ func (p *Init) Create(ctx context.Context, r *CreateConfig) error {
113116
err error
114117
socket *runc.Socket
115118
)
119+
p.pid.Lock()
120+
defer p.pid.Unlock()
121+
116122
if r.Terminal {
117123
if socket, err = runc.NewTempConsoleSocket(); err != nil {
118124
return errors.Wrap(err, "failed to create OCI runtime console socket")
@@ -191,7 +197,7 @@ func (p *Init) Create(ctx context.Context, r *CreateConfig) error {
191197
if err != nil {
192198
return errors.Wrap(err, "failed to retrieve OCI runtime container pid")
193199
}
194-
p.pid = pid
200+
p.pid.pid = pid
195201
return nil
196202
}
197203

@@ -207,7 +213,7 @@ func (p *Init) ID() string {
207213

208214
// Pid of the process
209215
func (p *Init) Pid() int {
210-
return p.pid
216+
return p.pid.get()
211217
}
212218

213219
// ExitStatus of the process
@@ -228,17 +234,14 @@ func (p *Init) ExitedAt() time.Time {
228234

229235
// Status of the process
230236
func (p *Init) Status(ctx context.Context) (string, error) {
237+
if p.pausing.get() {
238+
return "pausing", nil
239+
}
240+
231241
p.mu.Lock()
232242
defer p.mu.Unlock()
233243

234-
c, err := p.runtime.State(ctx, p.id)
235-
if err != nil {
236-
if strings.Contains(err.Error(), "does not exist") {
237-
return "stopped", nil
238-
}
239-
return "", p.runtimeError(err, "OCI runtime state failed")
240-
}
241-
return c.Status, nil
244+
return p.initState.Status(ctx)
242245
}
243246

244247
// Start the init process
@@ -266,6 +269,7 @@ func (p *Init) setExited(status int) {
266269
p.exited = time.Now()
267270
p.status = status
268271
p.Platform.ShutdownConsole(context.Background(), p.console)
272+
p.pid.set(StoppedPID)
269273
close(p.waitBlock)
270274
}
271275

@@ -278,7 +282,7 @@ func (p *Init) Delete(ctx context.Context) error {
278282
}
279283

280284
func (p *Init) delete(ctx context.Context) error {
281-
p.wg.Wait()
285+
waitTimeout(ctx, &p.wg, 2*time.Second)
282286
err := p.runtime.Delete(ctx, p.id, nil)
283287
// ignore errors if a runtime has already deleted the process
284288
// but we still hold metadata and pipes
@@ -408,7 +412,6 @@ func (p *Init) exec(ctx context.Context, path string, r *ExecConfig) (proc.Proce
408412
Terminal: r.Terminal,
409413
},
410414
waitBlock: make(chan struct{}),
411-
pid: &safePid{},
412415
}
413416
e.execState = &execCreatedState{p: e}
414417
return e, nil

runtime/v1/linux/proc/init_state.go

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ type initState interface {
4343
Exec(context.Context, string, *ExecConfig) (proc.Process, error)
4444
Kill(context.Context, uint32, bool) error
4545
SetExited(int)
46+
Status(context.Context) (string, error)
4647
}
4748

4849
type createdState struct {
@@ -113,6 +114,10 @@ func (s *createdState) Exec(ctx context.Context, path string, r *ExecConfig) (pr
113114
return s.p.exec(ctx, path, r)
114115
}
115116

117+
func (s *createdState) Status(ctx context.Context) (string, error) {
118+
return "created", nil
119+
}
120+
116121
type createdCheckpointState struct {
117122
p *Init
118123
opts *runc.RestoreOpts
@@ -156,6 +161,9 @@ func (s *createdCheckpointState) Start(ctx context.Context) error {
156161
p := s.p
157162
sio := p.stdio
158163

164+
p.pid.Lock()
165+
defer p.pid.Unlock()
166+
159167
var (
160168
err error
161169
socket *runc.Socket
@@ -201,7 +209,7 @@ func (s *createdCheckpointState) Start(ctx context.Context) error {
201209
if err != nil {
202210
return errors.Wrap(err, "failed to retrieve OCI runtime container pid")
203211
}
204-
p.pid = pid
212+
p.pid.pid = pid
205213
return s.transition("running")
206214
}
207215

@@ -228,6 +236,10 @@ func (s *createdCheckpointState) Exec(ctx context.Context, path string, r *ExecC
228236
return nil, errors.Errorf("cannot exec in a created state")
229237
}
230238

239+
func (s *createdCheckpointState) Status(ctx context.Context) (string, error) {
240+
return "created", nil
241+
}
242+
231243
type runningState struct {
232244
p *Init
233245
}
@@ -245,6 +257,13 @@ func (s *runningState) transition(name string) error {
245257
}
246258

247259
func (s *runningState) Pause(ctx context.Context) error {
260+
s.p.pausing.set(true)
261+
// NOTE "pausing" will be returned in the short window
262+
// after `transition("paused")`, before `pausing` is reset
263+
// to false. That doesn't break the state machine, just
264+
// delays the "paused" state a little bit.
265+
defer s.p.pausing.set(false)
266+
248267
if err := s.p.runtime.Pause(ctx, s.p.id); err != nil {
249268
return s.p.runtimeError(err, "OCI runtime pause failed")
250269
}
@@ -292,6 +311,10 @@ func (s *runningState) Exec(ctx context.Context, path string, r *ExecConfig) (pr
292311
return s.p.exec(ctx, path, r)
293312
}
294313

314+
func (s *runningState) Status(ctx context.Context) (string, error) {
315+
return "running", nil
316+
}
317+
295318
type pausedState struct {
296319
p *Init
297320
}
@@ -360,6 +383,10 @@ func (s *pausedState) Exec(ctx context.Context, path string, r *ExecConfig) (pro
360383
return nil, errors.Errorf("cannot exec in a paused state")
361384
}
362385

386+
func (s *pausedState) Status(ctx context.Context) (string, error) {
387+
return "paused", nil
388+
}
389+
363390
type stoppedState struct {
364391
p *Init
365392
}
@@ -416,3 +443,7 @@ func (s *stoppedState) SetExited(status int) {
416443
func (s *stoppedState) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) {
417444
return nil, errors.Errorf("cannot exec in a stopped state")
418445
}
446+
447+
func (s *stoppedState) Status(ctx context.Context) (string, error) {
448+
return "stopped", nil
449+
}

runtime/v1/linux/proc/process.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,11 @@ import (
2323
)
2424

2525
// RuncRoot is the path to the root runc state directory
26-
const RuncRoot = "/run/containerd/runc"
26+
const (
27+
RuncRoot = "/run/containerd/runc"
28+
// StoppedPID is the pid assigned after a container has run and stopped
29+
StoppedPID = -1
30+
)
2731

2832
func stateName(v interface{}) string {
2933
switch v.(type) {

runtime/v1/linux/proc/utils.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,13 @@
1919
package proc
2020

2121
import (
22+
"context"
2223
"encoding/json"
2324
"io"
2425
"os"
2526
"strings"
2627
"sync"
28+
"sync/atomic"
2729
"time"
2830

2931
"github.com/containerd/containerd/errdefs"
@@ -44,6 +46,26 @@ func (s *safePid) get() int {
4446
return s.pid
4547
}
4648

49+
func (s *safePid) set(pid int) {
50+
s.Lock()
51+
s.pid = pid
52+
s.Unlock()
53+
}
54+
55+
type atomicBool int32
56+
57+
func (ab *atomicBool) set(b bool) {
58+
if b {
59+
atomic.StoreInt32((*int32)(ab), 1)
60+
} else {
61+
atomic.StoreInt32((*int32)(ab), 0)
62+
}
63+
}
64+
65+
func (ab *atomicBool) get() bool {
66+
return atomic.LoadInt32((*int32)(ab)) == 1
67+
}
68+
4769
// TODO(mlaventure): move to runc package?
4870
func getLastRuntimeError(r *runc.Runc) (string, error) {
4971
if r.Log == "" {
@@ -117,3 +139,21 @@ func checkKillError(err error) error {
117139
func hasNoIO(r *CreateConfig) bool {
118140
return r.Stdin == "" && r.Stdout == "" && r.Stderr == ""
119141
}
142+
143+
// waitTimeout handles waiting on a waitgroup with a specified timeout.
144+
// this is commonly used for waiting on IO to finish after a process has exited
145+
func waitTimeout(ctx context.Context, wg *sync.WaitGroup, timeout time.Duration) error {
146+
ctx, cancel := context.WithTimeout(ctx, timeout)
147+
defer cancel()
148+
done := make(chan struct{}, 1)
149+
go func() {
150+
wg.Wait()
151+
close(done)
152+
}()
153+
select {
154+
case <-done:
155+
return nil
156+
case <-ctx.Done():
157+
return ctx.Err()
158+
}
159+
}

0 commit comments

Comments
 (0)