@@ -38,64 +38,48 @@ import (
3838
3939// StartContainer starts the container.
4040func (c * criService ) StartContainer (ctx context.Context , r * runtime.StartContainerRequest ) (retRes * runtime.StartContainerResponse , retErr error ) {
41- container , err := c .containerStore .Get (r .GetContainerId ())
41+ cntr , err := c .containerStore .Get (r .GetContainerId ())
4242 if err != nil {
4343 return nil , errors .Wrapf (err , "an error occurred when try to find container %q" , r .GetContainerId ())
4444 }
4545
46- var startErr error
47- // update container status in one transaction to avoid race with event monitor.
48- if err := container .Status .UpdateSync (func (status containerstore.Status ) (containerstore.Status , error ) {
49- // Always apply status change no matter startContainer fails or not. Because startContainer
50- // may change container state no matter it fails or succeeds.
51- startErr = c .startContainer (ctx , container , & status )
52- return status , nil
53- }); startErr != nil {
54- return nil , startErr
55- } else if err != nil {
56- return nil , errors .Wrapf (err , "failed to update container %q metadata" , container .ID )
57- }
58- return & runtime.StartContainerResponse {}, nil
59- }
60-
61- // startContainer actually starts the container. The function needs to be run in one transaction. Any updates
62- // to the status passed in will be applied no matter the function returns error or not.
63- func (c * criService ) startContainer (ctx context.Context ,
64- cntr containerstore.Container ,
65- status * containerstore.Status ) (retErr error ) {
6646 id := cntr .ID
6747 meta := cntr .Metadata
6848 container := cntr .Container
6949 config := meta .Config
7050
71- // Return error if container is not in created state.
72- if status .State () != runtime .ContainerState_CONTAINER_CREATED {
73- return errors .Errorf ("container %q is in %s state" , id , criContainerStateToString (status .State ()))
74- }
75- // Do not start the container when there is a removal in progress.
76- if status .Removing {
77- return errors .Errorf ("container %q is in removing state" , id )
51+ // Set starting state to prevent other start/remove operations against this container
52+ // while it's being started.
53+ if err := setContainerStarting (cntr ); err != nil {
54+ return nil , errors .Wrapf (err , "failed to set starting state for container %q" , id )
7855 }
79-
8056 defer func () {
8157 if retErr != nil {
8258 // Set container to exited if fail to start.
83- status .Pid = 0
84- status .FinishedAt = time .Now ().UnixNano ()
85- status .ExitCode = errorStartExitCode
86- status .Reason = errorStartReason
87- status .Message = retErr .Error ()
59+ if err := cntr .Status .UpdateSync (func (status containerstore.Status ) (containerstore.Status , error ) {
60+ status .Pid = 0
61+ status .FinishedAt = time .Now ().UnixNano ()
62+ status .ExitCode = errorStartExitCode
63+ status .Reason = errorStartReason
64+ status .Message = retErr .Error ()
65+ return status , nil
66+ }); err != nil {
67+ logrus .WithError (err ).Errorf ("failed to set start failure state for container %q" , id )
68+ }
69+ }
70+ if err := resetContainerStarting (cntr ); err != nil {
71+ logrus .WithError (err ).Errorf ("failed to reset starting state for container %q" , id )
8872 }
8973 }()
9074
9175 // Get sandbox config from sandbox store.
9276 sandbox , err := c .sandboxStore .Get (meta .SandboxID )
9377 if err != nil {
94- return errors .Wrapf (err , "sandbox %q not found" , meta .SandboxID )
78+ return nil , errors .Wrapf (err , "sandbox %q not found" , meta .SandboxID )
9579 }
9680 sandboxID := meta .SandboxID
9781 if sandbox .Status .Get ().State != sandboxstore .StateReady {
98- return errors .Errorf ("sandbox container %q is not running" , sandboxID )
82+ return nil , errors .Errorf ("sandbox container %q is not running" , sandboxID )
9983 }
10084
10185 ioCreation := func (id string ) (_ containerdio.IO , err error ) {
@@ -110,7 +94,7 @@ func (c *criService) startContainer(ctx context.Context,
11094
11195 ctrInfo , err := container .Info (ctx )
11296 if err != nil {
113- return errors .Wrap (err , "failed to get container info" )
97+ return nil , errors .Wrap (err , "failed to get container info" )
11498 }
11599
116100 var taskOpts []containerd.NewTaskOpts
@@ -120,7 +104,7 @@ func (c *criService) startContainer(ctx context.Context,
120104 }
121105 task , err := container .NewTask (ctx , ioCreation , taskOpts ... )
122106 if err != nil {
123- return errors .Wrap (err , "failed to create containerd task" )
107+ return nil , errors .Wrap (err , "failed to create containerd task" )
124108 }
125109 defer func () {
126110 if retErr != nil {
@@ -133,15 +117,61 @@ func (c *criService) startContainer(ctx context.Context,
133117 }
134118 }()
135119
120+ // wait is a long running background request, no timeout needed.
121+ exitCh , err := task .Wait (ctrdutil .NamespacedContext ())
122+ if err != nil {
123+ return nil , errors .Wrap (err , "failed to wait for containerd task" )
124+ }
125+
136126 // Start containerd task.
137127 if err := task .Start (ctx ); err != nil {
138- return errors .Wrapf (err , "failed to start containerd task %q" , id )
128+ return nil , errors .Wrapf (err , "failed to start containerd task %q" , id )
139129 }
140130
141131 // Update container start timestamp.
142- status .Pid = task .Pid ()
143- status .StartedAt = time .Now ().UnixNano ()
144- return nil
132+ if err := cntr .Status .UpdateSync (func (status containerstore.Status ) (containerstore.Status , error ) {
133+ status .Pid = task .Pid ()
134+ status .StartedAt = time .Now ().UnixNano ()
135+ return status , nil
136+ }); err != nil {
137+ return nil , errors .Wrapf (err , "failed to update container %q state" , id )
138+ }
139+
140+ // start the monitor after updating container state, this ensures that
141+ // event monitor receives the TaskExit event and update container state
142+ // after this.
143+ c .eventMonitor .startExitMonitor (context .Background (), id , task .Pid (), exitCh )
144+
145+ return & runtime.StartContainerResponse {}, nil
146+ }
147+
148+ // setContainerStarting sets the container into starting state. In starting state, the
149+ // container will not be removed or started again.
150+ func setContainerStarting (container containerstore.Container ) error {
151+ return container .Status .Update (func (status containerstore.Status ) (containerstore.Status , error ) {
152+ // Return error if container is not in created state.
153+ if status .State () != runtime .ContainerState_CONTAINER_CREATED {
154+ return status , errors .Errorf ("container is in %s state" , criContainerStateToString (status .State ()))
155+ }
156+ // Do not start the container when there is a removal in progress.
157+ if status .Removing {
158+ return status , errors .New ("container is in removing state, can't be started" )
159+ }
160+ if status .Starting {
161+ return status , errors .New ("container is already in starting state" )
162+ }
163+ status .Starting = true
164+ return status , nil
165+ })
166+ }
167+
168+ // resetContainerStarting resets the container starting state on start failure. So
169+ // that we could remove the container later.
170+ func resetContainerStarting (container containerstore.Container ) error {
171+ return container .Status .Update (func (status containerstore.Status ) (containerstore.Status , error ) {
172+ status .Starting = false
173+ return status , nil
174+ })
145175}
146176
147177// createContainerLoggers creates container loggers and return write closer for stdout and stderr.
0 commit comments