@@ -70,14 +70,22 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
7070 }
7171 name := makeSandboxName (metadata )
7272 log .G (ctx ).WithField ("podsandboxid" , id ).Debugf ("generated id for sandbox name %q" , name )
73+
74+ // cleanupErr records the last error returned by the critical cleanup operations in deferred functions,
75+ // like CNI teardown and stopping the running sandbox task.
76+ // If cleanup is not completed for some reason, the CRI-plugin will leave the sandbox
77+ // in a not-ready state, which can later be cleaned up by the next execution of the kubelet's syncPod workflow.
78+ var cleanupErr error
79+
7380 // Reserve the sandbox name to avoid concurrent `RunPodSandbox` request starting the
7481 // same sandbox.
7582 if err := c .sandboxNameIndex .Reserve (name , id ); err != nil {
7683 return nil , fmt .Errorf ("failed to reserve sandbox name %q: %w" , name , err )
7784 }
7885 defer func () {
79- // Release the name if the function returns with an error.
80- if retErr != nil {
86+ // Release the name if the function returns with an error and all the resource cleanup is done.
87+ // When cleanupErr != nil, the name will be cleaned in sandbox_remove.
88+ if retErr != nil && cleanupErr == nil {
8189 c .sandboxNameIndex .ReleaseByName (name )
8290 }
8391 }()
@@ -111,70 +119,13 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
111119 }
112120 log .G (ctx ).WithField ("podsandboxid" , id ).Debugf ("use OCI runtime %+v" , ociRuntime )
113121
114- podNetwork := true
115-
116- if goruntime .GOOS != "windows" &&
117- config .GetLinux ().GetSecurityContext ().GetNamespaceOptions ().GetNetwork () == runtime .NamespaceMode_NODE {
118- // Pod network is not needed on linux with host network.
119- podNetwork = false
120- }
121- if goruntime .GOOS == "windows" &&
122- config .GetWindows ().GetSecurityContext ().GetHostProcess () {
123- //Windows HostProcess pods can only run on the host network
124- podNetwork = false
125- }
126-
127- if podNetwork {
128- netStart := time .Now ()
129- // If it is not in host network namespace then create a namespace and set the sandbox
130- // handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
131- // namespaces. If the pod is in host network namespace then both are empty and should not
132- // be used.
133- var netnsMountDir = "/var/run/netns"
134- if c .config .NetNSMountsUnderStateDir {
135- netnsMountDir = filepath .Join (c .config .StateDir , "netns" )
136- }
137- sandbox .NetNS , err = netns .NewNetNS (netnsMountDir )
138- if err != nil {
139- return nil , fmt .Errorf ("failed to create network namespace for sandbox %q: %w" , id , err )
140- }
141- sandbox .NetNSPath = sandbox .NetNS .GetPath ()
142- defer func () {
143- if retErr != nil {
144- deferCtx , deferCancel := ctrdutil .DeferContext ()
145- defer deferCancel ()
146- // Teardown network if an error is returned.
147- if err := c .teardownPodNetwork (deferCtx , sandbox ); err != nil {
148- log .G (ctx ).WithError (err ).Errorf ("Failed to destroy network for sandbox %q" , id )
149- }
150-
151- if err := sandbox .NetNS .Remove (); err != nil {
152- log .G (ctx ).WithError (err ).Errorf ("Failed to remove network namespace %s for sandbox %q" , sandbox .NetNSPath , id )
153- }
154- sandbox .NetNSPath = ""
155- }
156- }()
157-
158- // Setup network for sandbox.
159- // Certain VM based solutions like clear containers (Issue containerd/cri-containerd#524)
160- // rely on the assumption that CRI shim will not be querying the network namespace to check the
161- // network states such as IP.
162- // In future runtime implementation should avoid relying on CRI shim implementation details.
163- // In this case however caching the IP will add a subtle performance enhancement by avoiding
164- // calls to network namespace of the pod to query the IP of the veth interface on every
165- // SandboxStatus request.
166- if err := c .setupPodNetwork (ctx , & sandbox ); err != nil {
167- return nil , fmt .Errorf ("failed to setup network for sandbox %q: %w" , id , err )
168- }
169- sandboxCreateNetworkTimer .UpdateSince (netStart )
170- }
171-
172122 runtimeStart := time .Now ()
173123 // Create sandbox container.
174124 // NOTE: sandboxContainerSpec SHOULD NOT have side
175125 // effect, e.g. accessing/creating files, so that we can test
176126 // it safely.
177- spec , err := c .sandboxContainerSpec (id , config , & image .ImageSpec .Config , sandbox .NetNSPath , ociRuntime .PodAnnotations )
127+ // NOTE: the network namespace path will be created later and update through updateNetNamespacePath function
128+ spec , err := c .sandboxContainerSpec (id , config , & image .ImageSpec .Config , "" , ociRuntime .PodAnnotations )
178129 if err != nil {
179130 return nil , fmt .Errorf ("failed to generate sandbox container spec: %w" , err )
180131 }
@@ -222,12 +173,27 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
222173 if err != nil {
223174 return nil , fmt .Errorf ("failed to create containerd container: %w" , err )
224175 }
176+
177+ // Add container into sandbox store in INIT state.
178+ sandbox .Container = container
179+
225180 defer func () {
226- if retErr != nil {
181+ // Put the sandbox into sandbox store when the some resource fails to be cleaned.
182+ if retErr != nil && cleanupErr != nil {
183+ log .G (ctx ).WithError (cleanupErr ).Errorf ("encountered an error cleaning up failed sandbox %q, marking sandbox state as SANDBOX_UNKNOWN" , id )
184+ if err := c .sandboxStore .Add (sandbox ); err != nil {
185+ log .G (ctx ).WithError (err ).Errorf ("failed to add sandbox %+v into store" , sandbox )
186+ }
187+ }
188+ }()
189+
190+ defer func () {
191+ // Delete container only if all the resource cleanup is done.
192+ if retErr != nil && cleanupErr == nil {
227193 deferCtx , deferCancel := ctrdutil .DeferContext ()
228194 defer deferCancel ()
229- if err : = container .Delete (deferCtx , containerd .WithSnapshotCleanup ); err != nil {
230- log .G (ctx ).WithError (err ).Errorf ("Failed to delete containerd container %q" , id )
195+ if cleanupErr = container .Delete (deferCtx , containerd .WithSnapshotCleanup ); cleanupErr != nil {
196+ log .G (ctx ).WithError (cleanupErr ).Errorf ("Failed to delete containerd container %q" , id )
231197 }
232198 }
233199 }()
@@ -281,6 +247,82 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
281247 return nil , fmt .Errorf ("failed to get sandbox container info: %w" , err )
282248 }
283249
250+ podNetwork := true
251+
252+ if goruntime .GOOS != "windows" &&
253+ config .GetLinux ().GetSecurityContext ().GetNamespaceOptions ().GetNetwork () == runtime .NamespaceMode_NODE {
254+ // Pod network is not needed on linux with host network.
255+ podNetwork = false
256+ }
257+ if goruntime .GOOS == "windows" &&
258+ config .GetWindows ().GetSecurityContext ().GetHostProcess () {
259+ // Windows HostProcess pods can only run on the host network
260+ podNetwork = false
261+ }
262+
263+ if podNetwork {
264+ netStart := time .Now ()
265+
266+ // If it is not in host network namespace then create a namespace and set the sandbox
267+ // handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
268+ // namespaces. If the pod is in host network namespace then both are empty and should not
269+ // be used.
270+ var netnsMountDir = "/var/run/netns"
271+ if c .config .NetNSMountsUnderStateDir {
272+ netnsMountDir = filepath .Join (c .config .StateDir , "netns" )
273+ }
274+ sandbox .NetNS , err = netns .NewNetNS (netnsMountDir )
275+ if err != nil {
276+ return nil , fmt .Errorf ("failed to create network namespace for sandbox %q: %w" , id , err )
277+ }
278+ sandbox .NetNSPath = sandbox .NetNS .GetPath ()
279+
280+ defer func () {
281+ // Remove the network namespace only if all the resource cleanup is done.
282+ if retErr != nil && cleanupErr == nil {
283+ if cleanupErr = sandbox .NetNS .Remove (); cleanupErr != nil {
284+ log .G (ctx ).WithError (cleanupErr ).Errorf ("Failed to remove network namespace %s for sandbox %q" , sandbox .NetNSPath , id )
285+ return
286+ }
287+ sandbox .NetNSPath = ""
288+ }
289+ }()
290+
291+ // Update network namespace in the container's spec
292+ c .updateNetNamespacePath (spec , sandbox .NetNSPath )
293+ if err := updateContainerSpec (ctx , container , spec ); err != nil {
294+ return nil , fmt .Errorf ("failed to update the network namespace for the sandbox container %q: %w" , id , err )
295+ }
296+
297+ // Define this defer to teardownPodNetwork prior to the setupPodNetwork function call.
298+ // This is because in setupPodNetwork the resource is allocated even if it returns error, unlike other resource creation functions.
299+ defer func () {
300+ // Teardown the network only if all the resource cleanup is done.
301+ if retErr != nil && cleanupErr == nil {
302+ deferCtx , deferCancel := ctrdutil .DeferContext ()
303+ defer deferCancel ()
304+ // Teardown network if an error is returned.
305+ if cleanupErr = c .teardownPodNetwork (deferCtx , sandbox ); cleanupErr != nil {
306+ log .G (ctx ).WithError (cleanupErr ).Errorf ("Failed to destroy network for sandbox %q" , id )
307+ }
308+ }
309+ }()
310+
311+ // Setup network for sandbox.
312+ // Certain VM based solutions like clear containers (Issue containerd/cri-containerd#524)
313+ // rely on the assumption that CRI shim will not be querying the network namespace to check the
314+ // network states such as IP.
315+ // In future runtime implementation should avoid relying on CRI shim implementation details.
316+ // In this case however caching the IP will add a subtle performance enhancement by avoiding
317+ // calls to network namespace of the pod to query the IP of the veth interface on every
318+ // SandboxStatus request.
319+ if err := c .setupPodNetwork (ctx , & sandbox ); err != nil {
320+ return nil , fmt .Errorf ("failed to setup network for sandbox %q: %w" , id , err )
321+ }
322+
323+ sandboxCreateNetworkTimer .UpdateSince (netStart )
324+ }
325+
284326 // Create sandbox task in containerd.
285327 log .G (ctx ).Tracef ("Create sandbox container (id=%q, name=%q)." ,
286328 id , name )
@@ -301,6 +343,7 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
301343 // Cleanup the sandbox container if an error is returned.
302344 if _ , err := task .Delete (deferCtx , WithNRISandboxDelete (id ), containerd .WithProcessKill ); err != nil && ! errdefs .IsNotFound (err ) {
303345 log .G (ctx ).WithError (err ).Errorf ("Failed to delete sandbox container %q" , id )
346+ cleanupErr = err
304347 }
305348 }
306349 }()
@@ -339,9 +382,6 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
339382 return nil , fmt .Errorf ("failed to update sandbox status: %w" , err )
340383 }
341384
342- // Add sandbox into sandbox store in INIT state.
343- sandbox .Container = container
344-
345385 if err := c .sandboxStore .Add (sandbox ); err != nil {
346386 return nil , fmt .Errorf ("failed to add sandbox %+v into store: %w" , sandbox , err )
347387 }
0 commit comments