Skip to content

Commit 4f4aad0

Browse files
committed
Persist container and sandbox if resource cleanup fails, like teardownPodNetwork
Signed-off-by: Qiutong Song <[email protected]>
1 parent 7a66f70 commit 4f4aad0

6 files changed

Lines changed: 161 additions & 85 deletions

File tree

pkg/cri/server/container_update_resources_other.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ import (
2323
"context"
2424
"fmt"
2525

26+
"github.com/containerd/containerd"
27+
"github.com/containerd/containerd/containers"
28+
"github.com/containerd/typeurl"
29+
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
2630
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
2731

2832
containerstore "github.com/containerd/containerd/pkg/cri/store/container"
@@ -44,3 +48,19 @@ func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.Up
4448
}
4549
return &runtime.UpdateContainerResourcesResponse{}, nil
4650
}
51+
52+
// TODO: Copied from container_update_resources.go because that file is not built for darwin.
53+
// updateContainerSpec updates container spec.
54+
func updateContainerSpec(ctx context.Context, cntr containerd.Container, spec *runtimespec.Spec) error {
55+
any, err := typeurl.MarshalAny(spec)
56+
if err != nil {
57+
return fmt.Errorf("failed to marshal spec %+v: %w", spec, err)
58+
}
59+
if err := cntr.Update(ctx, func(ctx context.Context, client *containerd.Client, c *containers.Container) error {
60+
c.Spec = any
61+
return nil
62+
}); err != nil {
63+
return fmt.Errorf("failed to update container spec: %w", err)
64+
}
65+
return nil
66+
}

pkg/cri/server/sandbox_run.go

Lines changed: 107 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -70,14 +70,22 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
7070
}
7171
name := makeSandboxName(metadata)
7272
log.G(ctx).WithField("podsandboxid", id).Debugf("generated id for sandbox name %q", name)
73+
74+
// cleanupErr records the last error returned by the critical cleanup operations in deferred functions,
75+
// like CNI teardown and stopping the running sandbox task.
76+
// If cleanup is not completed for some reason, the CRI-plugin will leave the sandbox
77+
// in a not-ready state, which can later be cleaned up by the next execution of the kubelet's syncPod workflow.
78+
var cleanupErr error
79+
7380
// Reserve the sandbox name to avoid concurrent `RunPodSandbox` request starting the
7481
// same sandbox.
7582
if err := c.sandboxNameIndex.Reserve(name, id); err != nil {
7683
return nil, fmt.Errorf("failed to reserve sandbox name %q: %w", name, err)
7784
}
7885
defer func() {
79-
// Release the name if the function returns with an error.
80-
if retErr != nil {
86+
// Release the name if the function returns with an error and all the resource cleanup is done.
87+
// When cleanupErr != nil, the name will be cleaned in sandbox_remove.
88+
if retErr != nil && cleanupErr == nil {
8189
c.sandboxNameIndex.ReleaseByName(name)
8290
}
8391
}()
@@ -111,70 +119,13 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
111119
}
112120
log.G(ctx).WithField("podsandboxid", id).Debugf("use OCI runtime %+v", ociRuntime)
113121

114-
podNetwork := true
115-
116-
if goruntime.GOOS != "windows" &&
117-
config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE {
118-
// Pod network is not needed on linux with host network.
119-
podNetwork = false
120-
}
121-
if goruntime.GOOS == "windows" &&
122-
config.GetWindows().GetSecurityContext().GetHostProcess() {
123-
//Windows HostProcess pods can only run on the host network
124-
podNetwork = false
125-
}
126-
127-
if podNetwork {
128-
netStart := time.Now()
129-
// If it is not in host network namespace then create a namespace and set the sandbox
130-
// handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
131-
// namespaces. If the pod is in host network namespace then both are empty and should not
132-
// be used.
133-
var netnsMountDir = "/var/run/netns"
134-
if c.config.NetNSMountsUnderStateDir {
135-
netnsMountDir = filepath.Join(c.config.StateDir, "netns")
136-
}
137-
sandbox.NetNS, err = netns.NewNetNS(netnsMountDir)
138-
if err != nil {
139-
return nil, fmt.Errorf("failed to create network namespace for sandbox %q: %w", id, err)
140-
}
141-
sandbox.NetNSPath = sandbox.NetNS.GetPath()
142-
defer func() {
143-
if retErr != nil {
144-
deferCtx, deferCancel := ctrdutil.DeferContext()
145-
defer deferCancel()
146-
// Teardown network if an error is returned.
147-
if err := c.teardownPodNetwork(deferCtx, sandbox); err != nil {
148-
log.G(ctx).WithError(err).Errorf("Failed to destroy network for sandbox %q", id)
149-
}
150-
151-
if err := sandbox.NetNS.Remove(); err != nil {
152-
log.G(ctx).WithError(err).Errorf("Failed to remove network namespace %s for sandbox %q", sandbox.NetNSPath, id)
153-
}
154-
sandbox.NetNSPath = ""
155-
}
156-
}()
157-
158-
// Setup network for sandbox.
159-
// Certain VM based solutions like clear containers (Issue containerd/cri-containerd#524)
160-
// rely on the assumption that CRI shim will not be querying the network namespace to check the
161-
// network states such as IP.
162-
// In future runtime implementation should avoid relying on CRI shim implementation details.
163-
// In this case however caching the IP will add a subtle performance enhancement by avoiding
164-
// calls to network namespace of the pod to query the IP of the veth interface on every
165-
// SandboxStatus request.
166-
if err := c.setupPodNetwork(ctx, &sandbox); err != nil {
167-
return nil, fmt.Errorf("failed to setup network for sandbox %q: %w", id, err)
168-
}
169-
sandboxCreateNetworkTimer.UpdateSince(netStart)
170-
}
171-
172122
runtimeStart := time.Now()
173123
// Create sandbox container.
174124
// NOTE: sandboxContainerSpec SHOULD NOT have side
175125
// effect, e.g. accessing/creating files, so that we can test
176126
// it safely.
177-
spec, err := c.sandboxContainerSpec(id, config, &image.ImageSpec.Config, sandbox.NetNSPath, ociRuntime.PodAnnotations)
127+
// NOTE: the network namespace path will be created later and update through updateNetNamespacePath function
128+
spec, err := c.sandboxContainerSpec(id, config, &image.ImageSpec.Config, "", ociRuntime.PodAnnotations)
178129
if err != nil {
179130
return nil, fmt.Errorf("failed to generate sandbox container spec: %w", err)
180131
}
@@ -222,12 +173,27 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
222173
if err != nil {
223174
return nil, fmt.Errorf("failed to create containerd container: %w", err)
224175
}
176+
177+
// Add container into sandbox store in INIT state.
178+
sandbox.Container = container
179+
225180
defer func() {
226-
if retErr != nil {
181+
// Put the sandbox into sandbox store when the some resource fails to be cleaned.
182+
if retErr != nil && cleanupErr != nil {
183+
log.G(ctx).WithError(cleanupErr).Errorf("encountered an error cleaning up failed sandbox %q, marking sandbox state as SANDBOX_UNKNOWN", id)
184+
if err := c.sandboxStore.Add(sandbox); err != nil {
185+
log.G(ctx).WithError(err).Errorf("failed to add sandbox %+v into store", sandbox)
186+
}
187+
}
188+
}()
189+
190+
defer func() {
191+
// Delete container only if all the resource cleanup is done.
192+
if retErr != nil && cleanupErr == nil {
227193
deferCtx, deferCancel := ctrdutil.DeferContext()
228194
defer deferCancel()
229-
if err := container.Delete(deferCtx, containerd.WithSnapshotCleanup); err != nil {
230-
log.G(ctx).WithError(err).Errorf("Failed to delete containerd container %q", id)
195+
if cleanupErr = container.Delete(deferCtx, containerd.WithSnapshotCleanup); cleanupErr != nil {
196+
log.G(ctx).WithError(cleanupErr).Errorf("Failed to delete containerd container %q", id)
231197
}
232198
}
233199
}()
@@ -281,6 +247,82 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
281247
return nil, fmt.Errorf("failed to get sandbox container info: %w", err)
282248
}
283249

250+
podNetwork := true
251+
252+
if goruntime.GOOS != "windows" &&
253+
config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE {
254+
// Pod network is not needed on linux with host network.
255+
podNetwork = false
256+
}
257+
if goruntime.GOOS == "windows" &&
258+
config.GetWindows().GetSecurityContext().GetHostProcess() {
259+
// Windows HostProcess pods can only run on the host network
260+
podNetwork = false
261+
}
262+
263+
if podNetwork {
264+
netStart := time.Now()
265+
266+
// If it is not in host network namespace then create a namespace and set the sandbox
267+
// handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
268+
// namespaces. If the pod is in host network namespace then both are empty and should not
269+
// be used.
270+
var netnsMountDir = "/var/run/netns"
271+
if c.config.NetNSMountsUnderStateDir {
272+
netnsMountDir = filepath.Join(c.config.StateDir, "netns")
273+
}
274+
sandbox.NetNS, err = netns.NewNetNS(netnsMountDir)
275+
if err != nil {
276+
return nil, fmt.Errorf("failed to create network namespace for sandbox %q: %w", id, err)
277+
}
278+
sandbox.NetNSPath = sandbox.NetNS.GetPath()
279+
280+
defer func() {
281+
// Remove the network namespace only if all the resource cleanup is done.
282+
if retErr != nil && cleanupErr == nil {
283+
if cleanupErr = sandbox.NetNS.Remove(); cleanupErr != nil {
284+
log.G(ctx).WithError(cleanupErr).Errorf("Failed to remove network namespace %s for sandbox %q", sandbox.NetNSPath, id)
285+
return
286+
}
287+
sandbox.NetNSPath = ""
288+
}
289+
}()
290+
291+
// Update network namespace in the container's spec
292+
c.updateNetNamespacePath(spec, sandbox.NetNSPath)
293+
if err := updateContainerSpec(ctx, container, spec); err != nil {
294+
return nil, fmt.Errorf("failed to update the network namespace for the sandbox container %q: %w", id, err)
295+
}
296+
297+
// Define this defer to teardownPodNetwork prior to the setupPodNetwork function call.
298+
// This is because in setupPodNetwork the resource is allocated even if it returns error, unlike other resource creation functions.
299+
defer func() {
300+
// Teardown the network only if all the resource cleanup is done.
301+
if retErr != nil && cleanupErr == nil {
302+
deferCtx, deferCancel := ctrdutil.DeferContext()
303+
defer deferCancel()
304+
// Teardown network if an error is returned.
305+
if cleanupErr = c.teardownPodNetwork(deferCtx, sandbox); cleanupErr != nil {
306+
log.G(ctx).WithError(cleanupErr).Errorf("Failed to destroy network for sandbox %q", id)
307+
}
308+
}
309+
}()
310+
311+
// Setup network for sandbox.
312+
// Certain VM based solutions like clear containers (Issue containerd/cri-containerd#524)
313+
// rely on the assumption that CRI shim will not be querying the network namespace to check the
314+
// network states such as IP.
315+
// In future runtime implementation should avoid relying on CRI shim implementation details.
316+
// In this case however caching the IP will add a subtle performance enhancement by avoiding
317+
// calls to network namespace of the pod to query the IP of the veth interface on every
318+
// SandboxStatus request.
319+
if err := c.setupPodNetwork(ctx, &sandbox); err != nil {
320+
return nil, fmt.Errorf("failed to setup network for sandbox %q: %w", id, err)
321+
}
322+
323+
sandboxCreateNetworkTimer.UpdateSince(netStart)
324+
}
325+
284326
// Create sandbox task in containerd.
285327
log.G(ctx).Tracef("Create sandbox container (id=%q, name=%q).",
286328
id, name)
@@ -301,6 +343,7 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
301343
// Cleanup the sandbox container if an error is returned.
302344
if _, err := task.Delete(deferCtx, WithNRISandboxDelete(id), containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
303345
log.G(ctx).WithError(err).Errorf("Failed to delete sandbox container %q", id)
346+
cleanupErr = err
304347
}
305348
}
306349
}()
@@ -339,9 +382,6 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
339382
return nil, fmt.Errorf("failed to update sandbox status: %w", err)
340383
}
341384

342-
// Add sandbox into sandbox store in INIT state.
343-
sandbox.Container = container
344-
345385
if err := c.sandboxStore.Add(sandbox); err != nil {
346386
return nil, fmt.Errorf("failed to add sandbox %+v into store: %w", sandbox, err)
347387
}

pkg/cri/server/sandbox_run_linux.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,3 +348,12 @@ func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts {
348348

349349
return taskOpts
350350
}
351+
352+
func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) {
353+
for i := range spec.Linux.Namespaces {
354+
if spec.Linux.Namespaces[i].Type == runtimespec.NetworkNamespace {
355+
spec.Linux.Namespaces[i].Path = nsPath
356+
break
357+
}
358+
}
359+
}

pkg/cri/server/sandbox_run_other.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,6 @@ func (c *criService) cleanupSandboxFiles(id string, config *runtime.PodSandboxCo
5454
func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts {
5555
return []containerd.NewTaskOpts{}
5656
}
57+
58+
func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) {
59+
}

pkg/cri/server/sandbox_run_windows.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,3 +111,7 @@ func (c *criService) cleanupSandboxFiles(id string, config *runtime.PodSandboxCo
111111
func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts {
112112
return nil
113113
}
114+
115+
func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) {
116+
spec.Windows.Network.NetworkNamespace = nsPath
117+
}

pkg/cri/store/sandbox/status.go

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -29,24 +29,24 @@ import (
2929
// | |
3030
// | Create(Run) | Load
3131
// | |
32-
// Start | |
33-
// (failed) | |
34-
// +------------------+ +-----------+
35-
// | | | |
36-
// | | | |
37-
// | | | |
38-
// | | Start(Run) | |
39-
// | | | |
40-
// | PortForward +----v----+ | |
41-
// | +------+ | | |
42-
// | | | READY <---------+ |
43-
// | +------> | | |
44-
// | +----+----+ | |
45-
// | | | |
46-
// | | Stop/Exit | |
47-
// | | | |
48-
// | +----v----+ | |
49-
// | | <---------+ +----v----+
32+
// | |
33+
// | | Start
34+
// | |(failed and not cleaned)
35+
// Start |--------------|--------------+
36+
//(failed but cleaned)| | |
37+
// +------------------+ |-----------+ |
38+
// | | Start(Run) | | |
39+
// | | | | |
40+
// | PortForward +----v----+ | | |
41+
// | +------+ | | | |
42+
// | | | READY <---------+ | |
43+
// | +------> | | | |
44+
// | +----+----+ | | |
45+
// | | | | |
46+
// | | Stop/Exit | | |
47+
// | | | | |
48+
// | +----v----+ | | |
49+
// | | <---------+ +----v--v-+
5050
// | | NOTREADY| | |
5151
// | | <----------------+ UNKNOWN |
5252
// | +----+----+ Stop | |

0 commit comments

Comments
 (0)