@@ -26,16 +26,24 @@ import (
2626 "path/filepath"
2727 "runtime"
2828 "strings"
29+ "sync"
30+ "syscall"
2931 "testing"
32+ "time"
3033
34+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
35+ "github.com/stretchr/testify/assert"
3136 "github.com/stretchr/testify/require"
3237 criapiv1 "k8s.io/cri-api/pkg/apis/runtime/v1"
3338
39+ "github.com/containerd/containerd/pkg/cri/store/sandbox"
3440 "github.com/containerd/containerd/pkg/failpoint"
41+ "github.com/containerd/typeurl"
3542)
3643
3744const (
3845 failpointRuntimeHandler = "runc-fp"
46+ failpointCNIBinary = "cni-bridge-fp"
3947
4048 failpointShimPrefixKey = "io.containerd.runtime.v2.shim.failpoint."
4149
@@ -138,7 +146,7 @@ func TestRunPodSandboxWithShimDeleteFailure(t *testing.T) {
138146
139147 if restart {
140148 t .Log ("Restart containerd" )
141- RestartContainerd (t )
149+ RestartContainerd (t , syscall . SIGTERM )
142150
143151 t .Log ("ListPodSandbox with the specific label" )
144152 l , err = runtimeService .ListPodSandbox (& criapiv1.PodSandboxFilter {Id : sb .Id })
@@ -211,7 +219,7 @@ func TestRunPodSandboxWithShimStartAndTeardownCNIFailure(t *testing.T) {
211219
212220 if restart {
213221 t .Log ("Restart containerd" )
214- RestartContainerd (t )
222+ RestartContainerd (t , syscall . SIGTERM )
215223
216224 t .Log ("ListPodSandbox with the specific label" )
217225 l , err = runtimeService .ListPodSandbox (& criapiv1.PodSandboxFilter {Id : sb .Id })
@@ -229,6 +237,132 @@ func TestRunPodSandboxWithShimStartAndTeardownCNIFailure(t *testing.T) {
229237 t .Run ("JustCleanup" , testCase (false ))
230238}
231239
240+ // TestRunPodSandboxWithShimStartAndTeardownCNISlow should keep the sandbox
241+ // record if failed to rollback CNI API.
242+ func TestRunPodSandboxAndTeardownCNISlow (t * testing.T ) {
243+ if runtime .GOOS != "linux" {
244+ t .Skip ()
245+ }
246+ if os .Getenv ("ENABLE_CRI_SANDBOXES" ) != "" {
247+ t .Skip ()
248+ }
249+
250+ t .Log ("Init PodSandboxConfig with specific key" )
251+ sbName := t .Name ()
252+ labels := map [string ]string {
253+ sbName : "true" ,
254+ }
255+ sbConfig := PodSandboxConfig (sbName , "failpoint" , WithPodLabels (labels ))
256+
257+ t .Log ("Inject CNI failpoint" )
258+ conf := & failpointConf {
259+ // Delay 1 day
260+ Add : "1*delay(86400000)" ,
261+ }
262+ injectCNIFailpoint (t , sbConfig , conf )
263+
264+ var wg sync.WaitGroup
265+ wg .Add (1 )
266+
267+ go func () {
268+ defer wg .Done ()
269+ t .Log ("Create a sandbox" )
270+ _ , err := runtimeService .RunPodSandbox (sbConfig , failpointRuntimeHandler )
271+ require .Error (t , err )
272+ require .ErrorContains (t , err , "error reading from server: EOF" )
273+ }()
274+
275+ assert .NoError (t , ensureCNIAddRunning (t , sbName ), "check that failpoint CNI.Add is running" )
276+
277+ // Use SIGKILL to prevent containerd server gracefulshutdown which may cause indeterministic invocation of defer functions
278+ t .Log ("Restart containerd" )
279+ RestartContainerd (t , syscall .SIGKILL )
280+
281+ wg .Wait ()
282+
283+ t .Log ("ListPodSandbox with the specific label" )
284+ l , err := runtimeService .ListPodSandbox (& criapiv1.PodSandboxFilter {LabelSelector : labels })
285+ require .NoError (t , err )
286+ require .Len (t , l , 1 )
287+
288+ sb := l [0 ]
289+
290+ defer func () {
291+ t .Log ("Cleanup leaky sandbox" )
292+ err := runtimeService .StopPodSandbox (sb .Id )
293+ assert .NoError (t , err )
294+ err = runtimeService .RemovePodSandbox (sb .Id )
295+ require .NoError (t , err )
296+ }()
297+
298+ assert .Equal (t , sb .State , criapiv1 .PodSandboxState_SANDBOX_NOTREADY )
299+ assert .Equal (t , sb .Metadata .Name , sbConfig .Metadata .Name )
300+ assert .Equal (t , sb .Metadata .Namespace , sbConfig .Metadata .Namespace )
301+ assert .Equal (t , sb .Metadata .Uid , sbConfig .Metadata .Uid )
302+ assert .Equal (t , sb .Metadata .Attempt , sbConfig .Metadata .Attempt )
303+
304+ t .Log ("Get sandbox info" )
305+ _ , info , err := SandboxInfo (sb .Id )
306+ require .NoError (t , err )
307+ require .False (t , info .NetNSClosed )
308+
309+ var netNS string
310+ for _ , n := range info .RuntimeSpec .Linux .Namespaces {
311+ if n .Type == runtimespec .NetworkNamespace {
312+ netNS = n .Path
313+ }
314+ }
315+ assert .NotEmpty (t , netNS , "network namespace should be set" )
316+
317+ t .Log ("Get sandbox container" )
318+ c , err := GetContainer (sb .Id )
319+ require .NoError (t , err )
320+ any , ok := c .Extensions ["io.cri-containerd.sandbox.metadata" ]
321+ require .True (t , ok , "sandbox metadata should exist in extension" )
322+ i , err := typeurl .UnmarshalAny (any )
323+ require .NoError (t , err )
324+ require .IsType (t , & sandbox.Metadata {}, i )
325+ metadata , ok := i .(* sandbox.Metadata )
326+ require .True (t , ok )
327+ assert .NotEmpty (t , metadata .NetNSPath )
328+ assert .Equal (t , netNS , metadata .NetNSPath , "network namespace path should be the same in runtime spec and sandbox metadata" )
329+ }
330+
331+ func ensureCNIAddRunning (t * testing.T , sbName string ) error {
332+ return Eventually (func () (bool , error ) {
333+ pids , err := PidsOf (failpointCNIBinary )
334+ if err != nil || len (pids ) == 0 {
335+ return false , err
336+ }
337+
338+ for _ , pid := range pids {
339+ envs , err := PidEnvs (pid )
340+ if err != nil {
341+ t .Logf ("failed to read environ of pid %v: %v: skip it" , pid , err )
342+ continue
343+ }
344+
345+ args , ok := envs ["CNI_ARGS" ]
346+ if ! ok {
347+ t .Logf ("expected CNI_ARGS env but got nothing, skip pid=%v" , pid )
348+ continue
349+ }
350+
351+ for _ , arg := range strings .Split (args , ";" ) {
352+ kv := strings .SplitN (arg , "=" , 2 )
353+ if len (kv ) != 2 {
354+ continue
355+ }
356+
357+ if kv [0 ] == "K8S_POD_NAME" && kv [1 ] == sbName {
358+ return true , nil
359+ }
360+ }
361+ }
362+ return false , nil
363+ }, time .Second , 30 * time .Second )
364+ }
365+
232366// failpointConf is used to describe cmdAdd/cmdDel/cmdCheck command's failpoint.
233367type failpointConf struct {
234368 Add string `json:"cmdAdd"`
0 commit comments