Skip to content

Commit 11a7751

Browse files
fuweidcyyzero
authored andcommitted
*: add runc-fp as runc wrapper to inject failpoint
Signed-off-by: Wei Fu <[email protected]>
1 parent 68dd47e commit 11a7751

5 files changed

Lines changed: 265 additions & 0 deletions

File tree

Makefile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,11 @@ bin/cni-bridge-fp: integration/failpoint/cmd/cni-bridge-fp FORCE
236236
@echo "$(WHALE) $@"
237237
@$(GO) build ${GO_BUILD_FLAGS} -o $@ ./integration/failpoint/cmd/cni-bridge-fp
238238

239+
# build runc-fp as runc wrapper to support failpoint, only used by integration test
240+
bin/runc-fp: integration/failpoint/cmd/runc-fp FORCE
241+
@echo "$(WHALE) $@"
242+
@$(GO) build ${GO_BUILD_FLAGS} -o $@ ./integration/failpoint/cmd/runc-fp
243+
239244
benchmark: ## run benchmarks tests
240245
@echo "$(WHALE) $@"
241246
@$(GO) test ${TESTFLAGS} -bench . -run Benchmark -test.root

integration/client/container_linux_test.go

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@ import (
4141
"github.com/containerd/containerd/plugin"
4242
"github.com/containerd/containerd/runtime/v2/runc/options"
4343
"github.com/containerd/containerd/sys"
44+
4445
"github.com/opencontainers/runtime-spec/specs-go"
46+
"github.com/stretchr/testify/require"
4547
exec "golang.org/x/sys/execabs"
4648
"golang.org/x/sys/unix"
4749
)
@@ -1417,3 +1419,80 @@ func TestShimOOMScore(t *testing.T) {
14171419
case <-statusC:
14181420
}
14191421
}
1422+
1423+
// TestIssue9103 is used as regression case for issue 9103.
1424+
//
1425+
// The runc-fp will kill the init process so that the shim should return stopped
1426+
// status after container.NewTask. It's used to simulate that the runc-init
1427+
// might be killed by oom-kill.
1428+
func TestIssue9103(t *testing.T) {
1429+
if os.Getenv("RUNC_FLAVOR") == "crun" {
1430+
t.Skip("skip it when using crun")
1431+
}
1432+
1433+
client, err := newClient(t, address)
1434+
require.NoError(t, err)
1435+
defer client.Close()
1436+
1437+
var (
1438+
image Image
1439+
ctx, cancel = testContext(t)
1440+
id = t.Name()
1441+
)
1442+
defer cancel()
1443+
1444+
image, err = client.GetImage(ctx, testImage)
1445+
require.NoError(t, err)
1446+
1447+
for idx, tc := range []struct {
1448+
desc string
1449+
cntrOpts []NewContainerOpts
1450+
expectedStatus ProcessStatus
1451+
}{
1452+
{
1453+
desc: "should be created status",
1454+
cntrOpts: []NewContainerOpts{
1455+
WithNewSpec(oci.WithImageConfig(image),
1456+
withProcessArgs("sleep", "30"),
1457+
),
1458+
},
1459+
expectedStatus: Created,
1460+
},
1461+
{
1462+
desc: "should be stopped status if init has been killed",
1463+
cntrOpts: []NewContainerOpts{
1464+
WithNewSpec(oci.WithImageConfig(image),
1465+
withProcessArgs("sleep", "30"),
1466+
oci.WithAnnotations(map[string]string{
1467+
"oci.runc.failpoint.profile": "issue9103",
1468+
}),
1469+
),
1470+
WithRuntime(client.Runtime(), &options.Options{
1471+
BinaryName: "runc-fp",
1472+
}),
1473+
},
1474+
expectedStatus: Stopped,
1475+
},
1476+
} {
1477+
tc := tc
1478+
tName := fmt.Sprintf("%s%d", id, idx)
1479+
t.Run(tc.desc, func(t *testing.T) {
1480+
container, err := client.NewContainer(ctx, tName,
1481+
append([]NewContainerOpts{WithNewSnapshot(tName, image)}, tc.cntrOpts...)...,
1482+
)
1483+
require.NoError(t, err)
1484+
defer container.Delete(ctx, WithSnapshotCleanup)
1485+
1486+
cctx, ccancel := context.WithTimeout(ctx, 30*time.Second)
1487+
task, err := container.NewTask(cctx, empty())
1488+
ccancel()
1489+
require.NoError(t, err)
1490+
1491+
defer task.Delete(ctx, WithProcessKill)
1492+
1493+
status, err := task.Status(ctx)
1494+
require.NoError(t, err)
1495+
require.Equal(t, status.Status, tc.expectedStatus)
1496+
})
1497+
}
1498+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
//go:build linux
2+
3+
/*
4+
Copyright The containerd Authors.
5+
6+
Licensed under the Apache License, Version 2.0 (the "License");
7+
you may not use this file except in compliance with the License.
8+
You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
*/
18+
19+
package main
20+
21+
import (
22+
"context"
23+
"fmt"
24+
"os"
25+
"strconv"
26+
"strings"
27+
"syscall"
28+
"time"
29+
)
30+
31+
// issue9103KillInitAfterCreate kills the runc.Init process after creating
32+
// command returns successfully.
33+
//
34+
// REF: https://github.com/containerd/containerd/issues/9103
35+
func issue9103KillInitAfterCreate(ctx context.Context, method invoker) error {
36+
isCreated := strings.Contains(strings.Join(os.Args, ","), ",create,")
37+
38+
if err := method(ctx); err != nil {
39+
return err
40+
}
41+
42+
if !isCreated {
43+
return nil
44+
}
45+
46+
initPidPath := "init.pid"
47+
data, err := os.ReadFile(initPidPath)
48+
if err != nil {
49+
return fmt.Errorf("failed to read %s: %w", initPidPath, err)
50+
}
51+
52+
pid, err := strconv.Atoi(string(data))
53+
if err != nil {
54+
return fmt.Errorf("failed to get init pid from string %s: %w", string(data), err)
55+
}
56+
57+
if pid <= 0 {
58+
return fmt.Errorf("unexpected init pid %v", pid)
59+
}
60+
61+
if err := syscall.Kill(pid, syscall.SIGKILL); err != nil {
62+
return fmt.Errorf("failed to kill the init pid %v: %w", pid, err)
63+
}
64+
65+
// Ensure that the containerd-shim has received the SIGCHLD and start
66+
// to cleanup
67+
time.Sleep(3 * time.Second)
68+
return nil
69+
}
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
//go:build linux
2+
3+
/*
4+
Copyright The containerd Authors.
5+
6+
Licensed under the Apache License, Version 2.0 (the "License");
7+
you may not use this file except in compliance with the License.
8+
You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
*/
18+
19+
package main
20+
21+
import (
22+
"context"
23+
"fmt"
24+
"os"
25+
"os/exec"
26+
"syscall"
27+
28+
"github.com/containerd/containerd/oci"
29+
"github.com/sirupsen/logrus"
30+
)
31+
32+
const (
33+
failpointProfileKey = "oci.runc.failpoint.profile"
34+
)
35+
36+
type invoker func(context.Context) error
37+
38+
type invokerInterceptor func(context.Context, invoker) error
39+
40+
var (
41+
failpointProfiles = map[string]invokerInterceptor{
42+
"issue9103": issue9103KillInitAfterCreate,
43+
}
44+
)
45+
46+
// setupLog setups messages into log file.
47+
func setupLog() {
48+
// containerd/go-runc always add --log option
49+
idx := 2
50+
for ; idx < len(os.Args); idx++ {
51+
if os.Args[idx] == "--log" {
52+
break
53+
}
54+
}
55+
56+
if idx >= len(os.Args)-1 || os.Args[idx] != "--log" {
57+
panic("option --log required")
58+
}
59+
60+
logFile := os.Args[idx+1]
61+
f, err := os.OpenFile(logFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND|os.O_SYNC, 0o644)
62+
if err != nil {
63+
panic(fmt.Errorf("failed to open %s: %w", logFile, err))
64+
}
65+
66+
logrus.SetOutput(f)
67+
logrus.SetFormatter(new(logrus.JSONFormatter))
68+
}
69+
70+
func main() {
71+
setupLog()
72+
73+
fpProfile, err := failpointProfileFromOCIAnnotation()
74+
if err != nil {
75+
logrus.WithError(err).Fatal("failed to get failpoint profile")
76+
}
77+
78+
ctx := context.Background()
79+
if err := fpProfile(ctx, defaultRuncInvoker); err != nil {
80+
logrus.WithError(err).Fatal("failed to exec failpoint profile")
81+
}
82+
}
83+
84+
// defaultRuncInvoker is to call the runc command with same arguments.
85+
func defaultRuncInvoker(ctx context.Context) error {
86+
cmd := exec.CommandContext(ctx, "runc", os.Args[1:]...)
87+
cmd.SysProcAttr = &syscall.SysProcAttr{Pdeathsig: syscall.SIGKILL}
88+
return cmd.Run()
89+
}
90+
91+
// failpointProfileFromOCIAnnotation gets the profile from OCI annotations.
92+
func failpointProfileFromOCIAnnotation() (invokerInterceptor, error) {
93+
spec, err := oci.ReadSpec(oci.ConfigFilename)
94+
if err != nil {
95+
return nil, fmt.Errorf("failed to read %s: %w", oci.ConfigFilename, err)
96+
}
97+
98+
profileName, ok := spec.Annotations[failpointProfileKey]
99+
if !ok {
100+
return nil, fmt.Errorf("failpoint profile is required")
101+
}
102+
103+
fp, ok := failpointProfiles[profileName]
104+
if !ok {
105+
return nil, fmt.Errorf("no such failpoint profile %s", profileName)
106+
}
107+
return fp, nil
108+
}

script/setup/install-failpoint-binaries

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,7 @@ sudo install bin/cni-bridge-fp "${CNI_BIN_DIR}"
3333
SHIM_BIN_DIR=${SHIM_BIN_DIR:-"/usr/local/bin"}
3434
make bin/containerd-shim-runc-fp-v1
3535
sudo install bin/containerd-shim-runc-fp-v1 "${SHIM_BIN_DIR}"
36+
37+
RUNCFP_BIN_DIR=${RUNCFP_BIN_DIR:-"/usr/local/bin"}
38+
make bin/runc-fp
39+
sudo install bin/runc-fp "${RUNCFP_BIN_DIR}"

0 commit comments

Comments
 (0)