Skip to content

Commit d0a1fed

Browse files
fuweidthaJeztah
authored andcommitted
*: add runc-fp as runc wrapper to inject failpoint
Signed-off-by: Wei Fu <[email protected]> (cherry picked from commit 11a7751) Signed-off-by: Sebastiaan van Stijn <[email protected]>
1 parent 0449124 commit d0a1fed

5 files changed

Lines changed: 268 additions & 0 deletions

File tree

Makefile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,11 @@ bin/cni-bridge-fp: integration/failpoint/cmd/cni-bridge-fp FORCE
234234
@echo "$(WHALE) $@"
235235
@$(GO) build ${GO_BUILD_FLAGS} -o $@ ./integration/failpoint/cmd/cni-bridge-fp
236236

237+
# build runc-fp as runc wrapper to support failpoint, only used by integration test
238+
bin/runc-fp: integration/failpoint/cmd/runc-fp FORCE
239+
@echo "$(WHALE) $@"
240+
@$(GO) build ${GO_BUILD_FLAGS} -o $@ ./integration/failpoint/cmd/runc-fp
241+
237242
benchmark: ## run benchmarks tests
238243
@echo "$(WHALE) $@"
239244
@$(GO) test ${TESTFLAGS} -bench . -run Benchmark -test.root

integration/client/container_linux_test.go

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,9 @@ import (
4242
"github.com/containerd/containerd/runtime/linux/runctypes"
4343
"github.com/containerd/containerd/runtime/v2/runc/options"
4444
"github.com/containerd/containerd/sys"
45+
4546
"github.com/opencontainers/runtime-spec/specs-go"
47+
"github.com/stretchr/testify/require"
4648
exec "golang.org/x/sys/execabs"
4749
"golang.org/x/sys/unix"
4850
)
@@ -1498,3 +1500,83 @@ func TestShimOOMScore(t *testing.T) {
14981500
case <-statusC:
14991501
}
15001502
}
1503+
1504+
// TestIssue9103 is used as regression case for issue 9103.
1505+
//
1506+
// The runc-fp will kill the init process so that the shim should return stopped
1507+
// status after container.NewTask. It's used to simulate that the runc-init
1508+
// might be killed by oom-kill.
1509+
func TestIssue9103(t *testing.T) {
1510+
if os.Getenv("RUNC_FLAVOR") == "crun" {
1511+
t.Skip("skip it when using crun")
1512+
}
1513+
if getRuntimeVersion() == "v1" {
1514+
t.Skip("skip it when using shim v1")
1515+
}
1516+
1517+
client, err := newClient(t, address)
1518+
require.NoError(t, err)
1519+
defer client.Close()
1520+
1521+
var (
1522+
image Image
1523+
ctx, cancel = testContext(t)
1524+
id = t.Name()
1525+
)
1526+
defer cancel()
1527+
1528+
image, err = client.GetImage(ctx, testImage)
1529+
require.NoError(t, err)
1530+
1531+
for idx, tc := range []struct {
1532+
desc string
1533+
cntrOpts []NewContainerOpts
1534+
expectedStatus ProcessStatus
1535+
}{
1536+
{
1537+
desc: "should be created status",
1538+
cntrOpts: []NewContainerOpts{
1539+
WithNewSpec(oci.WithImageConfig(image),
1540+
withProcessArgs("sleep", "30"),
1541+
),
1542+
},
1543+
expectedStatus: Created,
1544+
},
1545+
{
1546+
desc: "should be stopped status if init has been killed",
1547+
cntrOpts: []NewContainerOpts{
1548+
WithNewSpec(oci.WithImageConfig(image),
1549+
withProcessArgs("sleep", "30"),
1550+
oci.WithAnnotations(map[string]string{
1551+
"oci.runc.failpoint.profile": "issue9103",
1552+
}),
1553+
),
1554+
WithRuntime(client.Runtime(), &options.Options{
1555+
BinaryName: "runc-fp",
1556+
}),
1557+
},
1558+
expectedStatus: Stopped,
1559+
},
1560+
} {
1561+
tc := tc
1562+
tName := fmt.Sprintf("%s%d", id, idx)
1563+
t.Run(tc.desc, func(t *testing.T) {
1564+
container, err := client.NewContainer(ctx, tName,
1565+
append([]NewContainerOpts{WithNewSnapshot(tName, image)}, tc.cntrOpts...)...,
1566+
)
1567+
require.NoError(t, err)
1568+
defer container.Delete(ctx, WithSnapshotCleanup)
1569+
1570+
cctx, ccancel := context.WithTimeout(ctx, 30*time.Second)
1571+
task, err := container.NewTask(cctx, empty())
1572+
ccancel()
1573+
require.NoError(t, err)
1574+
1575+
defer task.Delete(ctx, WithProcessKill)
1576+
1577+
status, err := task.Status(ctx)
1578+
require.NoError(t, err)
1579+
require.Equal(t, status.Status, tc.expectedStatus)
1580+
})
1581+
}
1582+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
//go:build linux
2+
3+
/*
4+
Copyright The containerd Authors.
5+
6+
Licensed under the Apache License, Version 2.0 (the "License");
7+
you may not use this file except in compliance with the License.
8+
You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
*/
18+
19+
package main
20+
21+
import (
22+
"context"
23+
"fmt"
24+
"os"
25+
"strconv"
26+
"strings"
27+
"syscall"
28+
"time"
29+
)
30+
31+
// issue9103KillInitAfterCreate kills the runc.Init process after creating
32+
// command returns successfully.
33+
//
34+
// REF: https://github.com/containerd/containerd/issues/9103
35+
func issue9103KillInitAfterCreate(ctx context.Context, method invoker) error {
36+
isCreated := strings.Contains(strings.Join(os.Args, ","), ",create,")
37+
38+
if err := method(ctx); err != nil {
39+
return err
40+
}
41+
42+
if !isCreated {
43+
return nil
44+
}
45+
46+
initPidPath := "init.pid"
47+
data, err := os.ReadFile(initPidPath)
48+
if err != nil {
49+
return fmt.Errorf("failed to read %s: %w", initPidPath, err)
50+
}
51+
52+
pid, err := strconv.Atoi(string(data))
53+
if err != nil {
54+
return fmt.Errorf("failed to get init pid from string %s: %w", string(data), err)
55+
}
56+
57+
if pid <= 0 {
58+
return fmt.Errorf("unexpected init pid %v", pid)
59+
}
60+
61+
if err := syscall.Kill(pid, syscall.SIGKILL); err != nil {
62+
return fmt.Errorf("failed to kill the init pid %v: %w", pid, err)
63+
}
64+
65+
// Ensure that the containerd-shim has received the SIGCHLD and start
66+
// to cleanup
67+
time.Sleep(3 * time.Second)
68+
return nil
69+
}
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
//go:build linux
2+
3+
/*
4+
Copyright The containerd Authors.
5+
6+
Licensed under the Apache License, Version 2.0 (the "License");
7+
you may not use this file except in compliance with the License.
8+
You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
*/
18+
19+
package main
20+
21+
import (
22+
"context"
23+
"fmt"
24+
"os"
25+
"os/exec"
26+
"syscall"
27+
28+
"github.com/containerd/containerd/oci"
29+
"github.com/sirupsen/logrus"
30+
)
31+
32+
const (
33+
failpointProfileKey = "oci.runc.failpoint.profile"
34+
)
35+
36+
type invoker func(context.Context) error
37+
38+
type invokerInterceptor func(context.Context, invoker) error
39+
40+
var (
41+
failpointProfiles = map[string]invokerInterceptor{
42+
"issue9103": issue9103KillInitAfterCreate,
43+
}
44+
)
45+
46+
// setupLog setups messages into log file.
47+
func setupLog() {
48+
// containerd/go-runc always add --log option
49+
idx := 2
50+
for ; idx < len(os.Args); idx++ {
51+
if os.Args[idx] == "--log" {
52+
break
53+
}
54+
}
55+
56+
if idx >= len(os.Args)-1 || os.Args[idx] != "--log" {
57+
panic("option --log required")
58+
}
59+
60+
logFile := os.Args[idx+1]
61+
f, err := os.OpenFile(logFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND|os.O_SYNC, 0o644)
62+
if err != nil {
63+
panic(fmt.Errorf("failed to open %s: %w", logFile, err))
64+
}
65+
66+
logrus.SetOutput(f)
67+
logrus.SetFormatter(new(logrus.JSONFormatter))
68+
}
69+
70+
func main() {
71+
setupLog()
72+
73+
fpProfile, err := failpointProfileFromOCIAnnotation()
74+
if err != nil {
75+
logrus.WithError(err).Fatal("failed to get failpoint profile")
76+
}
77+
78+
ctx := context.Background()
79+
if err := fpProfile(ctx, defaultRuncInvoker); err != nil {
80+
logrus.WithError(err).Fatal("failed to exec failpoint profile")
81+
}
82+
}
83+
84+
// defaultRuncInvoker is to call the runc command with same arguments.
85+
func defaultRuncInvoker(ctx context.Context) error {
86+
cmd := exec.CommandContext(ctx, "runc", os.Args[1:]...)
87+
cmd.SysProcAttr = &syscall.SysProcAttr{Pdeathsig: syscall.SIGKILL}
88+
return cmd.Run()
89+
}
90+
91+
// failpointProfileFromOCIAnnotation gets the profile from OCI annotations.
92+
func failpointProfileFromOCIAnnotation() (invokerInterceptor, error) {
93+
spec, err := oci.ReadSpec(oci.ConfigFilename)
94+
if err != nil {
95+
return nil, fmt.Errorf("failed to read %s: %w", oci.ConfigFilename, err)
96+
}
97+
98+
profileName, ok := spec.Annotations[failpointProfileKey]
99+
if !ok {
100+
return nil, fmt.Errorf("failpoint profile is required")
101+
}
102+
103+
fp, ok := failpointProfiles[profileName]
104+
if !ok {
105+
return nil, fmt.Errorf("no such failpoint profile %s", profileName)
106+
}
107+
return fp, nil
108+
}

script/setup/install-failpoint-binaries

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,7 @@ sudo install bin/cni-bridge-fp "${CNI_BIN_DIR}"
3333
SHIM_BIN_DIR=${SHIM_BIN_DIR:-"/usr/local/bin"}
3434
make bin/containerd-shim-runc-fp-v1
3535
sudo install bin/containerd-shim-runc-fp-v1 "${SHIM_BIN_DIR}"
36+
37+
RUNCFP_BIN_DIR=${RUNCFP_BIN_DIR:-"/usr/local/bin"}
38+
make bin/runc-fp
39+
sudo install bin/runc-fp "${RUNCFP_BIN_DIR}"

0 commit comments

Comments
 (0)