Skip to content

Commit e48bbe8

Browse files
committed
add runc shim support for sched core
In linux 5.14 and hopefully some backports, core scheduling allows processes to be co scheduled within the same domain on SMT enabled systems. The containerd impl sets the core sched domain when launching a shim. This allows a clean way for each shim(container/pod) to be in its own domain and any additional containers, (v2 pods) be be launched with the same domain as well as any exec'd process added to the container. kernel docs: https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/core-scheduling.html Signed-off-by: Michael Crosby <[email protected]>
1 parent 88e1cf5 commit e48bbe8

99 files changed

Lines changed: 4329 additions & 3611 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

cmd/containerd/command/main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ can be used and modified as necessary as a custom configuration.`
184184
s *server.Server
185185
err error
186186
}
187+
187188
// run server initialization in a goroutine so we don't end up blocking important things like SIGTERM handling
188189
// while the server is initializing.
189190
// As an example opening the bolt database will block forever if another containerd is already running and containerd

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ require (
6262
go.opentelemetry.io/otel/trace v1.0.1
6363
golang.org/x/net v0.0.0-20210520170846-37e1c6afe023
6464
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c
65-
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c
65+
golang.org/x/sys v0.0.0-20210915083310-ed5796bab164
6666
google.golang.org/grpc v1.41.0
6767
google.golang.org/protobuf v1.27.1
6868
gotest.tools/v3 v3.0.3

go.sum

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -762,8 +762,9 @@ golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7w
762762
golang.org/x/sys v0.0.0-20210426230700-d19ff857e887/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
763763
golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
764764
golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
765-
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c h1:F1jZWGFhYfh0Ci55sIpILtKKK8p3i2/krTr0H1rg74I=
766765
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
766+
golang.org/x/sys v0.0.0-20210915083310-ed5796bab164 h1:7ZDGnxgHAMw7thfC5bEos0RDAccZKxioiWBhfIe+tvw=
767+
golang.org/x/sys v0.0.0-20210915083310-ed5796bab164/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
767768
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
768769
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
769770
golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d h1:SZxvLBoTP5yHO3Frd4z4vrF+DBX9vMVanchswa69toE=

pkg/schedcore/prctl_linux.go

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*
2+
Copyright The containerd Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package schedcore
18+
19+
import (
20+
"golang.org/x/sys/unix"
21+
)
22+
23+
// PidType is the type of provided pid value and how it should be treated
24+
type PidType int
25+
26+
const (
27+
// Pid affects the current pid
28+
Pid PidType = pidtypePid
29+
// ThreadGroup affects all threads in the group
30+
ThreadGroup PidType = pidtypeTgid
31+
// ProcessGroup affects all processes in the group
32+
ProcessGroup PidType = pidtypePgid
33+
)
34+
35+
const (
36+
pidtypePid = 0
37+
pidtypeTgid = 1
38+
pidtypePgid = 2
39+
)
40+
41+
// Create a new sched core domain
42+
func Create(t PidType) error {
43+
return unix.Prctl(unix.PR_SCHED_CORE, unix.PR_SCHED_CORE_CREATE, 0, uintptr(t), 0)
44+
}
45+
46+
// ShareFrom shares the sched core domain from the provided pid
47+
func ShareFrom(pid uint64, t PidType) error {
48+
return unix.Prctl(unix.PR_SCHED_CORE, unix.PR_SCHED_CORE_SHARE_FROM, uintptr(pid), uintptr(t), 0)
49+
}

runtime/v2/binary.go

Lines changed: 35 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -35,19 +35,28 @@ import (
3535
"github.com/sirupsen/logrus"
3636
)
3737

38-
func shimBinary(bundle *Bundle, runtime, containerdAddress string, containerdTTRPCAddress string) *binary {
38+
type shimBinaryConfig struct {
39+
runtime string
40+
address string
41+
ttrpcAddress string
42+
schedCore bool
43+
}
44+
45+
func shimBinary(bundle *Bundle, config shimBinaryConfig) *binary {
3946
return &binary{
4047
bundle: bundle,
41-
runtime: runtime,
42-
containerdAddress: containerdAddress,
43-
containerdTTRPCAddress: containerdTTRPCAddress,
48+
runtime: config.runtime,
49+
containerdAddress: config.address,
50+
containerdTTRPCAddress: config.ttrpcAddress,
51+
schedCore: config.schedCore,
4452
}
4553
}
4654

4755
type binary struct {
4856
runtime string
4957
containerdAddress string
5058
containerdTTRPCAddress string
59+
schedCore bool
5160
bundle *Bundle
5261
}
5362

@@ -61,13 +70,15 @@ func (b *binary) Start(ctx context.Context, opts *types.Any, onClose func()) (_
6170

6271
cmd, err := client.Command(
6372
ctx,
64-
b.runtime,
65-
b.containerdAddress,
66-
b.containerdTTRPCAddress,
67-
b.bundle.Path,
68-
opts,
69-
args...,
70-
)
73+
&client.CommandConfig{
74+
Runtime: b.runtime,
75+
Address: b.containerdAddress,
76+
TTRPCAddress: b.containerdTTRPCAddress,
77+
Path: b.bundle.Path,
78+
Opts: opts,
79+
Args: args,
80+
SchedCore: b.schedCore,
81+
})
7182
if err != nil {
7283
return nil, err
7384
}
@@ -138,14 +149,19 @@ func (b *binary) Delete(ctx context.Context) (*runtime.Exit, error) {
138149
}
139150

140151
cmd, err := client.Command(ctx,
141-
b.runtime,
142-
b.containerdAddress,
143-
b.containerdTTRPCAddress,
144-
bundlePath,
145-
nil,
146-
"-id", b.bundle.ID,
147-
"-bundle", b.bundle.Path,
148-
"delete")
152+
&client.CommandConfig{
153+
Runtime: b.runtime,
154+
Address: b.containerdAddress,
155+
TTRPCAddress: b.containerdTTRPCAddress,
156+
Path: bundlePath,
157+
Opts: nil,
158+
Args: []string{
159+
"-id", b.bundle.ID,
160+
"-bundle", b.bundle.Path,
161+
"delete",
162+
},
163+
})
164+
149165
if err != nil {
150166
return nil, err
151167
}

runtime/v2/manager.go

Lines changed: 46 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ import (
4141
type Config struct {
4242
// Supported platforms
4343
Platforms []string `toml:"platforms"`
44+
// SchedCore enabled linux core scheduling
45+
SchedCore bool `toml:"sched_core"`
4446
}
4547

4648
func init() {
@@ -55,7 +57,8 @@ func init() {
5557
Platforms: defaultPlatforms(),
5658
},
5759
InitFn: func(ic *plugin.InitContext) (interface{}, error) {
58-
supportedPlatforms, err := parsePlatforms(ic.Config.(*Config).Platforms)
60+
config := ic.Config.(*Config)
61+
supportedPlatforms, err := parsePlatforms(config.Platforms)
5962
if err != nil {
6063
return nil, err
6164
}
@@ -78,26 +81,45 @@ func init() {
7881
cs := metadata.NewContainerStore(m.(*metadata.DB))
7982
events := ep.(*exchange.Exchange)
8083

81-
return New(ic.Context, ic.Root, ic.State, ic.Address, ic.TTRPCAddress, events, cs)
84+
return New(ic.Context, &ManagerConfig{
85+
Root: ic.Root,
86+
State: ic.State,
87+
Address: ic.Address,
88+
TTRPCAddress: ic.TTRPCAddress,
89+
Events: events,
90+
Store: cs,
91+
SchedCore: config.SchedCore,
92+
})
8293
},
8394
})
8495
}
8596

97+
type ManagerConfig struct {
98+
Root string
99+
State string
100+
Store containers.Store
101+
Events *exchange.Exchange
102+
Address string
103+
TTRPCAddress string
104+
SchedCore bool
105+
}
106+
86107
// New task manager for v2 shims
87-
func New(ctx context.Context, root, state, containerdAddress, containerdTTRPCAddress string, events *exchange.Exchange, cs containers.Store) (*TaskManager, error) {
88-
for _, d := range []string{root, state} {
108+
func New(ctx context.Context, config *ManagerConfig) (*TaskManager, error) {
109+
for _, d := range []string{config.Root, config.State} {
89110
if err := os.MkdirAll(d, 0711); err != nil {
90111
return nil, err
91112
}
92113
}
93114
m := &TaskManager{
94-
root: root,
95-
state: state,
96-
containerdAddress: containerdAddress,
97-
containerdTTRPCAddress: containerdTTRPCAddress,
115+
root: config.Root,
116+
state: config.State,
117+
containerdAddress: config.Address,
118+
containerdTTRPCAddress: config.TTRPCAddress,
119+
schedCore: config.SchedCore,
98120
tasks: runtime.NewTaskList(),
99-
events: events,
100-
containers: cs,
121+
events: config.Events,
122+
containers: config.Store,
101123
}
102124
if err := m.loadExistingTasks(ctx); err != nil {
103125
return nil, err
@@ -111,6 +133,7 @@ type TaskManager struct {
111133
state string
112134
containerdAddress string
113135
containerdTTRPCAddress string
136+
schedCore bool
114137

115138
tasks *runtime.TaskList
116139
events *exchange.Exchange
@@ -167,7 +190,12 @@ func (m *TaskManager) startShim(ctx context.Context, bundle *Bundle, id string,
167190
topts = opts.RuntimeOptions
168191
}
169192

170-
b := shimBinary(bundle, opts.Runtime, m.containerdAddress, m.containerdTTRPCAddress)
193+
b := shimBinary(bundle, shimBinaryConfig{
194+
runtime: opts.Runtime,
195+
address: m.containerdAddress,
196+
ttrpcAddress: m.containerdTTRPCAddress,
197+
schedCore: m.schedCore,
198+
})
171199
shim, err := b.Start(ctx, topts, func() {
172200
log.G(ctx).WithField("id", id).Info("shim disconnected")
173201

@@ -303,7 +331,13 @@ func (m *TaskManager) loadTasks(ctx context.Context) error {
303331
bundle.Delete()
304332
continue
305333
}
306-
binaryCall := shimBinary(bundle, container.Runtime.Name, m.containerdAddress, m.containerdTTRPCAddress)
334+
binaryCall := shimBinary(bundle,
335+
shimBinaryConfig{
336+
runtime: container.Runtime.Name,
337+
address: m.containerdAddress,
338+
ttrpcAddress: m.containerdTTRPCAddress,
339+
schedCore: m.schedCore,
340+
})
307341
shim, err := loadShim(ctx, bundle, func() {
308342
log.G(ctx).WithField("id", id).Info("shim disconnected")
309343

runtime/v2/runc/v1/service.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"io"
2525
"os"
2626
"path/filepath"
27+
goruntime "runtime"
2728
"sync"
2829
"syscall"
2930
"time"
@@ -37,6 +38,7 @@ import (
3738
"github.com/containerd/containerd/pkg/oom"
3839
oomv1 "github.com/containerd/containerd/pkg/oom/v1"
3940
"github.com/containerd/containerd/pkg/process"
41+
"github.com/containerd/containerd/pkg/schedcore"
4042
"github.com/containerd/containerd/pkg/stdio"
4143
"github.com/containerd/containerd/runtime/v2/runc"
4244
"github.com/containerd/containerd/runtime/v2/runc/options"
@@ -166,10 +168,19 @@ func (s *service) StartShim(ctx context.Context, opts shim.StartOpts) (_ string,
166168

167169
cmd.ExtraFiles = append(cmd.ExtraFiles, f)
168170

171+
goruntime.LockOSThread()
172+
if os.Getenv("SCHED_CORE") != "" {
173+
if err := schedcore.Create(schedcore.ProcessGroup); err != nil {
174+
return "", errors.Wrap(err, "enable sched core support")
175+
}
176+
}
177+
169178
if err := cmd.Start(); err != nil {
170179
f.Close()
171180
return "", err
172181
}
182+
goruntime.UnlockOSThread()
183+
173184
defer func() {
174185
if retErr != nil {
175186
cmd.Process.Kill()

runtime/v2/runc/v2/service.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"io"
2626
"os"
2727
"path/filepath"
28+
goruntime "runtime"
2829
"sync"
2930
"syscall"
3031
"time"
@@ -40,6 +41,7 @@ import (
4041
oomv1 "github.com/containerd/containerd/pkg/oom/v1"
4142
oomv2 "github.com/containerd/containerd/pkg/oom/v2"
4243
"github.com/containerd/containerd/pkg/process"
44+
"github.com/containerd/containerd/pkg/schedcore"
4345
"github.com/containerd/containerd/pkg/stdio"
4446
"github.com/containerd/containerd/pkg/userns"
4547
"github.com/containerd/containerd/runtime/v2/runc"
@@ -234,10 +236,20 @@ func (s *service) StartShim(ctx context.Context, opts shim.StartOpts) (_ string,
234236

235237
cmd.ExtraFiles = append(cmd.ExtraFiles, f)
236238

239+
goruntime.LockOSThread()
240+
if os.Getenv("SCHED_CORE") != "" {
241+
if err := schedcore.Create(schedcore.ProcessGroup); err != nil {
242+
return "", errors.Wrap(err, "enable sched core support")
243+
}
244+
}
245+
237246
if err := cmd.Start(); err != nil {
238247
f.Close()
239248
return "", err
240249
}
250+
251+
goruntime.UnlockOSThread()
252+
241253
defer func() {
242254
if retErr != nil {
243255
cmd.Process.Kill()

0 commit comments

Comments
 (0)