Skip to content

Commit bbf5589

Browse files
committed
Add containerd-shim plumbing for job containers
* Add the necessary plumbing in containerd shim to be able to create a job container if asked for via the annotation. * Rework jobcontainers package a bit to return a resources struct to avoid some hacks during cleanup. This was resource cleanup for wcow/lcow is the exact same for job containers in the shim. * Change some of the layer code to handle taking in a volume mount point Signed-off-by: Daniel Canter <[email protected]>
1 parent 62680e0 commit bbf5589

9 files changed

Lines changed: 194 additions & 137 deletions

File tree

cmd/containerd-shim-runhcs-v1/pod.go

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,11 @@ func createPod(ctx context.Context, events publisher, req *task.CreateTaskReques
8383
owner := filepath.Base(os.Args[0])
8484
isWCOW := oci.IsWCOW(s)
8585

86+
p := pod{
87+
events: events,
88+
id: req.ID,
89+
}
90+
8691
var parent *uvm.UtilityVM
8792
if oci.IsIsolated(s) {
8893
// Create the UVM parent
@@ -125,22 +130,41 @@ func createPod(ctx context.Context, events publisher, req *task.CreateTaskReques
125130
parent.Close()
126131
return nil, err
127132
}
133+
} else if oci.IsJobContainer(s) {
134+
// If we're making a job container fake a task (i.e reuse the wcowPodSandbox logic)
135+
p.sandboxTask = newWcowPodSandboxTask(ctx, events, req.ID, req.Bundle, parent, "")
136+
if err := events.publishEvent(
137+
ctx,
138+
runtime.TaskCreateEventTopic,
139+
&eventstypes.TaskCreate{
140+
ContainerID: req.ID,
141+
Bundle: req.Bundle,
142+
Rootfs: req.Rootfs,
143+
IO: &eventstypes.TaskIO{
144+
Stdin: req.Stdin,
145+
Stdout: req.Stdout,
146+
Stderr: req.Stderr,
147+
Terminal: req.Terminal,
148+
},
149+
Checkpoint: "",
150+
Pid: 0,
151+
}); err != nil {
152+
return nil, err
153+
}
154+
p.jobContainer = true
155+
return &p, nil
128156
} else if !isWCOW {
129157
return nil, errors.Wrap(errdefs.ErrFailedPrecondition, "oci spec does not contain WCOW or LCOW spec")
130158
}
159+
131160
defer func() {
132161
// clean up the uvm if we fail any further operations
133162
if err != nil && parent != nil {
134163
parent.Close()
135164
}
136165
}()
137166

138-
p := pod{
139-
events: events,
140-
id: req.ID,
141-
host: parent,
142-
}
143-
167+
p.host = parent
144168
if parent != nil {
145169
cid := req.ID
146170
if id, ok := s.Annotations[oci.AnnotationNcproxyContainerID]; ok {
@@ -232,6 +256,11 @@ type pod struct {
232256
// It MUST be treated as read only in the lifetime of the pod.
233257
host *uvm.UtilityVM
234258

259+
// jobContainer specifies whether this pod is for WCOW job containers only.
260+
//
261+
// It MUST be treated as read only in the lifetime of the pod.
262+
jobContainer bool
263+
235264
workloadTasks sync.Map
236265
}
237266

@@ -263,6 +292,17 @@ func (p *pod) CreateTask(ctx context.Context, req *task.CreateTaskRequest, s *sp
263292
return nil, errors.Wrapf(errdefs.ErrAlreadyExists, "task with id: '%s' already exists id pod: '%s'", req.ID, p.id)
264293
}
265294

295+
if p.jobContainer {
296+
// This is a short circuit to make sure that all containers in a pod will have
297+
// the same IP address/be added to the same compartment.
298+
//
299+
// There will need to be OS work needed to support this scenario, so for now we need to block on
300+
// this.
301+
if !oci.IsJobContainer(s) {
302+
return nil, errors.New("cannot create a normal process isolated container if the pod sandbox is a job container")
303+
}
304+
}
305+
266306
ct, sid, err := oci.GetSandboxTypeAndID(s.Annotations)
267307
if err != nil {
268308
return nil, err

cmd/containerd-shim-runhcs-v1/task_hcs.go

Lines changed: 37 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"github.com/Microsoft/hcsshim/internal/hcs/schema1"
3030
hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2"
3131
"github.com/Microsoft/hcsshim/internal/hcsoci"
32+
"github.com/Microsoft/hcsshim/internal/jobcontainers"
3233
"github.com/Microsoft/hcsshim/internal/log"
3334
"github.com/Microsoft/hcsshim/internal/oci"
3435
"github.com/Microsoft/hcsshim/internal/processorinfo"
@@ -113,6 +114,39 @@ func newHcsStandaloneTask(ctx context.Context, events publisher, req *task.Creat
113114
return shim, nil
114115
}
115116

117+
// createContainer is a generic call to return either a process/hypervisor isolated container, or a job container
118+
// based on what is set in the OCI spec.
119+
func createContainer(ctx context.Context, id, owner, netNS string, s *specs.Spec, parent *uvm.UtilityVM, shimOpts *runhcsopts.Options) (cow.Container, *resources.Resources, error) {
120+
var (
121+
err error
122+
container cow.Container
123+
resources *resources.Resources
124+
)
125+
126+
if oci.IsJobContainer(s) {
127+
container, resources, err = jobcontainers.Create(ctx, id, s)
128+
if err != nil {
129+
return nil, nil, err
130+
}
131+
} else {
132+
opts := &hcsoci.CreateOptions{
133+
ID: id,
134+
Owner: owner,
135+
Spec: s,
136+
HostingSystem: parent,
137+
NetworkNamespace: netNS,
138+
}
139+
if shimOpts != nil {
140+
opts.ScaleCPULimitsToSandbox = shimOpts.ScaleCpuLimitsToSandbox
141+
}
142+
container, resources, err = hcsoci.CreateContainer(ctx, opts)
143+
if err != nil {
144+
return nil, nil, err
145+
}
146+
}
147+
return container, resources, nil
148+
}
149+
116150
// newHcsTask creates a container within `parent` and its init exec process in
117151
// the `shimExecCreated` state and returns the task that tracks its lifetime.
118152
//
@@ -152,19 +186,7 @@ func newHcsTask(
152186
shimOpts = v.(*runhcsopts.Options)
153187
}
154188

155-
opts := hcsoci.CreateOptions{
156-
ID: req.ID,
157-
Owner: owner,
158-
Spec: s,
159-
HostingSystem: parent,
160-
NetworkNamespace: netNS,
161-
}
162-
163-
if shimOpts != nil {
164-
opts.ScaleCPULimitsToSandbox = shimOpts.ScaleCpuLimitsToSandbox
165-
}
166-
167-
system, resources, err := hcsoci.CreateContainer(ctx, &opts)
189+
container, resources, err := createContainer(ctx, req.ID, owner, netNS, s, parent, shimOpts)
168190
if err != nil {
169191
return nil, err
170192
}
@@ -173,7 +195,7 @@ func newHcsTask(
173195
events: events,
174196
id: req.ID,
175197
isWCOW: oci.IsWCOW(s),
176-
c: system,
198+
c: container,
177199
cr: resources,
178200
ownsHost: ownsParent,
179201
host: parent,
@@ -186,7 +208,7 @@ func newHcsTask(
186208
events,
187209
req.ID,
188210
parent,
189-
system,
211+
container,
190212
req.ID,
191213
req.Bundle,
192214
ht.isWCOW,

cmd/containerd-shim-runhcs-v1/task_wcow_podsandbox.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,9 @@ func (wpst *wcowPodSandboxTask) Share(ctx context.Context, req *shimdiag.ShareRe
284284

285285
func (wpst *wcowPodSandboxTask) Stats(ctx context.Context) (*stats.Statistics, error) {
286286
stats := &stats.Statistics{}
287+
if wpst.host == nil {
288+
return stats, nil
289+
}
287290
vmStats, err := wpst.host.Stats(ctx)
288291
if err != nil && !isStatsNotFound(err) {
289292
return nil, err

internal/hcsoci/resources_lcow.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,12 @@ func allocateLinuxResources(ctx context.Context, coi *createOptionsInternal, r *
4242
containerRootInUVM := r.ContainerRootInUVM()
4343
if coi.Spec.Windows != nil && len(coi.Spec.Windows.LayerFolders) > 0 {
4444
log.G(ctx).Debug("hcsshim::allocateLinuxResources mounting storage")
45-
rootPath, err := layers.MountContainerLayers(ctx, coi.Spec.Windows.LayerFolders, containerRootInUVM, coi.HostingSystem)
45+
rootPath, err := layers.MountContainerLayers(ctx, coi.Spec.Windows.LayerFolders, containerRootInUVM, "", coi.HostingSystem)
4646
if err != nil {
4747
return errors.Wrap(err, "failed to mount container storage")
4848
}
4949
coi.Spec.Root.Path = rootPath
50-
layers := layers.NewImageLayers(coi.HostingSystem, containerRootInUVM, coi.Spec.Windows.LayerFolders, isSandbox)
50+
layers := layers.NewImageLayers(coi.HostingSystem, containerRootInUVM, coi.Spec.Windows.LayerFolders, "", isSandbox)
5151
r.SetLayers(layers)
5252
} else if coi.Spec.Root.Path != "" {
5353
// This is the "Plan 9" root filesystem.

internal/hcsoci/resources_wcow.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,12 @@ func allocateWindowsResources(ctx context.Context, coi *createOptionsInternal, r
5252
if coi.Spec.Root.Path == "" && (coi.HostingSystem != nil || coi.Spec.Windows.HyperV == nil) {
5353
log.G(ctx).Debug("hcsshim::allocateWindowsResources mounting storage")
5454
containerRootInUVM := r.ContainerRootInUVM()
55-
containerRootPath, err := layers.MountContainerLayers(ctx, coi.Spec.Windows.LayerFolders, containerRootInUVM, coi.HostingSystem)
55+
containerRootPath, err := layers.MountContainerLayers(ctx, coi.Spec.Windows.LayerFolders, containerRootInUVM, "", coi.HostingSystem)
5656
if err != nil {
5757
return errors.Wrap(err, "failed to mount container storage")
5858
}
5959
coi.Spec.Root.Path = containerRootPath
60-
layers := layers.NewImageLayers(coi.HostingSystem, containerRootInUVM, coi.Spec.Windows.LayerFolders, isSandbox)
60+
layers := layers.NewImageLayers(coi.HostingSystem, containerRootInUVM, coi.Spec.Windows.LayerFolders, "", isSandbox)
6161
r.SetLayers(layers)
6262
}
6363

internal/jobcontainers/jobcontainer.go

Lines changed: 26 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ import (
2121
"github.com/Microsoft/hcsshim/internal/layers"
2222
"github.com/Microsoft/hcsshim/internal/log"
2323
"github.com/Microsoft/hcsshim/internal/queue"
24+
"github.com/Microsoft/hcsshim/internal/resources"
2425
"github.com/Microsoft/hcsshim/internal/winapi"
2526
specs "github.com/opencontainers/runtime-spec/specs-go"
2627
"github.com/pkg/errors"
27-
"github.com/sirupsen/logrus"
2828
"golang.org/x/sys/windows"
2929
)
3030

@@ -66,7 +66,6 @@ type JobContainer struct {
6666
spec *specs.Spec // OCI spec used to create the container
6767
job *jobobject.JobObject // Object representing the job object the container owns
6868
sandboxMount string // Path to where the sandbox is mounted on the host
69-
m sync.Mutex
7069
closedWaitOnce sync.Once
7170
init initProc
7271
startTimestamp time.Time
@@ -89,33 +88,21 @@ func newJobContainer(id string, s *specs.Spec) *JobContainer {
8988
}
9089

9190
// Create creates a new JobContainer from `s`.
92-
func Create(ctx context.Context, id string, s *specs.Spec) (_ cow.Container, err error) {
91+
func Create(ctx context.Context, id string, s *specs.Spec) (_ cow.Container, _ *resources.Resources, err error) {
9392
log.G(ctx).WithField("id", id).Debug("Creating job container")
9493

9594
if s == nil {
96-
return nil, errors.New("Spec must be supplied")
95+
return nil, nil, errors.New("Spec must be supplied")
9796
}
9897

9998
if id == "" {
10099
g, err := guid.NewV4()
101100
if err != nil {
102-
return nil, err
101+
return nil, nil, err
103102
}
104103
id = g.String()
105104
}
106105

107-
if err := mountLayers(ctx, s); err != nil {
108-
return nil, errors.Wrap(err, "failed to mount container layers")
109-
}
110-
111-
volumeGUIDRegex := `^\\\\\?\\(Volume)\{{0,1}[0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12}(\}){0,1}\}(|\\)$`
112-
if matched, err := regexp.MatchString(volumeGUIDRegex, s.Root.Path); !matched || err != nil {
113-
return nil, fmt.Errorf(`invalid container spec - Root.Path '%s' must be a volume GUID path in the format '\\?\Volume{GUID}\'`, s.Root.Path)
114-
}
115-
if s.Root.Path[len(s.Root.Path)-1] != '\\' {
116-
s.Root.Path += `\` // Be nice to clients and make sure well-formed for back-compat
117-
}
118-
119106
container := newJobContainer(id, s)
120107

121108
// Create the job object all processes will run in.
@@ -125,52 +112,50 @@ func Create(ctx context.Context, id string, s *specs.Spec) (_ cow.Container, err
125112
}
126113
job, err := jobobject.Create(ctx, options)
127114
if err != nil {
128-
return nil, errors.Wrap(err, "failed to create job object")
115+
return nil, nil, errors.Wrap(err, "failed to create job object")
129116
}
130117

131118
// Parity with how we handle process isolated containers. We set the same flag which
132119
// behaves the same way for a silo.
133120
if err := job.SetTerminateOnLastHandleClose(); err != nil {
134-
return nil, errors.Wrap(err, "failed to set terminate on last handle close on job container")
121+
return nil, nil, errors.Wrap(err, "failed to set terminate on last handle close on job container")
135122
}
136123
container.job = job
137124

138-
var path string
125+
r := resources.NewContainerResources(id)
139126
defer func() {
140127
if err != nil {
141128
container.Close()
142-
if path != "" {
143-
_ = removeSandboxMountPoint(ctx, path)
144-
}
129+
_ = resources.ReleaseResources(ctx, r, nil, true)
145130
}
146131
}()
147132

148-
limits, err := specToLimits(ctx, id, s)
149-
if err != nil {
150-
return nil, errors.Wrap(err, "failed to convert OCI spec to job object limits")
133+
sandboxPath := fmt.Sprintf(sandboxMountFormat, id)
134+
if err := mountLayers(ctx, s, sandboxPath); err != nil {
135+
return nil, nil, errors.Wrap(err, "failed to mount container layers")
151136
}
137+
container.sandboxMount = sandboxPath
152138

153-
// Set resource limits on the job object based off of oci spec.
154-
if err := job.SetResourceLimits(limits); err != nil {
155-
return nil, errors.Wrap(err, "failed to set resource limits")
139+
layers := layers.NewImageLayers(nil, "", s.Windows.LayerFolders, sandboxPath, false)
140+
r.SetLayers(layers)
141+
142+
volumeGUIDRegex := `^\\\\\?\\(Volume)\{{0,1}[0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12}(\}){0,1}\}(|\\)$`
143+
if matched, err := regexp.MatchString(volumeGUIDRegex, s.Root.Path); !matched || err != nil {
144+
return nil, nil, fmt.Errorf(`invalid container spec - Root.Path '%s' must be a volume GUID path in the format '\\?\Volume{GUID}\'`, s.Root.Path)
156145
}
157146

158-
// Setup directory sandbox volume will be mounted
159-
sandboxPath := fmt.Sprintf(sandboxMountFormat, id)
160-
if _, err := os.Stat(sandboxPath); os.IsNotExist(err) {
161-
if err := os.MkdirAll(sandboxPath, 0777); err != nil {
162-
return nil, errors.Wrap(err, "failed to create mounted folder")
163-
}
147+
limits, err := specToLimits(ctx, id, s)
148+
if err != nil {
149+
return nil, nil, errors.Wrap(err, "failed to convert OCI spec to job object limits")
164150
}
165-
path = sandboxPath
166151

167-
if err := mountSandboxVolume(ctx, path, s.Root.Path); err != nil {
168-
return nil, errors.Wrap(err, "failed to bind payload directory on host")
152+
// Set resource limits on the job object based off of oci spec.
153+
if err := job.SetResourceLimits(limits); err != nil {
154+
return nil, nil, errors.Wrap(err, "failed to set resource limits")
169155
}
170156

171-
container.sandboxMount = path
172157
go container.waitBackground(ctx)
173-
return container, nil
158+
return container, r, nil
174159
}
175160

176161
// CreateProcess creates a process on the host, starts it, adds it to the containers
@@ -283,29 +268,6 @@ func (c *JobContainer) Modify(ctx context.Context, config interface{}) (err erro
283268
return errors.New("modify not supported for job containers")
284269
}
285270

286-
// Release unmounts all of the container layers. Safe to call multiple times, if no storage
287-
// is mounted this call will just return nil.
288-
func (c *JobContainer) Release(ctx context.Context) error {
289-
c.m.Lock()
290-
defer c.m.Unlock()
291-
292-
log.G(ctx).WithFields(logrus.Fields{
293-
"id": c.id,
294-
"path": c.sandboxMount,
295-
}).Warn("removing sandbox volume mount")
296-
297-
if c.sandboxMount != "" {
298-
if err := removeSandboxMountPoint(ctx, c.sandboxMount); err != nil {
299-
return errors.Wrap(err, "failed to remove sandbox volume mount path")
300-
}
301-
if err := layers.UnmountContainerLayers(ctx, c.spec.Windows.LayerFolders, "", nil, layers.UnmountOperationAll); err != nil {
302-
return errors.Wrap(err, "failed to unmount container layers")
303-
}
304-
c.sandboxMount = ""
305-
}
306-
return nil
307-
}
308-
309271
// Start starts the container. There's nothing to "start" for job containers, so this just
310272
// sets the start timestamp.
311273
func (c *JobContainer) Start(ctx context.Context) error {
@@ -484,7 +446,7 @@ func (c *JobContainer) waitBackground(ctx context.Context) {
484446
// them to exit.
485447
<-c.init.proc.waitBlock
486448

487-
ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
449+
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
488450
defer cancel()
489451
if err := c.Shutdown(ctx); err != nil {
490452
_ = c.Terminate(ctx)

0 commit comments

Comments
 (0)