Skip to content

Commit 9ed9300

Browse files
committed
add support for packing multiple LCOW layers onto single VPMem
add VirtualPMemMapping schema and update gcs types add memory allocator interface and implementation VPMem multi-mapping support has been added in 19H1, which enables packing multiple VHDs onto a single VPMem device. This feature enables an optimization, where multiple LCOW container layers can be packed onto a single VPMem device. This change uses memory allocator introduced above to keep track of the VPMem surface allocation. Additionally, introduce new structs to keep track of the internal state of each mapped LCOW layer VHD and update HCS/GCS calls accordingly. The optimization is enabled by default on supported systems and fall-back to old behavior otherwise. add CRI tests Signed-off-by: Maksim An <[email protected]>
1 parent 62680e0 commit 9ed9300

36 files changed

Lines changed: 4783 additions & 245 deletions

File tree

internal/guestrequest/types.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,18 @@ type LCOWMappedDirectory struct {
4545
ReadOnly bool `json:"ReadOnly,omitempty"`
4646
}
4747

48+
// LCOWMappedLayer is one of potentially multiple read-only layers mapped on a VPMem device
49+
type LCOWMappedLayer struct {
50+
DeviceOffsetInBytes uint64 `json:"DeviceOffsetInBytes,omitempty"`
51+
DeviceSizeInBytes uint64 `json:"DeviceSizeInBytes,omitempty"`
52+
}
53+
4854
// Read-only layers over VPMem
4955
type LCOWMappedVPMemDevice struct {
5056
DeviceNumber uint32 `json:"DeviceNumber,omitempty"`
5157
MountPath string `json:"MountPath,omitempty"`
58+
// Mapping is ignored when MountPath is not empty
59+
MappingInfo *LCOWMappedLayer `json:"MappingInfo,omitempty"`
5260
}
5361

5462
type LCOWMappedVPCIDevice struct {
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/*
2+
* HCS API
3+
*
4+
* No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen)
5+
*
6+
* API version: 2.4
7+
* Generated by: Swagger Codegen (https://github.com/swagger-api/swagger-codegen.git)
8+
*/
9+
10+
package hcsschema
11+
12+
type VirtualPMemMapping struct {
13+
HostPath string `json:"HostPath,omitempty"`
14+
ImageFormat string `json:"ImageFormat,omitempty"`
15+
}

internal/layers/layers.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -212,14 +212,14 @@ func addLCOWLayer(ctx context.Context, uvm *uvmpkg.UtilityVM, layerPath string)
212212
if !uvm.DevicesPhysicallyBacked() {
213213
// We first try vPMEM and if it is full or the file is too large we
214214
// fall back to SCSI.
215-
uvmPath, err = uvm.AddVPMEM(ctx, layerPath)
215+
uvmPath, err = uvm.AddVPMem(ctx, layerPath)
216216
if err == nil {
217217
log.G(ctx).WithFields(logrus.Fields{
218218
"layerPath": layerPath,
219219
"layerType": "vpmem",
220220
}).Debug("Added LCOW layer")
221221
return uvmPath, nil
222-
} else if err != uvmpkg.ErrNoAvailableLocation && err != uvmpkg.ErrMaxVPMEMLayerSize {
222+
} else if err != uvmpkg.ErrNoAvailableLocation && err != uvmpkg.ErrMaxVPMemLayerSize {
223223
return "", fmt.Errorf("failed to add VPMEM layer: %s", err)
224224
}
225225
}
@@ -239,7 +239,7 @@ func addLCOWLayer(ctx context.Context, uvm *uvmpkg.UtilityVM, layerPath string)
239239

240240
func removeLCOWLayer(ctx context.Context, uvm *uvmpkg.UtilityVM, layerPath string) error {
241241
// Assume it was added to vPMEM and fall back to SCSI
242-
err := uvm.RemoveVPMEM(ctx, layerPath)
242+
err := uvm.RemoveVPMem(ctx, layerPath)
243243
if err == nil {
244244
log.G(ctx).WithFields(logrus.Fields{
245245
"layerPath": layerPath,

internal/memory/pool.go

Lines changed: 316 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,316 @@
1+
package memory
2+
3+
import (
4+
"github.com/pkg/errors"
5+
)
6+
7+
const (
8+
minimumClassSize = MegaByte
9+
maximumClassSize = 4 * GigaByte
10+
memoryClassNumber = 7
11+
)
12+
13+
var (
14+
ErrInvalidMemoryClass = errors.New("invalid memory class")
15+
ErrEarlyMerge = errors.New("not all children have been freed")
16+
ErrEmptyPoolOperation = errors.New("operation on empty pool")
17+
)
18+
19+
// GetMemoryClassType returns the minimum memory class type that can hold a device of
20+
// a given size. The smallest class is 1MB and the largest one is 4GB with 2 bit offset
21+
// intervals in between, for a total of 7 different classes. This function does not
22+
// do a validity check
23+
func GetMemoryClassType(s uint64) classType {
24+
s = (s - 1) >> 20
25+
memCls := uint32(0)
26+
for s > 0 {
27+
s = s >> 2
28+
memCls++
29+
}
30+
return classType(memCls)
31+
}
32+
33+
// GetMemoryClassSize returns size in bytes for a given memory class
34+
func GetMemoryClassSize(memCls classType) (uint64, error) {
35+
if memCls >= memoryClassNumber {
36+
return 0, ErrInvalidMemoryClass
37+
}
38+
return minimumClassSize << (2 * memCls), nil
39+
}
40+
41+
// region represents a contiguous memory block
42+
type region struct {
43+
// parent region that has been split into 4
44+
parent *region
45+
class classType
46+
// offset represents offset in bytes
47+
offset uint64
48+
}
49+
50+
// memoryPool tracks free and busy (used) memory regions
51+
type memoryPool struct {
52+
free map[uint64]*region
53+
busy map[uint64]*region
54+
}
55+
56+
// PoolAllocator implements a memory allocation strategy similar to buddy-malloc https://github.com/evanw/buddy-malloc/blob/master/buddy-malloc.c
57+
// We borrow the idea of spanning a tree of fixed size regions on top of a contiguous memory
58+
// space.
59+
//
60+
// There are a total of 7 different region sizes that can be allocated, with the smallest
61+
// being 1MB and the largest 4GB (the default maximum size of a Virtual PMem device).
62+
//
63+
// For efficiency and to reduce fragmentation an entire region is allocated when requested.
64+
// When there's no available region of requested size, we try to allocate more memory for
65+
// this particular size by splitting the next available larger region into smaller ones, e.g.
66+
// if there's no region available for size class 0, we try splitting a region from class 1,
67+
// then class 2 etc, until we are able to do so or hit the upper limit.
68+
type PoolAllocator struct {
69+
pools [memoryClassNumber]*memoryPool
70+
}
71+
72+
var _ MappedRegion = &region{}
73+
var _ Allocator = &PoolAllocator{}
74+
75+
func (r *region) Offset() uint64 {
76+
return r.offset
77+
}
78+
79+
func (r *region) Size() uint64 {
80+
sz, err := GetMemoryClassSize(r.class)
81+
if err != nil {
82+
panic(err)
83+
}
84+
return sz
85+
}
86+
87+
func (r *region) Type() classType {
88+
return r.class
89+
}
90+
91+
func newEmptyMemoryPool() *memoryPool {
92+
return &memoryPool{
93+
free: make(map[uint64]*region),
94+
busy: make(map[uint64]*region),
95+
}
96+
}
97+
98+
func NewPoolMemoryAllocator() PoolAllocator {
99+
pa := PoolAllocator{}
100+
p := newEmptyMemoryPool()
101+
// by default we allocate a single region with maximum possible size (class type)
102+
p.free[0] = &region{
103+
class: memoryClassNumber - 1,
104+
offset: 0,
105+
}
106+
pa.pools[memoryClassNumber-1] = p
107+
return pa
108+
}
109+
110+
// Allocate checks memory region pool for the given `size` and returns a free region with
111+
// minimal offset, if none available tries expanding matched memory pool.
112+
//
113+
// Internally it's done via moving a region from free pool into a busy pool
114+
func (pa *PoolAllocator) Allocate(size uint64) (MappedRegion, error) {
115+
memCls := GetMemoryClassType(size)
116+
if memCls >= memoryClassNumber {
117+
return nil, ErrInvalidMemoryClass
118+
}
119+
120+
// find region with the smallest offset
121+
nextCls, nextOffset, err := pa.findNextOffset(memCls)
122+
if err != nil {
123+
return nil, err
124+
}
125+
126+
// this means that there are no more regions for the current class, try expanding
127+
if nextCls != memCls {
128+
if err := pa.split(memCls); err != nil {
129+
if err == ErrInvalidMemoryClass {
130+
return nil, ErrNotEnoughSpace
131+
}
132+
return nil, err
133+
}
134+
}
135+
136+
if err := pa.markBusy(memCls, nextOffset); err != nil {
137+
return nil, err
138+
}
139+
140+
// by this point memory pool for memCls should have been created,
141+
// either prior or during split call
142+
if r := pa.pools[memCls].busy[nextOffset]; r != nil {
143+
return r, nil
144+
}
145+
146+
return nil, ErrNotEnoughSpace
147+
}
148+
149+
// Release marks a memory region of class `memCls` and offset `offset` as free and tries to merge smaller regions into
150+
// a bigger one
151+
func (pa *PoolAllocator) Release(reg MappedRegion) error {
152+
mp := pa.pools[reg.Type()]
153+
if mp == nil {
154+
return ErrEmptyPoolOperation
155+
}
156+
157+
err := pa.markFree(reg.Type(), reg.Offset())
158+
if err != nil {
159+
return err
160+
}
161+
162+
n := mp.free[reg.Offset()]
163+
if n == nil {
164+
return ErrNotAllocated
165+
}
166+
if err := pa.merge(n.parent); err != nil {
167+
if err != ErrEarlyMerge {
168+
return err
169+
}
170+
}
171+
return nil
172+
}
173+
174+
// findNextOffset finds next region location for a given memCls
175+
func (pa *PoolAllocator) findNextOffset(memCls classType) (classType, uint64, error) {
176+
for mc := memCls; mc < memoryClassNumber; mc++ {
177+
pi := pa.pools[mc]
178+
if pi == nil || len(pi.free) == 0 {
179+
continue
180+
}
181+
182+
target := maximumClassSize
183+
for offset := range pi.free {
184+
if offset < target {
185+
target = offset
186+
}
187+
}
188+
return mc, target, nil
189+
}
190+
return 0, 0, ErrNotEnoughSpace
191+
}
192+
193+
// split tries to recursively split a bigger memory region into smaller ones until it succeeds or hits the upper limit
194+
func (pa *PoolAllocator) split(clsType classType) error {
195+
nextClsType := clsType + 1
196+
if nextClsType >= memoryClassNumber {
197+
return ErrInvalidMemoryClass
198+
}
199+
200+
nextPool := pa.pools[nextClsType]
201+
if nextPool == nil {
202+
nextPool = newEmptyMemoryPool()
203+
pa.pools[nextClsType] = nextPool
204+
}
205+
206+
cls, offset, err := pa.findNextOffset(nextClsType)
207+
if err != nil {
208+
return err
209+
}
210+
// not enough memory in the next class, try to recursively expand
211+
if cls != nextClsType {
212+
if err := pa.split(nextClsType); err != nil {
213+
return err
214+
}
215+
}
216+
217+
if err := pa.markBusy(nextClsType, offset); err != nil {
218+
return err
219+
}
220+
221+
// memCls validity has been checked already, we can ignore the error
222+
clsSize, _ := GetMemoryClassSize(clsType)
223+
224+
nextReg := nextPool.busy[offset]
225+
if nextReg == nil {
226+
return ErrNotAllocated
227+
}
228+
229+
// expand memCls
230+
cp := pa.pools[clsType]
231+
if cp == nil {
232+
cp = newEmptyMemoryPool()
233+
pa.pools[clsType] = cp
234+
}
235+
// create 4 smaller regions
236+
for i := uint64(0); i < 4; i++ {
237+
offset := nextReg.offset + i*clsSize
238+
reg := &region{
239+
parent: nextReg,
240+
class: clsType,
241+
offset: offset,
242+
}
243+
cp.free[offset] = reg
244+
}
245+
return nil
246+
}
247+
248+
func (pa *PoolAllocator) merge(parent *region) error {
249+
// nothing to merge
250+
if parent == nil {
251+
return nil
252+
}
253+
254+
childCls := parent.class - 1
255+
childPool := pa.pools[childCls]
256+
// no child nodes to merge, try to merge parent
257+
if childPool == nil {
258+
return pa.merge(parent.parent)
259+
}
260+
261+
childSize, err := GetMemoryClassSize(childCls)
262+
if err != nil {
263+
return err
264+
}
265+
266+
// check if all the child nodes are free
267+
var children []*region
268+
for i := uint64(0); i < 4; i++ {
269+
child, free := childPool.free[parent.offset+i*childSize]
270+
if !free {
271+
return ErrEarlyMerge
272+
}
273+
children = append(children, child)
274+
}
275+
276+
// at this point all the child nodes will be free and we can merge
277+
for _, child := range children {
278+
delete(childPool.free, child.offset)
279+
}
280+
281+
if err := pa.markFree(parent.class, parent.offset); err != nil {
282+
return err
283+
}
284+
285+
return pa.merge(parent.parent)
286+
}
287+
288+
// markFree internally moves a region with `offset` from busy to free map
289+
func (pa *PoolAllocator) markFree(memCls classType, offset uint64) error {
290+
clsPool := pa.pools[memCls]
291+
if clsPool == nil {
292+
return ErrEmptyPoolOperation
293+
}
294+
295+
if reg, exists := clsPool.busy[offset]; exists {
296+
clsPool.free[offset] = reg
297+
delete(clsPool.busy, offset)
298+
return nil
299+
}
300+
return ErrNotAllocated
301+
}
302+
303+
// markBusy internally moves a region with `offset` from free to busy map
304+
func (pa *PoolAllocator) markBusy(memCls classType, offset uint64) error {
305+
clsPool := pa.pools[memCls]
306+
if clsPool == nil {
307+
return ErrEmptyPoolOperation
308+
}
309+
310+
if reg, exists := clsPool.free[offset]; exists {
311+
clsPool.busy[offset] = reg
312+
delete(clsPool.free, offset)
313+
return nil
314+
}
315+
return ErrNotAllocated
316+
}

0 commit comments

Comments
 (0)