Skip to content

Commit 58b6b99

Browse files
committed
Add a check to skip stats for containers that are not running
When a container is just created, exited state the container will not have stats. A common case for this in k8s is the init containers for a pod. The will be present in the listed containers but will not have a running task and there for no stats. Signed-off-by: James Sturtevant <[email protected]>
1 parent 147aea0 commit 58b6b99

4 files changed

Lines changed: 276 additions & 47 deletions

File tree

pkg/cri/sbserver/sandbox_stats_windows.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,11 @@ func (c *criService) toPodSandboxStats(sandbox sandboxstore.Sandbox, statsMap ma
129129
for _, cntr := range containers {
130130
containerMetric := statsMap[cntr.ID]
131131

132+
if cntr.Status.Get().State() != runtime.ContainerState_CONTAINER_RUNNING {
133+
// containers that are just created, in a failed state or exited (init containers) will not have stats
134+
continue
135+
}
136+
132137
if containerMetric == nil {
133138
return nil, nil, fmt.Errorf("failed to find metrics for container with id %s: %w", cntr.ID, err)
134139
}

pkg/cri/sbserver/sandbox_stats_windows_test.go

Lines changed: 133 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,99 @@ func Test_criService_podSandboxStats(t *testing.T) {
112112
},
113113
sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}},
114114
containers: []containerstore.Container{
115-
{Metadata: containerstore.Metadata{ID: "c1"}},
115+
newContainer("c1", running, nil),
116+
},
117+
expectedPodStats: expectedStats{
118+
UsageCoreNanoSeconds: 400,
119+
UsageNanoCores: 0,
120+
WorkingSetBytes: 40,
121+
},
122+
expectedContainerStats: []expectedStats{
123+
{
124+
UsageCoreNanoSeconds: 200,
125+
UsageNanoCores: 0,
126+
WorkingSetBytes: 20,
127+
},
128+
},
129+
expectError: false,
130+
},
131+
"pod stats will include the init container stats": {
132+
metrics: map[string]*wstats.Statistics{
133+
"c1": {
134+
Container: windowsStat(currentStatsTimestamp, 200, 20),
135+
},
136+
"s1": {
137+
Container: windowsStat(currentStatsTimestamp, 200, 20),
138+
},
139+
"i1": {
140+
Container: windowsStat(currentStatsTimestamp, 200, 20),
141+
},
142+
},
143+
sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}},
144+
containers: []containerstore.Container{
145+
newContainer("c1", running, nil),
146+
newContainer("i1", running, nil),
147+
},
148+
expectedPodStats: expectedStats{
149+
UsageCoreNanoSeconds: 600,
150+
UsageNanoCores: 0,
151+
WorkingSetBytes: 60,
152+
},
153+
expectedContainerStats: []expectedStats{
154+
{
155+
UsageCoreNanoSeconds: 200,
156+
UsageNanoCores: 0,
157+
WorkingSetBytes: 20,
158+
},
159+
{
160+
UsageCoreNanoSeconds: 200,
161+
UsageNanoCores: 0,
162+
WorkingSetBytes: 20,
163+
},
164+
},
165+
expectError: false,
166+
},
167+
"pod stats will not include the init container stats if it is stopped": {
168+
metrics: map[string]*wstats.Statistics{
169+
"c1": {
170+
Container: windowsStat(currentStatsTimestamp, 200, 20),
171+
},
172+
"s1": {
173+
Container: windowsStat(currentStatsTimestamp, 200, 20),
174+
},
175+
},
176+
sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}},
177+
containers: []containerstore.Container{
178+
newContainer("c1", running, nil),
179+
newContainer("i1", exitedValid, nil),
180+
},
181+
expectedPodStats: expectedStats{
182+
UsageCoreNanoSeconds: 400,
183+
UsageNanoCores: 0,
184+
WorkingSetBytes: 40,
185+
},
186+
expectedContainerStats: []expectedStats{
187+
{
188+
UsageCoreNanoSeconds: 200,
189+
UsageNanoCores: 0,
190+
WorkingSetBytes: 20,
191+
},
192+
},
193+
expectError: false,
194+
},
195+
"pod stats will not include the init container stats if it is stopped in failed state": {
196+
metrics: map[string]*wstats.Statistics{
197+
"c1": {
198+
Container: windowsStat(currentStatsTimestamp, 200, 20),
199+
},
200+
"s1": {
201+
Container: windowsStat(currentStatsTimestamp, 200, 20),
202+
},
203+
},
204+
sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}},
205+
containers: []containerstore.Container{
206+
newContainer("c1", running, nil),
207+
newContainer("i1", exitedInvalid, nil),
116208
},
117209
expectedPodStats: expectedStats{
118210
UsageCoreNanoSeconds: 400,
@@ -139,13 +231,10 @@ func Test_criService_podSandboxStats(t *testing.T) {
139231
},
140232
sandbox: sandboxPod("s1", initialStatsTimestamp, 400),
141233
containers: []containerstore.Container{
142-
{
143-
Metadata: containerstore.Metadata{ID: "c1"},
144-
Stats: &stats.ContainerStats{
145-
Timestamp: initialStatsTimestamp,
146-
UsageCoreNanoSeconds: 200,
147-
},
148-
},
234+
newContainer("c1", running, &stats.ContainerStats{
235+
Timestamp: initialStatsTimestamp,
236+
UsageCoreNanoSeconds: 200,
237+
}),
149238
},
150239
expectedPodStats: expectedStats{
151240
UsageCoreNanoSeconds: 800,
@@ -170,13 +259,10 @@ func Test_criService_podSandboxStats(t *testing.T) {
170259
},
171260
sandbox: sandboxPod("s1", initialStatsTimestamp, 200),
172261
containers: []containerstore.Container{
173-
{
174-
Metadata: containerstore.Metadata{ID: "c1"},
175-
Stats: &stats.ContainerStats{
176-
Timestamp: initialStatsTimestamp,
177-
UsageCoreNanoSeconds: 200,
178-
},
179-
},
262+
newContainer("c1", running, &stats.ContainerStats{
263+
Timestamp: initialStatsTimestamp,
264+
UsageCoreNanoSeconds: 200,
265+
}),
180266
},
181267
expectedPodStats: expectedStats{
182268
UsageCoreNanoSeconds: 400,
@@ -201,13 +287,10 @@ func Test_criService_podSandboxStats(t *testing.T) {
201287
},
202288
sandbox: sandboxPod("s1", initialStatsTimestamp, 200),
203289
containers: []containerstore.Container{
204-
{
205-
Metadata: containerstore.Metadata{ID: "c1"},
206-
Stats: &stats.ContainerStats{
207-
Timestamp: initialStatsTimestamp,
208-
UsageCoreNanoSeconds: 200,
209-
},
210-
},
290+
newContainer("c1", running, &stats.ContainerStats{
291+
Timestamp: initialStatsTimestamp,
292+
UsageCoreNanoSeconds: 200,
293+
}),
211294
},
212295
expectedPodStats: expectedStats{
213296
UsageCoreNanoSeconds: 400,
@@ -230,7 +313,7 @@ func Test_criService_podSandboxStats(t *testing.T) {
230313
assert.NotNil(t, err)
231314
return
232315
}
233-
316+
assert.Nil(t, err)
234317
assert.Equal(t, test.expectedPodStats.UsageCoreNanoSeconds, actualPodStats.Cpu.UsageCoreNanoSeconds.Value)
235318
assert.Equal(t, test.expectedPodStats.UsageNanoCores, actualPodStats.Cpu.UsageNanoCores.Value)
236319

@@ -266,6 +349,33 @@ func windowsStat(timestamp time.Time, cpu uint64, memory uint64) *wstats.Statist
266349
}
267350
}
268351

352+
func newContainer(id string, status containerstore.Status, stats *stats.ContainerStats) containerstore.Container {
353+
cntr, err := containerstore.NewContainer(containerstore.Metadata{ID: id}, containerstore.WithFakeStatus(status))
354+
if err != nil {
355+
panic(err)
356+
}
357+
if stats != nil {
358+
cntr.Stats = stats
359+
}
360+
return cntr
361+
}
362+
363+
var exitedValid = containerstore.Status{
364+
StartedAt: time.Now().UnixNano(),
365+
FinishedAt: time.Now().UnixNano(),
366+
ExitCode: 0,
367+
}
368+
369+
var exitedInvalid = containerstore.Status{
370+
StartedAt: time.Now().UnixNano(),
371+
FinishedAt: time.Now().UnixNano(),
372+
ExitCode: 1,
373+
}
374+
375+
var running = containerstore.Status{
376+
StartedAt: time.Now().UnixNano(),
377+
}
378+
269379
func Test_criService_saveSandBoxMetrics(t *testing.T) {
270380

271381
timestamp := time.Now()

pkg/cri/server/sandbox_stats_windows.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ func (c *criService) podSandboxStats(
7171
podSandboxStats.Windows.Cpu = podCPU.Cpu
7272
podSandboxStats.Windows.Memory = podCPU.Memory
7373
podSandboxStats.Windows.Containers = containerStats
74-
7574
podSandboxStats.Windows.Network = windowsNetworkUsage(ctx, sandbox, timestamp)
7675

7776
pidCount, err := c.getSandboxPidCount(ctx, sandbox)
@@ -128,6 +127,11 @@ func (c *criService) toPodSandboxStats(sandbox sandboxstore.Sandbox, statsMap ma
128127
for _, cntr := range containers {
129128
containerMetric := statsMap[cntr.ID]
130129

130+
if cntr.Status.Get().State() != runtime.ContainerState_CONTAINER_RUNNING {
131+
// containers that are just created, in a failed state or exited (init containers) will not have stats
132+
continue
133+
}
134+
131135
if containerMetric == nil {
132136
return nil, nil, fmt.Errorf("failed to find metrics for container with id %s: %w", cntr.ID, err)
133137
}

0 commit comments

Comments
 (0)