Skip to content

Commit 28a5199

Browse files
committed
Add a check to skip stats for containers that are not running
When a container is just created, exited state the container will not have stats. A common case for this in k8s is the init containers for a pod. The will be present in the listed containers but will not have a running task and there for no stats. Signed-off-by: James Sturtevant <[email protected]>
1 parent 8a6c8a9 commit 28a5199

4 files changed

Lines changed: 282 additions & 47 deletions

File tree

pkg/cri/sbserver/sandbox_stats_windows.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,11 @@ func (c *criService) toPodSandboxStats(sandbox sandboxstore.Sandbox, statsMap ma
129129
for _, cntr := range containers {
130130
containerMetric := statsMap[cntr.ID]
131131

132+
if cntr.Status.Get().State() != runtime.ContainerState_CONTAINER_RUNNING {
133+
// containers that are just created, in a failed state or exited (init containers) will not have stats
134+
continue
135+
}
136+
132137
if containerMetric == nil {
133138
return nil, nil, fmt.Errorf("failed to find metrics for container with id %s: %w", cntr.ID, err)
134139
}

pkg/cri/sbserver/sandbox_stats_windows_test.go

Lines changed: 136 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,102 @@ func Test_criService_podSandboxStats(t *testing.T) {
118118
},
119119
sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}},
120120
containers: []containerstore.Container{
121-
{Metadata: containerstore.Metadata{ID: "c1"}},
121+
newContainer("c1", running, nil),
122+
},
123+
expectedPodStats: expectedStats{
124+
UsageCoreNanoSeconds: 400,
125+
UsageNanoCores: 0,
126+
WorkingSetBytes: 40,
127+
},
128+
expectedContainerStats: []expectedStats{
129+
{
130+
UsageCoreNanoSeconds: 200,
131+
UsageNanoCores: 0,
132+
WorkingSetBytes: 20,
133+
},
134+
},
135+
expectError: false,
136+
},
137+
{
138+
desc: "pod stats will include the init container stats",
139+
metrics: map[string]*wstats.Statistics{
140+
"c1": {
141+
Container: windowsStat(currentStatsTimestamp, 200, 20),
142+
},
143+
"s1": {
144+
Container: windowsStat(currentStatsTimestamp, 200, 20),
145+
},
146+
"i1": {
147+
Container: windowsStat(currentStatsTimestamp, 200, 20),
148+
},
149+
},
150+
sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}},
151+
containers: []containerstore.Container{
152+
newContainer("c1", running, nil),
153+
newContainer("i1", running, nil),
154+
},
155+
expectedPodStats: expectedStats{
156+
UsageCoreNanoSeconds: 600,
157+
UsageNanoCores: 0,
158+
WorkingSetBytes: 60,
159+
},
160+
expectedContainerStats: []expectedStats{
161+
{
162+
UsageCoreNanoSeconds: 200,
163+
UsageNanoCores: 0,
164+
WorkingSetBytes: 20,
165+
},
166+
{
167+
UsageCoreNanoSeconds: 200,
168+
UsageNanoCores: 0,
169+
WorkingSetBytes: 20,
170+
},
171+
},
172+
expectError: false,
173+
},
174+
{
175+
desc: "pod stats will not include the init container stats if it is stopped",
176+
metrics: map[string]*wstats.Statistics{
177+
"c1": {
178+
Container: windowsStat(currentStatsTimestamp, 200, 20),
179+
},
180+
"s1": {
181+
Container: windowsStat(currentStatsTimestamp, 200, 20),
182+
},
183+
},
184+
sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}},
185+
containers: []containerstore.Container{
186+
newContainer("c1", running, nil),
187+
newContainer("i1", exitedValid, nil),
188+
},
189+
expectedPodStats: expectedStats{
190+
UsageCoreNanoSeconds: 400,
191+
UsageNanoCores: 0,
192+
WorkingSetBytes: 40,
193+
},
194+
expectedContainerStats: []expectedStats{
195+
{
196+
UsageCoreNanoSeconds: 200,
197+
UsageNanoCores: 0,
198+
WorkingSetBytes: 20,
199+
},
200+
},
201+
expectError: false,
202+
},
203+
{
204+
desc: "pod stats will not include the init container stats if it is stopped in failed state",
205+
metrics: map[string]*wstats.Statistics{
206+
"c1": {
207+
Container: windowsStat(currentStatsTimestamp, 200, 20),
208+
},
209+
"s1": {
210+
Container: windowsStat(currentStatsTimestamp, 200, 20),
211+
},
212+
},
213+
sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}},
214+
containers: []containerstore.Container{
215+
newContainer("c1", running, nil),
216+
newContainer("i1", exitedInvalid, nil),
122217
},
123218
expectedPodStats: expectedStats{
124219
UsageCoreNanoSeconds: 400,
@@ -146,13 +241,10 @@ func Test_criService_podSandboxStats(t *testing.T) {
146241
},
147242
sandbox: sandboxPod("s1", initialStatsTimestamp, 400),
148243
containers: []containerstore.Container{
149-
{
150-
Metadata: containerstore.Metadata{ID: "c1"},
151-
Stats: &stats.ContainerStats{
152-
Timestamp: initialStatsTimestamp,
153-
UsageCoreNanoSeconds: 200,
154-
},
155-
},
244+
newContainer("c1", running, &stats.ContainerStats{
245+
Timestamp: initialStatsTimestamp,
246+
UsageCoreNanoSeconds: 200,
247+
}),
156248
},
157249
expectedPodStats: expectedStats{
158250
UsageCoreNanoSeconds: 800,
@@ -178,13 +270,10 @@ func Test_criService_podSandboxStats(t *testing.T) {
178270
},
179271
sandbox: sandboxPod("s1", initialStatsTimestamp, 200),
180272
containers: []containerstore.Container{
181-
{
182-
Metadata: containerstore.Metadata{ID: "c1"},
183-
Stats: &stats.ContainerStats{
184-
Timestamp: initialStatsTimestamp,
185-
UsageCoreNanoSeconds: 200,
186-
},
187-
},
273+
newContainer("c1", running, &stats.ContainerStats{
274+
Timestamp: initialStatsTimestamp,
275+
UsageCoreNanoSeconds: 200,
276+
}),
188277
},
189278
expectedPodStats: expectedStats{
190279
UsageCoreNanoSeconds: 400,
@@ -210,13 +299,10 @@ func Test_criService_podSandboxStats(t *testing.T) {
210299
},
211300
sandbox: sandboxPod("s1", initialStatsTimestamp, 200),
212301
containers: []containerstore.Container{
213-
{
214-
Metadata: containerstore.Metadata{ID: "c1"},
215-
Stats: &stats.ContainerStats{
216-
Timestamp: initialStatsTimestamp,
217-
UsageCoreNanoSeconds: 200,
218-
},
219-
},
302+
newContainer("c1", running, &stats.ContainerStats{
303+
Timestamp: initialStatsTimestamp,
304+
UsageCoreNanoSeconds: 200,
305+
}),
220306
},
221307
expectedPodStats: expectedStats{
222308
UsageCoreNanoSeconds: 400,
@@ -240,7 +326,7 @@ func Test_criService_podSandboxStats(t *testing.T) {
240326
assert.NotNil(t, err)
241327
return
242328
}
243-
329+
assert.Nil(t, err)
244330
assert.Equal(t, test.expectedPodStats.UsageCoreNanoSeconds, actualPodStats.Cpu.UsageCoreNanoSeconds.Value)
245331
assert.Equal(t, test.expectedPodStats.UsageNanoCores, actualPodStats.Cpu.UsageNanoCores.Value)
246332

@@ -276,6 +362,33 @@ func windowsStat(timestamp time.Time, cpu uint64, memory uint64) *wstats.Statist
276362
}
277363
}
278364

365+
func newContainer(id string, status containerstore.Status, stats *stats.ContainerStats) containerstore.Container {
366+
cntr, err := containerstore.NewContainer(containerstore.Metadata{ID: id}, containerstore.WithFakeStatus(status))
367+
if err != nil {
368+
panic(err)
369+
}
370+
if stats != nil {
371+
cntr.Stats = stats
372+
}
373+
return cntr
374+
}
375+
376+
var exitedValid = containerstore.Status{
377+
StartedAt: time.Now().UnixNano(),
378+
FinishedAt: time.Now().UnixNano(),
379+
ExitCode: 0,
380+
}
381+
382+
var exitedInvalid = containerstore.Status{
383+
StartedAt: time.Now().UnixNano(),
384+
FinishedAt: time.Now().UnixNano(),
385+
ExitCode: 1,
386+
}
387+
388+
var running = containerstore.Status{
389+
StartedAt: time.Now().UnixNano(),
390+
}
391+
279392
func Test_criService_saveSandBoxMetrics(t *testing.T) {
280393

281394
timestamp := time.Now()

pkg/cri/server/sandbox_stats_windows.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ func (c *criService) podSandboxStats(
7171
podSandboxStats.Windows.Cpu = podCPU.Cpu
7272
podSandboxStats.Windows.Memory = podCPU.Memory
7373
podSandboxStats.Windows.Containers = containerStats
74-
7574
podSandboxStats.Windows.Network = windowsNetworkUsage(ctx, sandbox, timestamp)
7675

7776
pidCount, err := c.getSandboxPidCount(ctx, sandbox)
@@ -128,6 +127,11 @@ func (c *criService) toPodSandboxStats(sandbox sandboxstore.Sandbox, statsMap ma
128127
for _, cntr := range containers {
129128
containerMetric := statsMap[cntr.ID]
130129

130+
if cntr.Status.Get().State() != runtime.ContainerState_CONTAINER_RUNNING {
131+
// containers that are just created, in a failed state or exited (init containers) will not have stats
132+
continue
133+
}
134+
131135
if containerMetric == nil {
132136
return nil, nil, fmt.Errorf("failed to find metrics for container with id %s: %w", cntr.ID, err)
133137
}

0 commit comments

Comments
 (0)