Skip to content

Commit 5949a8c

Browse files
feat: [SVLS-8759] cloud run jobs usage metric (#48454)
### What does this PR do? ### Motivation ### Describe how you validated your changes ### Additional Notes Co-authored-by: aleksandr.pasechnik <[email protected]>
1 parent 40f6310 commit 5949a8c

File tree

4 files changed

+70
-7
lines changed

4 files changed

+70
-7
lines changed

cmd/serverless-init/cloudservice/cloudrun_jobs.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ package cloudservice
77

88
import (
99
"fmt"
10+
"maps"
1011
"os"
1112
"strconv"
1213
"time"
@@ -100,7 +101,8 @@ func (c *CloudRunJobs) GetEnhancedMetricTags(tags map[string]string) EnhancedMet
100101
"project_id": tagValueOrUnknown(tags["project_id"]),
101102
}
102103

103-
usageTags := map[string]string{}
104+
usageTags := maps.Clone(baseTags)
105+
usageTags["instance"] = tagValueOrUnknown(tags["container_id"])
104106

105107
return EnhancedMetricTags{Base: baseTags, Usage: usageTags}
106108
}
@@ -117,7 +119,7 @@ func (c *CloudRunJobs) GetMetricPrefix() string {
117119
}
118120

119121
func (c *CloudRunJobs) GetUsageMetricSuffix() string {
120-
return ""
122+
return cloudRunUsageMetricSuffix
121123
}
122124

123125
// GetOrigin returns the `origin` attribute type for the given cloud service.

cmd/serverless-init/cloudservice/cloudrun_jobs_test.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,40 @@ func TestGetCloudRunJobsTagsWithEnvironmentVariables(t *testing.T) {
9090
}, tags)
9191
}
9292

93+
func TestCloudRunJobsGetEnhancedMetricTags(t *testing.T) {
94+
skipOnWindows(t)
95+
service := &CloudRunJobs{}
96+
tags := map[string]string{
97+
"job_name": "test-job",
98+
"location": "us-central1",
99+
"origin": "cloudrunjobs",
100+
"project_id": "test-project",
101+
"container_id": "abc123",
102+
}
103+
result := service.GetEnhancedMetricTags(tags)
104+
105+
assert.Equal(t, map[string]string{
106+
"job_name": "test-job",
107+
"location": "us-central1",
108+
"origin": "cloudrunjobs",
109+
"project_id": "test-project",
110+
}, result.Base)
111+
112+
assert.Equal(t, map[string]string{
113+
"job_name": "test-job",
114+
"location": "us-central1",
115+
"origin": "cloudrunjobs",
116+
"project_id": "test-project",
117+
"instance": "abc123",
118+
}, result.Usage)
119+
}
120+
121+
func TestCloudRunJobsGetUsageMetricSuffix(t *testing.T) {
122+
skipOnWindows(t)
123+
service := &CloudRunJobs{}
124+
assert.Equal(t, "instance", service.GetUsageMetricSuffix())
125+
}
126+
93127
func TestCloudRunJobsGetOrigin(t *testing.T) {
94128
skipOnWindows(t)
95129
service := &CloudRunJobs{}

cmd/serverless-init/enhanced-metrics/collector.go

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,14 @@ func (c *Collector) collectLoop(ctx context.Context) {
146146

147147
// collect collects the enhanced metrics from the cgroup and sends them to the metric agent
148148
func (c *Collector) collect() {
149+
collectionTime := time.Now()
150+
timestamp := float64(collectionTime.UnixNano()) / float64(time.Second)
151+
152+
// Always send the usage metric, regardless of cgroup collection success.
153+
if c.usageMetricSuffix != "" {
154+
c.metricAgent.AddEnhancedUsageMetric(c.metricPrefix+c.usageMetricSuffix, 1, c.metricSource, timestamp)
155+
}
156+
149157
if err := c.cgroupReader.RefreshCgroups(0); err != nil {
150158
log.Warnf("Failed to refresh cgroups: %v", err)
151159
return
@@ -158,7 +166,6 @@ func (c *Collector) collect() {
158166
}
159167

160168
stats := &cgroups.Stats{}
161-
collectionTime := time.Now()
162169
allFailed, errs := cgroups.GetStats(cgroup, stats)
163170
if allFailed {
164171
log.Warnf("Failed to get cgroup stats: %v", errs)
@@ -301,10 +308,6 @@ func calculateCPUUsage(currentTotal *uint64, previousTotal *uint64, currentTime
301308

302309
// sendMetrics sends the enhanced metrics to the metric agent
303310
func (c *Collector) sendMetrics(enhancedMetrics ServerlessEnhancedMetrics) {
304-
if c.usageMetricSuffix != "" {
305-
c.metricAgent.AddEnhancedUsageMetric(c.metricPrefix+c.usageMetricSuffix, 1, c.metricSource, enhancedMetrics.Timestamp)
306-
}
307-
308311
// CPU usage in nanocores
309312
// Skip when value is NaN since this value is used on the first collect before the rate can be computed
310313
if !math.IsNaN(enhancedMetrics.CPUUsage) {

cmd/serverless-init/enhanced-metrics/collector_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
package enhancedmetrics
1010

1111
import (
12+
"errors"
1213
"math"
1314
"testing"
1415
"time"
@@ -283,3 +284,26 @@ func TestCalculateCPUUsageValueDiffPositive(t *testing.T) {
283284

284285
assert.Equal(t, float64(1e8), CPUUsage)
285286
}
287+
288+
func TestCollectorSendsUsageMetricOnCgroupFailure(t *testing.T) {
289+
mockAgent := new(mockEnhancedMetricSender)
290+
mockAgent.On("AddEnhancedUsageMetric", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return()
291+
292+
mockReader := &mockCgroupReader{refreshErr: errors.New("cgroup failure"), version: 1}
293+
294+
c := &Collector{
295+
metricAgent: mockAgent,
296+
metricSource: metrics.MetricSourceGoogleCloudRunEnhanced,
297+
cgroupReader: mockReader,
298+
metricPrefix: "gcp.run.container.enhanced.",
299+
usageMetricSuffix: "instance",
300+
previousRateStats: NullServerlessRateStats,
301+
}
302+
303+
c.collect()
304+
305+
mockAgent.AssertCalled(t, "AddEnhancedUsageMetric",
306+
"gcp.run.container.enhanced.instance", float64(1),
307+
metrics.MetricSourceGoogleCloudRunEnhanced, mock.Anything, mock.Anything)
308+
mockAgent.AssertNotCalled(t, "AddEnhancedMetric", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything)
309+
}

0 commit comments

Comments
 (0)