Skip to content

Commit 3b87d46

Browse files
committed
Add tracing spans in CRI image service and pull.go
Signed-off-by: Swagat Bora <[email protected]> Add spans around image unpack operations Use image.ref to denote image name and image.id for the image config digest Add top-level spand and record errors in the CRI instrumentation service
1 parent bb0c380 commit 3b87d46

18 files changed

Lines changed: 226 additions & 10 deletions

File tree

docs/tracing.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ func clientWithTrace() error {
8282
defer cancel()
8383

8484
ctx, span := tracing.StartSpan(ctx, "OPERATION NAME")
85-
defer span.End()
85+
defer tracing.StopSpan(span)
8686
...
8787
}
8888
```

integration/client/go.mod

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ require (
3333
github.com/davecgh/go-spew v1.1.1 // indirect
3434
github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect
3535
github.com/docker/go-units v0.4.0 // indirect
36+
github.com/go-logr/logr v1.2.3 // indirect
37+
github.com/go-logr/stdr v1.2.2 // indirect
3638
github.com/godbus/dbus/v5 v5.0.6 // indirect
3739
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
3840
github.com/golang/protobuf v1.5.2 // indirect
@@ -50,6 +52,8 @@ require (
5052
github.com/pkg/errors v0.9.1 // indirect
5153
github.com/pmezard/go-difflib v1.0.0 // indirect
5254
go.opencensus.io v0.23.0 // indirect
55+
go.opentelemetry.io/otel v1.7.0 // indirect
56+
go.opentelemetry.io/otel/trace v1.7.0 // indirect
5357
golang.org/x/mod v0.6.0 // indirect
5458
golang.org/x/net v0.1.0 // indirect
5559
golang.org/x/sync v0.1.0 // indirect

integration/client/go.sum

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,9 @@ github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTg
258258
github.com/go-logr/logr v0.4.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU=
259259
github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
260260
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
261+
github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0=
261262
github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
263+
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
262264
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
263265
github.com/go-logr/zapr v1.2.0/go.mod h1:Qa4Bsj2Vb+FAVeAKsLD8RLQ+YRJB8YDmOAKxaBQf7Ro=
264266
github.com/go-openapi/jsonpointer v0.0.0-20160704185906-46af16f9f7b1/go.mod h1:+35s3my2LFTysnkMfxsJBAMHj/DoqoB9knIWoYG/Vk0=
@@ -704,6 +706,7 @@ go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.2
704706
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.32.0/go.mod h1:J0dBVrt7dPS/lKJyQoW0xzQiUr4r2Ik1VwPjAUWnofI=
705707
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.20.0/go.mod h1:2AboqHi0CiIZU0qwhtUfCYD1GeUzvvIXWNkhDt7ZMG4=
706708
go.opentelemetry.io/otel v0.20.0/go.mod h1:Y3ugLH2oa81t5QO+Lty+zXf8zC9L26ax4Nzoxm/dooo=
709+
go.opentelemetry.io/otel v1.7.0 h1:Z2lA3Tdch0iDcrhJXDIlC94XE+bxok1F9B+4Lz/lGsM=
707710
go.opentelemetry.io/otel v1.7.0/go.mod h1:5BdUoMIz5WEs0vt0CUEMtSSaTSHBBVwrhnz7+nrD5xk=
708711
go.opentelemetry.io/otel/exporters/otlp v0.20.0/go.mod h1:YIieizyaN77rtLJra0buKiNBOm9XQfkPEKBeuhoMwAM=
709712
go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.7.0/go.mod h1:M1hVZHNxcbkAlcvrOMlpQ4YOO3Awf+4N2dxkZL3xm04=
@@ -717,6 +720,7 @@ go.opentelemetry.io/otel/sdk v1.7.0/go.mod h1:uTEOTwaqIVuTGiJN7ii13Ibp75wJmYUDe3
717720
go.opentelemetry.io/otel/sdk/export/metric v0.20.0/go.mod h1:h7RBNMsDJ5pmI1zExLi+bJK+Dr8NQCh0qGhm1KDnNlE=
718721
go.opentelemetry.io/otel/sdk/metric v0.20.0/go.mod h1:knxiS8Xd4E/N+ZqKmUPf3gTTZ4/0TjTXukfxjzSTpHE=
719722
go.opentelemetry.io/otel/trace v0.20.0/go.mod h1:6GjCW8zgDjwGHGa6GkyeB8+/5vjT16gUEi0Nf1iBdgw=
723+
go.opentelemetry.io/otel/trace v1.7.0 h1:O37Iogk1lEkMRXewVtZ1BBTVn5JEp8GrJvP92bJqC6o=
720724
go.opentelemetry.io/otel/trace v1.7.0/go.mod h1:fzLSB9nqR2eXzxPXb2JW9IKE+ScyXA48yyE4TNvoHqU=
721725
go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI=
722726
go.opentelemetry.io/proto/otlp v0.16.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U=

pkg/cri/sbserver/helpers.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,11 @@ const (
9595

9696
// runtimeRunhcsV1 is the runtime type for runhcs.
9797
runtimeRunhcsV1 = "io.containerd.runhcs.v1"
98+
99+
// name prefix for CRI sbserver specific spans
100+
criSbServerSpanPrefix = "pkg.cri.sbserver"
101+
102+
spanDelimiter = "."
98103
)
99104

100105
// makeSandboxName generates sandbox name from sandbox metadata. The name
@@ -501,3 +506,10 @@ func copyResourcesToStatus(spec *runtimespec.Spec, status containerstore.Status)
501506
}
502507
return status
503508
}
509+
510+
func makeSpanName(funcName string) string {
511+
return strings.Join([]string{
512+
criSbServerSpanPrefix,
513+
funcName,
514+
}, spanDelimiter)
515+
}

pkg/cri/sbserver/image_pull.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ import (
3636
"github.com/containerd/imgcrypt"
3737
"github.com/containerd/imgcrypt/images/encryption"
3838
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
39+
"go.opentelemetry.io/otel/attribute"
3940
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
4041

4142
"github.com/containerd/containerd"
@@ -48,6 +49,7 @@ import (
4849
distribution "github.com/containerd/containerd/reference/docker"
4950
"github.com/containerd/containerd/remotes/docker"
5051
"github.com/containerd/containerd/remotes/docker/config"
52+
"github.com/containerd/containerd/tracing"
5153
)
5254

5355
// For image management:
@@ -93,6 +95,7 @@ import (
9395

9496
// PullImage pulls an image with authentication config.
9597
func (c *criService) PullImage(ctx context.Context, r *runtime.PullImageRequest) (*runtime.PullImageResponse, error) {
98+
span := tracing.CurrentSpan(ctx)
9699
imageRef := r.GetImage().GetImage()
97100
namedRef, err := distribution.ParseDockerRef(imageRef)
98101
if err != nil {
@@ -133,6 +136,10 @@ func (c *criService) PullImage(ctx context.Context, r *runtime.PullImageRequest)
133136
return nil, err
134137
}
135138
log.G(ctx).Debugf("PullImage %q with snapshotter %s", ref, snapshotter)
139+
span.SetAttributes(
140+
attribute.String("image.ref", ref),
141+
attribute.String("snapshotter.name", snapshotter),
142+
)
136143

137144
pullOpts := []containerd.RemoteOpt{
138145
containerd.WithSchema1Conversion, //nolint:staticcheck // Ignore SA1019. Need to keep deprecated package for compatibility.
@@ -165,6 +172,7 @@ func (c *criService) PullImage(ctx context.Context, r *runtime.PullImageRequest)
165172
if err != nil {
166173
return nil, fmt.Errorf("failed to pull and unpack image %q: %w", ref, err)
167174
}
175+
span.AddEvent("Pull and unpack image complete")
168176

169177
configDesc, err := image.Config(ctx)
170178
if err != nil {

pkg/cri/sbserver/image_remove.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ import (
2222

2323
"github.com/containerd/containerd/errdefs"
2424
"github.com/containerd/containerd/images"
25+
"github.com/containerd/containerd/tracing"
26+
"go.opentelemetry.io/otel/attribute"
2527

2628
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
2729
)
@@ -33,15 +35,17 @@ import (
3335
// Remove the whole image no matter the it's image id or reference. This is the
3436
// semantic defined in CRI now.
3537
func (c *criService) RemoveImage(ctx context.Context, r *runtime.RemoveImageRequest) (*runtime.RemoveImageResponse, error) {
38+
span := tracing.CurrentSpan(ctx)
3639
image, err := c.localResolve(r.GetImage().GetImage())
3740
if err != nil {
3841
if errdefs.IsNotFound(err) {
42+
span.AddEvent(err.Error())
3943
// return empty without error when image not found.
4044
return &runtime.RemoveImageResponse{}, nil
4145
}
4246
return nil, fmt.Errorf("can not resolve %q locally: %w", r.GetImage().GetImage(), err)
4347
}
44-
48+
span.SetAttributes(attribute.String("image.id", image.ID))
4549
// Remove all image references.
4650
for i, ref := range image.References {
4751
var opts []images.DeleteOpt

pkg/cri/sbserver/image_status.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ import (
2424
"github.com/containerd/containerd/errdefs"
2525
"github.com/containerd/containerd/log"
2626
imagestore "github.com/containerd/containerd/pkg/cri/store/image"
27+
"github.com/containerd/containerd/tracing"
28+
"go.opentelemetry.io/otel/attribute"
2729

2830
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
2931
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
@@ -33,14 +35,17 @@ import (
3335
// TODO(random-liu): We should change CRI to distinguish image id and image spec. (See
3436
// kubernetes/kubernetes#46255)
3537
func (c *criService) ImageStatus(ctx context.Context, r *runtime.ImageStatusRequest) (*runtime.ImageStatusResponse, error) {
38+
span := tracing.CurrentSpan(ctx)
3639
image, err := c.localResolve(r.GetImage().GetImage())
3740
if err != nil {
3841
if errdefs.IsNotFound(err) {
42+
span.AddEvent(err.Error())
3943
// return empty without error when image not found.
4044
return &runtime.ImageStatusResponse{}, nil
4145
}
4246
return nil, fmt.Errorf("can not resolve %q locally: %w", r.GetImage().GetImage(), err)
4347
}
48+
span.SetAttributes(attribute.String("image.id", image.ID))
4449
// TODO(random-liu): [P0] Make sure corresponding snapshot exists. What if snapshot
4550
// doesn't exist?
4651

pkg/cri/sbserver/instrumented_service.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222

2323
"github.com/containerd/containerd/errdefs"
2424
"github.com/containerd/containerd/log"
25+
"github.com/containerd/containerd/tracing"
2526
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
2627
runtime_alpha "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
2728

@@ -931,6 +932,8 @@ func (in *instrumentedService) PullImage(ctx context.Context, r *runtime.PullIma
931932
if err := in.checkInitialized(); err != nil {
932933
return nil, err
933934
}
935+
ctx, span := tracing.StartSpan(ctx, makeSpanName("PullImage"))
936+
defer tracing.StopSpan(span)
934937
log.G(ctx).Infof("PullImage %q", r.GetImage().GetImage())
935938
defer func() {
936939
if err != nil {
@@ -939,6 +942,7 @@ func (in *instrumentedService) PullImage(ctx context.Context, r *runtime.PullIma
939942
log.G(ctx).Infof("PullImage %q returns image reference %q",
940943
r.GetImage().GetImage(), res.GetImageRef())
941944
}
945+
tracing.SetSpanStatus(span, err)
942946
}()
943947
res, err = in.c.PullImage(ctrdutil.WithNamespace(ctx), r)
944948
return res, errdefs.ToGRPC(err)
@@ -948,6 +952,8 @@ func (in *instrumentedAlphaService) PullImage(ctx context.Context, r *runtime_al
948952
if err := in.checkInitialized(); err != nil {
949953
return nil, err
950954
}
955+
ctx, span := tracing.StartSpan(ctx, makeSpanName("PullImage"))
956+
defer tracing.StopSpan(span)
951957
log.G(ctx).Infof("PullImage %q", r.GetImage().GetImage())
952958
defer func() {
953959
if err != nil {
@@ -956,6 +962,7 @@ func (in *instrumentedAlphaService) PullImage(ctx context.Context, r *runtime_al
956962
log.G(ctx).Infof("PullImage %q returns image reference %q",
957963
r.GetImage().GetImage(), res.GetImageRef())
958964
}
965+
tracing.SetSpanStatus(span, err)
959966
}()
960967
// converts request and response for earlier CRI version to call and get response from the current version
961968
var v1r runtime.PullImageRequest
@@ -986,6 +993,8 @@ func (in *instrumentedService) ListImages(ctx context.Context, r *runtime.ListIm
986993
if err := in.checkInitialized(); err != nil {
987994
return nil, err
988995
}
996+
ctx, span := tracing.StartSpan(ctx, makeSpanName("ListImages"))
997+
defer tracing.StopSpan(span)
989998
log.G(ctx).Tracef("ListImages with filter %+v", r.GetFilter())
990999
defer func() {
9911000
if err != nil {
@@ -994,6 +1003,7 @@ func (in *instrumentedService) ListImages(ctx context.Context, r *runtime.ListIm
9941003
log.G(ctx).Tracef("ListImages with filter %+v returns image list %+v",
9951004
r.GetFilter(), res.GetImages())
9961005
}
1006+
tracing.SetSpanStatus(span, err)
9971007
}()
9981008
res, err = in.c.ListImages(ctrdutil.WithNamespace(ctx), r)
9991009
return res, errdefs.ToGRPC(err)
@@ -1003,6 +1013,8 @@ func (in *instrumentedAlphaService) ListImages(ctx context.Context, r *runtime_a
10031013
if err := in.checkInitialized(); err != nil {
10041014
return nil, err
10051015
}
1016+
ctx, span := tracing.StartSpan(ctx, makeSpanName("ListImages"))
1017+
defer tracing.StopSpan(span)
10061018
log.G(ctx).Tracef("ListImages with filter %+v", r.GetFilter())
10071019
defer func() {
10081020
if err != nil {
@@ -1011,6 +1023,7 @@ func (in *instrumentedAlphaService) ListImages(ctx context.Context, r *runtime_a
10111023
log.G(ctx).Tracef("ListImages with filter %+v returns image list %+v",
10121024
r.GetFilter(), res.GetImages())
10131025
}
1026+
tracing.SetSpanStatus(span, err)
10141027
}()
10151028
// converts request and response for earlier CRI version to call and get response from the current version
10161029
var v1r runtime.ListImagesRequest
@@ -1041,6 +1054,8 @@ func (in *instrumentedService) ImageStatus(ctx context.Context, r *runtime.Image
10411054
if err := in.checkInitialized(); err != nil {
10421055
return nil, err
10431056
}
1057+
ctx, span := tracing.StartSpan(ctx, makeSpanName("ImageStatus"))
1058+
defer tracing.StopSpan(span)
10441059
log.G(ctx).Tracef("ImageStatus for %q", r.GetImage().GetImage())
10451060
defer func() {
10461061
if err != nil {
@@ -1049,6 +1064,7 @@ func (in *instrumentedService) ImageStatus(ctx context.Context, r *runtime.Image
10491064
log.G(ctx).Tracef("ImageStatus for %q returns image status %+v",
10501065
r.GetImage().GetImage(), res.GetImage())
10511066
}
1067+
tracing.SetSpanStatus(span, err)
10521068
}()
10531069
res, err = in.c.ImageStatus(ctrdutil.WithNamespace(ctx), r)
10541070
return res, errdefs.ToGRPC(err)
@@ -1058,6 +1074,8 @@ func (in *instrumentedAlphaService) ImageStatus(ctx context.Context, r *runtime_
10581074
if err := in.checkInitialized(); err != nil {
10591075
return nil, err
10601076
}
1077+
ctx, span := tracing.StartSpan(ctx, makeSpanName("ImageStatus"))
1078+
defer tracing.StopSpan(span)
10611079
log.G(ctx).Tracef("ImageStatus for %q", r.GetImage().GetImage())
10621080
defer func() {
10631081
if err != nil {
@@ -1066,6 +1084,7 @@ func (in *instrumentedAlphaService) ImageStatus(ctx context.Context, r *runtime_
10661084
log.G(ctx).Tracef("ImageStatus for %q returns image status %+v",
10671085
r.GetImage().GetImage(), res.GetImage())
10681086
}
1087+
tracing.SetSpanStatus(span, err)
10691088
}()
10701089
// converts request and response for earlier CRI version to call and get response from the current version
10711090
var v1r runtime.ImageStatusRequest
@@ -1096,13 +1115,16 @@ func (in *instrumentedService) RemoveImage(ctx context.Context, r *runtime.Remov
10961115
if err := in.checkInitialized(); err != nil {
10971116
return nil, err
10981117
}
1118+
ctx, span := tracing.StartSpan(ctx, makeSpanName("RemoveImage"))
1119+
defer tracing.StopSpan(span)
10991120
log.G(ctx).Infof("RemoveImage %q", r.GetImage().GetImage())
11001121
defer func() {
11011122
if err != nil {
11021123
log.G(ctx).WithError(err).Errorf("RemoveImage %q failed", r.GetImage().GetImage())
11031124
} else {
11041125
log.G(ctx).Infof("RemoveImage %q returns successfully", r.GetImage().GetImage())
11051126
}
1127+
tracing.SetSpanStatus(span, err)
11061128
}()
11071129
res, err := in.c.RemoveImage(ctrdutil.WithNamespace(ctx), r)
11081130
return res, errdefs.ToGRPC(err)
@@ -1112,13 +1134,16 @@ func (in *instrumentedAlphaService) RemoveImage(ctx context.Context, r *runtime_
11121134
if err := in.checkInitialized(); err != nil {
11131135
return nil, err
11141136
}
1137+
ctx, span := tracing.StartSpan(ctx, makeSpanName("RemoveImage"))
1138+
defer tracing.StopSpan(span)
11151139
log.G(ctx).Infof("RemoveImage %q", r.GetImage().GetImage())
11161140
defer func() {
11171141
if err != nil {
11181142
log.G(ctx).WithError(err).Errorf("RemoveImage %q failed", r.GetImage().GetImage())
11191143
} else {
11201144
log.G(ctx).Infof("RemoveImage %q returns successfully", r.GetImage().GetImage())
11211145
}
1146+
tracing.SetSpanStatus(span, err)
11221147
}()
11231148
// converts request and response for earlier CRI version to call and get response from the current version
11241149
var v1r runtime.RemoveImageRequest
@@ -1149,13 +1174,16 @@ func (in *instrumentedService) ImageFsInfo(ctx context.Context, r *runtime.Image
11491174
if err := in.checkInitialized(); err != nil {
11501175
return nil, err
11511176
}
1177+
ctx, span := tracing.StartSpan(ctx, makeSpanName("ImageFsInfo"))
1178+
defer tracing.StopSpan(span)
11521179
log.G(ctx).Debugf("ImageFsInfo")
11531180
defer func() {
11541181
if err != nil {
11551182
log.G(ctx).WithError(err).Error("ImageFsInfo failed")
11561183
} else {
11571184
log.G(ctx).Debugf("ImageFsInfo returns filesystem info %+v", res.ImageFilesystems)
11581185
}
1186+
tracing.SetSpanStatus(span, err)
11591187
}()
11601188
res, err = in.c.ImageFsInfo(ctrdutil.WithNamespace(ctx), r)
11611189
return res, errdefs.ToGRPC(err)
@@ -1165,13 +1193,16 @@ func (in *instrumentedAlphaService) ImageFsInfo(ctx context.Context, r *runtime_
11651193
if err := in.checkInitialized(); err != nil {
11661194
return nil, err
11671195
}
1196+
ctx, span := tracing.StartSpan(ctx, makeSpanName("ImageFsInfo"))
1197+
defer tracing.StopSpan(span)
11681198
log.G(ctx).Debugf("ImageFsInfo")
11691199
defer func() {
11701200
if err != nil {
11711201
log.G(ctx).WithError(err).Error("ImageFsInfo failed")
11721202
} else {
11731203
log.G(ctx).Debugf("ImageFsInfo returns filesystem info %+v", res.ImageFilesystems)
11741204
}
1205+
tracing.SetSpanStatus(span, err)
11751206
}()
11761207
// converts request and response for earlier CRI version to call and get response from the current version
11771208
var v1r runtime.ImageFsInfoRequest

pkg/cri/server/helpers.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,11 @@ const (
9292

9393
// runtimeRunhcsV1 is the runtime type for runhcs.
9494
runtimeRunhcsV1 = "io.containerd.runhcs.v1"
95+
96+
// name prefix for CRI server specific spans
97+
criServerSpanPrefix = "pkg.cri.server"
98+
99+
spanDelimiter = "."
95100
)
96101

97102
// makeSandboxName generates sandbox name from sandbox metadata. The name
@@ -510,3 +515,10 @@ func copyResourcesToStatus(spec *runtimespec.Spec, status containerstore.Status)
510515
}
511516
return status
512517
}
518+
519+
func makeSpanName(funcName string) string {
520+
return strings.Join([]string{
521+
criServerSpanPrefix,
522+
funcName,
523+
}, spanDelimiter)
524+
}

0 commit comments

Comments
 (0)