Skip to content

Commit 7dd4b4e

Browse files
committed
Merge branch 'main' into deprecate-mount-apis
* main: fix(reaper): fix race condition when reusing reapers (#1904)
2 parents 01add1d + fc966d5 commit 7dd4b4e

File tree

2 files changed

+121
-13
lines changed

2 files changed

+121
-13
lines changed

reaper.go

Lines changed: 75 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"fmt"
77
"math/rand"
88
"net"
9+
"regexp"
910
"strings"
1011
"sync"
1112
"time"
@@ -50,6 +51,14 @@ func NewReaper(ctx context.Context, sessionID string, provider ReaperProvider, r
5051
return reuseOrCreateReaper(ctx, sessionID, provider, WithImageName(reaperImageName))
5152
}
5253

54+
// reaperContainerNameFromSessionID returns the container name that uniquely
55+
// identifies the container based on the session id.
56+
func reaperContainerNameFromSessionID(sessionID string) string {
57+
// The session id is 64 characters, so we will not hit the limit of 128
58+
// characters for container names.
59+
return fmt.Sprintf("reaper_%s", sessionID)
60+
}
61+
5362
// lookUpReaperContainer returns a DockerContainer type with the reaper container in the case
5463
// it's found in the running state, and including the labels for sessionID, reaper, and ryuk.
5564
// It will perform a retry with exponential backoff to allow for the container to be started and
@@ -67,7 +76,7 @@ func lookUpReaperContainer(ctx context.Context, sessionID string) (*DockerContai
6776

6877
// we want random intervals between 100ms and 500ms for concurrent executions
6978
// to not be synchronized: it could be the case that multiple executions of this
70-
// function happen at the same time (specially when called from a different test
79+
// function happen at the same time (specifically when called from a different test
7180
// process execution), and we want to avoid that they all try to find the reaper
7281
// container at the same time.
7382
exp.InitialInterval = time.Duration(rand.Intn(5)*100) * time.Millisecond
@@ -82,6 +91,7 @@ func lookUpReaperContainer(ctx context.Context, sessionID string) (*DockerContai
8291
filters.Arg("label", fmt.Sprintf("%s=%s", testcontainersdocker.LabelSessionID, sessionID)),
8392
filters.Arg("label", fmt.Sprintf("%s=%t", testcontainersdocker.LabelReaper, true)),
8493
filters.Arg("label", fmt.Sprintf("%s=%t", testcontainersdocker.LabelRyuk, true)),
94+
filters.Arg("name", reaperContainerNameFromSessionID(sessionID)),
8595
}
8696

8797
resp, err := dockerClient.ContainerList(ctx, types.ContainerListOptions{
@@ -146,19 +156,11 @@ func reuseOrCreateReaper(ctx context.Context, sessionID string, provider ReaperP
146156
reaperContainer, err := lookUpReaperContainer(context.Background(), sessionID)
147157
if err == nil && reaperContainer != nil {
148158
// The reaper container exists as a Docker container: re-use it
149-
endpoint, err := reaperContainer.PortEndpoint(ctx, "8080", "")
159+
Logger.Printf("🔥 Reaper obtained from Docker for this test session %s", reaperContainer.ID)
160+
reaperInstance, err = reuseReaperContainer(ctx, sessionID, provider, reaperContainer)
150161
if err != nil {
151162
return nil, err
152163
}
153-
154-
Logger.Printf("🔥 Reaper obtained from Docker for this test session %s", reaperContainer.ID)
155-
reaperInstance = &Reaper{
156-
Provider: provider,
157-
SessionID: sessionID,
158-
Endpoint: endpoint,
159-
container: reaperContainer,
160-
}
161-
162164
return reaperInstance, nil
163165
}
164166

@@ -182,8 +184,25 @@ func reuseOrCreateReaper(ctx context.Context, sessionID string, provider ReaperP
182184
return reaperInstance, nil
183185
}
184186

185-
// newReaper creates a Reaper with a sessionID to identify containers and a provider to use
186-
// Do not call this directly, use reuseOrCreateReaper instead
187+
var createContainerFailDueToNameConflictRegex = regexp.MustCompile("Conflict. The container name .* is already in use by container .*")
188+
189+
// reuseReaperContainer constructs a Reaper from an already running reaper
190+
// DockerContainer.
191+
func reuseReaperContainer(ctx context.Context, sessionID string, provider ReaperProvider, reaperContainer *DockerContainer) (*Reaper, error) {
192+
endpoint, err := reaperContainer.PortEndpoint(ctx, "8080", "")
193+
if err != nil {
194+
return nil, err
195+
}
196+
return &Reaper{
197+
Provider: provider,
198+
SessionID: sessionID,
199+
Endpoint: endpoint,
200+
container: reaperContainer,
201+
}, nil
202+
}
203+
204+
// newReaper creates a Reaper with a sessionID to identify containers and a
205+
// provider to use. Do not call this directly, use reuseOrCreateReaper instead.
187206
func newReaper(ctx context.Context, sessionID string, provider ReaperProvider, opts ...ContainerOption) (*Reaper, error) {
188207
dockerHostMount := testcontainersdocker.ExtractDockerSocket(ctx)
189208

@@ -208,6 +227,7 @@ func newReaper(ctx context.Context, sessionID string, provider ReaperProvider, o
208227
Labels: testcontainersdocker.DefaultLabels(sessionID),
209228
Privileged: tcConfig.RyukPrivileged,
210229
WaitingFor: wait.ForListeningPort(listeningPort),
230+
Name: reaperContainerNameFromSessionID(sessionID),
211231
ReaperOptions: opts,
212232
HostConfigModifier: func(hc *container.HostConfig) {
213233
hc.AutoRemove = true
@@ -237,6 +257,48 @@ func newReaper(ctx context.Context, sessionID string, provider ReaperProvider, o
237257

238258
c, err := provider.RunContainer(ctx, req)
239259
if err != nil {
260+
// We need to check whether the error is caused by a container with the same name
261+
// already existing due to race conditions. We manually match the error message
262+
// as we do not have any error types to check against.
263+
if createContainerFailDueToNameConflictRegex.MatchString(err.Error()) {
264+
// Manually retrieve the already running reaper container. However, we need to
265+
// use retries here as there are two possible race conditions that might lead to
266+
// errors: In most cases, there is a small delay between container creation and
267+
// actually being visible in list-requests. This means that creation might fail
268+
// due to name conflicts, but when we list containers with this name, we do not
269+
// get any results. In another case, the container might have simply died in the
270+
// meantime and therefore cannot be found.
271+
const timeout = 5 * time.Second
272+
const cooldown = 100 * time.Millisecond
273+
start := time.Now()
274+
var reaperContainer *DockerContainer
275+
for time.Since(start) < timeout {
276+
reaperContainer, err = lookUpReaperContainer(ctx, sessionID)
277+
if err == nil && reaperContainer != nil {
278+
break
279+
}
280+
select {
281+
case <-ctx.Done():
282+
case <-time.After(cooldown):
283+
}
284+
}
285+
if err != nil {
286+
return nil, fmt.Errorf("look up reaper container due to name conflict failed: %w", err)
287+
}
288+
// If the reaper container was not found, it is most likely to have died in
289+
// between as we can exclude any client errors because of the previous error
290+
// check. Because the reaper should only die if it performed clean-ups, we can
291+
// fail here as the reaper timeout needs to be increased, anyway.
292+
if reaperContainer == nil {
293+
return nil, fmt.Errorf("look up reaper container returned nil although creation failed due to name conflict")
294+
}
295+
Logger.Printf("🔥 Reaper obtained from Docker for this test session %s", reaperContainer.ID)
296+
reaper, err := reuseReaperContainer(ctx, sessionID, provider, reaperContainer)
297+
if err != nil {
298+
return nil, err
299+
}
300+
return reaper, nil
301+
}
240302
return nil, err
241303
}
242304
reaper.container = c

reaper_test.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -515,3 +515,49 @@ func TestReaper_reuseItFromOtherTestProgramUsingDocker(t *testing.T) {
515515
terminateContainerOnEnd(t, ctx, reaper.container)
516516
}
517517
}
518+
519+
// TestReaper_ReuseRunning tests whether reusing the reaper if using
520+
// testcontainers from concurrently multiple packages works as expected. In this
521+
// case, global locks are without any effect as Go tests different packages
522+
// isolated. Therefore, this test does not use the same logic with locks on
523+
// purpose. We expect reaper creation to still succeed in case a reaper is
524+
// already running for the same session id by returning its container instance
525+
// instead.
526+
func TestReaper_ReuseRunning(t *testing.T) {
527+
const concurrency = 64
528+
529+
timeout, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
530+
defer cancel()
531+
532+
sessionID := SessionID()
533+
534+
dockerProvider, err := NewDockerProvider()
535+
require.NoError(t, err, "new docker provider should not fail")
536+
537+
obtainedReaperContainerIDs := make([]string, concurrency)
538+
var wg sync.WaitGroup
539+
for i := 0; i < concurrency; i++ {
540+
i := i
541+
wg.Add(1)
542+
go func() {
543+
defer wg.Done()
544+
reaperContainer, err := lookUpReaperContainer(timeout, sessionID)
545+
if err == nil && reaperContainer != nil {
546+
// Found.
547+
obtainedReaperContainerIDs[i] = reaperContainer.GetContainerID()
548+
return
549+
}
550+
// Not found -> create.
551+
createdReaper, err := newReaper(timeout, sessionID, dockerProvider)
552+
require.NoError(t, err, "new reaper should not fail")
553+
obtainedReaperContainerIDs[i] = createdReaper.container.GetContainerID()
554+
}()
555+
}
556+
wg.Wait()
557+
558+
// Assure that all calls returned the same container.
559+
firstContainerID := obtainedReaperContainerIDs[0]
560+
for i, containerID := range obtainedReaperContainerIDs {
561+
assert.Equal(t, firstContainerID, containerID, "call %d should have returned same container id", i)
562+
}
563+
}

0 commit comments

Comments
 (0)