Skip to content

Commit efbf279

Browse files
nicklaslclaudetoddbaert
authored
fix: allow for providers to safely shutdown (#1744)
* fix: allow for providers to safely shutdown Signed-off-by: Nicklas Lundin <[email protected]> * fix: prevent race conditions during repository shutdown Signed-off-by: Nicklas Lundin <[email protected]> * test: comment out ProviderRepositoryCT due to VMLens limitation VMLens crashes with NPE when ThreadPoolExecutor.shutdown() is called inside AllInterleavings block. Co-Authored-By: Claude Opus 4.5 <[email protected]> Signed-off-by: Nicklas Lundin <[email protected]> * refactor: use throws declaration instead of try-catch in test helper Co-Authored-By: Claude Opus 4.5 <[email protected]> Signed-off-by: Nicklas Lundin <[email protected]> * fix: prevent deadlock during shutdown with pending init tasks Split ProviderRepository.shutdown() into prepareShutdown() and completeShutdown() phases. OpenFeatureAPI.shutdown() now releases the write lock before waiting for executor termination, allowing pending initializeProvider tasks to acquire read lock for event emission. Co-Authored-By: Claude Opus 4.5 <[email protected]> Signed-off-by: Nicklas Lundin <[email protected]> * test: simplify VMLens concurrency test for multiple providers Reduce from 3 to 2 providers to work around VMLens graph building bug. Co-Authored-By: Claude Opus 4.5 <[email protected]> Signed-off-by: Nicklas Lundin <[email protected]> * additional test for setProvider and shutdown Signed-off-by: Nicklas Lundin <[email protected]> * test: add coverage for shutdown edge cases Co-Authored-By: Claude Opus 4.5 <[email protected]> Signed-off-by: Nicklas Lundin <[email protected]> * fixup: use timeout const Signed-off-by: Todd Baert <[email protected]> --------- Signed-off-by: Nicklas Lundin <[email protected]> Signed-off-by: Todd Baert <[email protected]> Co-authored-by: Claude Opus 4.5 <[email protected]> Co-authored-by: Todd Baert <[email protected]>
1 parent 8cc5d40 commit efbf279

5 files changed

Lines changed: 453 additions & 14 deletions

File tree

src/main/java/dev/openfeature/sdk/OpenFeatureAPI.java

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -339,12 +339,23 @@ public void clearHooks() {
339339
* Once shut down is complete, API is reset and ready to use again.
340340
*/
341341
public void shutdown() {
342+
List<FeatureProviderStateManager> managersToShutdown;
342343
try (AutoCloseableLock ignored = lock.writeLockAutoCloseable()) {
343-
providerRepository.shutdown();
344+
// Mark repository as shutting down while holding lock.
345+
// This ensures setProvider calls will throw IllegalStateException.
346+
managersToShutdown = providerRepository.prepareShutdown();
347+
}
348+
349+
if (managersToShutdown != null) {
350+
// Complete shutdown without holding lock to avoid deadlock.
351+
// Pending tasks (e.g., initializeProvider) may need the read lock to emit events.
352+
providerRepository.completeShutdown(managersToShutdown);
344353
eventSupport.shutdown();
345354

346-
providerRepository = new ProviderRepository(this);
347-
eventSupport = new EventSupport();
355+
try (AutoCloseableLock ignored = lock.writeLockAutoCloseable()) {
356+
providerRepository = new ProviderRepository(this);
357+
eventSupport = new EventSupport();
358+
}
348359
}
349360
}
350361

src/main/java/dev/openfeature/sdk/ProviderRepository.java

Lines changed: 72 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
import java.util.concurrent.ConcurrentHashMap;
1111
import java.util.concurrent.ExecutorService;
1212
import java.util.concurrent.Executors;
13+
import java.util.concurrent.TimeUnit;
14+
import java.util.concurrent.atomic.AtomicBoolean;
1315
import java.util.concurrent.atomic.AtomicReference;
1416
import java.util.function.BiConsumer;
1517
import java.util.function.Consumer;
@@ -23,6 +25,7 @@ class ProviderRepository {
2325
private final Map<String, FeatureProviderStateManager> stateManagers = new ConcurrentHashMap<>();
2426
private final AtomicReference<FeatureProviderStateManager> defaultStateManger =
2527
new AtomicReference<>(new FeatureProviderStateManager(new NoOpProvider()));
28+
private final AtomicBoolean isShuttingDown = new AtomicBoolean(false);
2629
private final ExecutorService taskExecutor =
2730
Executors.newCachedThreadPool(new ConfigurableThreadFactory("openfeature-provider-thread", true));
2831
private final Object registerStateManagerLock = new Object();
@@ -162,6 +165,9 @@ private void prepareAndInitializeProvider(
162165
final FeatureProviderStateManager oldStateManager;
163166

164167
synchronized (registerStateManagerLock) {
168+
if (isShuttingDown.get()) {
169+
throw new IllegalStateException("Provider cannot be set while repository is shutting down");
170+
}
165171
FeatureProviderStateManager existing = getExistingStateManagerForProvider(newProvider);
166172
if (existing == null) {
167173
newStateManager = new FeatureProviderStateManager(newProvider);
@@ -228,9 +234,11 @@ private void initializeProvider(
228234
}
229235

230236
private void shutDownOld(FeatureProviderStateManager oldManager, Consumer<FeatureProvider> afterShutdown) {
231-
if (oldManager != null && !isStateManagerRegistered(oldManager)) {
232-
shutdownProvider(oldManager);
233-
afterShutdown.accept(oldManager.getProvider());
237+
synchronized (registerStateManagerLock) {
238+
if (oldManager != null && !isStateManagerRegistered(oldManager)) {
239+
shutdownProvider(oldManager);
240+
afterShutdown.accept(oldManager.getProvider());
241+
}
234242
}
235243
}
236244

@@ -254,16 +262,27 @@ private void shutdownProvider(FeatureProviderStateManager manager) {
254262
}
255263

256264
private void shutdownProvider(FeatureProvider provider) {
257-
taskExecutor.submit(() -> {
265+
try {
266+
taskExecutor.submit(() -> {
267+
try {
268+
provider.shutdown();
269+
} catch (Exception e) {
270+
log.error(
271+
"Exception when shutting down feature provider {}",
272+
provider.getClass().getName(),
273+
e);
274+
}
275+
});
276+
} catch (java.util.concurrent.RejectedExecutionException e) {
258277
try {
259278
provider.shutdown();
260-
} catch (Exception e) {
279+
} catch (Exception ex) {
261280
log.error(
262281
"Exception when shutting down feature provider {}",
263282
provider.getClass().getName(),
264-
e);
283+
ex);
265284
}
266-
});
285+
}
267286
}
268287

269288
/**
@@ -272,10 +291,52 @@ private void shutdownProvider(FeatureProvider provider) {
272291
* including the default feature provider.
273292
*/
274293
public void shutdown() {
275-
Stream.concat(Stream.of(this.defaultStateManger.get()), this.stateManagers.values().stream())
276-
.distinct()
277-
.forEach(this::shutdownProvider);
278-
this.stateManagers.clear();
294+
List<FeatureProviderStateManager> managersToShutdown = prepareShutdown();
295+
if (managersToShutdown != null) {
296+
completeShutdown(managersToShutdown);
297+
}
298+
}
299+
300+
/**
301+
* Prepares the repository for shutdown by marking it as shutting down and
302+
* collecting all managers that need to be shut down.
303+
*
304+
* <p>After this call, any attempt to set a provider will throw IllegalStateException.
305+
*
306+
* @return list of managers to shut down, or null if shutdown was already initiated
307+
*/
308+
List<FeatureProviderStateManager> prepareShutdown() {
309+
synchronized (registerStateManagerLock) {
310+
if (isShuttingDown.getAndSet(true)) {
311+
return null;
312+
}
313+
314+
List<FeatureProviderStateManager> managersToShutdown = Stream.concat(
315+
Stream.of(this.defaultStateManger.get()), this.stateManagers.values().stream())
316+
.distinct()
317+
.collect(Collectors.toList());
318+
this.stateManagers.clear();
319+
return managersToShutdown;
320+
}
321+
}
322+
323+
/**
324+
* Completes the shutdown by shutting down all providers and waiting for
325+
* pending tasks to complete.
326+
*
327+
* @param managersToShutdown the managers to shut down (from prepareShutdown)
328+
*/
329+
void completeShutdown(List<FeatureProviderStateManager> managersToShutdown) {
330+
managersToShutdown.forEach(this::shutdownProvider);
279331
taskExecutor.shutdown();
332+
try {
333+
if (!taskExecutor.awaitTermination(EventSupport.SHUTDOWN_TIMEOUT_SECONDS, TimeUnit.SECONDS)) {
334+
log.warn("Task executor did not terminate before the timeout period had elapsed");
335+
taskExecutor.shutdownNow();
336+
}
337+
} catch (InterruptedException e) {
338+
taskExecutor.shutdownNow();
339+
Thread.currentThread().interrupt();
340+
}
280341
}
281342
}

src/test/java/dev/openfeature/sdk/ProviderRepositoryTest.java

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import static dev.openfeature.sdk.testutils.stubbing.ConditionStubber.doDelayResponse;
55
import static org.assertj.core.api.Assertions.assertThat;
66
import static org.assertj.core.api.Assertions.assertThatCode;
7+
import static org.assertj.core.api.Assertions.assertThatThrownBy;
78
import static org.awaitility.Awaitility.await;
89
import static org.mockito.ArgumentMatchers.any;
910
import static org.mockito.ArgumentMatchers.eq;
@@ -15,6 +16,7 @@
1516
import java.util.concurrent.ExecutorService;
1617
import java.util.concurrent.Executors;
1718
import java.util.concurrent.Future;
19+
import java.util.concurrent.atomic.AtomicBoolean;
1820
import java.util.function.BiConsumer;
1921
import java.util.function.Consumer;
2022
import java.util.function.Function;
@@ -289,6 +291,199 @@ void shouldRunLambdasOnError() throws Exception {
289291
verify(afterError, timeout(TIMEOUT)).accept(eq(errorFeatureProvider), any());
290292
}
291293
}
294+
295+
@Nested
296+
class GracefulShutdownBehavior {
297+
298+
@Test
299+
@DisplayName("should complete shutdown successfully when executor terminates within timeout")
300+
void shouldCompleteShutdownSuccessfullyWhenExecutorTerminatesWithinTimeout() {
301+
FeatureProvider provider = createMockedProvider();
302+
setFeatureProvider(provider);
303+
304+
assertThatCode(() -> providerRepository.shutdown()).doesNotThrowAnyException();
305+
306+
verify(provider, timeout(TIMEOUT)).shutdown();
307+
}
308+
309+
@Test
310+
@DisplayName("should force shutdown when executor does not terminate within timeout")
311+
void shouldForceShutdownWhenExecutorDoesNotTerminateWithinTimeout() throws Exception {
312+
FeatureProvider provider = createMockedProvider();
313+
AtomicBoolean wasInterrupted = new AtomicBoolean(false);
314+
doAnswer(invocation -> {
315+
try {
316+
Thread.sleep(TIMEOUT);
317+
} catch (InterruptedException e) {
318+
wasInterrupted.set(true);
319+
throw e;
320+
}
321+
return null;
322+
})
323+
.when(provider)
324+
.shutdown();
325+
326+
setFeatureProvider(provider);
327+
328+
assertThatCode(() -> providerRepository.shutdown()).doesNotThrowAnyException();
329+
330+
verify(provider, timeout(TIMEOUT)).shutdown();
331+
// Verify that shutdownNow() interrupted the running shutdown task
332+
await().atMost(Duration.ofSeconds(1))
333+
.untilAsserted(() -> assertThat(wasInterrupted.get()).isTrue());
334+
}
335+
336+
// Note: shouldHandleInterruptionDuringShutdownGracefully was removed because the
337+
// interrupt timing is not guaranteed. Proper concurrency testing is done in
338+
// ProviderRepositoryCT using VMLens.
339+
340+
@Test
341+
@DisplayName("should not hang indefinitely on shutdown")
342+
void shouldNotHangIndefinitelyOnShutdown() {
343+
FeatureProvider provider = createMockedProvider();
344+
setFeatureProvider(provider);
345+
346+
await().alias("shutdown should complete within reasonable time")
347+
.atMost(Duration.ofSeconds(5))
348+
.until(() -> {
349+
providerRepository.shutdown();
350+
return true;
351+
});
352+
}
353+
354+
@Test
355+
@DisplayName("should handle shutdown during provider initialization")
356+
void shouldHandleShutdownDuringProviderInitialization() throws Exception {
357+
FeatureProvider slowInitProvider = createMockedProvider();
358+
AtomicBoolean shutdownCalled = new AtomicBoolean(false);
359+
360+
doDelayResponse(Duration.ofMillis(500)).when(slowInitProvider).initialize(any());
361+
362+
doAnswer(invocation -> {
363+
shutdownCalled.set(true);
364+
return null;
365+
})
366+
.when(slowInitProvider)
367+
.shutdown();
368+
369+
providerRepository.setProvider(
370+
slowInitProvider,
371+
mockAfterSet(),
372+
mockAfterInit(),
373+
mockAfterShutdown(),
374+
mockAfterError(),
375+
false);
376+
377+
// Call shutdown while initialization is in progress
378+
assertThatCode(() -> providerRepository.shutdown()).doesNotThrowAnyException();
379+
380+
await().atMost(Duration.ofSeconds(1)).untilTrue(shutdownCalled);
381+
verify(slowInitProvider, times(1)).shutdown();
382+
}
383+
384+
@Test
385+
@DisplayName("should handle provider replacement during shutdown")
386+
void shouldHandleProviderReplacementDuringShutdown() throws Exception {
387+
FeatureProvider oldProvider = createMockedProvider();
388+
FeatureProvider newProvider = createMockedProvider();
389+
AtomicBoolean oldProviderShutdownCalled = new AtomicBoolean(false);
390+
391+
doAnswer(invocation -> {
392+
oldProviderShutdownCalled.set(true);
393+
return null;
394+
})
395+
.when(oldProvider)
396+
.shutdown();
397+
398+
providerRepository.setProvider(
399+
oldProvider, mockAfterSet(), mockAfterInit(), mockAfterShutdown(), mockAfterError(), true);
400+
401+
// Replace provider (this will trigger old provider shutdown in background)
402+
providerRepository.setProvider(
403+
newProvider, mockAfterSet(), mockAfterInit(), mockAfterShutdown(), mockAfterError(), false);
404+
405+
assertThatCode(() -> providerRepository.shutdown()).doesNotThrowAnyException();
406+
407+
await().atMost(Duration.ofSeconds(1)).untilTrue(oldProviderShutdownCalled);
408+
verify(oldProvider, times(1)).shutdown();
409+
verify(newProvider, times(1)).shutdown();
410+
}
411+
412+
@Test
413+
@DisplayName("should prevent adding providers after shutdown has started")
414+
void shouldPreventAddingProvidersAfterShutdownHasStarted() {
415+
FeatureProvider provider = createMockedProvider();
416+
setFeatureProvider(provider);
417+
418+
providerRepository.shutdown();
419+
420+
FeatureProvider newProvider = createMockedProvider();
421+
assertThatThrownBy(() -> setFeatureProvider(newProvider))
422+
.isInstanceOf(IllegalStateException.class)
423+
.hasMessageContaining("shutting down");
424+
}
425+
426+
@Test
427+
@DisplayName("prepareShutdown should return null on second call")
428+
void prepareShutdownShouldReturnNullOnSecondCall() {
429+
FeatureProvider provider = createMockedProvider();
430+
setFeatureProvider(provider);
431+
432+
// First call should return managers list
433+
var managers = providerRepository.prepareShutdown();
434+
assertThat(managers).isNotNull();
435+
assertThat(managers).isNotEmpty();
436+
437+
// Second call should be a no-op and return null (already shutting down)
438+
var secondResult = providerRepository.prepareShutdown();
439+
assertThat(secondResult).isNull();
440+
}
441+
442+
@Test
443+
@DisplayName("should fall back to direct shutdown when executor rejects tasks")
444+
void shouldFallBackToDirectShutdownWhenExecutorRejectsTasks() throws Exception {
445+
FeatureProvider oldProvider = createMockedProvider();
446+
FeatureProvider newProvider = createMockedProvider();
447+
AtomicBoolean initializationStarted = new AtomicBoolean(false);
448+
AtomicBoolean proceedWithInit = new AtomicBoolean(false);
449+
450+
// Make oldProvider's initialization block until we signal
451+
doAnswer(invocation -> {
452+
initializationStarted.set(true);
453+
while (!proceedWithInit.get()) {
454+
Thread.sleep(10);
455+
}
456+
return null;
457+
})
458+
.when(oldProvider)
459+
.initialize(any());
460+
461+
// Start async initialization (will block)
462+
providerRepository.setProvider(
463+
oldProvider, mockAfterSet(), mockAfterInit(), mockAfterShutdown(), mockAfterError(), false);
464+
465+
// Wait for initialization to start
466+
await().atMost(Duration.ofSeconds(1)).untilTrue(initializationStarted);
467+
468+
// Now set a new provider - this will trigger shutDownOld for oldProvider
469+
// after initialization completes, but we haven't completed init yet
470+
providerRepository.setProvider(
471+
newProvider, mockAfterSet(), mockAfterInit(), mockAfterShutdown(), mockAfterError(), false);
472+
473+
// Call shutdown on repository - this will shutdown the executor
474+
var managers = providerRepository.prepareShutdown();
475+
providerRepository.completeShutdown(managers);
476+
477+
// Now let the initialization complete - shutDownOld will be called but executor is shutdown
478+
// This triggers the RejectedExecutionException path which falls back to direct shutdown
479+
proceedWithInit.set(true);
480+
481+
// Both providers should eventually be shut down (oldProvider via direct call due to
482+
// RejectedExecutionException)
483+
verify(oldProvider, timeout(TIMEOUT)).shutdown();
484+
verify(newProvider, timeout(TIMEOUT)).shutdown();
485+
}
486+
}
292487
}
293488

294489
@Test

src/test/java/dev/openfeature/sdk/ShutdownBehaviorSpecTest.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,5 +142,19 @@ void apiIsReadyToUseAfterShutdown() {
142142
NoOpProvider p2 = new NoOpProvider();
143143
api.setProvider(p2);
144144
}
145+
146+
@Test
147+
@DisplayName("calling shutdown twice should be safe and idempotent")
148+
void callingShutdownTwiceShouldBeSafe() {
149+
FeatureProvider provider = ProviderFixture.createMockedProvider();
150+
setFeatureProvider(provider);
151+
152+
api.shutdown();
153+
154+
// Second shutdown should be a no-op (no exception, provider not called twice)
155+
api.shutdown();
156+
157+
verify(provider, times(1)).shutdown();
158+
}
145159
}
146160
}

0 commit comments

Comments
 (0)