Skip to content

Commit 5d181bb

Browse files
authored
feat(spanner): enable long running transaction clean up (#8969)
* feat(spanner): enable long running txn clean up with default as WARN * feat(spanner): remove mutex that does not exist in main branch * feat(spanner): update tests * feat(spanner): comment update
1 parent 9f9b01e commit 5d181bb

File tree

4 files changed

+107
-25
lines changed

4 files changed

+107
-25
lines changed

spanner/README.md

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,86 @@ row, err := client.Single().ReadRow(ctx, "Users",
3131
if err != nil {
3232
log.Fatal(err)
3333
}
34+
```
35+
36+
### Session Leak
37+
A `Client` object of the Client Library has a limit on the number of maximum sessions. For example the
38+
default value of `MaxOpened`, which is the maximum number of sessions allowed by the session pool in the
39+
Golang Client Library, is 400. You can configure these values at the time of
40+
creating a `Client` by passing custom `SessionPoolConfig` as part of `ClientConfig`. When all the sessions are checked
41+
out of the session pool, every new transaction has to wait until a session is returned to the pool.
42+
If a session is never returned to the pool (hence causing a session leak), the transactions will have to wait
43+
indefinitely and your application will be blocked.
44+
45+
#### Common Root Causes
46+
The most common reason for session leaks in the Golang client library are:
47+
1. Not stopping a `RowIterator` that is returned by `Query`, `Read` and other methods. Always use `RowIterator.Stop()` to ensure that the `RowIterator` is always closed.
48+
2. Not closing a `ReadOnlyTransaction` when you no longer need it. Always call `ReadOnlyTransaction.Close()` after use, to ensure that the `ReadOnlyTransaction` is always closed.
49+
50+
As shown in the example below, the `txn.Close()` statement releases the session after it is complete.
51+
If you fail to call `txn.Close()`, the session is not released back to the pool. The recommended way is to use `defer` as shown below.
52+
```go
53+
client, err := spanner.NewClient(ctx, "projects/P/instances/I/databases/D")
54+
if err != nil {
55+
log.Fatal(err)
56+
}
57+
txn := client.ReadOnlyTransaction()
58+
defer txn.Close()
59+
```
60+
61+
#### Debugging and Resolving Session Leaks
62+
63+
##### Logging inactive transactions
64+
This option logs warnings when you have exhausted >95% of your session pool. It is enabled by default.
65+
This could mean two things; either you need to increase the max sessions in your session pool (as the number
66+
of queries run using the client side database object is greater than your session pool can serve), or you may
67+
have a session leak. To help debug which transactions may be causing this session leak, the logs will also contain stack traces of
68+
transactions which have been running longer than expected if `TrackSessionHandles` under `SessionPoolConfig` is enabled.
69+
70+
```go
71+
sessionPoolConfig := spanner.SessionPoolConfig{
72+
TrackSessionHandles: true,
73+
InactiveTransactionRemovalOptions: spanner.InactiveTransactionRemovalOptions{
74+
ActionOnInactiveTransaction: spanner.Warn,
75+
},
76+
}
77+
client, err := spanner.NewClientWithConfig(
78+
ctx, database, spanner.ClientConfig{SessionPoolConfig: sessionPoolConfig},
79+
)
80+
if err != nil {
81+
log.Fatal(err)
82+
}
83+
defer client.Close()
84+
85+
// Example Log message to warn presence of long running transactions
86+
// session <session-info> checked out of pool at <session-checkout-time> is long running due to possible session leak for goroutine
87+
// <Stack Trace of transaction>
88+
89+
```
90+
91+
##### Automatically clean inactive transactions
92+
When the option to automatically clean inactive transactions is enabled, the client library will automatically detect
93+
problematic transactions that are running for a very long time (thus causing session leaks) and close them.
94+
The session will be removed from the pool and be replaced by a new session. To dig deeper into which transactions are being
95+
closed, you can check the logs to see the stack trace of the transactions which might be causing these leaks and further
96+
debug them.
97+
98+
```go
99+
sessionPoolConfig := spanner.SessionPoolConfig{
100+
TrackSessionHandles: true,
101+
InactiveTransactionRemovalOptions: spanner.InactiveTransactionRemovalOptions{
102+
ActionOnInactiveTransaction: spanner.WarnAndClose,
103+
},
104+
}
105+
client, err := spanner.NewClientWithConfig(
106+
ctx, database, spanner.ClientConfig{SessionPoolConfig: sessionPoolConfig},
107+
)
108+
if err != nil {
109+
log.Fatal(err)
110+
}
111+
defer client.Close()
112+
113+
// Example Log message for when transaction is recycled
114+
// session <session-info> checked out of pool at <session-checkout-time> is long running and will be removed due to possible session leak for goroutine
115+
// <Stack Trace of transaction>
34116
```

spanner/client_test.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ func TestClient_Single_WhenInactiveTransactionsAndSessionIsNotFoundOnBackend_Rem
216216
MinOpened: 1,
217217
MaxOpened: 1,
218218
InactiveTransactionRemovalOptions: InactiveTransactionRemovalOptions{
219-
actionOnInactiveTransaction: WarnAndClose,
219+
ActionOnInactiveTransaction: WarnAndClose,
220220
},
221221
},
222222
})
@@ -1044,7 +1044,7 @@ func TestClient_ReadOnlyTransaction_WhenMultipleOperations_SessionLastUseTimeSho
10441044
MinOpened: 1,
10451045
MaxOpened: 1,
10461046
InactiveTransactionRemovalOptions: InactiveTransactionRemovalOptions{
1047-
actionOnInactiveTransaction: WarnAndClose,
1047+
ActionOnInactiveTransaction: WarnAndClose,
10481048
idleTimeThreshold: 300 * time.Millisecond,
10491049
},
10501050
},
@@ -1507,7 +1507,7 @@ func TestClient_ReadWriteTransaction_WhenLongRunningSessionCleaned_TransactionSh
15071507
MinOpened: 1,
15081508
MaxOpened: 1,
15091509
InactiveTransactionRemovalOptions: InactiveTransactionRemovalOptions{
1510-
actionOnInactiveTransaction: WarnAndClose,
1510+
ActionOnInactiveTransaction: WarnAndClose,
15111511
},
15121512
},
15131513
})
@@ -1560,7 +1560,7 @@ func TestClient_ReadWriteTransaction_WhenMultipleOperations_SessionLastUseTimeSh
15601560
MinOpened: 1,
15611561
MaxOpened: 1,
15621562
InactiveTransactionRemovalOptions: InactiveTransactionRemovalOptions{
1563-
actionOnInactiveTransaction: WarnAndClose,
1563+
ActionOnInactiveTransaction: WarnAndClose,
15641564
idleTimeThreshold: 300 * time.Millisecond,
15651565
},
15661566
},
@@ -1660,7 +1660,7 @@ func TestClient_ReadWriteTransaction_WhenLongRunningExecuteBatchUpdate_TakeNoAct
16601660
MinOpened: 1,
16611661
MaxOpened: 1,
16621662
InactiveTransactionRemovalOptions: InactiveTransactionRemovalOptions{
1663-
actionOnInactiveTransaction: WarnAndClose,
1663+
ActionOnInactiveTransaction: WarnAndClose,
16641664
},
16651665
},
16661666
})
@@ -4232,7 +4232,7 @@ func TestClient_WhenLongRunningPartitionedUpdateRequest_TakeNoAction(t *testing.
42324232
MaxOpened: 1,
42334233
healthCheckSampleInterval: 10 * time.Millisecond, // maintainer runs every 10ms
42344234
InactiveTransactionRemovalOptions: InactiveTransactionRemovalOptions{
4235-
actionOnInactiveTransaction: WarnAndClose,
4235+
ActionOnInactiveTransaction: WarnAndClose,
42364236
executionFrequency: 15 * time.Millisecond, // check long-running sessions every 15ms
42374237
},
42384238
},

spanner/session.go

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,9 @@ const (
5959

6060
// InactiveTransactionRemovalOptions has configurations for action on long-running transactions.
6161
type InactiveTransactionRemovalOptions struct {
62-
// actionOnInactiveTransaction is the configuration to choose action for inactive transactions.
62+
// ActionOnInactiveTransaction is the configuration to choose action for inactive transactions.
6363
// It can be one of Warn, Close, WarnAndClose.
64-
actionOnInactiveTransaction ActionOnInactiveTransactionKind
64+
ActionOnInactiveTransaction ActionOnInactiveTransactionKind
6565
// long-running transactions will be cleaned up if utilisation is
6666
// greater than the below value.
6767
usedSessionsRatioThreshold float64
@@ -505,7 +505,7 @@ var DefaultSessionPoolConfig = SessionPoolConfig{
505505
HealthCheckWorkers: 10,
506506
HealthCheckInterval: healthCheckIntervalMins * time.Minute,
507507
InactiveTransactionRemovalOptions: InactiveTransactionRemovalOptions{
508-
actionOnInactiveTransaction: NoAction, // Make default to Warn from NoAction later
508+
ActionOnInactiveTransaction: Warn,
509509
executionFrequency: 2 * time.Minute,
510510
idleTimeThreshold: 60 * time.Minute,
511511
usedSessionsRatioThreshold: 0.95,
@@ -635,8 +635,8 @@ func newSessionPool(sc *sessionClient, config SessionPoolConfig) (*sessionPool,
635635
if config.healthCheckSampleInterval == 0 {
636636
config.healthCheckSampleInterval = time.Minute
637637
}
638-
if config.actionOnInactiveTransaction == actionUnspecified {
639-
config.actionOnInactiveTransaction = DefaultSessionPoolConfig.actionOnInactiveTransaction
638+
if config.ActionOnInactiveTransaction == actionUnspecified {
639+
config.ActionOnInactiveTransaction = DefaultSessionPoolConfig.ActionOnInactiveTransaction
640640
}
641641
if config.idleTimeThreshold == 0 {
642642
config.idleTimeThreshold = DefaultSessionPoolConfig.idleTimeThreshold
@@ -726,23 +726,23 @@ func (p *sessionPool) getLongRunningSessionsLocked() []*sessionHandle {
726726
sh.mu.Lock()
727727
diff := time.Now().Sub(sh.lastUseTime)
728728
if !sh.eligibleForLongRunning && diff.Seconds() >= p.idleTimeThreshold.Seconds() {
729-
if (p.actionOnInactiveTransaction == Warn || p.actionOnInactiveTransaction == WarnAndClose) && !sh.isSessionLeakLogged {
730-
if p.actionOnInactiveTransaction == Warn {
729+
if (p.ActionOnInactiveTransaction == Warn || p.ActionOnInactiveTransaction == WarnAndClose) && !sh.isSessionLeakLogged {
730+
if p.ActionOnInactiveTransaction == Warn {
731731
if sh.stack != nil {
732732
logf(p.sc.logger, "session %s checked out of pool at %s is long running due to possible session leak for goroutine: \n%s", sh.session.getID(), sh.checkoutTime.Format(time.RFC3339), sh.stack)
733733
} else {
734734
logf(p.sc.logger, "session %s checked out of pool at %s is long running due to possible session leak for goroutine: \nEnable SessionPoolConfig.TrackSessionHandles to get stack trace associated with the session", sh.session.getID(), sh.checkoutTime.Format(time.RFC3339))
735735
}
736736
sh.isSessionLeakLogged = true
737-
} else if p.actionOnInactiveTransaction == WarnAndClose {
737+
} else if p.ActionOnInactiveTransaction == WarnAndClose {
738738
if sh.stack != nil {
739739
logf(p.sc.logger, "session %s checked out of pool at %s is long running and will be removed due to possible session leak for goroutine: \n%s", sh.session.getID(), sh.checkoutTime.Format(time.RFC3339), sh.stack)
740740
} else {
741741
logf(p.sc.logger, "session %s checked out of pool at %s is long running and will be removed due to possible session leak for goroutine: \nEnable SessionPoolConfig.TrackSessionHandles to get stack trace associated with the session", sh.session.getID(), sh.checkoutTime.Format(time.RFC3339))
742742
}
743743
}
744744
}
745-
if p.actionOnInactiveTransaction == WarnAndClose || p.actionOnInactiveTransaction == Close {
745+
if p.ActionOnInactiveTransaction == WarnAndClose || p.ActionOnInactiveTransaction == Close {
746746
longRunningSessions = append(longRunningSessions, sh)
747747
}
748748
}
@@ -760,7 +760,7 @@ func (p *sessionPool) removeLongRunningSessions() {
760760
p.mu.Unlock()
761761

762762
// destroy long-running sessions
763-
if p.actionOnInactiveTransaction == WarnAndClose || p.actionOnInactiveTransaction == Close {
763+
if p.ActionOnInactiveTransaction == WarnAndClose || p.ActionOnInactiveTransaction == Close {
764764
var leakedSessionsRemovedCount uint64
765765
for _, sh := range longRunningSessions {
766766
// removes inner session out of the pool to reduce the probability of two processes trying
@@ -895,7 +895,7 @@ var errGetSessionTimeout = spannerErrorf(codes.Canceled, "timeout / context canc
895895
// sessions being checked out of the pool.
896896
func (p *sessionPool) newSessionHandle(s *session) (sh *sessionHandle) {
897897
sh = &sessionHandle{session: s, checkoutTime: time.Now(), lastUseTime: time.Now()}
898-
if p.TrackSessionHandles || p.actionOnInactiveTransaction == Warn || p.actionOnInactiveTransaction == WarnAndClose || p.actionOnInactiveTransaction == Close {
898+
if p.TrackSessionHandles || p.ActionOnInactiveTransaction == Warn || p.ActionOnInactiveTransaction == WarnAndClose || p.ActionOnInactiveTransaction == Close {
899899
p.mu.Lock()
900900
sh.trackedSessionHandle = p.trackedSessionHandles.PushBack(sh)
901901
p.mu.Unlock()
@@ -1513,7 +1513,7 @@ func (hc *healthChecker) maintainer() {
15131513

15141514
// task to remove or log sessions which are unexpectedly long-running
15151515
if now.After(hc.pool.InactiveTransactionRemovalOptions.lastExecutionTime.Add(hc.pool.executionFrequency)) {
1516-
if hc.pool.actionOnInactiveTransaction == Warn || hc.pool.actionOnInactiveTransaction == WarnAndClose || hc.pool.actionOnInactiveTransaction == Close {
1516+
if hc.pool.ActionOnInactiveTransaction == Warn || hc.pool.ActionOnInactiveTransaction == WarnAndClose || hc.pool.ActionOnInactiveTransaction == Close {
15171517
hc.pool.removeLongRunningSessions()
15181518
}
15191519
hc.pool.InactiveTransactionRemovalOptions.lastExecutionTime = now

spanner/session_test.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,7 @@ func TestSessionLeak_WhenInactiveTransactions_RemoveSessionsFromPool(t *testing.
416416
MinOpened: 0,
417417
MaxOpened: 1,
418418
InactiveTransactionRemovalOptions: InactiveTransactionRemovalOptions{
419-
actionOnInactiveTransaction: WarnAndClose,
419+
ActionOnInactiveTransaction: WarnAndClose,
420420
},
421421
TrackSessionHandles: true,
422422
},
@@ -489,7 +489,7 @@ func TestMaintainer_LongRunningTransactionsCleanup_IfClose_VerifyInactiveSession
489489
MaxOpened: 3,
490490
healthCheckSampleInterval: 10 * time.Millisecond, // maintainer runs every 10ms
491491
InactiveTransactionRemovalOptions: InactiveTransactionRemovalOptions{
492-
actionOnInactiveTransaction: WarnAndClose,
492+
ActionOnInactiveTransaction: WarnAndClose,
493493
executionFrequency: 15 * time.Millisecond, // check long-running sessions every 20ms
494494
},
495495
},
@@ -560,7 +560,7 @@ func TestLongRunningTransactionsCleanup_IfClose_VerifyInactiveSessionsClosed(t *
560560
MinOpened: 1,
561561
MaxOpened: 3,
562562
InactiveTransactionRemovalOptions: InactiveTransactionRemovalOptions{
563-
actionOnInactiveTransaction: WarnAndClose,
563+
ActionOnInactiveTransaction: WarnAndClose,
564564
},
565565
},
566566
})
@@ -628,7 +628,7 @@ func TestLongRunningTransactionsCleanup_IfLog_VerifyInactiveSessionsOpen(t *test
628628
MinOpened: 1,
629629
MaxOpened: 3,
630630
InactiveTransactionRemovalOptions: InactiveTransactionRemovalOptions{
631-
actionOnInactiveTransaction: Warn,
631+
ActionOnInactiveTransaction: Warn,
632632
},
633633
},
634634
})
@@ -715,7 +715,7 @@ func TestLongRunningTransactionsCleanup_UtilisationBelowThreshold_VerifyInactive
715715
MaxOpened: 3,
716716
incStep: 1,
717717
InactiveTransactionRemovalOptions: InactiveTransactionRemovalOptions{
718-
actionOnInactiveTransaction: WarnAndClose,
718+
ActionOnInactiveTransaction: WarnAndClose,
719719
},
720720
},
721721
})
@@ -774,7 +774,7 @@ func TestLongRunningTransactions_WhenAllExpectedlyLongRunning_VerifyInactiveSess
774774
MinOpened: 1,
775775
MaxOpened: 3,
776776
InactiveTransactionRemovalOptions: InactiveTransactionRemovalOptions{
777-
actionOnInactiveTransaction: Warn,
777+
ActionOnInactiveTransaction: Warn,
778778
},
779779
},
780780
})
@@ -842,7 +842,7 @@ func TestLongRunningTransactions_WhenDurationBelowThreshold_VerifyInactiveSessio
842842
MinOpened: 1,
843843
MaxOpened: 3,
844844
InactiveTransactionRemovalOptions: InactiveTransactionRemovalOptions{
845-
actionOnInactiveTransaction: Warn,
845+
ActionOnInactiveTransaction: Warn,
846846
},
847847
},
848848
})

0 commit comments

Comments
 (0)