Skip to content

Commit a1155d9

Browse files
authored
Optimize AddCaller+AddStacktrace by sharing stacks (#1052)
(Supersedes #808.) If we use the `AddCaller(true)` option for a logger with `AddStacktrace(level)`, which captures a stack trace for the specified level or higher, Zap currently captures inspects the stack twice: - `getCallerFrame` retrieves information about the immediate caller of the log entry - `StackSkip` calls `takeStacktrace` to capture the entire stack trace and build a string from it For the case where both caller and stack trace are requested, we can avoid redundant work by sharing information about the call stack between the logic for the two. To accomplish this, the following high-level pieces were added: type stacktrace captureStacktrace(skip int, depth stacktraceDepth) *stacktrace func (*stacktrace) Next() (runtime.Frame, bool) type stackFormatter func newStackFormatter(*buffer.Buffer) func (*stackFormatter) FormatStack(*stacktrace) func (*stackFormatter) FormatFrame(runtime.Frame) `captureStacktrace` captures stack traces (either just one frame or the entire stack) that can then be inspected manually (to extract caller information) and formatted into a string representation either one-frame-at-a-time or wholesale with `stackFormatter`. This allows us reorganize logic that applies the AddCaller and AddStacktrace options so that it can: - capture as much of the stack as it needs - extract caller information from the first frame if we need it - format that frame and the remaining frames into a stack trace --- Benchmarking: I ran the new benchmark included in this PR, as well as the existing stack trace/caller based benchmarks 10 times for 10 seconds each. ``` % go test -run '^$' -bench 'Caller|Stack' -benchmem -benchtime 10s -count 10 [...] name old time/op new time/op delta StackField-96 834ns ± 8% 819ns ±14% ~ (p=0.971 n=10+10) AddCallerHook-96 466ns ± 7% 480ns ± 1% ~ (p=0.118 n=10+6) AddCallerAndStacktrace-96 1.16µs ± 4% 0.78µs ± 8% -32.60% (p=0.000 n=9+10) TakeStacktrace-96 2.66µs ± 2% 2.74µs ± 2% +3.01% (p=0.000 n=9+10) name old alloc/op new alloc/op delta StackField-96 542B ± 0% 544B ± 0% +0.33% (p=0.000 n=10+10) AddCallerHook-96 254B ± 0% 248B ± 0% -2.36% (p=0.000 n=10+10) AddCallerAndStacktrace-96 652B ± 0% 429B ± 0% -34.20% (p=0.000 n=9+6) TakeStacktrace-96 420B ± 0% 421B ± 0% +0.24% (p=0.000 n=10+10) name old allocs/op new allocs/op delta StackField-96 3.00 ± 0% 3.00 ± 0% ~ (all equal) AddCallerHook-96 3.00 ± 0% 2.00 ± 0% -33.33% (p=0.000 n=10+10) AddCallerAndStacktrace-96 5.00 ± 0% 3.00 ± 0% -40.00% (p=0.000 n=10+10) TakeStacktrace-96 2.00 ± 0% 2.00 ± 0% ~ (all equal) ``` Summary: - AddCaller+AddStacktrace takes 2 fewer allocations and is ~30% faster - AddCaller costs one allocation less - takeStacktrace costs 3% more CPU I think that the overall improvement is worth the slightly more expensive stack traces. Refs GO-1181
1 parent 3f20106 commit a1155d9

File tree

4 files changed

+224
-68
lines changed

4 files changed

+224
-68
lines changed

logger.go

+41-26
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ import (
2424
"fmt"
2525
"io/ioutil"
2626
"os"
27-
"runtime"
2827
"strings"
2928

29+
"go.uber.org/zap/internal/bufferpool"
3030
"go.uber.org/zap/zapcore"
3131
)
3232

@@ -259,8 +259,10 @@ func (log *Logger) clone() *Logger {
259259
}
260260

261261
func (log *Logger) check(lvl zapcore.Level, msg string) *zapcore.CheckedEntry {
262-
// check must always be called directly by a method in the Logger interface
263-
// (e.g., Check, Info, Fatal).
262+
// Logger.check must always be called directly by a method in the
263+
// Logger interface (e.g., Check, Info, Fatal).
264+
// This skips Logger.check and the Info/Fatal/Check/etc. method that
265+
// called it.
264266
const callerSkipOffset = 2
265267

266268
// Check the level first to reduce the cost of disabled log calls.
@@ -307,42 +309,55 @@ func (log *Logger) check(lvl zapcore.Level, msg string) *zapcore.CheckedEntry {
307309

308310
// Thread the error output through to the CheckedEntry.
309311
ce.ErrorOutput = log.errorOutput
310-
if log.addCaller {
311-
frame, defined := getCallerFrame(log.callerSkip + callerSkipOffset)
312-
if !defined {
312+
313+
addStack := log.addStack.Enabled(ce.Level)
314+
if !log.addCaller && !addStack {
315+
return ce
316+
}
317+
318+
// Adding the caller or stack trace requires capturing the callers of
319+
// this function. We'll share information between these two.
320+
stackDepth := stacktraceFirst
321+
if addStack {
322+
stackDepth = stacktraceFull
323+
}
324+
stack := captureStacktrace(log.callerSkip+callerSkipOffset, stackDepth)
325+
defer stack.Free()
326+
327+
if stack.Count() == 0 {
328+
if log.addCaller {
313329
fmt.Fprintf(log.errorOutput, "%v Logger.check error: failed to get caller\n", ent.Time.UTC())
314330
log.errorOutput.Sync()
315331
}
332+
return ce
333+
}
316334

335+
frame, more := stack.Next()
336+
337+
if log.addCaller {
317338
ce.Caller = zapcore.EntryCaller{
318-
Defined: defined,
339+
Defined: frame.PC != 0,
319340
PC: frame.PC,
320341
File: frame.File,
321342
Line: frame.Line,
322343
Function: frame.Function,
323344
}
324345
}
325-
if log.addStack.Enabled(ce.Level) {
326-
ce.Stack = StackSkip("", log.callerSkip+callerSkipOffset).String
327-
}
328346

329-
return ce
330-
}
347+
if addStack {
348+
buffer := bufferpool.Get()
349+
defer buffer.Free()
331350

332-
// getCallerFrame gets caller frame. The argument skip is the number of stack
333-
// frames to ascend, with 0 identifying the caller of getCallerFrame. The
334-
// boolean ok is false if it was not possible to recover the information.
335-
//
336-
// Note: This implementation is similar to runtime.Caller, but it returns the whole frame.
337-
func getCallerFrame(skip int) (frame runtime.Frame, ok bool) {
338-
const skipOffset = 2 // skip getCallerFrame and Callers
339-
340-
pc := make([]uintptr, 1)
341-
numFrames := runtime.Callers(skip+skipOffset, pc)
342-
if numFrames < 1 {
343-
return
351+
stackfmt := newStackFormatter(buffer)
352+
353+
// We've already extracted the first frame, so format that
354+
// separately and defer to stackfmt for the rest.
355+
stackfmt.FormatFrame(frame)
356+
if more {
357+
stackfmt.FormatStack(stack)
358+
}
359+
ce.Stack = buffer.String()
344360
}
345361

346-
frame, _ = runtime.CallersFrames(pc).Next()
347-
return frame, frame.PC != 0
362+
return ce
348363
}

logger_bench_test.go

+18
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,24 @@ func BenchmarkAddCallerHook(b *testing.B) {
179179
})
180180
}
181181

182+
func BenchmarkAddCallerAndStacktrace(b *testing.B) {
183+
logger := New(
184+
zapcore.NewCore(
185+
zapcore.NewJSONEncoder(NewProductionConfig().EncoderConfig),
186+
&ztest.Discarder{},
187+
InfoLevel,
188+
),
189+
AddCaller(),
190+
AddStacktrace(WarnLevel),
191+
)
192+
b.ResetTimer()
193+
b.RunParallel(func(pb *testing.PB) {
194+
for pb.Next() {
195+
logger.Warn("Caller and stacktrace.")
196+
}
197+
})
198+
}
199+
182200
func Benchmark10Fields(b *testing.B) {
183201
withBenchedLogger(b, func(log *Logger) {
184202
log.Info("Ten fields, passed at the log site.",

stacktrace.go

+133-42
Original file line numberDiff line numberDiff line change
@@ -24,62 +24,153 @@ import (
2424
"runtime"
2525
"sync"
2626

27+
"go.uber.org/zap/buffer"
2728
"go.uber.org/zap/internal/bufferpool"
2829
)
2930

30-
var (
31-
_stacktracePool = sync.Pool{
32-
New: func() interface{} {
33-
return newProgramCounters(64)
34-
},
35-
}
31+
var _stacktracePool = sync.Pool{
32+
New: func() interface{} {
33+
return &stacktrace{
34+
storage: make([]uintptr, 64),
35+
}
36+
},
37+
}
38+
39+
type stacktrace struct {
40+
pcs []uintptr // program counters; always a subslice of storage
41+
frames *runtime.Frames
42+
43+
// The size of pcs varies depending on requirements:
44+
// it will be one if the only the first frame was requested,
45+
// and otherwise it will reflect the depth of the call stack.
46+
//
47+
// storage decouples the slice we need (pcs) from the slice we pool.
48+
// We will always allocate a reasonably large storage, but we'll use
49+
// only as much of it as we need.
50+
storage []uintptr
51+
}
52+
53+
// stacktraceDepth specifies how deep of a stack trace should be captured.
54+
type stacktraceDepth int
55+
56+
const (
57+
// stacktraceFirst captures only the first frame.
58+
stacktraceFirst stacktraceDepth = iota
59+
60+
// stacktraceFull captures the entire call stack, allocating more
61+
// storage for it if needed.
62+
stacktraceFull
3663
)
3764

38-
func takeStacktrace(skip int) string {
39-
buffer := bufferpool.Get()
40-
defer buffer.Free()
41-
programCounters := _stacktracePool.Get().(*programCounters)
42-
defer _stacktracePool.Put(programCounters)
43-
44-
var numFrames int
45-
for {
46-
// Skip the call to runtime.Callers and takeStacktrace so that the
47-
// program counters start at the caller of takeStacktrace.
48-
numFrames = runtime.Callers(skip+2, programCounters.pcs)
49-
if numFrames < len(programCounters.pcs) {
50-
break
51-
}
52-
// Don't put the too-short counter slice back into the pool; this lets
53-
// the pool adjust if we consistently take deep stacktraces.
54-
programCounters = newProgramCounters(len(programCounters.pcs) * 2)
65+
// captureStacktrace captures a stack trace of the specified depth, skipping
66+
// the provided number of frames. skip=0 identifies the caller of
67+
// captureStacktrace.
68+
//
69+
// The caller must call Free on the returned stacktrace after using it.
70+
func captureStacktrace(skip int, depth stacktraceDepth) *stacktrace {
71+
stack := _stacktracePool.Get().(*stacktrace)
72+
73+
switch depth {
74+
case stacktraceFirst:
75+
stack.pcs = stack.storage[:1]
76+
case stacktraceFull:
77+
stack.pcs = stack.storage
5578
}
5679

57-
i := 0
58-
frames := runtime.CallersFrames(programCounters.pcs[:numFrames])
80+
// Unlike other "skip"-based APIs, skip=0 identifies runtime.Callers
81+
// itself. +2 to skip captureStacktrace and runtime.Callers.
82+
numFrames := runtime.Callers(
83+
skip+2,
84+
stack.pcs,
85+
)
5986

60-
// Note: On the last iteration, frames.Next() returns false, with a valid
61-
// frame, but we ignore this frame. The last frame is a a runtime frame which
62-
// adds noise, since it's only either runtime.main or runtime.goexit.
63-
for frame, more := frames.Next(); more; frame, more = frames.Next() {
64-
if i != 0 {
65-
buffer.AppendByte('\n')
87+
// runtime.Callers truncates the recorded stacktrace if there is no
88+
// room in the provided slice. For the full stack trace, keep expanding
89+
// storage until there are fewer frames than there is room.
90+
if depth == stacktraceFull {
91+
pcs := stack.pcs
92+
for numFrames == len(pcs) {
93+
pcs = make([]uintptr, len(pcs)*2)
94+
numFrames = runtime.Callers(skip+2, pcs)
6695
}
67-
i++
68-
buffer.AppendString(frame.Function)
69-
buffer.AppendByte('\n')
70-
buffer.AppendByte('\t')
71-
buffer.AppendString(frame.File)
72-
buffer.AppendByte(':')
73-
buffer.AppendInt(int64(frame.Line))
96+
97+
// Discard old storage instead of returning it to the pool.
98+
// This will adjust the pool size over time if stack traces are
99+
// consistently very deep.
100+
stack.storage = pcs
101+
stack.pcs = pcs[:numFrames]
102+
} else {
103+
stack.pcs = stack.pcs[:numFrames]
74104
}
75105

106+
stack.frames = runtime.CallersFrames(stack.pcs)
107+
return stack
108+
}
109+
110+
// Free releases resources associated with this stacktrace
111+
// and returns it back to the pool.
112+
func (st *stacktrace) Free() {
113+
st.frames = nil
114+
st.pcs = nil
115+
_stacktracePool.Put(st)
116+
}
117+
118+
// Count reports the total number of frames in this stacktrace.
119+
// Count DOES NOT change as Next is called.
120+
func (st *stacktrace) Count() int {
121+
return len(st.pcs)
122+
}
123+
124+
// Next returns the next frame in the stack trace,
125+
// and a boolean indicating whether there are more after it.
126+
func (st *stacktrace) Next() (_ runtime.Frame, more bool) {
127+
return st.frames.Next()
128+
}
129+
130+
func takeStacktrace(skip int) string {
131+
stack := captureStacktrace(skip+1, stacktraceFull)
132+
defer stack.Free()
133+
134+
buffer := bufferpool.Get()
135+
defer buffer.Free()
136+
137+
stackfmt := newStackFormatter(buffer)
138+
stackfmt.FormatStack(stack)
76139
return buffer.String()
77140
}
78141

79-
type programCounters struct {
80-
pcs []uintptr
142+
// stackFormatter formats a stack trace into a readable string representation.
143+
type stackFormatter struct {
144+
b *buffer.Buffer
145+
nonEmpty bool // whehther we've written at least one frame already
146+
}
147+
148+
// newStackFormatter builds a new stackFormatter.
149+
func newStackFormatter(b *buffer.Buffer) stackFormatter {
150+
return stackFormatter{b: b}
151+
}
152+
153+
// FormatStack formats all remaining frames in the provided stacktrace -- minus
154+
// the final runtime.main/runtime.goexit frame.
155+
func (sf *stackFormatter) FormatStack(stack *stacktrace) {
156+
// Note: On the last iteration, frames.Next() returns false, with a valid
157+
// frame, but we ignore this frame. The last frame is a a runtime frame which
158+
// adds noise, since it's only either runtime.main or runtime.goexit.
159+
for frame, more := stack.Next(); more; frame, more = stack.Next() {
160+
sf.FormatFrame(frame)
161+
}
81162
}
82163

83-
func newProgramCounters(size int) *programCounters {
84-
return &programCounters{make([]uintptr, size)}
164+
// FormatFrame formats the given frame.
165+
func (sf *stackFormatter) FormatFrame(frame runtime.Frame) {
166+
if sf.nonEmpty {
167+
sf.b.AppendByte('\n')
168+
}
169+
sf.nonEmpty = true
170+
sf.b.AppendString(frame.Function)
171+
sf.b.AppendByte('\n')
172+
sf.b.AppendByte('\t')
173+
sf.b.AppendString(frame.File)
174+
sf.b.AppendByte(':')
175+
sf.b.AppendInt(int64(frame.Line))
85176
}

stacktrace_test.go

+32
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
package zap
2222

2323
import (
24+
"bytes"
2425
"strings"
2526
"testing"
2627

@@ -67,8 +68,39 @@ func TestTakeStacktraceWithSkipInnerFunc(t *testing.T) {
6768
)
6869
}
6970

71+
func TestTakeStacktraceDeepStack(t *testing.T) {
72+
const (
73+
N = 500
74+
withStackDepthName = "go.uber.org/zap.withStackDepth"
75+
)
76+
withStackDepth(N, func() {
77+
trace := takeStacktrace(0)
78+
for found := 0; found < N; found++ {
79+
i := strings.Index(trace, withStackDepthName)
80+
if i < 0 {
81+
t.Fatalf(`expected %v occurrences of %q, found %d`,
82+
N, withStackDepthName, found)
83+
}
84+
trace = trace[i+len(withStackDepthName):]
85+
}
86+
})
87+
}
88+
7089
func BenchmarkTakeStacktrace(b *testing.B) {
7190
for i := 0; i < b.N; i++ {
7291
takeStacktrace(0)
7392
}
7493
}
94+
95+
func withStackDepth(depth int, f func()) {
96+
var recurse func(rune) rune
97+
recurse = func(r rune) rune {
98+
if r > 0 {
99+
bytes.Map(recurse, []byte(string([]rune{r - 1})))
100+
} else {
101+
f()
102+
}
103+
return 0
104+
}
105+
recurse(rune(depth))
106+
}

0 commit comments

Comments
 (0)