Skip to content

Commit 17c4bcf

Browse files
martinusfurszy
authored andcommitted
Improved microbenchmarking with multiple features.
* inline performance critical code * Average runtime is specified and used to calculate iterations. * Console: show median of multiple runs * plot: show box plot * filter benchmarks * specify scaling factor * ignore src/test and src/bench in command line check script * number of iterations instead of time * Replaced runtime in BENCHMARK makro number of iterations. * Added -? to bench_bitcoin * Benchmark plotly.js URL, width, height can be customized * Fixed incorrect precision warning
1 parent 7f8f030 commit 17c4bcf

File tree

15 files changed

+348
-206
lines changed

15 files changed

+348
-206
lines changed

src/bench/Examples.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ static void Sleep100ms(benchmark::State& state)
1414
}
1515
}
1616

17-
BENCHMARK(Sleep100ms);
17+
BENCHMARK(Sleep100ms, 10);
1818

1919
// Extremely fast-running benchmark:
2020
#include <math.h>
@@ -30,4 +30,4 @@ static void Trig(benchmark::State& state)
3030
}
3131
}
3232

33-
BENCHMARK(Trig);
33+
BENCHMARK(Trig, 12 * 1000 * 1000);

src/bench/base58.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,6 @@ static void Base58Decode(benchmark::State& state)
5252
}
5353

5454

55-
BENCHMARK(Base58Encode);
56-
BENCHMARK(Base58CheckEncode);
57-
BENCHMARK(Base58Decode);
55+
BENCHMARK(Base58Encode, 470 * 1000);
56+
BENCHMARK(Base58CheckEncode, 320 * 1000);
57+
BENCHMARK(Base58Decode, 800 * 1000);

src/bench/bench.cpp

Lines changed: 112 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -8,98 +8,138 @@
88

99
#include <assert.h>
1010
#include <iomanip>
11+
#include <algorithm>
12+
#include <regex>
13+
#include <numeric>
14+
1115
#include <iostream>
1216

13-
benchmark::BenchRunner::BenchmarkMap &benchmark::BenchRunner::benchmarks() {
14-
static std::map<std::string, benchmark::BenchFunction> benchmarks_map;
15-
return benchmarks_map;
17+
void benchmark::ConsolePrinter::header()
18+
{
19+
std::cout << "# Benchmark, evals, iterations, total, min, max, median" << std::endl;
1620
}
1721

18-
benchmark::BenchRunner::BenchRunner(std::string name, benchmark::BenchFunction func)
22+
void benchmark::ConsolePrinter::result(const State& state)
1923
{
20-
benchmarks().emplace(name, func);
24+
auto results = state.m_elapsed_results;
25+
std::sort(results.begin(), results.end());
26+
27+
double total = state.m_num_iters * std::accumulate(results.begin(), results.end(), 0.0);
28+
29+
double front = 0;
30+
double back = 0;
31+
double median = 0;
32+
33+
if (!results.empty()) {
34+
front = results.front();
35+
back = results.back();
36+
37+
size_t mid = results.size() / 2;
38+
median = results[mid];
39+
if (0 == results.size() % 2) {
40+
median = (results[mid] + results[mid + 1]) / 2;
41+
}
42+
}
43+
44+
std::cout << std::setprecision(6);
45+
std::cout << state.m_name << ", " << state.m_num_evals << ", " << state.m_num_iters << ", " << total << ", " << front << ", " << back << ", " << median << std::endl;
2146
}
2247

23-
void
24-
benchmark::BenchRunner::RunAll(benchmark::duration elapsedTimeForOne)
48+
void benchmark::ConsolePrinter::footer() {}
49+
benchmark::PlotlyPrinter::PlotlyPrinter(std::string plotly_url, int64_t width, int64_t height)
50+
: m_plotly_url(plotly_url), m_width(width), m_height(height)
2551
{
26-
perf_init();
27-
if (std::ratio_less_equal<benchmark::clock::period, std::micro>::value) {
28-
std::cerr << "WARNING: Clock precision is worse than microsecond - benchmarks may be less accurate!\n";
29-
}
30-
std::cout << "#Benchmark" << "," << "count" << "," << "min(ns)" << "," << "max(ns)" << "," << "average(ns)" << ","
31-
<< "min_cycles" << "," << "max_cycles" << "," << "average_cycles" << "\n";
52+
}
3253

33-
for (const auto &p: benchmarks()) {
34-
State state(p.first, elapsedTimeForOne);
35-
p.second(state);
36-
}
37-
perf_fini();
54+
void benchmark::PlotlyPrinter::header()
55+
{
56+
std::cout << "<html><head>"
57+
<< "<script src=\"" << m_plotly_url << "\"></script>"
58+
<< "</head><body><div id=\"myDiv\" style=\"width:" << m_width << "px; height:" << m_height << "px\"></div>"
59+
<< "<script> var data = ["
60+
<< std::endl;
3861
}
3962

40-
bool benchmark::State::KeepRunning()
63+
void benchmark::PlotlyPrinter::result(const State& state)
4164
{
42-
if (count & countMask) {
43-
++count;
44-
return true;
65+
std::cout << "{ " << std::endl
66+
<< " name: '" << state.m_name << "', " << std::endl
67+
<< " y: [";
68+
69+
const char* prefix = "";
70+
for (const auto& e : state.m_elapsed_results) {
71+
std::cout << prefix << std::setprecision(6) << e;
72+
prefix = ", ";
4573
}
46-
time_point now;
47-
uint64_t nowCycles;
48-
if (count == 0) {
49-
beginTime = now = clock::now();
50-
lastCycles = beginCycles = nowCycles = perf_cpucycles();
74+
std::cout << "]," << std::endl
75+
<< " boxpoints: 'all', jitter: 0.3, pointpos: 0, type: 'box',"
76+
<< std::endl
77+
<< "}," << std::endl;
78+
}
79+
80+
void benchmark::PlotlyPrinter::footer()
81+
{
82+
std::cout << "]; var layout = { showlegend: false, yaxis: { rangemode: 'tozero', autorange: true } };"
83+
<< "Plotly.newPlot('myDiv', data, layout);"
84+
<< "</script></body></html>";
85+
}
86+
87+
88+
benchmark::BenchRunner::BenchmarkMap& benchmark::BenchRunner::benchmarks()
89+
{
90+
static std::map<std::string, Bench> benchmarks_map;
91+
return benchmarks_map;
92+
}
93+
94+
benchmark::BenchRunner::BenchRunner(std::string name, benchmark::BenchFunction func, uint64_t num_iters_for_one_second)
95+
{
96+
benchmarks().insert(std::make_pair(name, Bench{func, num_iters_for_one_second}));
97+
}
98+
99+
void benchmark::BenchRunner::RunAll(Printer& printer, uint64_t num_evals, double scaling, const std::string& filter, bool is_list_only)
100+
{
101+
perf_init();
102+
if (!std::ratio_less_equal<benchmark::clock::period, std::micro>::value) {
103+
std::cerr << "WARNING: Clock precision is worse than microsecond - benchmarks may be less accurate!\n";
51104
}
52-
else {
53-
now = clock::now();
54-
auto elapsed = now - lastTime;
55-
auto elapsedOne = elapsed / (countMask + 1);
56-
if (elapsedOne < minTime) minTime = elapsedOne;
57-
if (elapsedOne > maxTime) maxTime = elapsedOne;
58-
59-
// We only use relative values, so don't have to handle 64-bit wrap-around specially
60-
nowCycles = perf_cpucycles();
61-
uint64_t elapsedOneCycles = (nowCycles - lastCycles) / (countMask + 1);
62-
if (elapsedOneCycles < minCycles) minCycles = elapsedOneCycles;
63-
if (elapsedOneCycles > maxCycles) maxCycles = elapsedOneCycles;
64-
65-
if (elapsed*128 < maxElapsed) {
66-
// If the execution was much too fast (1/128th of maxElapsed), increase the count mask by 8x and restart timing.
67-
// The restart avoids including the overhead of this code in the measurement.
68-
countMask = ((countMask<<3)|7) & ((1LL<<60)-1);
69-
count = 0;
70-
minTime = duration::max();
71-
maxTime = duration::zero();
72-
minCycles = std::numeric_limits<uint64_t>::max();
73-
maxCycles = std::numeric_limits<uint64_t>::min();
74-
return true;
105+
106+
std::regex reFilter(filter);
107+
std::smatch baseMatch;
108+
109+
printer.header();
110+
111+
for (const auto& p : benchmarks()) {
112+
if (!std::regex_match(p.first, baseMatch, reFilter)) {
113+
continue;
114+
}
115+
116+
uint64_t num_iters = static_cast<uint64_t>(p.second.num_iters_for_one_second * scaling);
117+
if (0 == num_iters) {
118+
num_iters = 1;
75119
}
76-
if (elapsed*16 < maxElapsed) {
77-
uint64_t newCountMask = ((countMask<<1)|1) & ((1LL<<60)-1);
78-
if ((count & newCountMask)==0) {
79-
countMask = newCountMask;
80-
}
120+
State state(p.first, num_evals, num_iters, printer);
121+
if (!is_list_only) {
122+
p.second.func(state);
81123
}
124+
printer.result(state);
82125
}
83-
lastTime = now;
84-
lastCycles = nowCycles;
85-
++count;
86126

87-
if (now - beginTime < maxElapsed) return true; // Keep going
127+
printer.footer();
88128

89-
--count;
129+
perf_fini();
130+
}
90131

91-
assert(count != 0 && "count == 0 => (now == 0 && beginTime == 0) => return above");
132+
bool benchmark::State::UpdateTimer(const benchmark::time_point current_time)
133+
{
134+
if (m_start_time != time_point()) {
135+
std::chrono::duration<double> diff = current_time - m_start_time;
136+
m_elapsed_results.push_back(diff.count() / m_num_iters);
92137

93-
// Output results
94-
// Duration casts are only necessary here because hardware with sub-nanosecond clocks
95-
// will lose precision.
96-
int64_t min_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(minTime).count();
97-
int64_t max_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(maxTime).count();
98-
int64_t avg_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>((now-beginTime)/count).count();
99-
int64_t averageCycles = (nowCycles-beginCycles)/count;
100-
std::cout << std::fixed << std::setprecision(15) << name << "," << count << "," << min_elapsed << "," << max_elapsed << "," << avg_elapsed << ","
101-
<< minCycles << "," << maxCycles << "," << averageCycles << "\n";
102-
std::cout.copyfmt(std::ios(nullptr));
138+
if (m_elapsed_results.size() == m_num_evals) {
139+
return false;
140+
}
141+
}
103142

104-
return false;
143+
m_num_iters_left = m_num_iters - 1;
144+
return true;
105145
}

0 commit comments

Comments
 (0)