|
8 | 8 |
|
9 | 9 | #include <assert.h> |
10 | 10 | #include <iomanip> |
| 11 | +#include <algorithm> |
| 12 | +#include <regex> |
| 13 | +#include <numeric> |
| 14 | + |
11 | 15 | #include <iostream> |
12 | 16 |
|
13 | | -benchmark::BenchRunner::BenchmarkMap &benchmark::BenchRunner::benchmarks() { |
14 | | - static std::map<std::string, benchmark::BenchFunction> benchmarks_map; |
15 | | - return benchmarks_map; |
| 17 | +void benchmark::ConsolePrinter::header() |
| 18 | +{ |
| 19 | + std::cout << "# Benchmark, evals, iterations, total, min, max, median" << std::endl; |
16 | 20 | } |
17 | 21 |
|
18 | | -benchmark::BenchRunner::BenchRunner(std::string name, benchmark::BenchFunction func) |
| 22 | +void benchmark::ConsolePrinter::result(const State& state) |
19 | 23 | { |
20 | | - benchmarks().emplace(name, func); |
| 24 | + auto results = state.m_elapsed_results; |
| 25 | + std::sort(results.begin(), results.end()); |
| 26 | + |
| 27 | + double total = state.m_num_iters * std::accumulate(results.begin(), results.end(), 0.0); |
| 28 | + |
| 29 | + double front = 0; |
| 30 | + double back = 0; |
| 31 | + double median = 0; |
| 32 | + |
| 33 | + if (!results.empty()) { |
| 34 | + front = results.front(); |
| 35 | + back = results.back(); |
| 36 | + |
| 37 | + size_t mid = results.size() / 2; |
| 38 | + median = results[mid]; |
| 39 | + if (0 == results.size() % 2) { |
| 40 | + median = (results[mid] + results[mid + 1]) / 2; |
| 41 | + } |
| 42 | + } |
| 43 | + |
| 44 | + std::cout << std::setprecision(6); |
| 45 | + std::cout << state.m_name << ", " << state.m_num_evals << ", " << state.m_num_iters << ", " << total << ", " << front << ", " << back << ", " << median << std::endl; |
21 | 46 | } |
22 | 47 |
|
23 | | -void |
24 | | -benchmark::BenchRunner::RunAll(benchmark::duration elapsedTimeForOne) |
| 48 | +void benchmark::ConsolePrinter::footer() {} |
| 49 | +benchmark::PlotlyPrinter::PlotlyPrinter(std::string plotly_url, int64_t width, int64_t height) |
| 50 | + : m_plotly_url(plotly_url), m_width(width), m_height(height) |
25 | 51 | { |
26 | | - perf_init(); |
27 | | - if (std::ratio_less_equal<benchmark::clock::period, std::micro>::value) { |
28 | | - std::cerr << "WARNING: Clock precision is worse than microsecond - benchmarks may be less accurate!\n"; |
29 | | - } |
30 | | - std::cout << "#Benchmark" << "," << "count" << "," << "min(ns)" << "," << "max(ns)" << "," << "average(ns)" << "," |
31 | | - << "min_cycles" << "," << "max_cycles" << "," << "average_cycles" << "\n"; |
| 52 | +} |
32 | 53 |
|
33 | | - for (const auto &p: benchmarks()) { |
34 | | - State state(p.first, elapsedTimeForOne); |
35 | | - p.second(state); |
36 | | - } |
37 | | - perf_fini(); |
| 54 | +void benchmark::PlotlyPrinter::header() |
| 55 | +{ |
| 56 | + std::cout << "<html><head>" |
| 57 | + << "<script src=\"" << m_plotly_url << "\"></script>" |
| 58 | + << "</head><body><div id=\"myDiv\" style=\"width:" << m_width << "px; height:" << m_height << "px\"></div>" |
| 59 | + << "<script> var data = [" |
| 60 | + << std::endl; |
38 | 61 | } |
39 | 62 |
|
40 | | -bool benchmark::State::KeepRunning() |
| 63 | +void benchmark::PlotlyPrinter::result(const State& state) |
41 | 64 | { |
42 | | - if (count & countMask) { |
43 | | - ++count; |
44 | | - return true; |
| 65 | + std::cout << "{ " << std::endl |
| 66 | + << " name: '" << state.m_name << "', " << std::endl |
| 67 | + << " y: ["; |
| 68 | + |
| 69 | + const char* prefix = ""; |
| 70 | + for (const auto& e : state.m_elapsed_results) { |
| 71 | + std::cout << prefix << std::setprecision(6) << e; |
| 72 | + prefix = ", "; |
45 | 73 | } |
46 | | - time_point now; |
47 | | - uint64_t nowCycles; |
48 | | - if (count == 0) { |
49 | | - beginTime = now = clock::now(); |
50 | | - lastCycles = beginCycles = nowCycles = perf_cpucycles(); |
| 74 | + std::cout << "]," << std::endl |
| 75 | + << " boxpoints: 'all', jitter: 0.3, pointpos: 0, type: 'box'," |
| 76 | + << std::endl |
| 77 | + << "}," << std::endl; |
| 78 | +} |
| 79 | + |
| 80 | +void benchmark::PlotlyPrinter::footer() |
| 81 | +{ |
| 82 | + std::cout << "]; var layout = { showlegend: false, yaxis: { rangemode: 'tozero', autorange: true } };" |
| 83 | + << "Plotly.newPlot('myDiv', data, layout);" |
| 84 | + << "</script></body></html>"; |
| 85 | +} |
| 86 | + |
| 87 | + |
| 88 | +benchmark::BenchRunner::BenchmarkMap& benchmark::BenchRunner::benchmarks() |
| 89 | +{ |
| 90 | + static std::map<std::string, Bench> benchmarks_map; |
| 91 | + return benchmarks_map; |
| 92 | +} |
| 93 | + |
| 94 | +benchmark::BenchRunner::BenchRunner(std::string name, benchmark::BenchFunction func, uint64_t num_iters_for_one_second) |
| 95 | +{ |
| 96 | + benchmarks().insert(std::make_pair(name, Bench{func, num_iters_for_one_second})); |
| 97 | +} |
| 98 | + |
| 99 | +void benchmark::BenchRunner::RunAll(Printer& printer, uint64_t num_evals, double scaling, const std::string& filter, bool is_list_only) |
| 100 | +{ |
| 101 | + perf_init(); |
| 102 | + if (!std::ratio_less_equal<benchmark::clock::period, std::micro>::value) { |
| 103 | + std::cerr << "WARNING: Clock precision is worse than microsecond - benchmarks may be less accurate!\n"; |
51 | 104 | } |
52 | | - else { |
53 | | - now = clock::now(); |
54 | | - auto elapsed = now - lastTime; |
55 | | - auto elapsedOne = elapsed / (countMask + 1); |
56 | | - if (elapsedOne < minTime) minTime = elapsedOne; |
57 | | - if (elapsedOne > maxTime) maxTime = elapsedOne; |
58 | | - |
59 | | - // We only use relative values, so don't have to handle 64-bit wrap-around specially |
60 | | - nowCycles = perf_cpucycles(); |
61 | | - uint64_t elapsedOneCycles = (nowCycles - lastCycles) / (countMask + 1); |
62 | | - if (elapsedOneCycles < minCycles) minCycles = elapsedOneCycles; |
63 | | - if (elapsedOneCycles > maxCycles) maxCycles = elapsedOneCycles; |
64 | | - |
65 | | - if (elapsed*128 < maxElapsed) { |
66 | | - // If the execution was much too fast (1/128th of maxElapsed), increase the count mask by 8x and restart timing. |
67 | | - // The restart avoids including the overhead of this code in the measurement. |
68 | | - countMask = ((countMask<<3)|7) & ((1LL<<60)-1); |
69 | | - count = 0; |
70 | | - minTime = duration::max(); |
71 | | - maxTime = duration::zero(); |
72 | | - minCycles = std::numeric_limits<uint64_t>::max(); |
73 | | - maxCycles = std::numeric_limits<uint64_t>::min(); |
74 | | - return true; |
| 105 | + |
| 106 | + std::regex reFilter(filter); |
| 107 | + std::smatch baseMatch; |
| 108 | + |
| 109 | + printer.header(); |
| 110 | + |
| 111 | + for (const auto& p : benchmarks()) { |
| 112 | + if (!std::regex_match(p.first, baseMatch, reFilter)) { |
| 113 | + continue; |
| 114 | + } |
| 115 | + |
| 116 | + uint64_t num_iters = static_cast<uint64_t>(p.second.num_iters_for_one_second * scaling); |
| 117 | + if (0 == num_iters) { |
| 118 | + num_iters = 1; |
75 | 119 | } |
76 | | - if (elapsed*16 < maxElapsed) { |
77 | | - uint64_t newCountMask = ((countMask<<1)|1) & ((1LL<<60)-1); |
78 | | - if ((count & newCountMask)==0) { |
79 | | - countMask = newCountMask; |
80 | | - } |
| 120 | + State state(p.first, num_evals, num_iters, printer); |
| 121 | + if (!is_list_only) { |
| 122 | + p.second.func(state); |
81 | 123 | } |
| 124 | + printer.result(state); |
82 | 125 | } |
83 | | - lastTime = now; |
84 | | - lastCycles = nowCycles; |
85 | | - ++count; |
86 | 126 |
|
87 | | - if (now - beginTime < maxElapsed) return true; // Keep going |
| 127 | + printer.footer(); |
88 | 128 |
|
89 | | - --count; |
| 129 | + perf_fini(); |
| 130 | +} |
90 | 131 |
|
91 | | - assert(count != 0 && "count == 0 => (now == 0 && beginTime == 0) => return above"); |
| 132 | +bool benchmark::State::UpdateTimer(const benchmark::time_point current_time) |
| 133 | +{ |
| 134 | + if (m_start_time != time_point()) { |
| 135 | + std::chrono::duration<double> diff = current_time - m_start_time; |
| 136 | + m_elapsed_results.push_back(diff.count() / m_num_iters); |
92 | 137 |
|
93 | | - // Output results |
94 | | - // Duration casts are only necessary here because hardware with sub-nanosecond clocks |
95 | | - // will lose precision. |
96 | | - int64_t min_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(minTime).count(); |
97 | | - int64_t max_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(maxTime).count(); |
98 | | - int64_t avg_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>((now-beginTime)/count).count(); |
99 | | - int64_t averageCycles = (nowCycles-beginCycles)/count; |
100 | | - std::cout << std::fixed << std::setprecision(15) << name << "," << count << "," << min_elapsed << "," << max_elapsed << "," << avg_elapsed << "," |
101 | | - << minCycles << "," << maxCycles << "," << averageCycles << "\n"; |
102 | | - std::cout.copyfmt(std::ios(nullptr)); |
| 138 | + if (m_elapsed_results.size() == m_num_evals) { |
| 139 | + return false; |
| 140 | + } |
| 141 | + } |
103 | 142 |
|
104 | | - return false; |
| 143 | + m_num_iters_left = m_num_iters - 1; |
| 144 | + return true; |
105 | 145 | } |
0 commit comments