Skip to content

Commit ba46967

Browse files
committed
partial bitcoin#23025: update nanobench add -min_time
includes: - eed99cf - 153e686
1 parent c28b05c commit ba46967

File tree

2 files changed

+39
-41
lines changed

2 files changed

+39
-41
lines changed

src/bench/addrman.cpp

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -101,40 +101,33 @@ static void AddrManGetAddr(benchmark::Bench& bench)
101101
});
102102
}
103103

104-
static void AddrManGood(benchmark::Bench& bench)
104+
static void AddrManAddThenGood(benchmark::Bench& bench)
105105
{
106-
/* Create many AddrMan objects - one to be modified at each loop iteration.
107-
* This is necessary because the AddrMan::Good() method modifies the
108-
* object, affecting the timing of subsequent calls to the same method and
109-
* we want to do the same amount of work in every loop iteration. */
110-
111-
bench.epochs(5).epochIterations(1);
112-
const size_t addrman_count{bench.epochs() * bench.epochIterations()};
113-
114-
std::vector<std::unique_ptr<AddrMan>> addrmans(addrman_count);
115-
for (size_t i{0}; i < addrman_count; ++i) {
116-
addrmans[i] = std::make_unique<AddrMan>(/* asmap */ std::vector<bool>(), /* deterministic */ false, /* consistency_check_ratio */ 0);
117-
FillAddrMan(*addrmans[i]);
118-
}
119-
120106
auto markSomeAsGood = [](AddrMan& addrman) {
121107
for (size_t source_i = 0; source_i < NUM_SOURCES; ++source_i) {
122108
for (size_t addr_i = 0; addr_i < NUM_ADDRESSES_PER_SOURCE; ++addr_i) {
123-
if (addr_i % 32 == 0) {
124-
addrman.Good(g_addresses[source_i][addr_i]);
125-
}
109+
addrman.Good(g_addresses[source_i][addr_i]);
126110
}
127111
}
128112
};
129113

130-
uint64_t i = 0;
114+
CreateAddresses();
115+
131116
bench.run([&] {
132-
markSomeAsGood(*addrmans.at(i));
133-
++i;
117+
// To make the benchmark independent of the number of evaluations, we always prepare a new addrman.
118+
// This is necessary because AddrMan::Good() method modifies the object, affecting the timing of subsequent calls
119+
// to the same method and we want to do the same amount of work in every loop iteration.
120+
//
121+
// This has some overhead (exactly the result of AddrManAdd benchmark), but that overhead is constant so improvements in
122+
// AddrMan::Good() will still be noticeable.
123+
AddrMan addrman(/* asmap */ std::vector<bool>(), /* deterministic */ false, /* consistency_check_ratio */ 0);
124+
AddAddressesToAddrMan(addrman);
125+
126+
markSomeAsGood(addrman);
134127
});
135128
}
136129

137130
BENCHMARK(AddrManAdd);
138131
BENCHMARK(AddrManSelect);
139132
BENCHMARK(AddrManGetAddr);
140-
BENCHMARK(AddrManGood);
133+
BENCHMARK(AddrManAddThenGood);

src/bench/nanobench.h

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
// see https://semver.org/
3434
#define ANKERL_NANOBENCH_VERSION_MAJOR 4 // incompatible API changes
3535
#define ANKERL_NANOBENCH_VERSION_MINOR 3 // backwards-compatible changes
36-
#define ANKERL_NANOBENCH_VERSION_PATCH 4 // backwards-compatible bug fixes
36+
#define ANKERL_NANOBENCH_VERSION_PATCH 6 // backwards-compatible bug fixes
3737

3838
///////////////////////////////////////////////////////////////////////////////////////////////////
3939
// public facing api - as minimal as possible
@@ -88,13 +88,15 @@
8888
} while (0)
8989
#endif
9090

91-
#if defined(__linux__) && defined(PERF_EVENT_IOC_ID) && defined(PERF_COUNT_HW_REF_CPU_CYCLES) && defined(PERF_FLAG_FD_CLOEXEC) && \
92-
!defined(ANKERL_NANOBENCH_DISABLE_PERF_COUNTERS)
93-
// only enable perf counters on kernel 3.14 which seems to have all the necessary defines. The three PERF_... defines are not in
94-
// kernel 2.6.32 (all others are).
95-
# define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 1
96-
#else
97-
# define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 0
91+
#define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 0
92+
#if defined(__linux__) && !defined(ANKERL_NANOBENCH_DISABLE_PERF_COUNTERS)
93+
# include <linux/version.h>
94+
# if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
95+
// PERF_COUNT_HW_REF_CPU_CYCLES only available since kernel 3.3
96+
// PERF_FLAG_FD_CLOEXEC since kernel 3.14
97+
# undef ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS
98+
# define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 1
99+
# endif
98100
#endif
99101

100102
#if defined(__clang__)
@@ -2210,20 +2212,20 @@ struct IterationLogic::Impl {
22102212
columns.emplace_back(10, 1, "err%", "%", rErrorMedian * 100.0);
22112213

22122214
double rInsMedian = -1.0;
2213-
if (mResult.has(Result::Measure::instructions)) {
2215+
if (mBench.performanceCounters() && mResult.has(Result::Measure::instructions)) {
22142216
rInsMedian = mResult.median(Result::Measure::instructions);
22152217
columns.emplace_back(18, 2, "ins/" + mBench.unit(), "", rInsMedian / mBench.batch());
22162218
}
22172219

22182220
double rCycMedian = -1.0;
2219-
if (mResult.has(Result::Measure::cpucycles)) {
2221+
if (mBench.performanceCounters() && mResult.has(Result::Measure::cpucycles)) {
22202222
rCycMedian = mResult.median(Result::Measure::cpucycles);
22212223
columns.emplace_back(18, 2, "cyc/" + mBench.unit(), "", rCycMedian / mBench.batch());
22222224
}
22232225
if (rInsMedian > 0.0 && rCycMedian > 0.0) {
22242226
columns.emplace_back(9, 3, "IPC", "", rCycMedian <= 0.0 ? 0.0 : rInsMedian / rCycMedian);
22252227
}
2226-
if (mResult.has(Result::Measure::branchinstructions)) {
2228+
if (mBench.performanceCounters() && mResult.has(Result::Measure::branchinstructions)) {
22272229
double rBraMedian = mResult.median(Result::Measure::branchinstructions);
22282230
columns.emplace_back(17, 2, "bra/" + mBench.unit(), "", rBraMedian / mBench.batch());
22292231
if (mResult.has(Result::Measure::branchmisses)) {
@@ -2402,6 +2404,14 @@ class LinuxPerformanceCounters {
24022404
return (a + divisor / 2) / divisor;
24032405
}
24042406

2407+
ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined")
2408+
static inline uint32_t mix(uint32_t x) noexcept {
2409+
x ^= x << 13;
2410+
x ^= x >> 17;
2411+
x ^= x << 5;
2412+
return x;
2413+
}
2414+
24052415
template <typename Op>
24062416
ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined")
24072417
void calibrate(Op&& op) {
@@ -2441,15 +2451,10 @@ class LinuxPerformanceCounters {
24412451
uint64_t const numIters = 100000U + (std::random_device{}() & 3);
24422452
uint64_t n = numIters;
24432453
uint32_t x = 1234567;
2444-
auto fn = [&]() {
2445-
x ^= x << 13;
2446-
x ^= x >> 17;
2447-
x ^= x << 5;
2448-
};
24492454

24502455
beginMeasure();
24512456
while (n-- > 0) {
2452-
fn();
2457+
x = mix(x);
24532458
}
24542459
endMeasure();
24552460
detail::doNotOptimizeAway(x);
@@ -2459,8 +2464,8 @@ class LinuxPerformanceCounters {
24592464
beginMeasure();
24602465
while (n-- > 0) {
24612466
// we now run *twice* so we can easily calculate the overhead
2462-
fn();
2463-
fn();
2467+
x = mix(x);
2468+
x = mix(x);
24642469
}
24652470
endMeasure();
24662471
detail::doNotOptimizeAway(x);

0 commit comments

Comments
 (0)