partial bitcoin#23025: update nanobench add -min_time

kwvg · kwvg · commit ba4696718eb3 · 2024-09-03T14:53:20.000Z
includes: - eed99cf - 153e686
diff --git a/src/bench/addrman.cpp b/src/bench/addrman.cpp
@@ -101,40 +101,33 @@ static void AddrManGetAddr(benchmark::Bench& bench)
     });
 }
 
-static void AddrManGood(benchmark::Bench& bench)
+static void AddrManAddThenGood(benchmark::Bench& bench)
 {
-    /* Create many AddrMan objects - one to be modified at each loop iteration.
-     * This is necessary because the AddrMan::Good() method modifies the
-     * object, affecting the timing of subsequent calls to the same method and
-     * we want to do the same amount of work in every loop iteration. */
-
-    bench.epochs(5).epochIterations(1);
-    const size_t addrman_count{bench.epochs() * bench.epochIterations()};
-
-    std::vector<std::unique_ptr<AddrMan>> addrmans(addrman_count);
-    for (size_t i{0}; i < addrman_count; ++i) {
-        addrmans[i] = std::make_unique<AddrMan>(/* asmap */ std::vector<bool>(), /* deterministic */ false, /* consistency_check_ratio */ 0);
-        FillAddrMan(*addrmans[i]);
-    }
-
     auto markSomeAsGood = [](AddrMan& addrman) {
         for (size_t source_i = 0; source_i < NUM_SOURCES; ++source_i) {
             for (size_t addr_i = 0; addr_i < NUM_ADDRESSES_PER_SOURCE; ++addr_i) {
-                if (addr_i % 32 == 0) {
-                    addrman.Good(g_addresses[source_i][addr_i]);
-                }
+                addrman.Good(g_addresses[source_i][addr_i]);
             }
         }
     };
 
-    uint64_t i = 0;
+    CreateAddresses();
+
     bench.run([&] {
-        markSomeAsGood(*addrmans.at(i));
-        ++i;
+        // To make the benchmark independent of the number of evaluations, we always prepare a new addrman.
+        // This is necessary because AddrMan::Good() method modifies the object, affecting the timing of subsequent calls
+        // to the same method and we want to do the same amount of work in every loop iteration.
+        //
+        // This has some overhead (exactly the result of AddrManAdd benchmark), but that overhead is constant so improvements in
+        // AddrMan::Good() will still be noticeable.
+        AddrMan addrman(/* asmap */ std::vector<bool>(), /* deterministic */ false, /* consistency_check_ratio */ 0);
+        AddAddressesToAddrMan(addrman);
+
+        markSomeAsGood(addrman);
     });
 }
 
 BENCHMARK(AddrManAdd);
 BENCHMARK(AddrManSelect);
 BENCHMARK(AddrManGetAddr);
-BENCHMARK(AddrManGood);
+BENCHMARK(AddrManAddThenGood);
diff --git a/src/bench/nanobench.h b/src/bench/nanobench.h
@@ -33,7 +33,7 @@
 // see https://semver.org/
 #define ANKERL_NANOBENCH_VERSION_MAJOR 4 // incompatible API changes
 #define ANKERL_NANOBENCH_VERSION_MINOR 3 // backwards-compatible changes
-#define ANKERL_NANOBENCH_VERSION_PATCH 4 // backwards-compatible bug fixes
+#define ANKERL_NANOBENCH_VERSION_PATCH 6 // backwards-compatible bug fixes
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 // public facing api - as minimal as possible
@@ -88,13 +88,15 @@
         } while (0)
 #endif
 
-#if defined(__linux__) && defined(PERF_EVENT_IOC_ID) && defined(PERF_COUNT_HW_REF_CPU_CYCLES) && defined(PERF_FLAG_FD_CLOEXEC) && \
-    !defined(ANKERL_NANOBENCH_DISABLE_PERF_COUNTERS)
-// only enable perf counters on kernel 3.14 which seems to have all the necessary defines. The three PERF_... defines are not in
-// kernel 2.6.32 (all others are).
-#    define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 1
-#else
-#    define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 0
+#define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 0
+#if defined(__linux__) && !defined(ANKERL_NANOBENCH_DISABLE_PERF_COUNTERS)
+#    include <linux/version.h>
+#    if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
+// PERF_COUNT_HW_REF_CPU_CYCLES only available since kernel 3.3
+// PERF_FLAG_FD_CLOEXEC since kernel 3.14
+#        undef ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS
+#        define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 1
+#    endif
 #endif
 
 #if defined(__clang__)
@@ -2210,20 +2212,20 @@ struct IterationLogic::Impl {
             columns.emplace_back(10, 1, "err%", "%", rErrorMedian * 100.0);
 
             double rInsMedian = -1.0;
-            if (mResult.has(Result::Measure::instructions)) {
+            if (mBench.performanceCounters() && mResult.has(Result::Measure::instructions)) {
                 rInsMedian = mResult.median(Result::Measure::instructions);
                 columns.emplace_back(18, 2, "ins/" + mBench.unit(), "", rInsMedian / mBench.batch());
             }
 
             double rCycMedian = -1.0;
-            if (mResult.has(Result::Measure::cpucycles)) {
+            if (mBench.performanceCounters() && mResult.has(Result::Measure::cpucycles)) {
                 rCycMedian = mResult.median(Result::Measure::cpucycles);
                 columns.emplace_back(18, 2, "cyc/" + mBench.unit(), "", rCycMedian / mBench.batch());
             }
             if (rInsMedian > 0.0 && rCycMedian > 0.0) {
                 columns.emplace_back(9, 3, "IPC", "", rCycMedian <= 0.0 ? 0.0 : rInsMedian / rCycMedian);
             }
-            if (mResult.has(Result::Measure::branchinstructions)) {
+            if (mBench.performanceCounters() && mResult.has(Result::Measure::branchinstructions)) {
                 double rBraMedian = mResult.median(Result::Measure::branchinstructions);
                 columns.emplace_back(17, 2, "bra/" + mBench.unit(), "", rBraMedian / mBench.batch());
                 if (mResult.has(Result::Measure::branchmisses)) {
@@ -2402,6 +2404,14 @@ class LinuxPerformanceCounters {
         return (a + divisor / 2) / divisor;
     }
 
+    ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined")
+    static inline uint32_t mix(uint32_t x) noexcept {
+        x ^= x << 13;
+        x ^= x >> 17;
+        x ^= x << 5;
+        return x;
+    }
+
     template <typename Op>
     ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined")
     void calibrate(Op&& op) {
@@ -2441,15 +2451,10 @@ class LinuxPerformanceCounters {
             uint64_t const numIters = 100000U + (std::random_device{}() & 3);
             uint64_t n = numIters;
             uint32_t x = 1234567;
-            auto fn = [&]() {
-                x ^= x << 13;
-                x ^= x >> 17;
-                x ^= x << 5;
-            };
 
             beginMeasure();
             while (n-- > 0) {
-                fn();
+                x = mix(x);
             }
             endMeasure();
             detail::doNotOptimizeAway(x);
@@ -2459,8 +2464,8 @@ class LinuxPerformanceCounters {
             beginMeasure();
             while (n-- > 0) {
                 // we now run *twice* so we can easily calculate the overhead
-                fn();
-                fn();
+                x = mix(x);
+                x = mix(x);
             }
             endMeasure();
             detail::doNotOptimizeAway(x);