Add a workflow for hyperfine benchmarks (#1304)

kushudai · j178 · web-flow · commit e6344fcbb325 · 2026-03-09T15:50:24.000+08:00
Link to uploaded benchmark: https://github.com/j178/prek/actions/runs/22839343954/artifacts/5823487446 Cannot test the comment workflow until it is merged. Fixes #992 --------- Co-authored-by: Jo <10510431+j178@users.noreply.github.com>
diff --git a/.github/scripts/hyperfine-run-benchmarks.sh b/.github/scripts/hyperfine-run-benchmarks.sh
@@ -0,0 +1,227 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+TARGET_WORKSPACE=${HYPERFINE_BENCHMARK_WORKSPACE:?HYPERFINE_BENCHMARK_WORKSPACE is required}
+COMMENT=${HYPERFINE_RESULTS_FILE:?HYPERFINE_RESULTS_FILE is required}
+HEAD_BINARY=${HYPERFINE_HEAD_BINARY:?HYPERFINE_HEAD_BINARY is required}
+BASE_BINARY=${HYPERFINE_BASE_BINARY:?HYPERFINE_BASE_BINARY is required}
+REPO_WORKSPACE=$(pwd)
+OUT_DIR=$(dirname "$COMMENT")
+META_WORKSPACE="${TARGET_WORKSPACE}-meta"
+
+failed=false
+
+mkdir -p "$OUT_DIR"
+OUT_MD="$OUT_DIR/out.md"
+OUT_JSON="$OUT_DIR/out.json"
+
+CURRENT_PREK_VERSION=$(
+  "$HEAD_BINARY" --version | sed -n '1p'
+)
+
+write_line() {
+  printf '%s\n' "$1" >> "$COMMENT"
+}
+
+write_blank_line() {
+  printf '\n' >> "$COMMENT"
+}
+
+write_section() {
+  local title="$1"
+  local description="${2:-}"
+
+  write_blank_line
+  write_line "## $title"
+  if [ -n "$description" ]; then
+    write_line "$description"
+  fi
+}
+
+# Compare the two commands in out.json (reference vs current).
+# Hyperfine's JSON has results[0] = reference and results[1] = current.
+# A ratio > 1 means current is slower (regression), < 1 means faster (improvement).
+check_variance() {
+  local cmd="$1"
+  local num_results
+  num_results=$(jq '.results | length' "$OUT_JSON")
+
+  if [ "$num_results" -lt 2 ]; then
+    return
+  fi
+
+  local ref_mean current_mean ratio pct
+  ref_mean=$(jq '.results[0].mean' "$OUT_JSON")
+  current_mean=$(jq '.results[1].mean' "$OUT_JSON")
+  ratio=$(echo "scale=4; $current_mean / $ref_mean" | bc)
+  pct=$(echo "scale=2; ($ratio - 1) * 100" | bc)
+
+  if (( $(echo "${pct#-} > 10" | bc -l) )); then
+    if (( $(echo "$ratio < 1" | bc -l) )); then
+      write_line "✅  Performance improvement for \`$cmd\`: ${pct#-}% faster"
+    else
+      write_line "⚠️  Warning: Performance regression for \`$cmd\`: ${pct}% slower"
+      failed=true
+    fi
+  fi
+}
+
+write_benchmark_details() {
+  write_line "<details>"
+  write_line "<summary>Benchmark details</summary>"
+  write_blank_line
+  cat "$OUT_MD" >> "$COMMENT"
+  write_blank_line
+  write_line "</details>"
+}
+
+benchmark() {
+  local label="$1"
+  local cmd="$2"
+  local warmup="${3:-3}"
+  local runs="${4:-30}"
+  local setup="${5:-}"
+  local prepare="${6:-}"
+  local check_change="${7:-false}"
+  local -a hyperfine_args=(-i -N -w "$warmup" -r "$runs" --export-markdown "$OUT_MD" --export-json "$OUT_JSON" --show-output)
+
+  if [ -n "$setup" ]; then
+    hyperfine_args+=(--setup "$setup")
+  fi
+
+  if [ -n "$prepare" ]; then
+    hyperfine_args+=(--prepare "$prepare")
+  fi
+
+  write_line "### \`$label\`"
+  if ! hyperfine "${hyperfine_args[@]}" --reference "$BASE_BINARY $cmd" "$HEAD_BINARY $cmd"; then
+    write_line "⚠️ Benchmark failed for: $cmd"
+    return 1
+  fi
+  write_benchmark_details
+  if [ "$check_change" = "true" ]; then
+    check_variance "$cmd"
+  fi
+}
+
+create_meta_workspace() {
+  rm -rf "$META_WORKSPACE"
+  mkdir -p "$META_WORKSPACE"
+  cd "$META_WORKSPACE"
+  git init || { echo "Failed to init git for meta hooks"; exit 1; }
+  git config user.name "Benchmark"
+  git config user.email "bench@prek.dev"
+
+  cp "$TARGET_WORKSPACE"/*.txt "$TARGET_WORKSPACE"/*.json . 2>/dev/null || true
+
+  cat > .pre-commit-config.yaml << 'EOF'
+repos:
+  - repo: meta
+    hooks:
+      - id: check-hooks-apply
+      - id: check-useless-excludes
+      - id: identity
+  - repo: builtin
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+EOF
+
+  git add -A
+  git commit -m "Meta hooks test" || { echo "Failed to commit meta hooks test"; exit 1; }
+  prek install-hooks
+}
+
+# Add environment metadata
+write_line "## Hyperfine Performance"
+write_blank_line
+write_line "**Environment:**"
+write_line "- OS: $(uname -s) $(uname -r)"
+write_line "- CPU: $(nproc) cores"
+write_line "- prek version: $CURRENT_PREK_VERSION"
+write_line "- Rust version: $(rustc --version)"
+write_line "- Hyperfine version: $(hyperfine --version)"
+
+# Benchmark in the main repo
+CMDS=(
+  "--version"
+  "list"
+  "validate-config .pre-commit-config.yaml"
+  "sample-config"
+)
+for cmd in "${CMDS[@]}"; do
+  if [[ "$cmd" == "validate-config"* ]] && [ ! -f ".pre-commit-config.yaml" ]; then
+    write_line "### \`prek $cmd\`"
+    write_line "⏭️  Skipped: .pre-commit-config.yaml not found"
+    continue
+  fi
+
+  if [[ "$cmd" == "--version" ]] || [[ "$cmd" == "list" ]]; then
+    benchmark "prek $cmd" "$cmd" 5 100
+  else
+    benchmark "prek $cmd" "$cmd" 3 50
+  fi
+  check_variance "$cmd"
+done
+
+# Benchmark builtin hooks in test directory
+cd "$TARGET_WORKSPACE"
+
+# Cold vs warm benchmarks before polluting cache
+write_section "Cold vs Warm Runs" "Comparing first run (cold) vs subsequent runs (warm cache):"
+benchmark "prek run --all-files (cold - no cache)" "run --all-files" 0 10 "rm -rf ~/.cache/prek" "git checkout -- ."
+benchmark "prek run --all-files (warm - with cache)" "run --all-files" 3 20 "" "git checkout -- ."
+
+# Full benchmark suite with cache warmed up
+write_section "Full Hook Suite" "Running the builtin hook suite on the benchmark workspace:"
+benchmark "prek run --all-files (full builtin hook suite)" "run --all-files" 3 50 "" "git checkout -- ." true
+
+# Individual hook performance
+write_section "Individual Hook Performance" "Benchmarking each hook individually on the test repo:"
+
+INDIVIDUAL_HOOKS=(
+  "trailing-whitespace"
+  "end-of-file-fixer"
+  "check-json"
+  "check-yaml"
+  "check-toml"
+  "check-xml"
+)
+
+for hook in "${INDIVIDUAL_HOOKS[@]}"; do
+  benchmark "prek run $hook --all-files" "run $hook --all-files" 3 30 "" "git checkout -- ."
+done
+
+# Installation performance
+write_section "Installation Performance" "Benchmarking hook installation (fast path hooks skip Python setup):"
+benchmark "prek install-hooks (cold - no cache)" "install-hooks" 1 5 "rm -rf ~/.cache/prek/hooks ~/.cache/prek/repos"
+benchmark "prek install-hooks (warm - with cache)" "install-hooks" 1 5
+
+# File filtering/scoping performance
+write_section "File Filtering/Scoping Performance" "Testing different file selection modes:"
+
+git add -A
+benchmark "prek run (staged files only)" "run" 3 20 "" "sh -c 'git checkout -- . && git add -A'"
+benchmark "prek run --files '*.json' (specific file type)" "run --files '*.json'" 3 20
+
+# Workspace discovery & initialization
+write_section "Workspace Discovery & Initialization" "Benchmarking hook discovery and initialization overhead:"
+benchmark "prek run --dry-run --all-files (measures init overhead)" "run --dry-run --all-files" 3 20
+
+# Meta hooks performance
+write_section "Meta Hooks Performance" "Benchmarking meta hooks separately:"
+create_meta_workspace
+
+META_HOOKS=(
+  "check-hooks-apply"
+  "check-useless-excludes"
+  "identity"
+)
+
+for hook in "${META_HOOKS[@]}"; do
+  benchmark "prek run $hook --all-files" "run $hook --all-files" 3 15 "" "git checkout -- ."
+done
+
+if [ "$failed" = true ]; then
+  exit 1
+fi
diff --git a/.github/scripts/hyperfine-setup-test-env.sh b/.github/scripts/hyperfine-setup-test-env.sh
@@ -0,0 +1,90 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+TARGET_WORKSPACE=${HYPERFINE_BENCHMARK_WORKSPACE:?HYPERFINE_BENCHMARK_WORKSPACE is required}
+
+# Create a clean test directory with files to run builtin hooks against
+rm -rf "$TARGET_WORKSPACE"
+mkdir -p "$TARGET_WORKSPACE"
+cd "$TARGET_WORKSPACE"
+git init || { echo "Failed to init git"; exit 1; }
+git config user.name "Benchmark"
+git config user.email "bench@prek.dev"
+
+# Files with trailing whitespace and no final newline
+for i in {1..50}; do
+  printf "line with trailing whitespace   \nanother line  " > "file$i.txt"
+done
+
+# JSON files
+for i in {1..30}; do
+  echo '{"key": "value", "number": '$i'}' > "file$i.json"
+done
+
+# YAML files
+for i in {1..30}; do
+  echo "key: value" > "file$i.yaml"
+  echo "number: $i" >> "file$i.yaml"
+done
+
+# TOML files
+for i in {1..30}; do
+  echo "[section]" > "file$i.toml"
+  echo "key = \"value$i\"" >> "file$i.toml"
+done
+
+# XML files
+for i in {1..30}; do
+  echo '<?xml version="1.0"?><root><item id="'$i'">value</item></root>' > "file$i.xml"
+done
+
+# Files with mixed line endings
+for i in {1..20}; do
+  printf "line1\r\nline2\nline3\r\n" > "mixed$i.txt"
+done
+
+# Files with UTF-8 BOM
+for i in {1..20}; do
+  printf '\xef\xbb\xbfContent with BOM' > "bom$i.txt"
+done
+
+# Executable files (for shebang check)
+for i in {1..10}; do
+  echo "#!/bin/bash" > "script$i.sh"
+  echo "echo hello" >> "script$i.sh"
+  chmod +x "script$i.sh"
+done
+
+# Files that might contain private keys (but don't)
+for i in {1..10}; do
+  echo "# This is not a private key" > "config$i.txt"
+  echo "api_key = fake_key_$i" >> "config$i.txt"
+done
+
+# Create symlinks for check-symlinks
+for i in {1..10}; do
+  ln -s "file$i.txt" "link$i.txt"
+done
+
+# Create a config that uses all builtin hooks
+cat > .pre-commit-config.yaml << 'EOF'
+repos:
+  - repo: builtin
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-json
+      - id: check-yaml
+      - id: check-toml
+      - id: check-xml
+      - id: mixed-line-ending
+      - id: fix-byte-order-marker
+      - id: check-executables-have-shebangs
+      - id: detect-private-key
+      - id: check-case-conflict
+      - id: check-merge-conflict
+      - id: check-symlinks
+EOF
+
+git add -A
+git commit -m "Initial commit" || { echo "Failed to commit"; exit 1; }
diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
@@ -17,6 +17,10 @@ env:
   CARGO_TERM_COLOR: always
   RUSTUP_MAX_RETRIES: 10
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
   bloat-check:
     runs-on: ubuntu-latest
@@ -98,7 +102,65 @@ jobs:
       - name: Upload bloat results
         uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
         with:
-          # NOTE: prek-ci-bot uses this artifact name to post comments on PRs.
+          # NOTE: https://github.com/j178/prek-ci-bot uses this artifact name to post comments on PRs.
           # Make sure to update the bot if you rename the artifact.
           name: bloat-check-results
           path: bloat-comparison.txt
+
+  hyperfine-benchmark:
+    runs-on: ubuntu-latest
+    name: "hyperfine benchmark"
+    timeout-minutes: 30
+    env:
+      HYPERFINE_BENCHMARK_WORKSPACE: /tmp/prek-bench
+      HYPERFINE_RESULTS_FILE: ${{ github.workspace }}/hyperfine-benchmark.md
+      HYPERFINE_HEAD_BINARY: ${{ github.workspace }}/target/profiling/prek
+      HYPERFINE_BASE_BINARY: ${{ github.workspace }}/../bin/prek-${{ github.event.pull_request.base.sha }}
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          fetch-depth: 0
+          persist-credentials: false
+
+      - uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2.8.2
+        with:
+          save-if: ${{ inputs.save-rust-cache == 'true' }}
+
+      - id: base-binary-cache
+        uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
+        with:
+          path: ${{ env.HYPERFINE_BASE_BINARY }}
+          key: prek-hyperfine-base-${{ github.event.pull_request.base.sha }}-${{ hashFiles('Cargo.lock') }}-${{ runner.os }}-${{ runner.arch }}
+
+      - name: Build base version
+        if: ${{ steps.base-binary-cache.outputs.cache-hit != 'true' }}
+        env:
+          BASE_VERSION: ${{ github.event.pull_request.base.sha }}
+        run: |
+          mkdir -p "$(dirname "$HYPERFINE_BASE_BINARY")"
+          git checkout ${{ github.event.pull_request.base.sha }}
+          cargo build --profile profiling && mv target/profiling/prek "$HYPERFINE_BASE_BINARY"
+          git checkout ${{ github.sha }}
+
+      - name: Build head version
+        run: |
+          cargo build --profile profiling
+
+      - name: Install hyperfine
+        uses: taiki-e/install-action@f92912fad184299a31e22ad070a5059fd07d4f59 # v2.68.7
+        with:
+          tool: hyperfine
+
+      - name: Setup test environment for builtin hooks
+        run: .github/scripts/hyperfine-setup-test-env.sh
+
+      - name: Run benchmarks
+        run: .github/scripts/hyperfine-run-benchmarks.sh
+
+      - name: Upload benchmark results
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
+        with:
+          # NOTE: https://github.com/j178/prek-ci-bot uses this artifact name to post comments on PRs.
+          # Make sure to update the bot if you rename the artifact.
+          name: hyperfine-benchmark-results
+          path: ${{ env.HYPERFINE_RESULTS_FILE }}