Skip to content

Commit cb59445

Browse files
authored
Merge pull request #8680 from sylvestre/wc-perf
evaluate codspeed for perfs
2 parents c229ff8 + 4ffe09b commit cb59445

File tree

5 files changed

+347
-0
lines changed

5 files changed

+347
-0
lines changed

.github/workflows/codspeed.yml

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
name: CodSpeed Benchmarks
2+
3+
# spell-checker: disable
4+
5+
on:
6+
push:
7+
branches:
8+
- "main"
9+
pull_request:
10+
11+
permissions:
12+
contents: read
13+
14+
jobs:
15+
benchmarks:
16+
name: Run benchmarks
17+
runs-on: ubuntu-latest
18+
steps:
19+
- uses: actions/checkout@v5
20+
with:
21+
persist-credentials: false
22+
23+
- name: Install system dependencies
24+
shell: bash
25+
run: |
26+
sudo apt-get -y update
27+
sudo apt-get -y install libselinux1-dev
28+
29+
- uses: dtolnay/rust-toolchain@stable
30+
31+
- uses: Swatinem/rust-cache@v2
32+
33+
- name: Run sccache-cache
34+
uses: mozilla-actions/[email protected]
35+
36+
- name: Install cargo-codspeed
37+
shell: bash
38+
run: cargo install cargo-codspeed --locked
39+
40+
- name: Run benchmarks
41+
uses: CodSpeedHQ/action@v4
42+
with:
43+
mode: instrumentation
44+
run: |
45+
# Find all utilities with benchmarks and run them
46+
find src/uu/*/benches/ -name "*.rs" 2>/dev/null | while read bench_file; do
47+
crate_dir=$(dirname $(dirname "$bench_file"))
48+
echo "Building benchmarks in $crate_dir"
49+
(cd "$crate_dir" && cargo codspeed build)
50+
echo "Running benchmarks in $crate_dir"
51+
(cd "$crate_dir" && cargo codspeed run)
52+
done
53+
token: ${{ secrets.CODSPEED_TOKEN }}

Cargo.lock

Lines changed: 39 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,7 @@ compare = "0.1.0"
316316
crossterm = "0.29.0"
317317
ctor = "0.5.0"
318318
ctrlc = { version = "3.4.7", features = ["termination"] }
319+
divan = "0.1"
319320
dns-lookup = { version = "3.0.0" }
320321
exacl = "0.12.0"
321322
file_diff = "1.0.0"

src/uu/wc/Cargo.toml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,14 @@ fluent = { workspace = true }
2929
nix = { workspace = true }
3030
libc = { workspace = true }
3131

32+
[dev-dependencies]
33+
divan = { workspace = true }
34+
tempfile = { workspace = true }
35+
3236
[[bin]]
3337
name = "wc"
3438
path = "src/main.rs"
39+
40+
[[bench]]
41+
name = "wc_bench"
42+
harness = false

src/uu/wc/benches/wc_bench.rs

Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
// This file is part of the uutils coreutils package.
2+
//
3+
// For the full copyright and license information, please view the LICENSE
4+
// file that was distributed with this source code.
5+
6+
use divan::{Bencher, black_box};
7+
use std::fs::File;
8+
use std::io::{BufWriter, Write};
9+
use tempfile::TempDir;
10+
11+
/// Generate test data with different characteristics
12+
fn generate_test_data(size_mb: usize, avg_line_length: usize) -> Vec<u8> {
13+
let total_size = size_mb * 1024 * 1024;
14+
let mut data = Vec::with_capacity(total_size);
15+
16+
let mut current_size = 0;
17+
let mut line_chars = 0;
18+
19+
while current_size < total_size {
20+
if line_chars >= avg_line_length {
21+
data.push(b'\n');
22+
line_chars = 0;
23+
} else {
24+
// Use various ASCII characters to make it realistic
25+
data.push(b'a' + (current_size % 26) as u8);
26+
line_chars += 1;
27+
}
28+
current_size += 1;
29+
}
30+
31+
// Ensure we end with a newline
32+
if data.last() != Some(&b'\n') {
33+
data.push(b'\n');
34+
}
35+
36+
data
37+
}
38+
39+
/// Generate test data by line count instead of size
40+
fn generate_test_data_by_lines(num_lines: usize, avg_line_length: usize) -> Vec<u8> {
41+
let mut data = Vec::new();
42+
43+
for line_num in 0..num_lines {
44+
// Vary line length slightly for realism
45+
let line_length = avg_line_length + (line_num % 40).saturating_sub(20);
46+
47+
for char_pos in 0..line_length {
48+
// Create more realistic text with spaces
49+
if char_pos > 0 && char_pos % 8 == 0 {
50+
data.push(b' '); // Add spaces every 8 characters
51+
} else {
52+
// Cycle through letters with some variation
53+
let char_offset = (line_num + char_pos) % 26;
54+
data.push(b'a' + char_offset as u8);
55+
}
56+
}
57+
data.push(b'\n');
58+
}
59+
60+
data
61+
}
62+
63+
/// Create a temporary file with test data
64+
fn create_test_file(data: &[u8], temp_dir: &TempDir) -> std::path::PathBuf {
65+
let file_path = temp_dir.path().join("test_data.txt");
66+
let file = File::create(&file_path).unwrap();
67+
let mut writer = BufWriter::new(file);
68+
writer.write_all(data).unwrap();
69+
writer.flush().unwrap();
70+
file_path
71+
}
72+
73+
/// Run uutils wc with given arguments
74+
fn run_uutils_wc(args: &[&str]) -> i32 {
75+
use std::process::{Command, Stdio};
76+
77+
// Use the binary instead of calling uumain directly to avoid stdout issues
78+
let output = Command::new("../../../target/release/coreutils")
79+
.args(["wc"].iter().chain(args.iter()))
80+
.stdout(Stdio::null())
81+
.stderr(Stdio::null())
82+
.status()
83+
.expect("Failed to execute wc command");
84+
85+
i32::from(!output.success())
86+
}
87+
88+
/// Benchmark different file sizes for line counting
89+
#[divan::bench(args = [1, 5, 10, 25, 50])]
90+
fn wc_lines_synthetic(bencher: Bencher, size_mb: usize) {
91+
let temp_dir = tempfile::tempdir().unwrap();
92+
let data = generate_test_data(size_mb, 80);
93+
let file_path = create_test_file(&data, &temp_dir);
94+
let file_path_str = file_path.to_str().unwrap();
95+
96+
bencher.bench(|| {
97+
black_box(run_uutils_wc(&["-l", file_path_str]));
98+
});
99+
}
100+
101+
/// Benchmark different file sizes for character counting
102+
#[divan::bench(args = [1, 5, 10, 25])]
103+
fn wc_chars_synthetic(bencher: Bencher, size_mb: usize) {
104+
let temp_dir = tempfile::tempdir().unwrap();
105+
let data = generate_test_data(size_mb, 80);
106+
let file_path = create_test_file(&data, &temp_dir);
107+
let file_path_str = file_path.to_str().unwrap();
108+
109+
bencher.bench(|| {
110+
black_box(run_uutils_wc(&["-m", file_path_str]));
111+
});
112+
}
113+
114+
/// Benchmark different file sizes for byte counting
115+
#[divan::bench(args = [1, 5, 10, 50, 100])]
116+
fn wc_bytes_synthetic(bencher: Bencher, size_mb: usize) {
117+
let temp_dir = tempfile::tempdir().unwrap();
118+
let data = generate_test_data(size_mb, 80);
119+
let file_path = create_test_file(&data, &temp_dir);
120+
let file_path_str = file_path.to_str().unwrap();
121+
122+
bencher.bench(|| {
123+
black_box(run_uutils_wc(&["-c", file_path_str]));
124+
});
125+
}
126+
127+
/// Benchmark word counting (should use traditional read path)
128+
#[divan::bench(args = [1, 5, 10, 25])]
129+
fn wc_words_synthetic(bencher: Bencher, size_mb: usize) {
130+
let temp_dir = tempfile::tempdir().unwrap();
131+
let data = generate_test_data(size_mb, 80);
132+
let file_path = create_test_file(&data, &temp_dir);
133+
let file_path_str = file_path.to_str().unwrap();
134+
135+
bencher.bench(|| {
136+
black_box(run_uutils_wc(&["-w", file_path_str]));
137+
});
138+
}
139+
140+
/// Benchmark combined byte+line counting
141+
#[divan::bench(args = [1, 5, 10, 50])]
142+
fn wc_bytes_lines_synthetic(bencher: Bencher, size_mb: usize) {
143+
let temp_dir = tempfile::tempdir().unwrap();
144+
let data = generate_test_data(size_mb, 80);
145+
let file_path = create_test_file(&data, &temp_dir);
146+
let file_path_str = file_path.to_str().unwrap();
147+
148+
bencher.bench(|| {
149+
black_box(run_uutils_wc(&["-cl", file_path_str]));
150+
});
151+
}
152+
153+
/// Benchmark default wc behavior (bytes, lines, words)
154+
#[divan::bench(args = [1, 5, 10])]
155+
fn wc_default_synthetic(bencher: Bencher, size_mb: usize) {
156+
let temp_dir = tempfile::tempdir().unwrap();
157+
let data = generate_test_data(size_mb, 80);
158+
let file_path = create_test_file(&data, &temp_dir);
159+
let file_path_str = file_path.to_str().unwrap();
160+
161+
bencher.bench(|| {
162+
black_box(run_uutils_wc(&[file_path_str]));
163+
});
164+
}
165+
166+
/// Test different line lengths impact on performance
167+
#[divan::bench(args = [(5, 50), (5, 100), (5, 200), (5, 500)])]
168+
fn wc_lines_variable_length(bencher: Bencher, (size_mb, avg_line_len): (usize, usize)) {
169+
let temp_dir = tempfile::tempdir().unwrap();
170+
let data = generate_test_data(size_mb, avg_line_len);
171+
let file_path = create_test_file(&data, &temp_dir);
172+
let file_path_str = file_path.to_str().unwrap();
173+
174+
bencher.bench(|| {
175+
black_box(run_uutils_wc(&["-l", file_path_str]));
176+
});
177+
}
178+
179+
/// Benchmark large files by line count - up to 500K lines!
180+
#[divan::bench(args = [10_000, 50_000, 100_000, 500_000])]
181+
fn wc_lines_large_line_count(bencher: Bencher, num_lines: usize) {
182+
let temp_dir = tempfile::tempdir().unwrap();
183+
let data = generate_test_data_by_lines(num_lines, 80);
184+
let file_path = create_test_file(&data, &temp_dir);
185+
let file_path_str = file_path.to_str().unwrap();
186+
187+
bencher.bench(|| {
188+
black_box(run_uutils_wc(&["-l", file_path_str]));
189+
});
190+
}
191+
192+
/// Benchmark character counting on large line counts
193+
#[divan::bench(args = [10_000, 50_000, 100_000])]
194+
fn wc_chars_large_line_count(bencher: Bencher, num_lines: usize) {
195+
let temp_dir = tempfile::tempdir().unwrap();
196+
let data = generate_test_data_by_lines(num_lines, 80);
197+
let file_path = create_test_file(&data, &temp_dir);
198+
let file_path_str = file_path.to_str().unwrap();
199+
200+
bencher.bench(|| {
201+
black_box(run_uutils_wc(&["-m", file_path_str]));
202+
});
203+
}
204+
205+
/// Benchmark word counting on large line counts
206+
#[divan::bench(args = [10_000, 50_000, 100_000])]
207+
fn wc_words_large_line_count(bencher: Bencher, num_lines: usize) {
208+
let temp_dir = tempfile::tempdir().unwrap();
209+
let data = generate_test_data_by_lines(num_lines, 80);
210+
let file_path = create_test_file(&data, &temp_dir);
211+
let file_path_str = file_path.to_str().unwrap();
212+
213+
bencher.bench(|| {
214+
black_box(run_uutils_wc(&["-w", file_path_str]));
215+
});
216+
}
217+
218+
/// Benchmark default wc (lines, words, bytes) on large line counts
219+
#[divan::bench(args = [10_000, 50_000, 100_000])]
220+
fn wc_default_large_line_count(bencher: Bencher, num_lines: usize) {
221+
let temp_dir = tempfile::tempdir().unwrap();
222+
let data = generate_test_data_by_lines(num_lines, 80);
223+
let file_path = create_test_file(&data, &temp_dir);
224+
let file_path_str = file_path.to_str().unwrap();
225+
226+
bencher.bench(|| {
227+
black_box(run_uutils_wc(&[file_path_str]));
228+
});
229+
}
230+
231+
/// Benchmark very short vs very long lines with 100K lines
232+
#[divan::bench(args = [(100_000, 10), (100_000, 200), (100_000, 1000)])]
233+
fn wc_lines_extreme_line_lengths(bencher: Bencher, (num_lines, line_len): (usize, usize)) {
234+
let temp_dir = tempfile::tempdir().unwrap();
235+
let data = generate_test_data_by_lines(num_lines, line_len);
236+
let file_path = create_test_file(&data, &temp_dir);
237+
let file_path_str = file_path.to_str().unwrap();
238+
239+
bencher.bench(|| {
240+
black_box(run_uutils_wc(&["-l", file_path_str]));
241+
});
242+
}
243+
244+
fn main() {
245+
divan::main();
246+
}

0 commit comments

Comments
 (0)