-
-
Notifications
You must be signed in to change notification settings - Fork 3.1k
Closed
Labels
Description
Zig Version
0.15.1
Steps to Reproduce and Observed Behavior
I was testing out a toy example with zig with: wasm, release builds and the debug build and I couldn't get my code to finish execution (expecting ~1second of execution time) but.. I ended up finding out it was ~500 slower than zig build-exe -O Debug -fllvm benchmark.zig
here is the benchmark file:
const std = @import("std");
const print = std.debug.print;
// Constants matching Go/C++ versions
const MatrixSize = 64;
const Iterations = 10000;
// FNV-1a hash for 64-bit values (matching Go/C++ implementation)
fn fnv1aHash64(data: [MatrixSize][MatrixSize]u64, seed: u64) u64 {
const fnv_prime64: u64 = 1099511628211;
var hash = seed ^ 14695981039346656037; // FNV offset basis XOR seed
var i: usize = 0;
while (i < MatrixSize) : (i += 1) {
var j: usize = 0;
while (j < MatrixSize) : (j += 1) {
hash ^= data[i][j];
hash *%= fnv_prime64;
}
}
return hash;
}
// Matrix multiply and hash (matching Go/C++ implementation)
fn matmulAndHash(seed: u64) u64 {
// Generate matrices from seed to prevent constant folding
var a: [MatrixSize][MatrixSize]u64 = undefined;
var b: [MatrixSize][MatrixSize]u64 = undefined;
var i: usize = 0;
while (i < MatrixSize) : (i += 1) {
var j: usize = 0;
while (j < MatrixSize) : (j += 1) {
a[i][j] = seed ^ @as(u64, @intCast(i * MatrixSize + j));
b[i][j] = seed ^ @as(u64, @intCast(i + j + 1));
}
}
// Integer matrix multiply
var c: [MatrixSize][MatrixSize]u64 = std.mem.zeroes([MatrixSize][MatrixSize]u64);
i = 0;
while (i < MatrixSize) : (i += 1) {
var j: usize = 0;
while (j < MatrixSize) : (j += 1) {
var k: usize = 0;
while (k < MatrixSize) : (k += 1) {
c[i][j] +%= a[i][k] *% b[k][j];
}
}
}
// FNV-1a hash of result matrix
return fnv1aHash64(c, seed);
}
pub fn main() void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const args = std.process.argsAlloc(allocator) catch |err| {
print("Error parsing args: {}\n", .{err});
return;
};
defer std.process.argsFree(allocator, args);
var iterations: usize = Iterations;
if (args.len > 1) {
iterations = std.fmt.parseInt(usize, args[1], 10) catch |err| {
print("Invalid iterations number '{s}': {}\n", .{ args[1], err });
return;
};
}
const start = std.time.nanoTimestamp();
var result: u64 = 5281;
// Chain operations to prevent optimization
var i: usize = 0;
while (i < iterations) : (i += 1) {
result = matmulAndHash(result);
}
const end = std.time.nanoTimestamp();
const duration = @as(f64, @floatFromInt(end - start));
const seconds = duration / 1e9;
print("\n", .{});
print("Matrix size: {}x{}\n", .{ MatrixSize, MatrixSize });
print("Result: {}\n", .{result});
print("Time: {d:.6}s\n", .{seconds});
print("Ops/sec: {d:.2}\n", .{@as(f64, @floatFromInt(iterations)) / seconds});
print("Matrix ops/sec: {d:.0}\n", .{@as(f64, @floatFromInt(iterations * MatrixSize * MatrixSize * MatrixSize)) / seconds});
print("\n", .{});
}
Example outputs:
miguel@fw13 wasmplay (git)[main] % zig build-exe -O Debug -fllvm benchmark.zig -femit-bin=bench-zig-llvm
./bench-zig-llvm 100
Matrix size: 64x64
Result: 17211066552811558049
Time: 0.089756s
Ops/sec: 1114.13
Matrix ops/sec: 292062763
miguel@fw13 wasmplay (git)[main] % zig build-exe -O Debug benchmark.zig -femit-bin=bench-zig-self
./bench-zig-self 100
Matrix size: 64x64
Result: 17211066552811558049
Time: 17.845303s
Ops/sec: 5.60
Matrix ops/sec: 1468980
Expected Behavior
I was expecting the performance in the order of ~10x slower? maybe 50x slower would be tolerable?
Reactions are currently unavailable