self-hosted compiler generates code that is too slow (~500x slower than -fllvm -O Debug)

### Zig Version

0.15.1

### Steps to Reproduce and Observed Behavior

I was testing out a toy example with zig with: wasm, release builds and the debug build and I couldn't get my code to finish execution (expecting ~1second of execution time) but.. I ended up finding out it was ~500 slower than ` zig build-exe -O Debug -fllvm benchmark.zig` 

here is the benchmark file:
```
const std = @import("std");
const print = std.debug.print;

// Constants matching Go/C++ versions
const MatrixSize = 64;
const Iterations = 10000;

// FNV-1a hash for 64-bit values (matching Go/C++ implementation)
fn fnv1aHash64(data: [MatrixSize][MatrixSize]u64, seed: u64) u64 {
    const fnv_prime64: u64 = 1099511628211;
    var hash = seed ^ 14695981039346656037; // FNV offset basis XOR seed

    var i: usize = 0;
    while (i < MatrixSize) : (i += 1) {
        var j: usize = 0;
        while (j < MatrixSize) : (j += 1) {
            hash ^= data[i][j];
            hash *%= fnv_prime64;
        }
    }
    return hash;
}

// Matrix multiply and hash (matching Go/C++ implementation)
fn matmulAndHash(seed: u64) u64 {
    // Generate matrices from seed to prevent constant folding
    var a: [MatrixSize][MatrixSize]u64 = undefined;
    var b: [MatrixSize][MatrixSize]u64 = undefined;

    var i: usize = 0;
    while (i < MatrixSize) : (i += 1) {
        var j: usize = 0;
        while (j < MatrixSize) : (j += 1) {
            a[i][j] = seed ^ @as(u64, @intCast(i * MatrixSize + j));
            b[i][j] = seed ^ @as(u64, @intCast(i + j + 1));
        }
    }

    // Integer matrix multiply
    var c: [MatrixSize][MatrixSize]u64 = std.mem.zeroes([MatrixSize][MatrixSize]u64);
    i = 0;
    while (i < MatrixSize) : (i += 1) {
        var j: usize = 0;
        while (j < MatrixSize) : (j += 1) {
            var k: usize = 0;
            while (k < MatrixSize) : (k += 1) {
                c[i][j] +%= a[i][k] *% b[k][j];
            }
        }
    }

    // FNV-1a hash of result matrix
    return fnv1aHash64(c, seed);
}

pub fn main() void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer _ = gpa.deinit();
    const allocator = gpa.allocator();

    const args = std.process.argsAlloc(allocator) catch |err| {
        print("Error parsing args: {}\n", .{err});
        return;
    };
    defer std.process.argsFree(allocator, args);

    var iterations: usize = Iterations;
    if (args.len > 1) {
        iterations = std.fmt.parseInt(usize, args[1], 10) catch |err| {
            print("Invalid iterations number '{s}': {}\n", .{ args[1], err });
            return;
        };
    }

    const start = std.time.nanoTimestamp();
    var result: u64 = 5281;

    // Chain operations to prevent optimization
    var i: usize = 0;
    while (i < iterations) : (i += 1) {
        result = matmulAndHash(result);
    }

    const end = std.time.nanoTimestamp();
    const duration = @as(f64, @floatFromInt(end - start));
    const seconds = duration / 1e9;

    print("\n", .{});
    print("Matrix size: {}x{}\n", .{ MatrixSize, MatrixSize });
    print("Result: {}\n", .{result});
    print("Time: {d:.6}s\n", .{seconds});
    print("Ops/sec: {d:.2}\n", .{@as(f64, @floatFromInt(iterations)) / seconds});
    print("Matrix ops/sec: {d:.0}\n", .{@as(f64, @floatFromInt(iterations * MatrixSize * MatrixSize * MatrixSize)) / seconds});
    print("\n", .{});
}
```


Example outputs:
```
miguel@fw13 wasmplay (git)[main] %  zig build-exe -O Debug -fllvm benchmark.zig -femit-bin=bench-zig-llvm
  ./bench-zig-llvm 100

Matrix size: 64x64
Result: 17211066552811558049
Time: 0.089756s
Ops/sec: 1114.13
Matrix ops/sec: 292062763

miguel@fw13 wasmplay (git)[main] %  zig build-exe -O Debug  benchmark.zig -femit-bin=bench-zig-self
  ./bench-zig-self 100

Matrix size: 64x64
Result: 17211066552811558049
Time: 17.845303s
Ops/sec: 5.60
Matrix ops/sec: 1468980
```


### Expected Behavior

I was expecting the performance in the order of ~10x slower? maybe 50x slower would be tolerable?

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

self-hosted compiler generates code that is too slow (~500x slower than -fllvm -O Debug) #25111

Zig Version

Steps to Reproduce and Observed Behavior

Expected Behavior

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Uh oh!

self-hosted compiler generates code that is too slow (~500x slower than -fllvm -O Debug) #25111

Description

Zig Version

Steps to Reproduce and Observed Behavior

Expected Behavior

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions