-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Closed
Copy link
Description
Checked with Halide-16.0.0. Generator target: target=x86-64-linux-avx-avx2-f16c-fma-sse41 raises the following warning for float16 ->f32 conversion:
Warning: In function my_gen, (b)float16 type operation is emulated, which is likely to slow down the performance. If your target supports native (b)float16 operations, it could be improved by adding Target feature to enable it.
Generator code:
#include "Halide.h"
using namespace Halide;
class HalfFloatMatMul : public Halide::Generator<HalfFloatMatMul> {
public:
Input<Buffer<float16_t, 2>> A{"A"};
Input<Buffer<float16_t, 2>> B{"B"};
Output<Buffer<float, 2>> output{"C"};
void generate() {
RDom r(0, A.dim(0).extent(), "r");
matmul(x, y) += cast<float>(A(r, y)) * cast<float>(B(r, x));
output(x, y) = matmul(x, y);
}
void schedule() {
if (using_autoscheduler()) {
A.set_estimates({{0, 1024}, {0, 1024}});
B.set_estimates({{0, 1024}, {0, 1024}});
output.set_estimates({{0, 1024}, {0, 1024}});
} else {
matmul.compute_root();
}
}
private:
Func matmul;
Var x{"x"}, y{"y"};
};
HALIDE_REGISTER_GENERATOR(HalfFloatMatMul, my_gen)
I expect to see vcvtph2ps (_mm256_cvtph_ps) in generated assembly. Is it possible to change anything to emit it?
Metadata
Metadata
Assignees
Labels
No labels