Commit df9eb79
authored
Fix the IR lowering for `__amdgpu_texture_t` to generate a single
256-bit load instead of a double indirection through a flat pointer.
Previously, `__amdgpu_texture_t` was lowered to `ptr addrspace(0)`
(64-bit flat pointer), which caused the double load and indirection.
With the same reproducer like #187697.
```c
#define TSHARP __constant uint *
// Old tsharp handling:
// #define LOAD_TSHARP(I) *(__constant uint8 *)I
#define LOAD_TSHARP(I) *(__constant __amdgpu_texture_t *)I
float4 test_image_load_1D(TSHARP i, int c) {
return __builtin_amdgcn_image_load_1d_v4f32_i32(15, c, LOAD_TSHARP(i), 0, 0);
}
```
old output:
```llvm
define hidden <4 x float> @test_image_load_1D(ptr addrspace(4) noundef readonly captures(none) %i, i32 noundef %c) local_unnamed_addr #0 {
entry:
%0 = load ptr, ptr addrspace(4) %i, align 32, !tbaa !9
%1 = addrspacecast ptr %0 to ptr addrspace(1)
%tex.rsrc.val = load <8 x i32>, ptr addrspace(1) %1, align 32
%2 = tail call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 15, i32 %c, <8 x i32> %tex.rsrc.val, i32 0, i32 0)
ret <4 x float> %2
}
```
This matches the old `__constant uint8 *` behavior. With this fix new
output is
```llvm
define hidden <4 x float> @test_image_load_1D(ptr addrspace(4) noundef readonly captures(none) %0, i32 noundef %1) local_unnamed_addr #0 {
%3 = load <8 x i32>, ptr addrspace(4) %0, align 32, !tbaa !10
%4 = tail call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 15, i32 %1, <8 x i32> %3, i32 0, i32 0)
ret <4 x float> %4
}
```
Fixes #187697
1 parent d818fa4 commit df9eb79
File tree
5 files changed
+582
-691
lines changed- clang
- lib/CodeGen
- test/CodeGen
5 files changed
+582
-691
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
599 | 599 | | |
600 | 600 | | |
601 | 601 | | |
602 | | - | |
603 | | - | |
| 602 | + | |
| 603 | + | |
| 604 | + | |
| 605 | + | |
| 606 | + | |
| 607 | + | |
| 608 | + | |
604 | 609 | | |
605 | 610 | | |
606 | 611 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
3 | 3 | | |
4 | 4 | | |
5 | 5 | | |
6 | | - | |
| 6 | + | |
7 | 7 | | |
8 | | - | |
| 8 | + | |
9 | 9 | | |
10 | | - | |
11 | | - | |
12 | | - | |
| 10 | + | |
| 11 | + | |
| 12 | + | |
13 | 13 | | |
14 | 14 | | |
15 | 15 | | |
| |||
0 commit comments