Skip to content

Commit df9eb79

Browse files
authored
[Clang][AMDGPU] Lower __amdgpu_texture_t to <8 x i32> instead of ptr adrspace(0) (#187774)
Fix the IR lowering for `__amdgpu_texture_t` to generate a single 256-bit load instead of a double indirection through a flat pointer. Previously, `__amdgpu_texture_t` was lowered to `ptr addrspace(0)` (64-bit flat pointer), which caused the double load and indirection. With the same reproducer like #187697. ```c #define TSHARP __constant uint * // Old tsharp handling: // #define LOAD_TSHARP(I) *(__constant uint8 *)I #define LOAD_TSHARP(I) *(__constant __amdgpu_texture_t *)I float4 test_image_load_1D(TSHARP i, int c) { return __builtin_amdgcn_image_load_1d_v4f32_i32(15, c, LOAD_TSHARP(i), 0, 0); } ``` old output: ```llvm define hidden <4 x float> @test_image_load_1D(ptr addrspace(4) noundef readonly captures(none) %i, i32 noundef %c) local_unnamed_addr #0 { entry: %0 = load ptr, ptr addrspace(4) %i, align 32, !tbaa !9 %1 = addrspacecast ptr %0 to ptr addrspace(1) %tex.rsrc.val = load <8 x i32>, ptr addrspace(1) %1, align 32 %2 = tail call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 15, i32 %c, <8 x i32> %tex.rsrc.val, i32 0, i32 0) ret <4 x float> %2 } ``` This matches the old `__constant uint8 *` behavior. With this fix new output is ```llvm define hidden <4 x float> @test_image_load_1D(ptr addrspace(4) noundef readonly captures(none) %0, i32 noundef %1) local_unnamed_addr #0 { %3 = load <8 x i32>, ptr addrspace(4) %0, align 32, !tbaa !10 %4 = tail call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 15, i32 %1, <8 x i32> %3, i32 0, i32 0) ret <4 x float> %4 } ``` Fixes #187697
1 parent d818fa4 commit df9eb79

File tree

5 files changed

+582
-691
lines changed

5 files changed

+582
-691
lines changed

clang/lib/CodeGen/CodeGenTypes.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -599,8 +599,13 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
599599
} break;
600600
#include "clang/Basic/WebAssemblyReferenceTypes.def"
601601
#define AMDGPU_OPAQUE_PTR_TYPE(Name, Id, SingletonId, Width, Align, AS) \
602-
case BuiltinType::Id: \
603-
return llvm::PointerType::get(getLLVMContext(), AS);
602+
case BuiltinType::Id: { \
603+
if (BuiltinType::Id == BuiltinType::AMDGPUTexture) { \
604+
return llvm::FixedVectorType::get( \
605+
llvm::Type::getInt32Ty(getLLVMContext()), 8); \
606+
} \
607+
return llvm::PointerType::get(getLLVMContext(), AS); \
608+
}
604609
#define AMDGPU_NAMED_BARRIER_TYPE(Name, Id, SingletonId, Width, Align, Scope) \
605610
case BuiltinType::Id: \
606611
return llvm::TargetExtType::get(getLLVMContext(), "amdgcn.named.barrier", \

clang/test/CodeGen/amdgpu-image-rsrc-type-debug-info.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@
33
// RUN: %clang_cc1 -triple amdgcn -emit-llvm -o - %s -debug-info-kind=limited | FileCheck %s
44

55
// CHECK-LABEL: define dso_local void @test_locals(
6-
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] !dbg [[DBG6:![0-9]+]] {
6+
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] !dbg [[DBG5:![0-9]+]] {
77
// CHECK-NEXT: [[ENTRY:.*:]]
8-
// CHECK-NEXT: [[IMG:%.*]] = alloca ptr, align 32, addrspace(5)
8+
// CHECK-NEXT: [[IMG:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
99
// CHECK-NEXT: [[IMG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IMG]] to ptr
10-
// CHECK-NEXT: #dbg_declare(ptr addrspace(5) [[IMG]], [[META11:![0-9]+]], !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), [[META14:![0-9]+]])
11-
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[IMG_ASCAST]], align 32, !dbg [[DBG15:![0-9]+]]
12-
// CHECK-NEXT: ret void, !dbg [[DBG16:![0-9]+]]
10+
// CHECK-NEXT: #dbg_declare(ptr addrspace(5) [[IMG]], [[META10:![0-9]+]], !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), [[META13:![0-9]+]])
11+
// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr [[IMG_ASCAST]], align 32, !dbg [[DBG14:![0-9]+]]
12+
// CHECK-NEXT: ret void, !dbg [[DBG15:![0-9]+]]
1313
//
1414
void test_locals(void) {
1515
__amdgpu_texture_t img;

0 commit comments

Comments
 (0)