Skip to content

Commit ffd6a13

Browse files
authored
[compiler-rt] Rework profile data handling for GPU targets (#187136)
Summary: Currently, the GPU iterates through all of the present symbols and copies them by prefix. This is inefficient as it requires a lot of small high-latency data transfers rather than a few large ones. Additionally, we force every single profiling symbol to have protected visibility. This means potentially hundreds of unnecessary symbols in the symbol table. This PR changes the interface to move towards the start / stop section handling. AMDGPU supports this natively as an ELF target, so we need little changes. Instead of overriding visibility, we use a single table to define the bounds that we can obtain with one contiguous load. Using a table interface should also work for the in-progress HIP implementation for this, as it wraps the start / stop sections into standard void pointers which will be inside of an already mapped region of memory, so they should be accessible from the HIP API. NVPTX is more difficult as it is an ELF platform without this support. I have hooked up the 'Other' handling to work around this, but even then it's a bit of a stretch. I could remove this support here, but I wanted to demonstrate that we can share the ABI. However, NVPTX will only work if we force LTO and change the backend to emit variables in the same TL;DR, we now do this: ```c struct { start1, stop1, start2, stop2, start3, stop3, version; } device; struct host = DtoH(lookup("device")); counters = DtoH(host.stop - host.start) version = DtoH(host.version); ```
1 parent 76f8806 commit ffd6a13

File tree

11 files changed

+281
-146
lines changed

11 files changed

+281
-146
lines changed

compiler-rt/include/profile/InstrProfData.inc

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,38 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::PointerType::getUnqual(Ctx), Next, \
142142
#undef INSTR_PROF_VALUE_NODE
143143
/* INSTR_PROF_VALUE_NODE end. */
144144

145+
/* INSTR_PROF_GPU_SECT start. */
146+
/* Fields of the GPU profile section bounds structure, populated by the
147+
* compiler runtime and read by the host to extract profiling data. */
148+
#ifndef INSTR_PROF_GPU_SECT
149+
#define INSTR_PROF_GPU_SECT(Type, LLVMType, Name, Initializer)
150+
#else
151+
#define INSTR_PROF_DATA_DEFINED
152+
#endif
153+
INSTR_PROF_GPU_SECT(const char *, llvm::PointerType::getUnqual(Ctx), \
154+
NamesStart, \
155+
ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
156+
INSTR_PROF_GPU_SECT(const char *, llvm::PointerType::getUnqual(Ctx), \
157+
NamesStop, \
158+
ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
159+
INSTR_PROF_GPU_SECT(char *, llvm::PointerType::getUnqual(Ctx), \
160+
CountersStart, \
161+
ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
162+
INSTR_PROF_GPU_SECT(char *, llvm::PointerType::getUnqual(Ctx), \
163+
CountersStop, \
164+
ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
165+
INSTR_PROF_GPU_SECT(const __llvm_profile_data *, llvm::PointerType::getUnqual( \
166+
Ctx), DataStart, \
167+
ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
168+
INSTR_PROF_GPU_SECT(const __llvm_profile_data *, llvm::PointerType::getUnqual( \
169+
Ctx), DataStop, \
170+
ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
171+
INSTR_PROF_GPU_SECT(uint64_t *, llvm::PointerType::getUnqual(Ctx), \
172+
VersionVar, \
173+
ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
174+
#undef INSTR_PROF_GPU_SECT
175+
/* INSTR_PROF_GPU_SECT end. */
176+
145177
/* INSTR_PROF_RAW_HEADER start */
146178
/* Definition of member fields of the raw profile header data structure. */
147179
/* Please update llvm/docs/InstrProfileFormat.rst as appropriate when updating
@@ -761,6 +793,10 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
761793
* specified via command line. */
762794
#define INSTR_PROF_PROFILE_NAME_VAR __llvm_profile_filename
763795

796+
/* GPU profiling section bounds structure, populated by the compiler runtime
797+
* and read by the host to extract profiling data. */
798+
#define INSTR_PROF_SECT_BOUNDS_TABLE __llvm_profile_sections
799+
764800
/* section name strings common to all targets other
765801
than WIN32 */
766802
#define INSTR_PROF_DATA_COMMON __llvm_prf_data

compiler-rt/lib/profile/InstrProfiling.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,11 @@ typedef struct COMPILER_RT_ALIGNAS(INSTR_PROF_DATA_ALIGNMENT) VTableProfData {
5757
#include "profile/InstrProfData.inc"
5858
} VTableProfData;
5959

60+
typedef struct __llvm_profile_gpu_sections {
61+
#define INSTR_PROF_GPU_SECT(Type, LLVMType, Name, Initializer) Type Name;
62+
#include "profile/InstrProfData.inc"
63+
} __llvm_profile_gpu_sections;
64+
6065
typedef struct COMPILER_RT_ALIGNAS(INSTR_PROF_DATA_ALIGNMENT)
6166
__llvm_gcov_init_func_struct {
6267
#define COVINIT_FUNC(Type, LLVMType, Name, Initializer) Type Name;

compiler-rt/lib/profile/InstrProfilingPlatformGPU.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
#include "InstrProfiling.h"
1818
#include <gpuintrin.h>
1919

20+
// Symbols exported to the GPU runtime need to be visible in the .dynsym table.
21+
#define COMPILER_RT_GPU_VISIBILITY __attribute__((visibility("protected")))
22+
2023
// Indicates that the current wave is fully occupied.
2124
static int is_uniform(uint64_t mask) {
2225
const uint64_t uniform_mask = ~0ull >> (64 - __gpu_num_lanes());
@@ -39,4 +42,45 @@ COMPILER_RT_VISIBILITY void __llvm_profile_instrument_gpu(uint64_t *counter,
3942
}
4043
}
4144

45+
#if defined(__AMDGPU__)
46+
47+
#define PROF_NAME_START INSTR_PROF_SECT_START(INSTR_PROF_NAME_COMMON)
48+
#define PROF_NAME_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_NAME_COMMON)
49+
#define PROF_CNTS_START INSTR_PROF_SECT_START(INSTR_PROF_CNTS_COMMON)
50+
#define PROF_CNTS_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_CNTS_COMMON)
51+
#define PROF_DATA_START INSTR_PROF_SECT_START(INSTR_PROF_DATA_COMMON)
52+
#define PROF_DATA_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_DATA_COMMON)
53+
54+
extern char PROF_NAME_START[] COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
55+
extern char PROF_NAME_STOP[] COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
56+
extern char PROF_CNTS_START[] COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
57+
extern char PROF_CNTS_STOP[] COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
58+
extern __llvm_profile_data PROF_DATA_START[] COMPILER_RT_VISIBILITY
59+
COMPILER_RT_WEAK;
60+
extern __llvm_profile_data PROF_DATA_STOP[] COMPILER_RT_VISIBILITY
61+
COMPILER_RT_WEAK;
62+
63+
// AMDGPU is a proper ELF target and exports the linker-defined section bounds.
64+
COMPILER_RT_GPU_VISIBILITY
65+
__llvm_profile_gpu_sections INSTR_PROF_SECT_BOUNDS_TABLE = {
66+
PROF_NAME_START,
67+
PROF_NAME_STOP,
68+
PROF_CNTS_START,
69+
PROF_CNTS_STOP,
70+
PROF_DATA_START,
71+
PROF_DATA_STOP,
72+
&INSTR_PROF_RAW_VERSION_VAR};
73+
74+
#elif defined(__NVPTX__)
75+
76+
// NVPTX supports neither sections nor ELF symbols, we rely on the handling in
77+
// the 'InstrProfilingPlatformOther.c' file to fill this at initialization time.
78+
// FIXME: This will not work until we make the NVPTX backend emit section
79+
// globals next to each other.
80+
COMPILER_RT_GPU_VISIBILITY
81+
__llvm_profile_gpu_sections INSTR_PROF_SECT_BOUNDS_TABLE = {
82+
NULL, NULL, NULL, NULL, NULL, NULL, &INSTR_PROF_RAW_VERSION_VAR};
83+
84+
#endif
85+
4286
#endif

compiler-rt/lib/profile/InstrProfilingPlatformLinux.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
#if defined(__linux__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \
2424
(defined(__sun__) && defined(__svr4__)) || defined(__NetBSD__) || \
2525
defined(_AIX) || defined(__wasm__) || defined(__HAIKU__) || \
26-
defined(COMPILER_RT_PROFILE_BAREMETAL)
26+
(defined(COMPILER_RT_PROFILE_BAREMETAL) && !defined(__NVPTX__))
2727

2828
#if !defined(_AIX) && !defined(__wasm__) && \
2929
!defined(COMPILER_RT_PROFILE_BAREMETAL)

compiler-rt/lib/profile/InstrProfilingPlatformOther.c

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,28 +13,38 @@
1313
// This implementation expects the compiler instrumentation pass to define a
1414
// constructor in each file which calls into this file.
1515

16-
#if !defined(__APPLE__) && !defined(__linux__) && !defined(__FreeBSD__) && \
17-
!defined(__Fuchsia__) && !(defined(__sun__) && defined(__svr4__)) && \
18-
!defined(__NetBSD__) && !defined(_WIN32) && !defined(_AIX) && \
19-
!defined(__wasm__) && !defined(__HAIKU__) && \
20-
!defined(COMPILER_RT_PROFILE_BAREMETAL)
21-
22-
#include <stdlib.h>
23-
#include <stdio.h>
16+
#if (!defined(__APPLE__) && !defined(__linux__) && !defined(__FreeBSD__) && \
17+
!defined(__Fuchsia__) && !(defined(__sun__) && defined(__svr4__)) && \
18+
!defined(__NetBSD__) && !defined(_WIN32) && !defined(_AIX) && \
19+
!defined(__wasm__) && !defined(__HAIKU__) && \
20+
!defined(COMPILER_RT_PROFILE_BAREMETAL)) || \
21+
defined(__NVPTX__)
2422

2523
#include "InstrProfiling.h"
2624
#include "InstrProfilingInternal.h"
2725

26+
#if defined(__NVPTX__)
27+
extern __llvm_profile_gpu_sections INSTR_PROF_SECT_BOUNDS_TABLE;
28+
#define DataFirst INSTR_PROF_SECT_BOUNDS_TABLE.DataStart
29+
#define DataLast INSTR_PROF_SECT_BOUNDS_TABLE.DataStop
30+
#define NamesFirst INSTR_PROF_SECT_BOUNDS_TABLE.NamesStart
31+
#define NamesLast INSTR_PROF_SECT_BOUNDS_TABLE.NamesStop
32+
#define CountersFirst INSTR_PROF_SECT_BOUNDS_TABLE.CountersStart
33+
#define CountersLast INSTR_PROF_SECT_BOUNDS_TABLE.CountersStop
34+
#else
2835
static const __llvm_profile_data *DataFirst = NULL;
2936
static const __llvm_profile_data *DataLast = NULL;
30-
static const VTableProfData *VTableProfDataFirst = NULL;
31-
static const VTableProfData *VTableProfDataLast = NULL;
3237
static const char *NamesFirst = NULL;
3338
static const char *NamesLast = NULL;
34-
static const char *VNamesFirst = NULL;
35-
static const char *VNamesLast = NULL;
3639
static char *CountersFirst = NULL;
3740
static char *CountersLast = NULL;
41+
#endif
42+
static const VTableProfData *VTableProfDataFirst = NULL;
43+
static const VTableProfData *VTableProfDataLast = NULL;
44+
static const char *VNamesFirst = NULL;
45+
static const char *VNamesLast = NULL;
46+
static char *BitmapFirst = NULL;
47+
static char *BitmapLast = NULL;
3848

3949
static const void *getMinAddr(const void *A1, const void *A2) {
4050
return A1 < A2 ? A1 : A2;
@@ -55,6 +65,19 @@ COMPILER_RT_VISIBILITY
5565
void __llvm_profile_register_function(void *Data_) {
5666
/* TODO: Only emit this function if we can't use linker magic. */
5767
const __llvm_profile_data *Data = (__llvm_profile_data *)Data_;
68+
69+
#if defined(__NVPTX__)
70+
// NVPTX stores absolute counter addresses to avoid circular dependencies in
71+
// PTX global variable initializers. Convert to a relative offset so the
72+
// host-side profile reader sees the standard format.
73+
{
74+
uintptr_t Rel = (uintptr_t)Data->CounterPtr - (uintptr_t)Data_;
75+
__builtin_memcpy((char *)Data_ +
76+
__builtin_offsetof(__llvm_profile_data, CounterPtr),
77+
&Rel, sizeof(Rel));
78+
}
79+
#endif
80+
5881
if (!DataFirst) {
5982
DataFirst = Data;
6083
DataLast = Data + 1;

llvm/include/llvm/ProfileData/InstrProfData.inc

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,38 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::PointerType::getUnqual(Ctx), Next, \
142142
#undef INSTR_PROF_VALUE_NODE
143143
/* INSTR_PROF_VALUE_NODE end. */
144144

145+
/* INSTR_PROF_GPU_SECT start. */
146+
/* Fields of the GPU profile section bounds structure, populated by the
147+
* compiler runtime and read by the host to extract profiling data. */
148+
#ifndef INSTR_PROF_GPU_SECT
149+
#define INSTR_PROF_GPU_SECT(Type, LLVMType, Name, Initializer)
150+
#else
151+
#define INSTR_PROF_DATA_DEFINED
152+
#endif
153+
INSTR_PROF_GPU_SECT(const char *, llvm::PointerType::getUnqual(Ctx), \
154+
NamesStart, \
155+
ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
156+
INSTR_PROF_GPU_SECT(const char *, llvm::PointerType::getUnqual(Ctx), \
157+
NamesStop, \
158+
ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
159+
INSTR_PROF_GPU_SECT(char *, llvm::PointerType::getUnqual(Ctx), \
160+
CountersStart, \
161+
ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
162+
INSTR_PROF_GPU_SECT(char *, llvm::PointerType::getUnqual(Ctx), \
163+
CountersStop, \
164+
ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
165+
INSTR_PROF_GPU_SECT(const __llvm_profile_data *, llvm::PointerType::getUnqual( \
166+
Ctx), DataStart, \
167+
ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
168+
INSTR_PROF_GPU_SECT(const __llvm_profile_data *, llvm::PointerType::getUnqual( \
169+
Ctx), DataStop, \
170+
ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
171+
INSTR_PROF_GPU_SECT(uint64_t *, llvm::PointerType::getUnqual(Ctx), \
172+
VersionVar, \
173+
ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
174+
#undef INSTR_PROF_GPU_SECT
175+
/* INSTR_PROF_GPU_SECT end. */
176+
145177
/* INSTR_PROF_RAW_HEADER start */
146178
/* Definition of member fields of the raw profile header data structure. */
147179
/* Please update llvm/docs/InstrProfileFormat.rst as appropriate when updating
@@ -761,6 +793,10 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
761793
* specified via command line. */
762794
#define INSTR_PROF_PROFILE_NAME_VAR __llvm_profile_filename
763795

796+
/* GPU profiling section bounds structure, populated by the compiler runtime
797+
* and read by the host to extract profiling data. */
798+
#define INSTR_PROF_SECT_BOUNDS_TABLE __llvm_profile_sections
799+
764800
/* section name strings common to all targets other
765801
than WIN32 */
766802
#define INSTR_PROF_DATA_COMMON __llvm_prf_data

llvm/lib/ProfileData/InstrProf.cpp

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -486,25 +486,18 @@ bool isGPUProfTarget(const Module &M) {
486486
}
487487

488488
void setPGOFuncVisibility(Module &M, GlobalVariable *FuncNameVar) {
489-
// If the target is a GPU, make the symbol protected so it can
490-
// be read from the host device
491-
if (isGPUProfTarget(M))
492-
FuncNameVar->setVisibility(GlobalValue::ProtectedVisibility);
493489
// Hide the symbol so that we correctly get a copy for each executable.
494-
else if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
490+
if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
495491
FuncNameVar->setVisibility(GlobalValue::HiddenVisibility);
496492
}
497493

498494
GlobalVariable *createPGOFuncNameVar(Module &M,
499495
GlobalValue::LinkageTypes Linkage,
500496
StringRef PGOFuncName) {
501-
// Ensure profiling variables on GPU are visible to be read from host
502-
if (isGPUProfTarget(M))
503-
Linkage = GlobalValue::ExternalLinkage;
504497
// We generally want to match the function's linkage, but available_externally
505498
// and extern_weak both have the wrong semantics, and anything that doesn't
506499
// need to link across compilation units doesn't need to be visible at all.
507-
else if (Linkage == GlobalValue::ExternalWeakLinkage)
500+
if (Linkage == GlobalValue::ExternalWeakLinkage)
508501
Linkage = GlobalValue::LinkOnceAnyLinkage;
509502
else if (Linkage == GlobalValue::AvailableExternallyLinkage)
510503
Linkage = GlobalValue::LinkOnceODRLinkage;

llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1425,6 +1425,10 @@ static inline Constant *getFuncAddrForProfData(Function *Fn) {
14251425
}
14261426

14271427
static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
1428+
// NVPTX is an ELF target but PTX does not expose sections or linker symbols.
1429+
if (TT.isNVPTX())
1430+
return true;
1431+
14281432
// compiler-rt uses linker support to get data/counters/name start/end for
14291433
// ELF, COFF, Mach-O, XCOFF, and Wasm.
14301434
if (TT.isOSBinFormatELF() || TT.isOSBinFormatCOFF() ||
@@ -1815,10 +1819,6 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
18151819
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
18161820
Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
18171821

1818-
if (isGPUProfTarget(M)) {
1819-
Linkage = GlobalValue::ExternalLinkage;
1820-
Visibility = GlobalValue::ProtectedVisibility;
1821-
}
18221822
// If the data variable is not referenced by code (if we don't emit
18231823
// @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the
18241824
// data variable live under linker GC, the data variable can be private. This
@@ -1830,12 +1830,17 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
18301830
// If profd is in a deduplicate comdat, NS==0 with a hash suffix guarantees
18311831
// that other copies must have the same CFG and cannot have value profiling.
18321832
// If no hash suffix, other profd copies may be referenced by code.
1833-
else if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
1834-
(TT.isOSBinFormatELF() ||
1835-
(!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
1833+
if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
1834+
(TT.isOSBinFormatELF() ||
1835+
(!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
18361836
Linkage = GlobalValue::PrivateLinkage;
18371837
Visibility = GlobalValue::DefaultVisibility;
18381838
}
1839+
// AMDGPU objects are always ET_DYN, so non-local symbols with default
1840+
// visibility are preemptible. The CounterPtr label difference emits a REL32
1841+
// relocation that lld rejects against preemptible targets.
1842+
if (TT.isAMDGPU() && !GlobalValue::isLocalLinkage(Linkage))
1843+
Visibility = GlobalValue::ProtectedVisibility;
18391844
auto *Data =
18401845
new GlobalVariable(M, DataTy, false, Linkage, nullptr, DataVarName);
18411846
Constant *RelativeCounterPtr;
@@ -1849,6 +1854,12 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
18491854
RelativeCounterPtr = ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy);
18501855
if (BitmapPtr != nullptr)
18511856
RelativeBitmapPtr = ConstantExpr::getPtrToInt(BitmapPtr, IntPtrTy);
1857+
} else if (TT.isNVPTX()) {
1858+
// The NVPTX target cannot handle self-referencing constant expressions in
1859+
// global initializers at all. Use absolute pointers and have the runtime
1860+
// registration convert them to relative offsets.
1861+
DataSectionKind = IPSK_data;
1862+
RelativeCounterPtr = ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy);
18521863
} else {
18531864
// Reference the counter variable with a label difference (link-time
18541865
// constant).
@@ -1953,10 +1964,6 @@ void InstrLowerer::emitNameData() {
19531964
NamesVar = new GlobalVariable(M, NamesVal->getType(), true,
19541965
GlobalValue::PrivateLinkage, NamesVal,
19551966
getInstrProfNamesVarName());
1956-
if (isGPUProfTarget(M)) {
1957-
NamesVar->setLinkage(GlobalValue::ExternalLinkage);
1958-
NamesVar->setVisibility(GlobalValue::ProtectedVisibility);
1959-
}
19601967

19611968
NamesSize = CompressedNameStr.size();
19621969
setGlobalVariableLargeSection(TT, *NamesVar);
@@ -2048,6 +2055,11 @@ void InstrLowerer::emitRegistration() {
20482055
}
20492056

20502057
bool InstrLowerer::emitRuntimeHook() {
2058+
// GPU profiling data is read directly by the host offload runtime. We do not
2059+
// need the standard runtime hook.
2060+
if (TT.isGPU())
2061+
return false;
2062+
20512063
// We expect the linker to be invoked with -u<hook_var> flag for Linux
20522064
// in which case there is no need to emit the external variable.
20532065
if (TT.isOSLinux() || TT.isOSAIX())
@@ -2062,10 +2074,7 @@ bool InstrLowerer::emitRuntimeHook() {
20622074
auto *Var =
20632075
new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
20642076
nullptr, getInstrProfRuntimeHookVarName());
2065-
if (isGPUProfTarget(M))
2066-
Var->setVisibility(GlobalValue::ProtectedVisibility);
2067-
else
2068-
Var->setVisibility(GlobalValue::HiddenVisibility);
2077+
Var->setVisibility(GlobalValue::HiddenVisibility);
20692078

20702079
if (TT.isOSBinFormatELF() && !TT.isPS()) {
20712080
// Mark the user variable as used so that it isn't stripped out.

llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -469,9 +469,6 @@ createIRLevelProfileFlagVar(Module &M,
469469
M, IntTy64, true, GlobalValue::WeakAnyLinkage,
470470
Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
471471
IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
472-
if (isGPUProfTarget(M))
473-
IRLevelVersionVariable->setVisibility(
474-
llvm::GlobalValue::ProtectedVisibility);
475472

476473
Triple TT(M.getTargetTriple());
477474
if (TT.supportsCOMDAT()) {

0 commit comments

Comments
 (0)