[compiler-rt] Rework profile data handling for GPU targets (#187136)

jhuber6 · web-flow · commit ffd6a13b5ffe · 2026-03-26T10:17:43.000-05:00
Summary:
Currently, the GPU iterates through all of the present symbols and
copies them by prefix. This is inefficient as it requires a lot of small
high-latency data transfers rather than a few large ones. Additionally,
we force every single profiling symbol to have protected visibility.
This means potentially hundreds of unnecessary symbols in the symbol
table.

This PR changes the interface to move towards the start / stop section
handling. AMDGPU supports this natively as an ELF target, so we need
little changes. Instead of overriding visibility, we use a single table
to define the bounds that we can obtain with one contiguous load.

Using a table interface should also work for the in-progress HIP
implementation for this, as it wraps the start / stop sections into
standard void pointers which will be inside of an already mapped region
of memory, so they should be accessible from the HIP API.

NVPTX is more difficult as it is an ELF platform without this support. I
have hooked up the 'Other' handling to work around this, but even then
it's a bit of a stretch. I could remove this support here, but I wanted
to demonstrate that we can share the ABI. However, NVPTX will only work
if we force LTO and change the backend to emit variables in the same

TL;DR, we now do this:
```c
struct { start1, stop1, start2, stop2, start3, stop3, version; } device;
struct host = DtoH(lookup("device"));
counters = DtoH(host.stop - host.start)
version = DtoH(host.version);
```
diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc
@@ -142,6 +142,38 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::PointerType::getUnqual(Ctx), Next, \
 #undef INSTR_PROF_VALUE_NODE
 /* INSTR_PROF_VALUE_NODE end. */
 
+/* INSTR_PROF_GPU_SECT start. */
+/* Fields of the GPU profile section bounds structure, populated by the
+ * compiler runtime and read by the host to extract profiling data. */
+#ifndef INSTR_PROF_GPU_SECT
+#define INSTR_PROF_GPU_SECT(Type, LLVMType, Name, Initializer)
+#else
+#define INSTR_PROF_DATA_DEFINED
+#endif
+INSTR_PROF_GPU_SECT(const char *, llvm::PointerType::getUnqual(Ctx),           \
+                    NamesStart,                                                \
+                    ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
+INSTR_PROF_GPU_SECT(const char *, llvm::PointerType::getUnqual(Ctx),           \
+                    NamesStop,                                                 \
+                    ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
+INSTR_PROF_GPU_SECT(char *, llvm::PointerType::getUnqual(Ctx),                 \
+                    CountersStart,                                             \
+                    ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
+INSTR_PROF_GPU_SECT(char *, llvm::PointerType::getUnqual(Ctx),                 \
+                    CountersStop,                                              \
+                    ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
+INSTR_PROF_GPU_SECT(const __llvm_profile_data *, llvm::PointerType::getUnqual( \
+                    Ctx), DataStart,                                           \
+                    ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
+INSTR_PROF_GPU_SECT(const __llvm_profile_data *, llvm::PointerType::getUnqual( \
+                    Ctx), DataStop,                                            \
+                    ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
+INSTR_PROF_GPU_SECT(uint64_t *, llvm::PointerType::getUnqual(Ctx),             \
+                    VersionVar,                                                \
+                    ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
+#undef INSTR_PROF_GPU_SECT
+/* INSTR_PROF_GPU_SECT end. */
+
 /* INSTR_PROF_RAW_HEADER  start */
 /* Definition of member fields of the raw profile header data structure. */
 /* Please update llvm/docs/InstrProfileFormat.rst as appropriate when updating
@@ -761,6 +793,10 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
  * specified via command line. */
 #define INSTR_PROF_PROFILE_NAME_VAR __llvm_profile_filename
 
+/* GPU profiling section bounds structure, populated by the compiler runtime
+ * and read by the host to extract profiling data. */
+#define INSTR_PROF_SECT_BOUNDS_TABLE __llvm_profile_sections
+
 /* section name strings common to all targets other
    than WIN32 */
 #define INSTR_PROF_DATA_COMMON __llvm_prf_data
diff --git a/compiler-rt/lib/profile/InstrProfiling.h b/compiler-rt/lib/profile/InstrProfiling.h
@@ -57,6 +57,11 @@ typedef struct COMPILER_RT_ALIGNAS(INSTR_PROF_DATA_ALIGNMENT) VTableProfData {
 #include "profile/InstrProfData.inc"
 } VTableProfData;
 
+typedef struct __llvm_profile_gpu_sections {
+#define INSTR_PROF_GPU_SECT(Type, LLVMType, Name, Initializer) Type Name;
+#include "profile/InstrProfData.inc"
+} __llvm_profile_gpu_sections;
+
 typedef struct COMPILER_RT_ALIGNAS(INSTR_PROF_DATA_ALIGNMENT)
     __llvm_gcov_init_func_struct {
 #define COVINIT_FUNC(Type, LLVMType, Name, Initializer) Type Name;
diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformGPU.c b/compiler-rt/lib/profile/InstrProfilingPlatformGPU.c
@@ -17,6 +17,9 @@
 #include "InstrProfiling.h"
 #include <gpuintrin.h>
 
+// Symbols exported to the GPU runtime need to be visible in the .dynsym table.
+#define COMPILER_RT_GPU_VISIBILITY __attribute__((visibility("protected")))
+
 // Indicates that the current wave is fully occupied.
 static int is_uniform(uint64_t mask) {
   const uint64_t uniform_mask = ~0ull >> (64 - __gpu_num_lanes());
@@ -39,4 +42,45 @@ COMPILER_RT_VISIBILITY void __llvm_profile_instrument_gpu(uint64_t *counter,
   }
 }
 
+#if defined(__AMDGPU__)
+
+#define PROF_NAME_START INSTR_PROF_SECT_START(INSTR_PROF_NAME_COMMON)
+#define PROF_NAME_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_NAME_COMMON)
+#define PROF_CNTS_START INSTR_PROF_SECT_START(INSTR_PROF_CNTS_COMMON)
+#define PROF_CNTS_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_CNTS_COMMON)
+#define PROF_DATA_START INSTR_PROF_SECT_START(INSTR_PROF_DATA_COMMON)
+#define PROF_DATA_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_DATA_COMMON)
+
+extern char PROF_NAME_START[] COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
+extern char PROF_NAME_STOP[] COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
+extern char PROF_CNTS_START[] COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
+extern char PROF_CNTS_STOP[] COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
+extern __llvm_profile_data PROF_DATA_START[] COMPILER_RT_VISIBILITY
+    COMPILER_RT_WEAK;
+extern __llvm_profile_data PROF_DATA_STOP[] COMPILER_RT_VISIBILITY
+    COMPILER_RT_WEAK;
+
+// AMDGPU is a proper ELF target and exports the linker-defined section bounds.
+COMPILER_RT_GPU_VISIBILITY
+__llvm_profile_gpu_sections INSTR_PROF_SECT_BOUNDS_TABLE = {
+    PROF_NAME_START,
+    PROF_NAME_STOP,
+    PROF_CNTS_START,
+    PROF_CNTS_STOP,
+    PROF_DATA_START,
+    PROF_DATA_STOP,
+    &INSTR_PROF_RAW_VERSION_VAR};
+
+#elif defined(__NVPTX__)
+
+// NVPTX supports neither sections nor ELF symbols, we rely on the handling in
+// the 'InstrProfilingPlatformOther.c' file to fill this at initialization time.
+// FIXME: This will not work until we make the NVPTX backend emit section
+//        globals next to each other.
+COMPILER_RT_GPU_VISIBILITY
+__llvm_profile_gpu_sections INSTR_PROF_SECT_BOUNDS_TABLE = {
+    NULL, NULL, NULL, NULL, NULL, NULL, &INSTR_PROF_RAW_VERSION_VAR};
+
+#endif
+
 #endif
diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
@@ -23,7 +23,7 @@
 #if defined(__linux__) || defined(__FreeBSD__) || defined(__Fuchsia__) ||      \
     (defined(__sun__) && defined(__svr4__)) || defined(__NetBSD__) ||          \
     defined(_AIX) || defined(__wasm__) || defined(__HAIKU__) ||                \
-    defined(COMPILER_RT_PROFILE_BAREMETAL)
+    (defined(COMPILER_RT_PROFILE_BAREMETAL) && !defined(__NVPTX__))
 
 #if !defined(_AIX) && !defined(__wasm__) &&                                    \
     !defined(COMPILER_RT_PROFILE_BAREMETAL)
diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformOther.c b/compiler-rt/lib/profile/InstrProfilingPlatformOther.c
@@ -13,28 +13,38 @@
 // This implementation expects the compiler instrumentation pass to define a
 // constructor in each file which calls into this file.
 
-#if !defined(__APPLE__) && !defined(__linux__) && !defined(__FreeBSD__) &&     \
-    !defined(__Fuchsia__) && !(defined(__sun__) && defined(__svr4__)) &&       \
-    !defined(__NetBSD__) && !defined(_WIN32) && !defined(_AIX) &&              \
-    !defined(__wasm__) && !defined(__HAIKU__) &&                               \
-    !defined(COMPILER_RT_PROFILE_BAREMETAL)
-
-#include <stdlib.h>
-#include <stdio.h>
+#if (!defined(__APPLE__) && !defined(__linux__) && !defined(__FreeBSD__) &&    \
+     !defined(__Fuchsia__) && !(defined(__sun__) && defined(__svr4__)) &&      \
+     !defined(__NetBSD__) && !defined(_WIN32) && !defined(_AIX) &&             \
+     !defined(__wasm__) && !defined(__HAIKU__) &&                              \
+     !defined(COMPILER_RT_PROFILE_BAREMETAL)) ||                               \
+    defined(__NVPTX__)
 
 #include "InstrProfiling.h"
 #include "InstrProfilingInternal.h"
 
+#if defined(__NVPTX__)
+extern __llvm_profile_gpu_sections INSTR_PROF_SECT_BOUNDS_TABLE;
+#define DataFirst INSTR_PROF_SECT_BOUNDS_TABLE.DataStart
+#define DataLast INSTR_PROF_SECT_BOUNDS_TABLE.DataStop
+#define NamesFirst INSTR_PROF_SECT_BOUNDS_TABLE.NamesStart
+#define NamesLast INSTR_PROF_SECT_BOUNDS_TABLE.NamesStop
+#define CountersFirst INSTR_PROF_SECT_BOUNDS_TABLE.CountersStart
+#define CountersLast INSTR_PROF_SECT_BOUNDS_TABLE.CountersStop
+#else
 static const __llvm_profile_data *DataFirst = NULL;
 static const __llvm_profile_data *DataLast = NULL;
-static const VTableProfData *VTableProfDataFirst = NULL;
-static const VTableProfData *VTableProfDataLast = NULL;
 static const char *NamesFirst = NULL;
 static const char *NamesLast = NULL;
-static const char *VNamesFirst = NULL;
-static const char *VNamesLast = NULL;
 static char *CountersFirst = NULL;
 static char *CountersLast = NULL;
+#endif
+static const VTableProfData *VTableProfDataFirst = NULL;
+static const VTableProfData *VTableProfDataLast = NULL;
+static const char *VNamesFirst = NULL;
+static const char *VNamesLast = NULL;
+static char *BitmapFirst = NULL;
+static char *BitmapLast = NULL;
 
 static const void *getMinAddr(const void *A1, const void *A2) {
   return A1 < A2 ? A1 : A2;
@@ -55,6 +65,19 @@ COMPILER_RT_VISIBILITY
 void __llvm_profile_register_function(void *Data_) {
   /* TODO: Only emit this function if we can't use linker magic. */
   const __llvm_profile_data *Data = (__llvm_profile_data *)Data_;
+
+#if defined(__NVPTX__)
+  // NVPTX stores absolute counter addresses to avoid circular dependencies in
+  // PTX global variable initializers. Convert to a relative offset so the
+  // host-side profile reader sees the standard format.
+  {
+    uintptr_t Rel = (uintptr_t)Data->CounterPtr - (uintptr_t)Data_;
+    __builtin_memcpy((char *)Data_ +
+                         __builtin_offsetof(__llvm_profile_data, CounterPtr),
+                     &Rel, sizeof(Rel));
+  }
+#endif
+
   if (!DataFirst) {
     DataFirst = Data;
     DataLast = Data + 1;
diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -142,6 +142,38 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::PointerType::getUnqual(Ctx), Next, \
 #undef INSTR_PROF_VALUE_NODE
 /* INSTR_PROF_VALUE_NODE end. */
 
+/* INSTR_PROF_GPU_SECT start. */
+/* Fields of the GPU profile section bounds structure, populated by the
+ * compiler runtime and read by the host to extract profiling data. */
+#ifndef INSTR_PROF_GPU_SECT
+#define INSTR_PROF_GPU_SECT(Type, LLVMType, Name, Initializer)
+#else
+#define INSTR_PROF_DATA_DEFINED
+#endif
+INSTR_PROF_GPU_SECT(const char *, llvm::PointerType::getUnqual(Ctx),           \
+                    NamesStart,                                                \
+                    ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
+INSTR_PROF_GPU_SECT(const char *, llvm::PointerType::getUnqual(Ctx),           \
+                    NamesStop,                                                 \
+                    ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
+INSTR_PROF_GPU_SECT(char *, llvm::PointerType::getUnqual(Ctx),                 \
+                    CountersStart,                                             \
+                    ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
+INSTR_PROF_GPU_SECT(char *, llvm::PointerType::getUnqual(Ctx),                 \
+                    CountersStop,                                              \
+                    ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
+INSTR_PROF_GPU_SECT(const __llvm_profile_data *, llvm::PointerType::getUnqual( \
+                    Ctx), DataStart,                                           \
+                    ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
+INSTR_PROF_GPU_SECT(const __llvm_profile_data *, llvm::PointerType::getUnqual( \
+                    Ctx), DataStop,                                            \
+                    ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
+INSTR_PROF_GPU_SECT(uint64_t *, llvm::PointerType::getUnqual(Ctx),             \
+                    VersionVar,                                                \
+                    ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)))
+#undef INSTR_PROF_GPU_SECT
+/* INSTR_PROF_GPU_SECT end. */
+
 /* INSTR_PROF_RAW_HEADER  start */
 /* Definition of member fields of the raw profile header data structure. */
 /* Please update llvm/docs/InstrProfileFormat.rst as appropriate when updating
@@ -761,6 +793,10 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
  * specified via command line. */
 #define INSTR_PROF_PROFILE_NAME_VAR __llvm_profile_filename
 
+/* GPU profiling section bounds structure, populated by the compiler runtime
+ * and read by the host to extract profiling data. */
+#define INSTR_PROF_SECT_BOUNDS_TABLE __llvm_profile_sections
+
 /* section name strings common to all targets other
    than WIN32 */
 #define INSTR_PROF_DATA_COMMON __llvm_prf_data
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
@@ -486,25 +486,18 @@ bool isGPUProfTarget(const Module &M) {
 }
 
 void setPGOFuncVisibility(Module &M, GlobalVariable *FuncNameVar) {
-  // If the target is a GPU, make the symbol protected so it can
-  // be read from the host device
-  if (isGPUProfTarget(M))
-    FuncNameVar->setVisibility(GlobalValue::ProtectedVisibility);
   // Hide the symbol so that we correctly get a copy for each executable.
-  else if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
+  if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
     FuncNameVar->setVisibility(GlobalValue::HiddenVisibility);
 }
 
 GlobalVariable *createPGOFuncNameVar(Module &M,
                                      GlobalValue::LinkageTypes Linkage,
                                      StringRef PGOFuncName) {
-  // Ensure profiling variables on GPU are visible to be read from host
-  if (isGPUProfTarget(M))
-    Linkage = GlobalValue::ExternalLinkage;
   // We generally want to match the function's linkage, but available_externally
   // and extern_weak both have the wrong semantics, and anything that doesn't
   // need to link across compilation units doesn't need to be visible at all.
-  else if (Linkage == GlobalValue::ExternalWeakLinkage)
+  if (Linkage == GlobalValue::ExternalWeakLinkage)
     Linkage = GlobalValue::LinkOnceAnyLinkage;
   else if (Linkage == GlobalValue::AvailableExternallyLinkage)
     Linkage = GlobalValue::LinkOnceODRLinkage;
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -1425,6 +1425,10 @@ static inline Constant *getFuncAddrForProfData(Function *Fn) {
 }
 
 static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
+  // NVPTX is an ELF target but PTX does not expose sections or linker symbols.
+  if (TT.isNVPTX())
+    return true;
+
   // compiler-rt uses linker support to get data/counters/name start/end for
   // ELF, COFF, Mach-O, XCOFF, and Wasm.
   if (TT.isOSBinFormatELF() || TT.isOSBinFormatCOFF() ||
@@ -1815,10 +1819,6 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
   for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
     Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
 
-  if (isGPUProfTarget(M)) {
-    Linkage = GlobalValue::ExternalLinkage;
-    Visibility = GlobalValue::ProtectedVisibility;
-  }
   // If the data variable is not referenced by code (if we don't emit
   // @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the
   // data variable live under linker GC, the data variable can be private. This
@@ -1830,12 +1830,17 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
   // If profd is in a deduplicate comdat, NS==0 with a hash suffix guarantees
   // that other copies must have the same CFG and cannot have value profiling.
   // If no hash suffix, other profd copies may be referenced by code.
-  else if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
-           (TT.isOSBinFormatELF() ||
-            (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
+  if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
+      (TT.isOSBinFormatELF() ||
+       (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
     Linkage = GlobalValue::PrivateLinkage;
     Visibility = GlobalValue::DefaultVisibility;
   }
+  // AMDGPU objects are always ET_DYN, so non-local symbols with default
+  // visibility are preemptible. The CounterPtr label difference emits a REL32
+  // relocation that lld rejects against preemptible targets.
+  if (TT.isAMDGPU() && !GlobalValue::isLocalLinkage(Linkage))
+    Visibility = GlobalValue::ProtectedVisibility;
   auto *Data =
       new GlobalVariable(M, DataTy, false, Linkage, nullptr, DataVarName);
   Constant *RelativeCounterPtr;
@@ -1849,6 +1854,12 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
     RelativeCounterPtr = ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy);
     if (BitmapPtr != nullptr)
       RelativeBitmapPtr = ConstantExpr::getPtrToInt(BitmapPtr, IntPtrTy);
+  } else if (TT.isNVPTX()) {
+    // The NVPTX target cannot handle self-referencing constant expressions in
+    // global initializers at all. Use absolute pointers and have the runtime
+    // registration convert them to relative offsets.
+    DataSectionKind = IPSK_data;
+    RelativeCounterPtr = ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy);
   } else {
     // Reference the counter variable with a label difference (link-time
     // constant).
@@ -1953,10 +1964,6 @@ void InstrLowerer::emitNameData() {
   NamesVar = new GlobalVariable(M, NamesVal->getType(), true,
                                 GlobalValue::PrivateLinkage, NamesVal,
                                 getInstrProfNamesVarName());
-  if (isGPUProfTarget(M)) {
-    NamesVar->setLinkage(GlobalValue::ExternalLinkage);
-    NamesVar->setVisibility(GlobalValue::ProtectedVisibility);
-  }
 
   NamesSize = CompressedNameStr.size();
   setGlobalVariableLargeSection(TT, *NamesVar);
@@ -2048,6 +2055,11 @@ void InstrLowerer::emitRegistration() {
 }
 
 bool InstrLowerer::emitRuntimeHook() {
+  // GPU profiling data is read directly by the host offload runtime. We do not
+  // need the standard runtime hook.
+  if (TT.isGPU())
+    return false;
+
   // We expect the linker to be invoked with -u<hook_var> flag for Linux
   // in which case there is no need to emit the external variable.
   if (TT.isOSLinux() || TT.isOSAIX())
@@ -2062,10 +2074,7 @@ bool InstrLowerer::emitRuntimeHook() {
   auto *Var =
       new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
                          nullptr, getInstrProfRuntimeHookVarName());
-  if (isGPUProfTarget(M))
-    Var->setVisibility(GlobalValue::ProtectedVisibility);
-  else
-    Var->setVisibility(GlobalValue::HiddenVisibility);
+  Var->setVisibility(GlobalValue::HiddenVisibility);
 
   if (TT.isOSBinFormatELF() && !TT.isPS()) {
     // Mark the user variable as used so that it isn't stripped out.
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -469,9 +469,6 @@ createIRLevelProfileFlagVar(Module &M,
       M, IntTy64, true, GlobalValue::WeakAnyLinkage,
       Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
   IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
-  if (isGPUProfTarget(M))
-    IRLevelVersionVariable->setVisibility(
-        llvm::GlobalValue::ProtectedVisibility);
 
   Triple TT(M.getTargetTriple());
   if (TT.supportsCOMDAT()) {
diff --git a/offload/plugins-nextgen/common/include/GlobalHandler.h b/offload/plugins-nextgen/common/include/GlobalHandler.h
diff --git a/offload/plugins-nextgen/common/src/GlobalHandler.cpp b/offload/plugins-nextgen/common/src/GlobalHandler.cpp