Skip to content

Commit 6d7962d

Browse files
[clang][CUDA] Avoid ambiguity in host/device template specializations (#201049)
This commit changes SemaOverload to resolve an otherwise diagnosed ambiguity between addresses of template specializations of functions that are overloaded for both device and host. Similar to how it works for non-templated function overloads, these changes prioritizes the specializations that corresponds to the target of the owning function, i.e. if compiling for host, the address of the host specialization takes precedence over the device specialization and vice versa. Fixes #199299 --------- Signed-off-by: Steffen Holst Larsen <[email protected]>
1 parent ee20b10 commit 6d7962d

4 files changed

Lines changed: 37 additions & 3 deletions

File tree

clang/docs/ReleaseNotes.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -822,6 +822,10 @@ CUDA/HIP Language Changes
822822
CUDA Support
823823
^^^^^^^^^^^^
824824

825+
- Fixed a bug where host-device ambiguities in CUDA/HIP when retrieving the
826+
address of specializations of templated functions that have overloads for both
827+
host and device. (#GH199299)
828+
825829
AIX Support
826830
^^^^^^^^^^^
827831

clang/lib/Sema/SemaOverload.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13742,6 +13742,9 @@ class AddressOfFunctionResolver {
1374213742
OvlExpr->copyTemplateArgumentsInto(OvlExplicitTemplateArgs);
1374313743

1374413744
if (FindAllFunctionsThatMatchTargetTypeExactly()) {
13745+
if (Matches.size() > 1 && S.getLangOpts().CUDA)
13746+
EliminateSuboptimalCudaMatches();
13747+
1374513748
// C++ [over.over]p4:
1374613749
// If more than one function is selected, [...]
1374713750
if (Matches.size() > 1 && !eliminiateSuboptimalOverloadCandidates()) {
@@ -13752,9 +13755,6 @@ class AddressOfFunctionResolver {
1375213755
EliminateAllExceptMostSpecializedTemplate();
1375313756
}
1375413757
}
13755-
13756-
if (S.getLangOpts().CUDA && Matches.size() > 1)
13757-
EliminateSuboptimalCudaMatches();
1375813758
}
1375913759

1376013760
bool hasComplained() const { return HasComplained; }

clang/test/SemaCUDA/addr-of-overloaded-fn.cu

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s
44
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s
5+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fsyntax-only -fcuda-is-device -verify %s
6+
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fsyntax-only -fcuda-is-device -verify %s
57

68
#include "Inputs/cuda.h"
79

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// expected-no-diagnostics
2+
3+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s
4+
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s
5+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fsyntax-only -fcuda-is-device -verify %s
6+
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fsyntax-only -fcuda-is-device -verify %s
7+
8+
// Tests that no ambiguities are diagnosed when resolving addresses of
9+
// specialized template functions with the same overloads on host and device.
10+
11+
#include "Inputs/cuda.h"
12+
13+
template <typename T> __host__ void overload(T) {}
14+
template <typename T> __device__ void overload(T) {}
15+
16+
__host__ __device__ void test_hd() {
17+
void (*x)(int) = overload<int>;
18+
void (*y)(float) = overload<float>;
19+
}
20+
21+
__host__ void test_host() {
22+
void (*x)(int) = overload<int>;
23+
void (*y)(float) = overload<float>;
24+
}
25+
__device__ void test_device() {
26+
void (*x)(int) = overload<int>;
27+
void (*y)(float) = overload<float>;
28+
}

0 commit comments

Comments
 (0)