-
Notifications
You must be signed in to change notification settings - Fork 272
Closed
Labels
Description
Related to #1371
Problem Description
The develop branch 6ef8d3c fails to build with -DGPU_TARGETS="gfx908;gfx90a" -DCMAKE_CXX_FLAGS_RELEASE=' '
device_gemm_dpp_f16_f16_f16_km_nk_mn_instance
composable_kernel> [152/4044] Building CXX object library/src/tensor_operation_instance/gpu/gemm/CMakeFiles/device_gemm_instance.dir/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp.o
composable_kernel> FAILED: library/src/tensor_operation_instance/gpu/gemm/CMakeFiles/device_gemm_instance.dir/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp.o
composable_kernel> /nix/store/wcfqfaalprfjgp0w9mx98fwdzwc82xq4-clr-6.3.0/bin/clang++ -DCK_ENABLE_BF16 -DCK_ENABLE_BF8 -DCK_ENABLE_FP16 -DCK_ENABLE_FP32 -DCK_ENABLE_FP64 -DCK_ENABLE_FP8 -DCK_ENABLE_INT8 -DCK_USE_FNUZ_FP8 -DCK_USE_XDL -DUSE_PROF_API=1 -D__HIP_PLATFORM_AMD__=1 -D__HIP_PLATFORM_HCC__=1 -I/build/source/library/include -I/build/source/include -I/build/source/build/include -isystem /nix/store/wcfqfaalprfjgp0w9mx98fwdzwc82xq4-clr-6.3.0/include -parallel-jobs=1 -fgpu-inline-threshold=32768 -std=c++17 -fPIC -Wall -Wextra -Wcomment -Wendif-labels -Wformat -Winit-self -Wreturn-type -Wsequence-point -Wswitch -Wtrigraphs -Wundef -Wuninitialized -Wunreachable-code -Wunused -Wno-reserved-identifier -Werror -Wno-option-ignored -Wsign-compare -Wno-extra-semi-stmt -Wno-unused-template -Wno-missing-field-initializers -Wno-deprecated-declarations -Wall -Wextra -Wcomment -Wendif-labels -Wformat -Winit-self -Wreturn-type -Wsequence-point -Wswitch -Wtrigraphs -Wundef -Wuninitialized -Wunreachable-code -Wunused -Wno-reserved-identifier -Werror -Wno-option-ignored -Wsign-compare -Wno-extra-semi-stmt -Wno-unused-template -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-conversion -Wno-double-promotion -Wno-exit-time-destructors -Wno-extra-semi -Wno-float-conversion -Wno-gnu-anonymous-struct -Wno-gnu-zero-variadic-macro-arguments -Wno-missing-prototypes -Wno-nested-anon-types -Wno-padded -Wno-return-std-move-in-c++11 -Wno-shorten-64-to-32 -Wno-sign-conversion -Wno-unknown-warning-option -Wno-unused-command-line-argument -Wno-weak-vtables -Wno-covered-switch-default -Wno-unsafe-buffer-usage -Wno-unused-lambda-capture -Wno-nvcc-compat -Wno-bit-int-extension -Wno-pass-failed -Wno-switch-default -fno-offload-uniform-block -mllvm --lsr-drop-solution=1 -mllvm -enable-post-misched=0 -mllvm -amdgpu-coerce-illegal-types=1 -fcolor-diagnostics --offload-compress -x hip --offload-arch=gfx908 --offload-arch=gfx90a --offload-arch=gfx908 --offload-arch=gfx90a -MD -MT library/src/tensor_operation_instance/gpu/gemm/CMakeFiles/device_gemm_instance.dir/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp.o -MF library/src/tensor_operation_instance/gpu/gemm/CMakeFiles/device_gemm_instance.dir/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp.o.d -o library/src/tensor_operation_instance/gpu/gemm/CMakeFiles/device_gemm_instance.dir/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp.o -c /build/source/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp
composable_kernel> In file included from /build/source/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp:9:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/device/impl/device_gemm_dpp.hpp:14:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dpp.hpp:13:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/block/blockwise_gemm_dpp.hpp:9:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/warp/dpp_gemm.hpp:6:
composable_kernel> In file included from /build/source/include/ck/utility/amd_gemm_dpp.hpp:8:
composable_kernel> /build/source/include/ck/utility/inner_product_dpp8.hpp:23:21: error: not a valid operand.
composable_kernel> 23 | asm volatile("\n v_dot2c_f32_f16_dpp %0, %1, %2 dpp8:[0, 0, 0, 0, 0, 0, 0, 0]" : "=v"(c) : "v"(a), "v"(b), "0"(c));
composable_kernel> | ^
composable_kernel> <inline asm>:2:33: note: instantiated into assembly here
composable_kernel> 2 | v_dot2c_f32_f16_dpp v2, v3, v4 dpp8:[0, 0, 0, 0, 0, 0, 0, 0]
composable_kernel> | ^
composable_kernel> In file included from /build/source/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp:9:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/device/impl/device_gemm_dpp.hpp:14:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dpp.hpp:13:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/block/blockwise_gemm_dpp.hpp:9:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/warp/dpp_gemm.hpp:6:
composable_kernel> In file included from /build/source/include/ck/utility/amd_gemm_dpp.hpp:8:
composable_kernel> /build/source/include/ck/utility/inner_product_dpp8.hpp:27:21: error: not a valid operand.
composable_kernel> 27 | asm volatile("\n v_dot2c_f32_f16_dpp %0, %1, %2 dpp8:[1, 1, 1, 1, 1, 1, 1, 1]" : "=v"(c) : "v"(a), "v"(b), "0"(c));
composable_kernel> | ^
composable_kernel> <inline asm>:2:33: note: instantiated into assembly here
composable_kernel> 2 | v_dot2c_f32_f16_dpp v2, v3, v4 dpp8:[1, 1, 1, 1, 1, 1, 1, 1]
composable_kernel> | ^
composable_kernel> In file included from /build/source/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp:9:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/device/impl/device_gemm_dpp.hpp:14:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dpp.hpp:13:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/block/blockwise_gemm_dpp.hpp:9:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/warp/dpp_gemm.hpp:6:
composable_kernel> In file included from /build/source/include/ck/utility/amd_gemm_dpp.hpp:8:
composable_kernel> /build/source/include/ck/utility/inner_product_dpp8.hpp:31:21: error: not a valid operand.
composable_kernel> 31 | asm volatile("\n v_dot2c_f32_f16_dpp %0, %1, %2 dpp8:[2, 2, 2, 2, 2, 2, 2, 2]" : "=v"(c) : "v"(a), "v"(b), "0"(c));
composable_kernel> | ^
composable_kernel> <inline asm>:2:33: note: instantiated into assembly here
composable_kernel> 2 | v_dot2c_f32_f16_dpp v2, v3, v4 dpp8:[2, 2, 2, 2, 2, 2, 2, 2]
composable_kernel> | ^
composable_kernel> In file included from /build/source/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp:9:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/device/impl/device_gemm_dpp.hpp:14:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dpp.hpp:13:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/block/blockwise_gemm_dpp.hpp:9:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/warp/dpp_gemm.hpp:6:
composable_kernel> In file included from /build/source/include/ck/utility/amd_gemm_dpp.hpp:8:
composable_kernel> /build/source/include/ck/utility/inner_product_dpp8.hpp:35:21: error: not a valid operand.
composable_kernel> 35 | asm volatile("\n v_dot2c_f32_f16_dpp %0, %1, %2 dpp8:[3, 3, 3, 3, 3, 3, 3, 3]" : "=v"(c) : "v"(a), "v"(b), "0"(c));
composable_kernel> | ^
composable_kernel> <inline asm>:2:33: note: instantiated into assembly here
composable_kernel> 2 | v_dot2c_f32_f16_dpp v2, v3, v4 dpp8:[3, 3, 3, 3, 3, 3, 3, 3]
composable_kernel> | ^
composable_kernel> In file included from /build/source/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp:9:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/device/impl/device_gemm_dpp.hpp:14:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dpp.hpp:13:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/block/blockwise_gemm_dpp.hpp:9:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/warp/dpp_gemm.hpp:6:
composable_kernel> In file included from /build/source/include/ck/utility/amd_gemm_dpp.hpp:8:
composable_kernel> /build/source/include/ck/utility/inner_product_dpp8.hpp:39:21: error: not a valid operand.
composable_kernel> 39 | asm volatile("\n v_dot2c_f32_f16_dpp %0, %1, %2 dpp8:[4, 4, 4, 4, 4, 4, 4, 4]" : "=v"(c) : "v"(a), "v"(b), "0"(c));
composable_kernel> | ^
composable_kernel> <inline asm>:2:33: note: instantiated into assembly here
composable_kernel> 2 | v_dot2c_f32_f16_dpp v2, v3, v4 dpp8:[4, 4, 4, 4, 4, 4, 4, 4]
composable_kernel> | ^
composable_kernel> In file included from /build/source/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp:9:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/device/impl/device_gemm_dpp.hpp:14:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dpp.hpp:13:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/block/blockwise_gemm_dpp.hpp:9:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/warp/dpp_gemm.hpp:6:
composable_kernel> In file included from /build/source/include/ck/utility/amd_gemm_dpp.hpp:8:
composable_kernel> /build/source/include/ck/utility/inner_product_dpp8.hpp:43:21: error: not a valid operand.
composable_kernel> 43 | asm volatile("\n v_dot2c_f32_f16_dpp %0, %1, %2 dpp8:[5, 5, 5, 5, 5, 5, 5, 5]" : "=v"(c) : "v"(a), "v"(b), "0"(c));
composable_kernel> | ^
composable_kernel> <inline asm>:2:33: note: instantiated into assembly here
composable_kernel> 2 | v_dot2c_f32_f16_dpp v2, v3, v4 dpp8:[5, 5, 5, 5, 5, 5, 5, 5]
composable_kernel> | ^
composable_kernel> In file included from /build/source/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp:9:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/device/impl/device_gemm_dpp.hpp:14:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dpp.hpp:13:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/block/blockwise_gemm_dpp.hpp:9:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/warp/dpp_gemm.hpp:6:
composable_kernel> In file included from /build/source/include/ck/utility/amd_gemm_dpp.hpp:8:
composable_kernel> /build/source/include/ck/utility/inner_product_dpp8.hpp:47:21: error: not a valid operand.
composable_kernel> 47 | asm volatile("\n v_dot2c_f32_f16_dpp %0, %1, %2 dpp8:[6, 6, 6, 6, 6, 6, 6, 6]" : "=v"(c) : "v"(a), "v"(b), "0"(c));
composable_kernel> | ^
composable_kernel> <inline asm>:2:33: note: instantiated into assembly here
composable_kernel> 2 | v_dot2c_f32_f16_dpp v2, v3, v4 dpp8:[6, 6, 6, 6, 6, 6, 6, 6]
composable_kernel> | ^
composable_kernel> In file included from /build/source/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp:9:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/device/impl/device_gemm_dpp.hpp:14:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dpp.hpp:13:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/block/blockwise_gemm_dpp.hpp:9:
composable_kernel> In file included from /build/source/include/ck/tensor_operation/gpu/warp/dpp_gemm.hpp:6:
composable_kernel> In file included from /build/source/include/ck/utility/amd_gemm_dpp.hpp:8:
composable_kernel> /build/source/include/ck/utility/inner_product_dpp8.hpp:51:21: error: not a valid operand.
composable_kernel> 51 | asm volatile("\n v_dot2c_f32_f16_dpp %0, %1, %2 dpp8:[7, 7, 7, 7, 7, 7, 7, 7]" : "=v"(c) : "v"(a), "v"(b), "0"(c));
composable_kernel> | ^
composable_kernel> <inline asm>:2:33: note: instantiated into assembly here
composable_kernel> 2 | v_dot2c_f32_f16_dpp v2, v3, v4 dpp8:[7, 7, 7, 7, 7, 7, 7, 7]
composable_kernel> | ^
composable_kernel> 8 errors generated when compiling for gfx908.
cmake flags: -DCMAKE_C_FLAGS_RELEASE=-fgpu-inline-threshold=32768 -DCMAKE_CXX_FLAGS_RELEASE=-fgpu-inline-threshold=32768 -DCK_PARALLEL_LINK_JOBS=5 -DCK_PARALLEL_COMPILE_JOBS=62
This is on top of a ROCM 6.3 stack from the rocm-6.3.0 tags.
Operating System
NixOS
CPU
EPYC 7773
GPU
AMD Instinct MI100
Reactions are currently unavailable