This repository was archived by the owner on Aug 11, 2020. It is now read-only.
File tree Expand file tree Collapse file tree 4 files changed +74
-3
lines changed
Expand file tree Collapse file tree 4 files changed +74
-3
lines changed Original file line number Diff line number Diff line change @@ -48,6 +48,31 @@ else()
4848 add_definitions (-DMSHADOW_USE_SSE=0)
4949endif ()
5050
51+ if (NOT DEFINED SUPPORT_MF16C AND NOT MSVC )
52+ check_cxx_compiler_flag("-mf16c" COMPILER_SUPPORT_MF16C)
53+ if (CMAKE_SYSTEM_NAME STREQUAL "Linux" )
54+ execute_process (COMMAND cat /proc/cpuinfo
55+ COMMAND grep flags
56+ COMMAND grep f16c
57+ OUTPUT_VARIABLE CPU_SUPPORT_F16C)
58+ elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" )
59+ execute_process (COMMAND sysctl -a
60+ COMMAND grep machdep.cpu.features
61+ COMMAND grep F16C
62+ OUTPUT_VARIABLE CPU_SUPPORT_F16C)
63+ endif ()
64+ if (CPU_SUPPORT_F16C AND COMPILER_SUPPORT_MF16C)
65+ set (SUPPORT_MF16C TRUE )
66+ endif ()
67+ endif ()
68+
69+ if (SUPPORT_MF16C)
70+ add_definitions (-DMSHADOW_USE_F16C=1)
71+ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mf16c" )
72+ else ()
73+ add_definitions (-DMSHADOW_USE_F16C=0)
74+ endif ()
75+
5176if (USE_CUDA)
5277 find_package (CUDA 5.5 QUIET )
5378 find_cuda_helper_libs(curand)
Original file line number Diff line number Diff line change 3131 MSHADOW_CFLAGS += -DMSHADOW_USE_SSE=0
3232endif
3333
34+ ifndef USE_F16C
35+ ifneq ($(OS),Windows_NT)
36+ detected_OS := $(shell uname -s)
37+ ifeq ($(detected_OS),Darwin)
38+ F16C_SUPP = $(shell sysctl -a | grep machdep.cpu.features | grep F16C)
39+ endif
40+ ifeq ($(detected_OS),Linux)
41+ F16C_SUPP = $(shell cat /proc/cpuinfo | grep flags | grep f16c)
42+ endif
43+ ifneq ($(F16C_SUPP), NONE)
44+ USE_F16C =1
45+ else
46+ USE_F16C =0
47+ endif
48+ endif
49+ # if OS is Windows, check if your processor supports F16C architecture.
50+ # One way to do that is to download the tool https://docs.microsoft.com/en-us/sysinternals/downloads/coreinfo.
51+ # If coreinfo -c shows F16C then you can set USE_F16C=1 explicitly to leverage that capability"
52+ endif
53+
54+ ifeq ($(USE_F16C ) , 1)
55+ MSHADOW_CFLAGS += -mf16c
56+ else
57+ MSHADOW_CFLAGS += -DMSHADOW_USE_F16C=0
58+ endif
59+
3460ifeq ($(USE_CUDA ) , 0)
3561 MSHADOW_CFLAGS += -DMSHADOW_USE_CUDA=0
3662else
Original file line number Diff line number Diff line change @@ -134,6 +134,12 @@ typedef unsigned __int64 uint64_t;
134134#ifndef MSHADOW_USE_SSE
135135 #define MSHADOW_USE_SSE 1
136136#endif
137+
138+ /* ! \brief whether use F16C instruction set architecture extension */
139+ #ifndef MSHADOW_USE_F16C
140+ #define MSHADOW_USE_F16C 1
141+ #endif
142+
137143/* ! \brief whether use NVML to get dynamic info */
138144#ifndef MSHADOW_USE_NVML
139145 #define MSHADOW_USE_NVML 0
Original file line number Diff line number Diff line change 99#define MSHADOW_HALF_H_
1010#include " ./base.h"
1111
12+ #if MSHADOW_USE_F16C
13+ #include < x86intrin.h>
14+ #endif // MSHADOW_USE_F16C
15+
1216#if (MSHADOW_USE_CUDA && CUDA_VERSION >= 7050)
1317 #define MSHADOW_CUDA_HALF 1
1418 #include < cuda_fp16.h>
@@ -61,7 +65,15 @@ namespace half {
6165 return T (__half2float (cuhalf_)); /* NOLINT(*)*/ \
6266 } \
6367 MSHADOW_XINLINE operator T () const volatile { \
64- return T (__half2float_warp (cuhalf_)); /* NOLINT(*)*/ \
68+ return T (__half2float_warp (cuhalf_)); /* NOLINT(*)*/ \
69+ }
70+ #elif (MSHADOW_USE_F16C)
71+ #define MSHADOW_HALF_CONVERSIONOP (T ) \
72+ MSHADOW_XINLINE operator T () const { \
73+ return T (_cvtsh_ss (half_)); /* NOLINT(*)*/ \
74+ } \
75+ MSHADOW_XINLINE operator T () const volatile { \
76+ return T (_cvtsh_ss (half_)); /* NOLINT(*)*/ \
6577 }
6678#else
6779#define MSHADOW_HALF_CONVERSIONOP (T ) \
@@ -244,9 +256,11 @@ class MSHADOW_ALIGNED(2) half_t {
244256 MSHADOW_XINLINE void constructor (const T& value) {
245257#if (MSHADOW_CUDA_HALF && defined(__CUDA_ARCH__))
246258 cuhalf_ = __float2half (float (value)); // NOLINT(*)
247- #else
259+ #elif (MSHADOW_USE_F16C)
260+ half_ = _cvtss_sh (static_cast <float >(value), 0 );
261+ #else /* !MSHADOW_CUDA_HALF && !MSHADOW_USE_F16C */
248262 half_ = float2half (float (value)); // NOLINT(*)
249- #endif // ( MSHADOW_CUDA_HALF && defined(__CUDA_ARCH__))
263+ #endif /* ! MSHADOW_CUDA_HALF && !MSHADOW_USE_F16C */
250264 }
251265};
252266
You can’t perform that action at this time.
0 commit comments