Skip to content

Commit da38871

Browse files
committed
crypto: implement SSSE3 backend for Echo512's MixColumns()
1 parent 31a6732 commit da38871

File tree

7 files changed

+163
-52
lines changed

7 files changed

+163
-52
lines changed

configure.ac

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,7 @@ AX_CHECK_COMPILE_FLAG([-fstack-reuse=none], [CORE_CXXFLAGS="$CORE_CXXFLAGS -fsta
537537

538538
enable_arm_crc=no
539539
enable_arm_shani=no
540+
enable_ssse3=no
540541
enable_sse42=no
541542
enable_sse41=no
542543
enable_avx2=no
@@ -548,6 +549,7 @@ dnl be compiled with them, rather that specific objects/libs may use them after
548549
dnl compatibility.
549550

550551
dnl x86
552+
AX_CHECK_COMPILE_FLAG([-mssse3], [SSSE3_CXXFLAGS="-mssse3"], [], [$CXXFLAG_WERROR])
551553
AX_CHECK_COMPILE_FLAG([-msse4.2], [SSE42_CXXFLAGS="-msse4.2"], [], [$CXXFLAG_WERROR])
552554
AX_CHECK_COMPILE_FLAG([-msse4.1], [SSE41_CXXFLAGS="-msse4.1"], [], [$CXXFLAG_WERROR])
553555
AX_CHECK_COMPILE_FLAG([-mavx -mavx2], [AVX2_CXXFLAGS="-mavx -mavx2"], [], [$CXXFLAG_WERROR])
@@ -572,6 +574,20 @@ if test "$enable_clmul" = "yes"; then
572574
AC_DEFINE([HAVE_CLMUL], [1], [Define this symbol if clmul instructions can be used])
573575
fi
574576

577+
TEMP_CXXFLAGS="$CXXFLAGS"
578+
CXXFLAGS="$SSSE3_CXXFLAGS $CXXFLAGS"
579+
AC_MSG_CHECKING([for SSSE3 intrinsics])
580+
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
581+
#include <tmmintrin.h>
582+
]],[[
583+
__m64 x = _mm_abs_pi32(_m_from_int(0));
584+
return 0;
585+
]])],
586+
[ AC_MSG_RESULT([yes]); enable_ssse3=yes; AC_DEFINE([ENABLE_SSSE3], [1], [Define this symbol to build code that uses SSSE3 intrinsics]) ],
587+
[ AC_MSG_RESULT([no])]
588+
)
589+
CXXFLAGS="$TEMP_CXXFLAGS"
590+
575591
TEMP_CXXFLAGS="$CXXFLAGS"
576592
CXXFLAGS="$SSE42_CXXFLAGS $CXXFLAGS"
577593
AC_MSG_CHECKING([for SSE4.2 intrinsics])
@@ -1839,6 +1855,7 @@ AM_CONDITIONAL([USE_QRCODE], [test "$use_qr" = "yes"])
18391855
AM_CONDITIONAL([USE_LCOV], [test "$use_lcov" = "yes"])
18401856
AM_CONDITIONAL([USE_LIBEVENT], [test "$use_libevent" = "yes"])
18411857
AM_CONDITIONAL([HARDEN], [test "$use_hardening" = "yes"])
1858+
AM_CONDITIONAL([ENABLE_SSSE3], [test "$enable_ssse3" = "yes"])
18421859
AM_CONDITIONAL([ENABLE_SSE42], [test "$enable_sse42" = "yes"])
18431860
AM_CONDITIONAL([ENABLE_SSE41], [test "$enable_sse41" = "yes"])
18441861
AM_CONDITIONAL([ENABLE_AVX2], [test "$enable_avx2" = "yes"])
@@ -1898,6 +1915,7 @@ AC_SUBST(PIE_FLAGS)
18981915
AC_SUBST(SANITIZER_CXXFLAGS)
18991916
AC_SUBST(SANITIZER_LDFLAGS)
19001917
AC_SUBST(SPHLIB_FLAGS)
1918+
AC_SUBST(SSSE3_CXXFLAGS)
19011919
AC_SUBST(SSE42_CXXFLAGS)
19021920
AC_SUBST(SSE41_CXXFLAGS)
19031921
AC_SUBST(CLMUL_CXXFLAGS)

src/Makefile.am

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,10 @@ endif
7575
LIBBITCOIN_CRYPTO = $(LIBBITCOIN_CRYPTO_BASE)
7676
LIBBITCOIN_CRYPTO_SPH = crypto/libbitcoin_crypto_sph.la
7777
LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_SPH)
78+
if ENABLE_SSSE3
79+
LIBBITCOIN_CRYPTO_SSSE3 = crypto/libbitcoin_crypto_ssse3.la
80+
LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_SSSE3)
81+
endif
7882
if ENABLE_SSE41
7983
LIBBITCOIN_CRYPTO_SSE41 = crypto/libbitcoin_crypto_sse41.la
8084
LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_SSE41)
@@ -769,6 +773,16 @@ crypto_libbitcoin_crypto_sph_la_SOURCES = \
769773
crypto/x11/util/consts_aes.hpp \
770774
crypto/x11/util/util.hpp
771775

776+
# See explanation for -static in crypto_libbitcoin_crypto_base_la's LDFLAGS and
777+
# CXXFLAGS above
778+
crypto_libbitcoin_crypto_ssse3_la_LDFLAGS = $(AM_LDFLAGS) -static
779+
crypto_libbitcoin_crypto_ssse3_la_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS) -static
780+
crypto_libbitcoin_crypto_ssse3_la_CPPFLAGS = $(AM_CPPFLAGS)
781+
crypto_libbitcoin_crypto_ssse3_la_CXXFLAGS += $(SSSE3_CXXFLAGS)
782+
crypto_libbitcoin_crypto_ssse3_la_CPPFLAGS += -DENABLE_SSSE3
783+
crypto_libbitcoin_crypto_ssse3_la_SOURCES = \
784+
crypto/x11/ssse3/echo.cpp
785+
772786
# See explanation for -static in crypto_libbitcoin_crypto_base_la's LDFLAGS and
773787
# CXXFLAGS above
774788
crypto_libbitcoin_crypto_x86_aesni_la_LDFLAGS = $(AM_LDFLAGS) -static

src/crypto/x11/dispatch.cpp

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@
1616

1717
namespace sapphire {
1818
#if !defined(DISABLE_OPTIMIZED_SHA256)
19+
#if defined(ENABLE_SSSE3)
20+
namespace ssse3_echo {
21+
void MixColumns(uint64_t W[16][2]);
22+
} // namespace ssse3_echo
23+
#endif // ENABLE_SSSE3
24+
1925
#if defined(ENABLE_SSE41) && defined(ENABLE_X86_AESNI)
2026
namespace x86_aesni_aes {
2127
void Round(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
@@ -39,39 +45,41 @@ void RoundKeyless(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
3945
} // namespace soft_aes
4046
namespace soft_echo {
4147
void FullStateRound(uint64_t W[16][2], uint32_t& k0, uint32_t& k1, uint32_t& k2, uint32_t& k3);
48+
void MixColumns(uint64_t W[16][2]);
4249
} // namespace soft_echo
4350
} // namespace sapphire
4451

45-
namespace {
46-
[[maybe_unused]] bool use_aes_ni = []() {
47-
#if !defined(DISABLE_OPTIMIZED_SHA256) && defined(HAVE_GETCPUID)
48-
uint32_t eax, ebx, ecx, edx;
49-
GetCPUID(1, 0, eax, ebx, ecx, edx);
50-
return (/*has_sse4_1=*/((ecx >> 19) & 1) &&
51-
/*has_aes_ni=*/((ecx >> 25) & 1));
52-
#else
53-
return false;
54-
#endif // !DISABLE_OPTIMIZED_SHA256 && HAVE_GETCPUID
55-
}();
56-
} // anonymous namespace
57-
5852
extern sapphire::dispatch::AESRoundFn aes_round;
5953
extern sapphire::dispatch::AESRoundFnNk aes_round_nk;
54+
extern sapphire::dispatch::EchoMixCols echo_mix_columns;
6055
extern sapphire::dispatch::EchoRoundFn echo_round;
6156

6257
void SapphireAutoDetect()
6358
{
6459
aes_round = sapphire::soft_aes::Round;
6560
aes_round_nk = sapphire::soft_aes::RoundKeyless;
6661
echo_round = sapphire::soft_echo::FullStateRound;
62+
echo_mix_columns = sapphire::soft_echo::MixColumns;
6763

6864
#if !defined(DISABLE_OPTIMIZED_SHA256)
65+
#if defined(HAVE_GETCPUID)
66+
uint32_t eax, ebx, ecx, edx;
67+
GetCPUID(1, 0, eax, ebx, ecx, edx);
6968
#if defined(ENABLE_SSE41) && defined(ENABLE_X86_AESNI)
70-
if (use_aes_ni) {
69+
const bool use_sse_4_1 = ((ecx >> 19) & 1);
70+
const bool use_aes_ni = ((ecx >> 25) & 1);
71+
if (use_sse_4_1 && use_aes_ni) {
7172
aes_round = sapphire::x86_aesni_aes::Round;
7273
aes_round_nk = sapphire::x86_aesni_aes::RoundKeyless;
7374
echo_round = sapphire::x86_aesni_echo::FullStateRound;
7475
}
7576
#endif // ENABLE_SSE41 && ENABLE_X86_AESNI
77+
#if defined(ENABLE_SSSE3)
78+
const bool use_ssse3 = ((ecx >> 9) & 1);
79+
if (use_ssse3) {
80+
echo_mix_columns = sapphire::ssse3_echo::MixColumns;
81+
}
82+
#endif // ENABLE_SSSE3
83+
#endif // HAVE_GETCPUID
7684
#endif // !DISABLE_OPTIMIZED_SHA256
7785
}

src/crypto/x11/dispatch.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ typedef void (*AESRoundFn)(uint32_t, uint32_t, uint32_t, uint32_t,
1515
typedef void (*AESRoundFnNk)(uint32_t, uint32_t, uint32_t, uint32_t,
1616
uint32_t&, uint32_t&, uint32_t&, uint32_t&);
1717

18+
typedef void (*EchoMixCols)(uint64_t[16][2]);
1819
typedef void (*EchoRoundFn)(uint64_t[16][2], uint32_t&, uint32_t&, uint32_t&, uint32_t&);
1920
} // namespace dispatch
2021
} // namespace sapphire

src/crypto/x11/echo.cpp

Lines changed: 37 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
#include <crypto/x11/aes.h>
3434
#include <crypto/x11/dispatch.h>
3535

36+
#include <attributes.h>
37+
3638
#include <cstddef>
3739
#include <cstring>
3840

@@ -50,6 +52,31 @@
5052

5153
namespace sapphire {
5254
namespace soft_echo {
55+
namespace {
56+
void ALWAYS_INLINE MixColumn(sph_u64 W[16][2], int ia, int ib, int ic, int id)
57+
{
58+
for (int n = 0; n < 2; n ++) {
59+
sph_u64 a = W[ia][n];
60+
sph_u64 b = W[ib][n];
61+
sph_u64 c = W[ic][n];
62+
sph_u64 d = W[id][n];
63+
sph_u64 ab = a ^ b;
64+
sph_u64 bc = b ^ c;
65+
sph_u64 cd = c ^ d;
66+
sph_u64 abx = ((ab & C64(0x8080808080808080)) >> 7) * 27U
67+
^ ((ab & C64(0x7F7F7F7F7F7F7F7F)) << 1);
68+
sph_u64 bcx = ((bc & C64(0x8080808080808080)) >> 7) * 27U
69+
^ ((bc & C64(0x7F7F7F7F7F7F7F7F)) << 1);
70+
sph_u64 cdx = ((cd & C64(0x8080808080808080)) >> 7) * 27U
71+
^ ((cd & C64(0x7F7F7F7F7F7F7F7F)) << 1);
72+
W[ia][n] = abx ^ bc ^ d;
73+
W[ib][n] = bcx ^ a ^ cd;
74+
W[ic][n] = cdx ^ ab ^ d;
75+
W[id][n] = abx ^ bcx ^ cdx ^ ab ^ c;
76+
}
77+
}
78+
} // anonymous namespace
79+
5380
void FullStateRound(sph_u64 W[16][2], sph_u32& K0, sph_u32& K1, sph_u32& K2, sph_u32& K3)
5481
{
5582
for (int n = 0; n < 16; n ++) {
@@ -73,9 +100,18 @@ void FullStateRound(sph_u64 W[16][2], sph_u32& K0, sph_u32& K1, sph_u32& K2, sph
73100
}
74101
}
75102
}
103+
104+
void MixColumns(uint64_t W[16][2])
105+
{
106+
MixColumn(W, 0, 1, 2, 3);
107+
MixColumn(W, 4, 5, 6, 7);
108+
MixColumn(W, 8, 9, 10, 11);
109+
MixColumn(W, 12, 13, 14, 15);
110+
}
76111
} // namespace soft_echo
77112
} // namespace sapphire
78113

114+
sapphire::dispatch::EchoMixCols echo_mix_columns = sapphire::soft_echo::MixColumns;
79115
sapphire::dispatch::EchoRoundFn echo_round = sapphire::soft_echo::FullStateRound;
80116

81117
#define DECL_STATE_BIG \
@@ -130,45 +166,10 @@ sapphire::dispatch::EchoRoundFn echo_round = sapphire::soft_echo::FullStateRound
130166
SHIFT_ROW3(3, 7, 11, 15); \
131167
} while (0)
132168

133-
static void
134-
mix_column(sph_u64 W[16][2], int ia, int ib, int ic, int id)
135-
{
136-
int n;
137-
138-
for (n = 0; n < 2; n ++) {
139-
sph_u64 a = W[ia][n];
140-
sph_u64 b = W[ib][n];
141-
sph_u64 c = W[ic][n];
142-
sph_u64 d = W[id][n];
143-
sph_u64 ab = a ^ b;
144-
sph_u64 bc = b ^ c;
145-
sph_u64 cd = c ^ d;
146-
sph_u64 abx = ((ab & C64(0x8080808080808080)) >> 7) * 27U
147-
^ ((ab & C64(0x7F7F7F7F7F7F7F7F)) << 1);
148-
sph_u64 bcx = ((bc & C64(0x8080808080808080)) >> 7) * 27U
149-
^ ((bc & C64(0x7F7F7F7F7F7F7F7F)) << 1);
150-
sph_u64 cdx = ((cd & C64(0x8080808080808080)) >> 7) * 27U
151-
^ ((cd & C64(0x7F7F7F7F7F7F7F7F)) << 1);
152-
W[ia][n] = abx ^ bc ^ d;
153-
W[ib][n] = bcx ^ a ^ cd;
154-
W[ic][n] = cdx ^ ab ^ d;
155-
W[id][n] = abx ^ bcx ^ cdx ^ ab ^ c;
156-
}
157-
}
158-
159-
#define MIX_COLUMN(a, b, c, d) mix_column(W, a, b, c, d)
160-
161-
#define BIG_MIX_COLUMNS do { \
162-
MIX_COLUMN(0, 1, 2, 3); \
163-
MIX_COLUMN(4, 5, 6, 7); \
164-
MIX_COLUMN(8, 9, 10, 11); \
165-
MIX_COLUMN(12, 13, 14, 15); \
166-
} while (0)
167-
168169
#define BIG_ROUND do { \
169170
echo_round(W, K0, K1, K2, K3); \
170171
BIG_SHIFT_ROWS; \
171-
BIG_MIX_COLUMNS; \
172+
echo_mix_columns(W); \
172173
} while (0)
173174

174175
#define FINAL_BIG do { \

src/crypto/x11/ssse3/echo.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
// Copyright (c) 2025 The Dash Core developers
2+
// Distributed under the MIT software license, see the accompanying
3+
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
4+
5+
#if defined(ENABLE_SSSE3)
6+
#include <attributes.h>
7+
#include <crypto/x11/util/util.hpp>
8+
9+
#include <cstdint>
10+
11+
#include <tmmintrin.h>
12+
13+
namespace sapphire {
14+
namespace {
15+
__m128i ALWAYS_INLINE gf8_mul2(const __m128i& x)
16+
{
17+
// (x << 1)
18+
const __m128i lhs = _mm_add_epi8(x, x);
19+
// (x & 0x80) ? 0xff : 0x00
20+
const __m128i msb_set = _mm_and_si128(x, _mm_set1_epi8(0x80));
21+
const __m128i mask = _mm_cmpeq_epi8(msb_set, _mm_set1_epi8(0x80));
22+
// Replace 0xff with 0x1b
23+
const __m128i rhs = _mm_and_si128(mask, _mm_set1_epi8(0x1b));
24+
// (x << 1) ^ ((x & 0x80) ? 0x1b : 0x00)
25+
return util::Xor(lhs, rhs);
26+
}
27+
28+
void ALWAYS_INLINE MixColumn(uint64_t W[16][2], int ia, int ib, int ic, int id)
29+
{
30+
const __m128i a = _mm_load_si128((const __m128i*)&W[ia][0]);
31+
const __m128i b = _mm_load_si128((const __m128i*)&W[ib][0]);
32+
const __m128i c = _mm_load_si128((const __m128i*)&W[ic][0]);
33+
const __m128i d = _mm_load_si128((const __m128i*)&W[id][0]);
34+
35+
const __m128i ab = util::Xor(a, b);
36+
const __m128i bc = util::Xor(b, c);
37+
const __m128i cd = util::Xor(c, d);
38+
39+
const __m128i abx = gf8_mul2(ab);
40+
const __m128i bcx = gf8_mul2(bc);
41+
const __m128i cdx = gf8_mul2(cd);
42+
43+
// W[ia] = abx ^ bc ^ d
44+
_mm_store_si128((__m128i*)&W[ia][0], util::Xor(util::Xor(abx, bc), d));
45+
// W[ib] = bcx ^ a ^ cd
46+
_mm_store_si128((__m128i*)&W[ib][0], util::Xor(util::Xor(bcx, a), cd));
47+
// W[ic] = cdx ^ ab ^ d
48+
_mm_store_si128((__m128i*)&W[ic][0], util::Xor(util::Xor(cdx, ab), d));
49+
// W[id] = abx ^ bcx ^ cdx ^ ab ^ c
50+
_mm_store_si128((__m128i*)&W[id][0], util::Xor(util::Xor(util::Xor(util::Xor(abx, bcx), cdx), ab), c));
51+
}
52+
} // anonymous namespace
53+
54+
namespace ssse3_echo {
55+
void MixColumns(uint64_t W[16][2])
56+
{
57+
MixColumn(W, 0, 1, 2, 3);
58+
MixColumn(W, 4, 5, 6, 7);
59+
MixColumn(W, 8, 9, 10, 11);
60+
MixColumn(W, 12, 13, 14, 15);
61+
}
62+
} // namespace ssse3_echo
63+
} // namespace sapphire
64+
65+
#endif // ENABLE_SSSE3

src/crypto/x11/util/util.hpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010
#if !defined(DISABLE_OPTIMIZED_SHA256)
1111
#include <attributes.h>
1212

13-
#if defined(ENABLE_SSE41) && defined(ENABLE_X86_AESNI)
13+
#if defined(ENABLE_SSSE3) || (defined(ENABLE_SSE41) && defined(ENABLE_X86_AESNI))
1414
#include <immintrin.h>
15-
#endif // ENABLE_SSE41 && ENABLE_X86_AESNI
15+
#endif // ENABLE_SSSE3 || (ENABLE_SSE41 && ENABLE_X86_AESNI)
1616
#endif // !DISABLE_OPTIMIZED_SHA256
1717

1818
namespace sapphire {
@@ -26,6 +26,9 @@ constexpr inline uint32_t pack_le(uint8_t b3, uint8_t b2, uint8_t b1, uint8_t b0
2626
}
2727

2828
#if !defined(DISABLE_OPTIMIZED_SHA256)
29+
#if defined(ENABLE_SSSE3) || (defined(ENABLE_SSE41) && defined(ENABLE_X86_AESNI))
30+
__m128i ALWAYS_INLINE Xor(const __m128i& x, const __m128i& y) { return _mm_xor_si128(x, y); }
31+
2932
#if defined(ENABLE_SSE41) && defined(ENABLE_X86_AESNI)
3033
__m128i ALWAYS_INLINE aes_round(const __m128i& input, const __m128i& key) { return _mm_aesenc_si128(input, key); }
3134

@@ -42,6 +45,7 @@ void ALWAYS_INLINE unpack_le(const __m128i& i, uint32_t& w0, uint32_t& w1, uint3
4245
w3 = _mm_extract_epi32(i, 3);
4346
}
4447
#endif // ENABLE_SSE41 && ENABLE_X86_AESNI
48+
#endif // ENABLE_SSSE3 || (ENABLE_SSE41 && ENABLE_X86_AESNI)
4549
#endif // !DISABLE_OPTIMIZED_SHA256
4650
} // namespace util
4751
} // namespace sapphire

0 commit comments

Comments
 (0)