Skip to content

Commit f06f46c

Browse files
prusnaksipa
andcommitted
Fix memory alignment
Co-Authored-By: Pieter Wuille <[email protected]>
1 parent d47c5fb commit f06f46c

File tree

1 file changed

+5
-6
lines changed

1 file changed

+5
-6
lines changed

src/crypto/sha256_arm_shani.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,19 +47,18 @@ void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks)
4747
STATE0 = vld1q_u32(&s[0]);
4848
STATE1 = vld1q_u32(&s[4]);
4949

50-
const uint8x16_t* input32 = reinterpret_cast<const uint8x16_t*>(chunk);
51-
5250
while (blocks--)
5351
{
5452
// Save state
5553
ABEF_SAVE = STATE0;
5654
CDGH_SAVE = STATE1;
5755

5856
// Load and convert input chunk to Big Endian
59-
MSG0 = vreinterpretq_u32_u8(vrev32q_u8(*input32++));
60-
MSG1 = vreinterpretq_u32_u8(vrev32q_u8(*input32++));
61-
MSG2 = vreinterpretq_u32_u8(vrev32q_u8(*input32++));
62-
MSG3 = vreinterpretq_u32_u8(vrev32q_u8(*input32++));
57+
MSG0 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(chunk + 0)));
58+
MSG1 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(chunk + 16)));
59+
MSG2 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(chunk + 32)));
60+
MSG3 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(chunk + 48)));
61+
chunk += 64;
6362

6463
// Original implemenation preloaded message and constant addition which was 1-3% slower.
6564
// Now included as first step in quad round code saving one Q Neon register

0 commit comments

Comments
 (0)