Skip to content

Commit 9037707

Browse files
committed
Rewrite 5x52 normalize method to be faster
1 parent 5355746 commit 9037707

File tree

1 file changed

+33
-35
lines changed

1 file changed

+33
-35
lines changed

src/field_5x52_impl.h

Lines changed: 33 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -34,41 +34,39 @@ void static secp256k1_fe_inner_start(void) {}
3434
void static secp256k1_fe_inner_stop(void) {}
3535

3636
void static secp256k1_fe_normalize(secp256k1_fe_t *r) {
37-
uint64_t c;
38-
c = r->n[0];
39-
uint64_t t0 = c & 0xFFFFFFFFFFFFFULL;
40-
c = (c >> 52) + r->n[1];
41-
uint64_t t1 = c & 0xFFFFFFFFFFFFFULL;
42-
c = (c >> 52) + r->n[2];
43-
uint64_t t2 = c & 0xFFFFFFFFFFFFFULL;
44-
c = (c >> 52) + r->n[3];
45-
uint64_t t3 = c & 0xFFFFFFFFFFFFFULL;
46-
c = (c >> 52) + r->n[4];
47-
uint64_t t4 = c & 0x0FFFFFFFFFFFFULL;
48-
c >>= 48;
49-
50-
// The following code will not modify the t's if c is initially 0.
51-
c = c * 0x1000003D1ULL + t0;
52-
t0 = c & 0xFFFFFFFFFFFFFULL;
53-
c = (c >> 52) + t1;
54-
t1 = c & 0xFFFFFFFFFFFFFULL;
55-
c = (c >> 52) + t2;
56-
t2 = c & 0xFFFFFFFFFFFFFULL;
57-
c = (c >> 52) + t3;
58-
t3 = c & 0xFFFFFFFFFFFFFULL;
59-
c = (c >> 52) + t4;
60-
t4 = c & 0x0FFFFFFFFFFFFULL;
61-
assert((c >> 48) == 0);
62-
63-
// Subtract p if result >= p
64-
uint64_t mask = -(int64_t)((t4 < 0xFFFFFFFFFFFFULL) | (t3 < 0xFFFFFFFFFFFFFULL) | (t2 < 0xFFFFFFFFFFFFFULL) | (t1 < 0xFFFFFFFFFFFFFULL) | (t0 < 0xFFFFEFFFFFC2FULL));
65-
t4 &= mask;
66-
t3 &= mask;
67-
t2 &= mask;
68-
t1 &= mask;
69-
t0 -= (~mask & 0xFFFFEFFFFFC2FULL);
70-
71-
// push internal variables back
37+
uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
38+
39+
// Reduce t4 at the start so there will be at most a single carry from the first pass
40+
uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
41+
42+
// The first pass ensures the magnitude is 1, ...
43+
t0 += x * 0x1000003D1ULL;
44+
t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL;
45+
t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL;
46+
t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL;
47+
t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL;
48+
49+
// ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element)
50+
assert(t4 >> 49 == 0);
51+
52+
// At most a single final reduction is needed; check if the value is >= the field characteristic
53+
x = (t4 >> 48) | ((t4 == 0x0FFFFFFFFFFFFULL)
54+
& ((t3 & t2 & t1) == 0xFFFFFFFFFFFFFULL)
55+
& (t0 >= 0xFFFFEFFFFFC2FULL));
56+
57+
// Apply the final reduction (for constant-time behaviour, we do it always)
58+
t0 += x * 0x1000003D1ULL;
59+
t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL;
60+
t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL;
61+
t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL;
62+
t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL;
63+
64+
// If t4 didn't carry to bit 48 already, then it should have after any final reduction
65+
assert(t4 >> 48 == x);
66+
67+
// Mask off the possible multiple of 2^256 from the final reduction
68+
t4 &= 0x0FFFFFFFFFFFFULL;
69+
7270
r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
7371

7472
#ifdef VERIFY

0 commit comments

Comments
 (0)