@@ -34,41 +34,39 @@ void static secp256k1_fe_inner_start(void) {}
3434void static secp256k1_fe_inner_stop (void ) {}
3535
3636void static secp256k1_fe_normalize (secp256k1_fe_t * r ) {
37- uint64_t c ;
38- c = r -> n [0 ];
39- uint64_t t0 = c & 0xFFFFFFFFFFFFFULL ;
40- c = (c >> 52 ) + r -> n [1 ];
41- uint64_t t1 = c & 0xFFFFFFFFFFFFFULL ;
42- c = (c >> 52 ) + r -> n [2 ];
43- uint64_t t2 = c & 0xFFFFFFFFFFFFFULL ;
44- c = (c >> 52 ) + r -> n [3 ];
45- uint64_t t3 = c & 0xFFFFFFFFFFFFFULL ;
46- c = (c >> 52 ) + r -> n [4 ];
47- uint64_t t4 = c & 0x0FFFFFFFFFFFFULL ;
48- c >>= 48 ;
49-
50- // The following code will not modify the t's if c is initially 0.
51- c = c * 0x1000003D1ULL + t0 ;
52- t0 = c & 0xFFFFFFFFFFFFFULL ;
53- c = (c >> 52 ) + t1 ;
54- t1 = c & 0xFFFFFFFFFFFFFULL ;
55- c = (c >> 52 ) + t2 ;
56- t2 = c & 0xFFFFFFFFFFFFFULL ;
57- c = (c >> 52 ) + t3 ;
58- t3 = c & 0xFFFFFFFFFFFFFULL ;
59- c = (c >> 52 ) + t4 ;
60- t4 = c & 0x0FFFFFFFFFFFFULL ;
61- assert ((c >> 48 ) == 0 );
62-
63- // Subtract p if result >= p
64- uint64_t mask = - (int64_t )((t4 < 0xFFFFFFFFFFFFULL ) | (t3 < 0xFFFFFFFFFFFFFULL ) | (t2 < 0xFFFFFFFFFFFFFULL ) | (t1 < 0xFFFFFFFFFFFFFULL ) | (t0 < 0xFFFFEFFFFFC2FULL ));
65- t4 &= mask ;
66- t3 &= mask ;
67- t2 &= mask ;
68- t1 &= mask ;
69- t0 -= (~mask & 0xFFFFEFFFFFC2FULL );
70-
71- // push internal variables back
37+ uint64_t t0 = r -> n [0 ], t1 = r -> n [1 ], t2 = r -> n [2 ], t3 = r -> n [3 ], t4 = r -> n [4 ];
38+
39+ // Reduce t4 at the start so there will be at most a single carry from the first pass
40+ uint64_t x = t4 >> 48 ; t4 &= 0x0FFFFFFFFFFFFULL ;
41+
42+ // The first pass ensures the magnitude is 1, ...
43+ t0 += x * 0x1000003D1ULL ;
44+ t1 += (t0 >> 52 ); t0 &= 0xFFFFFFFFFFFFFULL ;
45+ t2 += (t1 >> 52 ); t1 &= 0xFFFFFFFFFFFFFULL ;
46+ t3 += (t2 >> 52 ); t2 &= 0xFFFFFFFFFFFFFULL ;
47+ t4 += (t3 >> 52 ); t3 &= 0xFFFFFFFFFFFFFULL ;
48+
49+ // ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element)
50+ assert (t4 >> 49 == 0 );
51+
52+ // At most a single final reduction is needed; check if the value is >= the field characteristic
53+ x = (t4 >> 48 ) | ((t4 == 0x0FFFFFFFFFFFFULL )
54+ & ((t3 & t2 & t1 ) == 0xFFFFFFFFFFFFFULL )
55+ & (t0 >= 0xFFFFEFFFFFC2FULL ));
56+
57+ // Apply the final reduction (for constant-time behaviour, we do it always)
58+ t0 += x * 0x1000003D1ULL ;
59+ t1 += (t0 >> 52 ); t0 &= 0xFFFFFFFFFFFFFULL ;
60+ t2 += (t1 >> 52 ); t1 &= 0xFFFFFFFFFFFFFULL ;
61+ t3 += (t2 >> 52 ); t2 &= 0xFFFFFFFFFFFFFULL ;
62+ t4 += (t3 >> 52 ); t3 &= 0xFFFFFFFFFFFFFULL ;
63+
64+ // If t4 didn't carry to bit 48 already, then it should have after any final reduction
65+ assert (t4 >> 48 == x );
66+
67+ // Mask off the possible multiple of 2^256 from the final reduction
68+ t4 &= 0x0FFFFFFFFFFFFULL ;
69+
7270 r -> n [0 ] = t0 ; r -> n [1 ] = t1 ; r -> n [2 ] = t2 ; r -> n [3 ] = t3 ; r -> n [4 ] = t4 ;
7371
7472#ifdef VERIFY
0 commit comments