Skip to content

Commit 729badf

Browse files
committed
Merge pull request bitcoin#210
2d5a186 Apply effective-affine trick to precomp (Peter Dettman) 4f9791a Effective affine addition in EC multiplication (Peter Dettman)
2 parents 22f60a6 + 2d5a186 commit 729badf

File tree

6 files changed

+374
-97
lines changed

6 files changed

+374
-97
lines changed

src/bench_internal.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ void bench_group_double_var(void* arg) {
193193
bench_inv_t *data = (bench_inv_t*)arg;
194194

195195
for (i = 0; i < 200000; i++) {
196-
secp256k1_gej_double_var(&data->gej_x, &data->gej_x);
196+
secp256k1_gej_double_var(&data->gej_x, &data->gej_x, NULL);
197197
}
198198
}
199199

@@ -202,7 +202,7 @@ void bench_group_add_var(void* arg) {
202202
bench_inv_t *data = (bench_inv_t*)arg;
203203

204204
for (i = 0; i < 200000; i++) {
205-
secp256k1_gej_add_var(&data->gej_x, &data->gej_x, &data->gej_y);
205+
secp256k1_gej_add_var(&data->gej_x, &data->gej_x, &data->gej_y, NULL);
206206
}
207207
}
208208

@@ -220,7 +220,7 @@ void bench_group_add_affine_var(void* arg) {
220220
bench_inv_t *data = (bench_inv_t*)arg;
221221

222222
for (i = 0; i < 200000; i++) {
223-
secp256k1_gej_add_ge_var(&data->gej_x, &data->gej_x, &data->ge_y);
223+
secp256k1_gej_add_ge_var(&data->gej_x, &data->gej_x, &data->ge_y, NULL);
224224
}
225225
}
226226

src/ecmult_gen_impl.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context_t *c
4040
VERIFY_CHECK(secp256k1_ge_set_xo_var(&nums_ge, &nums_x, 0));
4141
secp256k1_gej_set_ge(&nums_gej, &nums_ge);
4242
/* Add G to make the bits in x uniformly distributed. */
43-
secp256k1_gej_add_ge_var(&nums_gej, &nums_gej, &secp256k1_ge_const_g);
43+
secp256k1_gej_add_ge_var(&nums_gej, &nums_gej, &secp256k1_ge_const_g, NULL);
4444
}
4545

4646
/* compute prec. */
@@ -54,18 +54,18 @@ static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context_t *c
5454
/* Set precj[j*16 .. j*16+15] to (numsbase, numsbase + gbase, ..., numsbase + 15*gbase). */
5555
precj[j*16] = numsbase;
5656
for (i = 1; i < 16; i++) {
57-
secp256k1_gej_add_var(&precj[j*16 + i], &precj[j*16 + i - 1], &gbase);
57+
secp256k1_gej_add_var(&precj[j*16 + i], &precj[j*16 + i - 1], &gbase, NULL);
5858
}
5959
/* Multiply gbase by 16. */
6060
for (i = 0; i < 4; i++) {
61-
secp256k1_gej_double_var(&gbase, &gbase);
61+
secp256k1_gej_double_var(&gbase, &gbase, NULL);
6262
}
6363
/* Multiply numbase by 2. */
64-
secp256k1_gej_double_var(&numsbase, &numsbase);
64+
secp256k1_gej_double_var(&numsbase, &numsbase, NULL);
6565
if (j == 62) {
6666
/* In the last iteration, numsbase is (1 - 2^j) * nums instead. */
6767
secp256k1_gej_neg(&numsbase, &numsbase);
68-
secp256k1_gej_add_var(&numsbase, &numsbase, &nums_gej);
68+
secp256k1_gej_add_var(&numsbase, &numsbase, &nums_gej, NULL);
6969
}
7070
}
7171
secp256k1_ge_set_all_gej_var(1024, prec, precj);

src/ecmult_impl.h

Lines changed: 112 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -24,62 +24,107 @@
2424
#define WINDOW_G 16
2525
#endif
2626

27-
/** Fill a table 'pre' with precomputed odd multiples of a. W determines the size of the table.
28-
* pre will contains the values [1*a,3*a,5*a,...,(2^(w-1)-1)*a], so it needs place for
29-
* 2^(w-2) entries.
30-
*
31-
* There are two versions of this function:
32-
* - secp256k1_ecmult_precomp_wnaf_gej, which operates on group elements in jacobian notation,
33-
* fast to precompute, but slower to use in later additions.
34-
* - secp256k1_ecmult_precomp_wnaf_ge, which operates on group elements in affine notations,
35-
* (much) slower to precompute, but a bit faster to use in later additions.
36-
* To compute a*P + b*G, we use the jacobian version for P, and the affine version for G, as
37-
* G is constant, so it only needs to be done once in advance.
27+
/** The number of entries a table with precomputed multiples needs to have. */
28+
#define ECMULT_TABLE_SIZE(w) (1 << ((w)-2))
29+
30+
/** Fill a table 'prej' with precomputed odd multiples of a. Prej will contain
31+
* the values [1*a,3*a,...,(2*n-1)*a], so it space for n values. zr[0] will
32+
* contain prej[0].z / a.z. The other zr[i] values = prej[i].z / prej[i-1].z.
33+
* Prej's Z values are undefined, except for the last value.
3834
*/
39-
static void secp256k1_ecmult_table_precomp_gej_var(secp256k1_gej_t *pre, const secp256k1_gej_t *a, int w) {
35+
static void secp256k1_ecmult_odd_multiples_table(int n, secp256k1_gej_t *prej, secp256k1_fe_t *zr, const secp256k1_gej_t *a) {
4036
secp256k1_gej_t d;
37+
secp256k1_ge_t a_ge, d_ge;
4138
int i;
42-
pre[0] = *a;
43-
secp256k1_gej_double_var(&d, &pre[0]);
44-
for (i = 1; i < (1 << (w-2)); i++) {
45-
secp256k1_gej_add_var(&pre[i], &d, &pre[i-1]);
39+
40+
VERIFY_CHECK(!a->infinity);
41+
42+
secp256k1_gej_double_var(&d, a, NULL);
43+
44+
/*
45+
* Perform the additions on an isomorphism where 'd' is affine: drop the z coordinate
46+
* of 'd', and scale the 1P starting value's x/y coordinates without changing its z.
47+
*/
48+
d_ge.x = d.x;
49+
d_ge.y = d.y;
50+
d_ge.infinity = 0;
51+
52+
secp256k1_ge_set_gej_zinv(&a_ge, a, &d.z);
53+
prej[0].x = a_ge.x;
54+
prej[0].y = a_ge.y;
55+
prej[0].z = a->z;
56+
prej[0].infinity = 0;
57+
58+
zr[0] = d.z;
59+
for (i = 1; i < n; i++) {
60+
secp256k1_gej_add_ge_var(&prej[i], &prej[i-1], &d_ge, &zr[i]);
4661
}
62+
63+
/*
64+
* Each point in 'prej' has a z coordinate too small by a factor of 'd.z'. Only
65+
* the final point's z coordinate is actually used though, so just update that.
66+
*/
67+
secp256k1_fe_mul(&prej[n-1].z, &prej[n-1].z, &d.z);
4768
}
4869

49-
static void secp256k1_ecmult_table_precomp_ge_storage_var(secp256k1_ge_storage_t *pre, const secp256k1_gej_t *a, int w) {
50-
secp256k1_gej_t d;
70+
/** Fill a table 'pre' with precomputed odd multiples of a.
71+
*
72+
* There are two versions of this function:
73+
* - secp256k1_ecmult_odd_multiples_table_globalz_windowa which brings its
74+
* resulting point set to a single constant Z denominator, stores the X and Y
75+
* coordinates as ge_storage points in pre, and stores the global Z in rz.
76+
* It only operates on tables sized for WINDOW_A wnaf multiples.
77+
* - secp256k1_ecmult_odd_multiples_table_storage_var, which converts its
78+
* resulting point set to actually affine points, and stores those in pre.
79+
* It operates on tables of any size, but uses heap-allocated temporaries.
80+
*
81+
* To compute a*P + b*G, we compute a table for P using the first function,
82+
* and for G using the second (which requires an inverse, but it only needs to
83+
* happen once).
84+
*/
85+
static void secp256k1_ecmult_odd_multiples_table_globalz_windowa(secp256k1_ge_t *pre, secp256k1_fe_t *globalz, const secp256k1_gej_t *a) {
86+
secp256k1_gej_t prej[ECMULT_TABLE_SIZE(WINDOW_A)];
87+
secp256k1_fe_t zr[ECMULT_TABLE_SIZE(WINDOW_A)];
88+
89+
/* Compute the odd multiples in Jacobian form. */
90+
secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), prej, zr, a);
91+
/* Bring them to the same Z denominator. */
92+
secp256k1_ge_globalz_set_table_gej(ECMULT_TABLE_SIZE(WINDOW_A), pre, globalz, prej, zr);
93+
}
94+
95+
static void secp256k1_ecmult_odd_multiples_table_storage_var(int n, secp256k1_ge_storage_t *pre, const secp256k1_gej_t *a) {
96+
secp256k1_gej_t *prej = checked_malloc(sizeof(secp256k1_gej_t) * n);
97+
secp256k1_ge_t *prea = checked_malloc(sizeof(secp256k1_ge_t) * n);
98+
secp256k1_fe_t *zr = checked_malloc(sizeof(secp256k1_fe_t) * n);
5199
int i;
52-
const int table_size = 1 << (w-2);
53-
secp256k1_gej_t *prej = (secp256k1_gej_t *)checked_malloc(sizeof(secp256k1_gej_t) * table_size);
54-
secp256k1_ge_t *prea = (secp256k1_ge_t *)checked_malloc(sizeof(secp256k1_ge_t) * table_size);
55-
prej[0] = *a;
56-
secp256k1_gej_double_var(&d, a);
57-
for (i = 1; i < table_size; i++) {
58-
secp256k1_gej_add_var(&prej[i], &d, &prej[i-1]);
59-
}
60-
secp256k1_ge_set_all_gej_var(table_size, prea, prej);
61-
for (i = 0; i < table_size; i++) {
100+
101+
/* Compute the odd multiples in Jacobian form. */
102+
secp256k1_ecmult_odd_multiples_table(n, prej, zr, a);
103+
/* Convert them in batch to affine coordinates. */
104+
secp256k1_ge_set_table_gej_var(n, prea, prej, zr);
105+
/* Convert them to compact storage form. */
106+
for (i = 0; i < n; i++) {
62107
secp256k1_ge_to_storage(&pre[i], &prea[i]);
63108
}
64-
free(prej);
109+
65110
free(prea);
111+
free(prej);
112+
free(zr);
66113
}
67114

68-
/** The number of entries a table with precomputed multiples needs to have. */
69-
#define ECMULT_TABLE_SIZE(w) (1 << ((w)-2))
70-
71115
/** The following two macro retrieves a particular odd multiple from a table
72116
* of precomputed multiples. */
73-
#define ECMULT_TABLE_GET_GEJ(r,pre,n,w) do { \
117+
#define ECMULT_TABLE_GET_GE(r,pre,n,w) do { \
74118
VERIFY_CHECK(((n) & 1) == 1); \
75119
VERIFY_CHECK((n) >= -((1 << ((w)-1)) - 1)); \
76120
VERIFY_CHECK((n) <= ((1 << ((w)-1)) - 1)); \
77121
if ((n) > 0) { \
78122
*(r) = (pre)[((n)-1)/2]; \
79123
} else { \
80-
secp256k1_gej_neg((r), &(pre)[(-(n)-1)/2]); \
124+
secp256k1_ge_neg((r), &(pre)[(-(n)-1)/2]); \
81125
} \
82126
} while(0)
127+
83128
#define ECMULT_TABLE_GET_GE_STORAGE(r,pre,n,w) do { \
84129
VERIFY_CHECK(((n) & 1) == 1); \
85130
VERIFY_CHECK((n) >= -((1 << ((w)-1)) - 1)); \
@@ -112,7 +157,7 @@ static void secp256k1_ecmult_context_build(secp256k1_ecmult_context_t *ctx) {
112157
ctx->pre_g = (secp256k1_ge_storage_t (*)[])checked_malloc(sizeof((*ctx->pre_g)[0]) * ECMULT_TABLE_SIZE(WINDOW_G));
113158

114159
/* precompute the tables with odd multiples */
115-
secp256k1_ecmult_table_precomp_ge_storage_var(*ctx->pre_g, &gj, WINDOW_G);
160+
secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g, &gj);
116161

117162
#ifdef USE_ENDOMORPHISM
118163
{
@@ -124,9 +169,9 @@ static void secp256k1_ecmult_context_build(secp256k1_ecmult_context_t *ctx) {
124169
/* calculate 2^128*generator */
125170
g_128j = gj;
126171
for (i = 0; i < 128; i++) {
127-
secp256k1_gej_double_var(&g_128j, &g_128j);
172+
secp256k1_gej_double_var(&g_128j, &g_128j, NULL);
128173
}
129-
secp256k1_ecmult_table_precomp_ge_storage_var(*ctx->pre_g_128, &g_128j, WINDOW_G);
174+
secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g_128, &g_128j);
130175
}
131176
#endif
132177
}
@@ -208,11 +253,11 @@ static int secp256k1_ecmult_wnaf(int *wnaf, const secp256k1_scalar_t *a, int w)
208253
}
209254

210255
static void secp256k1_ecmult(const secp256k1_ecmult_context_t *ctx, secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_scalar_t *na, const secp256k1_scalar_t *ng) {
211-
secp256k1_gej_t tmpj;
212-
secp256k1_gej_t pre_a[ECMULT_TABLE_SIZE(WINDOW_A)];
256+
secp256k1_ge_t pre_a[ECMULT_TABLE_SIZE(WINDOW_A)];
213257
secp256k1_ge_t tmpa;
258+
secp256k1_fe_t Z;
214259
#ifdef USE_ENDOMORPHISM
215-
secp256k1_gej_t pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)];
260+
secp256k1_ge_t pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)];
216261
secp256k1_scalar_t na_1, na_lam;
217262
/* Splitted G factors. */
218263
secp256k1_scalar_t ng_1, ng_128;
@@ -252,12 +297,21 @@ static void secp256k1_ecmult(const secp256k1_ecmult_context_t *ctx, secp256k1_ge
252297
bits = bits_na;
253298
#endif
254299

255-
/* calculate odd multiples of a */
256-
secp256k1_ecmult_table_precomp_gej_var(pre_a, a, WINDOW_A);
300+
/* Calculate odd multiples of a.
301+
* All multiples are brought to the same Z 'denominator', which is stored
302+
* in Z. Due to secp256k1' isomorphism we can do all operations pretending
303+
* that the Z coordinate was 1, use affine addition formulae, and correct
304+
* the Z coordinate of the result once at the end.
305+
* The exception is the precomputed G table points, which are actually
306+
* affine. Compared to the base used for other points, they have a Z ratio
307+
* of 1/Z, so we can use secp256k1_gej_add_zinv_var, which uses the same
308+
* isomorphism to efficiently add with a known Z inverse.
309+
*/
310+
secp256k1_ecmult_odd_multiples_table_globalz_windowa(pre_a, &Z, a);
257311

258312
#ifdef USE_ENDOMORPHISM
259313
for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) {
260-
secp256k1_gej_mul_lambda(&pre_a_lam[i], &pre_a[i]);
314+
secp256k1_ge_mul_lambda(&pre_a_lam[i], &pre_a[i]);
261315
}
262316

263317
/* split ng into ng_1 and ng_128 (where gn = gn_1 + gn_128*2^128, and gn_1 and gn_128 are ~128 bit) */
@@ -281,37 +335,41 @@ static void secp256k1_ecmult(const secp256k1_ecmult_context_t *ctx, secp256k1_ge
281335

282336
secp256k1_gej_set_infinity(r);
283337

284-
for (i = bits-1; i >= 0; i--) {
338+
for (i = bits - 1; i >= 0; i--) {
285339
int n;
286-
secp256k1_gej_double_var(r, r);
340+
secp256k1_gej_double_var(r, r, NULL);
287341
#ifdef USE_ENDOMORPHISM
288342
if (i < bits_na_1 && (n = wnaf_na_1[i])) {
289-
ECMULT_TABLE_GET_GEJ(&tmpj, pre_a, n, WINDOW_A);
290-
secp256k1_gej_add_var(r, r, &tmpj);
343+
ECMULT_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A);
344+
secp256k1_gej_add_ge_var(r, r, &tmpa, NULL);
291345
}
292346
if (i < bits_na_lam && (n = wnaf_na_lam[i])) {
293-
ECMULT_TABLE_GET_GEJ(&tmpj, pre_a_lam, n, WINDOW_A);
294-
secp256k1_gej_add_var(r, r, &tmpj);
347+
ECMULT_TABLE_GET_GE(&tmpa, pre_a_lam, n, WINDOW_A);
348+
secp256k1_gej_add_ge_var(r, r, &tmpa, NULL);
295349
}
296350
if (i < bits_ng_1 && (n = wnaf_ng_1[i])) {
297351
ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g, n, WINDOW_G);
298-
secp256k1_gej_add_ge_var(r, r, &tmpa);
352+
secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z);
299353
}
300354
if (i < bits_ng_128 && (n = wnaf_ng_128[i])) {
301355
ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g_128, n, WINDOW_G);
302-
secp256k1_gej_add_ge_var(r, r, &tmpa);
356+
secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z);
303357
}
304358
#else
305359
if (i < bits_na && (n = wnaf_na[i])) {
306-
ECMULT_TABLE_GET_GEJ(&tmpj, pre_a, n, WINDOW_A);
307-
secp256k1_gej_add_var(r, r, &tmpj);
360+
ECMULT_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A);
361+
secp256k1_gej_add_ge_var(r, r, &tmpa, NULL);
308362
}
309363
if (i < bits_ng && (n = wnaf_ng[i])) {
310364
ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g, n, WINDOW_G);
311-
secp256k1_gej_add_ge_var(r, r, &tmpa);
365+
secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z);
312366
}
313367
#endif
314368
}
369+
370+
if (!r->infinity) {
371+
secp256k1_fe_mul(&r->z, &r->z, &Z);
372+
}
315373
}
316374

317375
#endif

src/group.h

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,17 @@ static void secp256k1_ge_set_gej(secp256k1_ge_t *r, secp256k1_gej_t *a);
6262
/** Set a batch of group elements equal to the inputs given in jacobian coordinates */
6363
static void secp256k1_ge_set_all_gej_var(size_t len, secp256k1_ge_t *r, const secp256k1_gej_t *a);
6464

65+
/** Set a batch of group elements equal to the inputs given in jacobian
66+
* coordinates (with known z-ratios). zr must contain the known z-ratios such
67+
* that mul(a[i].z, zr[i+1]) == a[i+1].z. zr[0] is ignored. */
68+
static void secp256k1_ge_set_table_gej_var(size_t len, secp256k1_ge_t *r, const secp256k1_gej_t *a, const secp256k1_fe_t *zr);
69+
70+
/** Bring a batch inputs given in jacobian coordinates (with known z-ratios) to
71+
* the same global z "denominator". zr must contain the known z-ratios such
72+
* that mul(a[i].z, zr[i+1]) == a[i+1].z. zr[0] is ignored. The x and y
73+
* coordinates of the result are stored in r, the common z coordinate is
74+
* stored in globalz. */
75+
static void secp256k1_ge_globalz_set_table_gej(size_t len, secp256k1_ge_t *r, secp256k1_fe_t *globalz, const secp256k1_gej_t *a, const secp256k1_fe_t *zr);
6576

6677
/** Set a group element (jacobian) equal to the point at infinity. */
6778
static void secp256k1_gej_set_infinity(secp256k1_gej_t *r);
@@ -81,23 +92,26 @@ static void secp256k1_gej_neg(secp256k1_gej_t *r, const secp256k1_gej_t *a);
8192
/** Check whether a group element is the point at infinity. */
8293
static int secp256k1_gej_is_infinity(const secp256k1_gej_t *a);
8394

84-
/** Set r equal to the double of a. */
85-
static void secp256k1_gej_double_var(secp256k1_gej_t *r, const secp256k1_gej_t *a);
95+
/** Set r equal to the double of a. If rzr is not-NULL, r->z = a->z * *rzr (where infinity means an implicit z = 0). */
96+
static void secp256k1_gej_double_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, secp256k1_fe_t *rzr);
8697

87-
/** Set r equal to the sum of a and b. */
88-
static void secp256k1_gej_add_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_gej_t *b);
98+
/** Set r equal to the sum of a and b. If rzr is non-NULL, r->z = a->z * *rzr (a cannot be infinity in that case). */
99+
static void secp256k1_gej_add_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_gej_t *b, secp256k1_fe_t *rzr);
89100

90101
/** Set r equal to the sum of a and b (with b given in affine coordinates, and not infinity). */
91102
static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b);
92103

93104
/** Set r equal to the sum of a and b (with b given in affine coordinates). This is more efficient
94105
than secp256k1_gej_add_var. It is identical to secp256k1_gej_add_ge but without constant-time
95-
guarantee, and b is allowed to be infinity. */
96-
static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b);
106+
guarantee, and b is allowed to be infinity. If rzr is non-NULL, r->z = a->z * *rzr (a cannot be infinity in that case). */
107+
static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b, secp256k1_fe_t *rzr);
108+
109+
/** Set r equal to the sum of a and b (with the inverse of b's Z coordinate passed as bzinv). */
110+
static void secp256k1_gej_add_zinv_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b, const secp256k1_fe_t *bzinv);
97111

98112
#ifdef USE_ENDOMORPHISM
99113
/** Set r to be equal to lambda times a, where lambda is chosen in a way such that this is very fast. */
100-
static void secp256k1_gej_mul_lambda(secp256k1_gej_t *r, const secp256k1_gej_t *a);
114+
static void secp256k1_ge_mul_lambda(secp256k1_ge_t *r, const secp256k1_ge_t *a);
101115
#endif
102116

103117
/** Clear a secp256k1_gej_t to prevent leaking sensitive information. */

0 commit comments

Comments
 (0)