Skip to content

Commit 4f9791a

Browse files
peterdettmansipa
authored andcommitted
Effective affine addition in EC multiplication
* Make secp256k1_gej_add_var and secp256k1_gej_double return the Z ratio to go from a.z to r.z. * Use these Z ratios to speed up batch point conversion to affine coordinates, and to speed up batch conversion of points to a common Z coordinate. * Add a point addition function that takes a point with a known Z inverse. * Due to secp256k1's endomorphism, all additions in the EC multiplication code can work on affine coordinate (with an implicit common Z coordinate), correcting the Z coordinate of the result afterwards. Refactoring by Pieter Wuille: * Move more global-z logic into the group code. * Separate code for computing the odd multiples from the code to bring it to either storage or globalz format. * Rename functions. * Make all addition operations return Z ratios, and test them. * Make the zr table format compatible with future batch chaining (the first entry in zr becomes the ratio between the input and the first output). Original idea and code by Peter Dettman.
1 parent 22f60a6 commit 4f9791a

File tree

6 files changed

+330
-86
lines changed

6 files changed

+330
-86
lines changed

src/bench_internal.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ void bench_group_double_var(void* arg) {
193193
bench_inv_t *data = (bench_inv_t*)arg;
194194

195195
for (i = 0; i < 200000; i++) {
196-
secp256k1_gej_double_var(&data->gej_x, &data->gej_x);
196+
secp256k1_gej_double_var(&data->gej_x, &data->gej_x, NULL);
197197
}
198198
}
199199

@@ -202,7 +202,7 @@ void bench_group_add_var(void* arg) {
202202
bench_inv_t *data = (bench_inv_t*)arg;
203203

204204
for (i = 0; i < 200000; i++) {
205-
secp256k1_gej_add_var(&data->gej_x, &data->gej_x, &data->gej_y);
205+
secp256k1_gej_add_var(&data->gej_x, &data->gej_x, &data->gej_y, NULL);
206206
}
207207
}
208208

src/ecmult_gen_impl.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,18 +54,18 @@ static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context_t *c
5454
/* Set precj[j*16 .. j*16+15] to (numsbase, numsbase + gbase, ..., numsbase + 15*gbase). */
5555
precj[j*16] = numsbase;
5656
for (i = 1; i < 16; i++) {
57-
secp256k1_gej_add_var(&precj[j*16 + i], &precj[j*16 + i - 1], &gbase);
57+
secp256k1_gej_add_var(&precj[j*16 + i], &precj[j*16 + i - 1], &gbase, NULL);
5858
}
5959
/* Multiply gbase by 16. */
6060
for (i = 0; i < 4; i++) {
61-
secp256k1_gej_double_var(&gbase, &gbase);
61+
secp256k1_gej_double_var(&gbase, &gbase, NULL);
6262
}
6363
/* Multiply numbase by 2. */
64-
secp256k1_gej_double_var(&numsbase, &numsbase);
64+
secp256k1_gej_double_var(&numsbase, &numsbase, NULL);
6565
if (j == 62) {
6666
/* In the last iteration, numsbase is (1 - 2^j) * nums instead. */
6767
secp256k1_gej_neg(&numsbase, &numsbase);
68-
secp256k1_gej_add_var(&numsbase, &numsbase, &nums_gej);
68+
secp256k1_gej_add_var(&numsbase, &numsbase, &nums_gej, NULL);
6969
}
7070
}
7171
secp256k1_ge_set_all_gej_var(1024, prec, precj);

src/ecmult_impl.h

Lines changed: 90 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -24,62 +24,85 @@
2424
#define WINDOW_G 16
2525
#endif
2626

27-
/** Fill a table 'pre' with precomputed odd multiples of a. W determines the size of the table.
28-
* pre will contains the values [1*a,3*a,5*a,...,(2^(w-1)-1)*a], so it needs place for
29-
* 2^(w-2) entries.
30-
*
31-
* There are two versions of this function:
32-
* - secp256k1_ecmult_precomp_wnaf_gej, which operates on group elements in jacobian notation,
33-
* fast to precompute, but slower to use in later additions.
34-
* - secp256k1_ecmult_precomp_wnaf_ge, which operates on group elements in affine notations,
35-
* (much) slower to precompute, but a bit faster to use in later additions.
36-
* To compute a*P + b*G, we use the jacobian version for P, and the affine version for G, as
37-
* G is constant, so it only needs to be done once in advance.
27+
/** The number of entries a table with precomputed multiples needs to have. */
28+
#define ECMULT_TABLE_SIZE(w) (1 << ((w)-2))
29+
30+
/** Fill a table 'prej' with precomputed odd multiples of a. Prej will contain
31+
* the values [1*a,3*a,...,(2*n-1)*a], so it space for n values. zr[0] will
32+
* contain prej[0].z / a.z. The other zr[i] values = prej[i].z / prej[i-1].z.
3833
*/
39-
static void secp256k1_ecmult_table_precomp_gej_var(secp256k1_gej_t *pre, const secp256k1_gej_t *a, int w) {
34+
static void secp256k1_ecmult_odd_multiples_table(int n, secp256k1_gej_t *prej, secp256k1_fe_t *zr, const secp256k1_gej_t *a) {
4035
secp256k1_gej_t d;
4136
int i;
42-
pre[0] = *a;
43-
secp256k1_gej_double_var(&d, &pre[0]);
44-
for (i = 1; i < (1 << (w-2)); i++) {
45-
secp256k1_gej_add_var(&pre[i], &d, &pre[i-1]);
37+
38+
VERIFY_CHECK(!a->infinity);
39+
40+
prej[0] = *a;
41+
secp256k1_gej_double_var(&d, &prej[0], NULL);
42+
secp256k1_fe_set_int(zr, 1);
43+
for (i = 1; i < n; i++) {
44+
secp256k1_gej_add_var(&prej[i], &prej[i-1], &d, &zr[i]);
4645
}
4746
}
4847

49-
static void secp256k1_ecmult_table_precomp_ge_storage_var(secp256k1_ge_storage_t *pre, const secp256k1_gej_t *a, int w) {
50-
secp256k1_gej_t d;
48+
/** Fill a table 'pre' with precomputed odd multiples of a.
49+
*
50+
* There are two versions of this function:
51+
* - secp256k1_ecmult_odd_multiples_table_globalz_windowa which brings its
52+
* resulting point set to a single constant Z denominator, stores the X and Y
53+
* coordinates as ge_storage points in pre, and stores the global Z in rz.
54+
* It only operates on tables sized for WINDOW_A wnaf multiples.
55+
* - secp256k1_ecmult_odd_multiples_table_storage_var, which converts its
56+
* resulting point set to actually affine points, and stores those in pre.
57+
* It operates on tables of any size, but uses heap-allocated temporaries.
58+
*
59+
* To compute a*P + b*G, we compute a table for P using the first function,
60+
* and for G using the second (which requires an inverse, but it only needs to
61+
* happen once).
62+
*/
63+
static void secp256k1_ecmult_odd_multiples_table_globalz_windowa(secp256k1_ge_t *pre, secp256k1_fe_t *globalz, const secp256k1_gej_t *a) {
64+
secp256k1_gej_t prej[ECMULT_TABLE_SIZE(WINDOW_A)];
65+
secp256k1_fe_t zr[ECMULT_TABLE_SIZE(WINDOW_A)];
66+
67+
/* Compute the odd multiples in Jacobian form. */
68+
secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), prej, zr, a);
69+
/* Bring them to the same Z denominator. */
70+
secp256k1_ge_globalz_set_table_gej(ECMULT_TABLE_SIZE(WINDOW_A), pre, globalz, prej, zr);
71+
}
72+
73+
static void secp256k1_ecmult_odd_multiples_table_storage_var(int n, secp256k1_ge_storage_t *pre, const secp256k1_gej_t *a) {
74+
secp256k1_gej_t *prej = checked_malloc(sizeof(secp256k1_gej_t) * n);
75+
secp256k1_ge_t *prea = checked_malloc(sizeof(secp256k1_ge_t) * n);
76+
secp256k1_fe_t *zr = checked_malloc(sizeof(secp256k1_fe_t) * n);
5177
int i;
52-
const int table_size = 1 << (w-2);
53-
secp256k1_gej_t *prej = (secp256k1_gej_t *)checked_malloc(sizeof(secp256k1_gej_t) * table_size);
54-
secp256k1_ge_t *prea = (secp256k1_ge_t *)checked_malloc(sizeof(secp256k1_ge_t) * table_size);
55-
prej[0] = *a;
56-
secp256k1_gej_double_var(&d, a);
57-
for (i = 1; i < table_size; i++) {
58-
secp256k1_gej_add_var(&prej[i], &d, &prej[i-1]);
59-
}
60-
secp256k1_ge_set_all_gej_var(table_size, prea, prej);
61-
for (i = 0; i < table_size; i++) {
78+
79+
/* Compute the odd multiples in Jacobian form. */
80+
secp256k1_ecmult_odd_multiples_table(n, prej, zr, a);
81+
/* Convert them in batch to affine coordinates. */
82+
secp256k1_ge_set_table_gej_var(n, prea, prej, zr);
83+
/* Convert them to compact storage form. */
84+
for (i = 0; i < n; i++) {
6285
secp256k1_ge_to_storage(&pre[i], &prea[i]);
6386
}
64-
free(prej);
87+
6588
free(prea);
89+
free(prej);
90+
free(zr);
6691
}
6792

68-
/** The number of entries a table with precomputed multiples needs to have. */
69-
#define ECMULT_TABLE_SIZE(w) (1 << ((w)-2))
70-
7193
/** The following two macro retrieves a particular odd multiple from a table
7294
* of precomputed multiples. */
73-
#define ECMULT_TABLE_GET_GEJ(r,pre,n,w) do { \
95+
#define ECMULT_TABLE_GET_GE(r,pre,n,w) do { \
7496
VERIFY_CHECK(((n) & 1) == 1); \
7597
VERIFY_CHECK((n) >= -((1 << ((w)-1)) - 1)); \
7698
VERIFY_CHECK((n) <= ((1 << ((w)-1)) - 1)); \
7799
if ((n) > 0) { \
78100
*(r) = (pre)[((n)-1)/2]; \
79101
} else { \
80-
secp256k1_gej_neg((r), &(pre)[(-(n)-1)/2]); \
102+
secp256k1_ge_neg((r), &(pre)[(-(n)-1)/2]); \
81103
} \
82104
} while(0)
105+
83106
#define ECMULT_TABLE_GET_GE_STORAGE(r,pre,n,w) do { \
84107
VERIFY_CHECK(((n) & 1) == 1); \
85108
VERIFY_CHECK((n) >= -((1 << ((w)-1)) - 1)); \
@@ -112,7 +135,7 @@ static void secp256k1_ecmult_context_build(secp256k1_ecmult_context_t *ctx) {
112135
ctx->pre_g = (secp256k1_ge_storage_t (*)[])checked_malloc(sizeof((*ctx->pre_g)[0]) * ECMULT_TABLE_SIZE(WINDOW_G));
113136

114137
/* precompute the tables with odd multiples */
115-
secp256k1_ecmult_table_precomp_ge_storage_var(*ctx->pre_g, &gj, WINDOW_G);
138+
secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g, &gj);
116139

117140
#ifdef USE_ENDOMORPHISM
118141
{
@@ -124,9 +147,9 @@ static void secp256k1_ecmult_context_build(secp256k1_ecmult_context_t *ctx) {
124147
/* calculate 2^128*generator */
125148
g_128j = gj;
126149
for (i = 0; i < 128; i++) {
127-
secp256k1_gej_double_var(&g_128j, &g_128j);
150+
secp256k1_gej_double_var(&g_128j, &g_128j, NULL);
128151
}
129-
secp256k1_ecmult_table_precomp_ge_storage_var(*ctx->pre_g_128, &g_128j, WINDOW_G);
152+
secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g_128, &g_128j);
130153
}
131154
#endif
132155
}
@@ -208,11 +231,11 @@ static int secp256k1_ecmult_wnaf(int *wnaf, const secp256k1_scalar_t *a, int w)
208231
}
209232

210233
static void secp256k1_ecmult(const secp256k1_ecmult_context_t *ctx, secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_scalar_t *na, const secp256k1_scalar_t *ng) {
211-
secp256k1_gej_t tmpj;
212-
secp256k1_gej_t pre_a[ECMULT_TABLE_SIZE(WINDOW_A)];
234+
secp256k1_ge_t pre_a[ECMULT_TABLE_SIZE(WINDOW_A)];
213235
secp256k1_ge_t tmpa;
236+
secp256k1_fe_t Z;
214237
#ifdef USE_ENDOMORPHISM
215-
secp256k1_gej_t pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)];
238+
secp256k1_ge_t pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)];
216239
secp256k1_scalar_t na_1, na_lam;
217240
/* Splitted G factors. */
218241
secp256k1_scalar_t ng_1, ng_128;
@@ -252,12 +275,21 @@ static void secp256k1_ecmult(const secp256k1_ecmult_context_t *ctx, secp256k1_ge
252275
bits = bits_na;
253276
#endif
254277

255-
/* calculate odd multiples of a */
256-
secp256k1_ecmult_table_precomp_gej_var(pre_a, a, WINDOW_A);
278+
/* Calculate odd multiples of a.
279+
* All multiples are brought to the same Z 'denominator', which is stored
280+
* in Z. Due to secp256k1' isomorphism we can do all operations pretending
281+
* that the Z coordinate was 1, use affine addition formulae, and correct
282+
* the Z coordinate of the result once at the end.
283+
* The exception is the precomputed G table points, which are actually
284+
* affine. Compared to the base used for other points, they have a Z ratio
285+
* of 1/Z, so we can use secp256k1_gej_add_zinv_var, which uses the same
286+
* isomorphism to efficiently add with a known Z inverse.
287+
*/
288+
secp256k1_ecmult_odd_multiples_table_globalz_windowa(pre_a, &Z, a);
257289

258290
#ifdef USE_ENDOMORPHISM
259291
for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) {
260-
secp256k1_gej_mul_lambda(&pre_a_lam[i], &pre_a[i]);
292+
secp256k1_ge_mul_lambda(&pre_a_lam[i], &pre_a[i]);
261293
}
262294

263295
/* split ng into ng_1 and ng_128 (where gn = gn_1 + gn_128*2^128, and gn_1 and gn_128 are ~128 bit) */
@@ -281,37 +313,41 @@ static void secp256k1_ecmult(const secp256k1_ecmult_context_t *ctx, secp256k1_ge
281313

282314
secp256k1_gej_set_infinity(r);
283315

284-
for (i = bits-1; i >= 0; i--) {
316+
for (i = bits - 1; i >= 0; i--) {
285317
int n;
286-
secp256k1_gej_double_var(r, r);
318+
secp256k1_gej_double_var(r, r, NULL);
287319
#ifdef USE_ENDOMORPHISM
288320
if (i < bits_na_1 && (n = wnaf_na_1[i])) {
289-
ECMULT_TABLE_GET_GEJ(&tmpj, pre_a, n, WINDOW_A);
290-
secp256k1_gej_add_var(r, r, &tmpj);
321+
ECMULT_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A);
322+
secp256k1_gej_add_ge_var(r, r, &tmpa);
291323
}
292324
if (i < bits_na_lam && (n = wnaf_na_lam[i])) {
293-
ECMULT_TABLE_GET_GEJ(&tmpj, pre_a_lam, n, WINDOW_A);
294-
secp256k1_gej_add_var(r, r, &tmpj);
325+
ECMULT_TABLE_GET_GE(&tmpa, pre_a_lam, n, WINDOW_A);
326+
secp256k1_gej_add_ge_var(r, r, &tmpa);
295327
}
296328
if (i < bits_ng_1 && (n = wnaf_ng_1[i])) {
297329
ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g, n, WINDOW_G);
298-
secp256k1_gej_add_ge_var(r, r, &tmpa);
330+
secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z);
299331
}
300332
if (i < bits_ng_128 && (n = wnaf_ng_128[i])) {
301333
ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g_128, n, WINDOW_G);
302-
secp256k1_gej_add_ge_var(r, r, &tmpa);
334+
secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z);
303335
}
304336
#else
305337
if (i < bits_na && (n = wnaf_na[i])) {
306-
ECMULT_TABLE_GET_GEJ(&tmpj, pre_a, n, WINDOW_A);
307-
secp256k1_gej_add_var(r, r, &tmpj);
338+
ECMULT_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A);
339+
secp256k1_gej_add_ge_var(r, r, &tmpa);
308340
}
309341
if (i < bits_ng && (n = wnaf_ng[i])) {
310342
ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g, n, WINDOW_G);
311-
secp256k1_gej_add_ge_var(r, r, &tmpa);
343+
secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z);
312344
}
313345
#endif
314346
}
347+
348+
if (!r->infinity) {
349+
secp256k1_fe_mul(&r->z, &r->z, &Z);
350+
}
315351
}
316352

317353
#endif

src/group.h

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,17 @@ static void secp256k1_ge_set_gej(secp256k1_ge_t *r, secp256k1_gej_t *a);
6262
/** Set a batch of group elements equal to the inputs given in jacobian coordinates */
6363
static void secp256k1_ge_set_all_gej_var(size_t len, secp256k1_ge_t *r, const secp256k1_gej_t *a);
6464

65+
/** Set a batch of group elements equal to the inputs given in jacobian
66+
* coordinates (with known z-ratios). zr must contain the known z-ratios such
67+
* that mul(a[i].z, zr[i+1]) == a[i+1].z. zr[0] is ignored. */
68+
static void secp256k1_ge_set_table_gej_var(size_t len, secp256k1_ge_t *r, const secp256k1_gej_t *a, const secp256k1_fe_t *zr);
69+
70+
/** Bring a batch inputs given in jacobian coordinates (with known z-ratios) to
71+
* the same global z "denominator". zr must contain the known z-ratios such
72+
* that mul(a[i].z, zr[i+1]) == a[i+1].z. zr[0] is ignored. The x and y
73+
* coordinates of the result are stored in r, the common z coordinate is
74+
* stored in globalz. */
75+
static void secp256k1_ge_globalz_set_table_gej(size_t len, secp256k1_ge_t *r, secp256k1_fe_t *globalz, const secp256k1_gej_t *a, const secp256k1_fe_t *zr);
6576

6677
/** Set a group element (jacobian) equal to the point at infinity. */
6778
static void secp256k1_gej_set_infinity(secp256k1_gej_t *r);
@@ -81,11 +92,11 @@ static void secp256k1_gej_neg(secp256k1_gej_t *r, const secp256k1_gej_t *a);
8192
/** Check whether a group element is the point at infinity. */
8293
static int secp256k1_gej_is_infinity(const secp256k1_gej_t *a);
8394

84-
/** Set r equal to the double of a. */
85-
static void secp256k1_gej_double_var(secp256k1_gej_t *r, const secp256k1_gej_t *a);
95+
/** Set r equal to the double of a. If rzr is not-NULL, r->z = a->z * *rzr (where infinity means an implicit z = 0). */
96+
static void secp256k1_gej_double_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, secp256k1_fe_t *rzr);
8697

87-
/** Set r equal to the sum of a and b. */
88-
static void secp256k1_gej_add_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_gej_t *b);
98+
/** Set r equal to the sum of a and b. If rzr is non-NULL, r->z = a->z * *rzr (a cannot be infinity in that case). */
99+
static void secp256k1_gej_add_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_gej_t *b, secp256k1_fe_t *rzr);
89100

90101
/** Set r equal to the sum of a and b (with b given in affine coordinates, and not infinity). */
91102
static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b);
@@ -95,9 +106,12 @@ static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, c
95106
guarantee, and b is allowed to be infinity. */
96107
static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b);
97108

109+
/** Set r equal to the sum of a and b (with the inverse of b's Z coordinate passed as bzinv). */
110+
static void secp256k1_gej_add_zinv_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b, const secp256k1_fe_t *bzinv);
111+
98112
#ifdef USE_ENDOMORPHISM
99113
/** Set r to be equal to lambda times a, where lambda is chosen in a way such that this is very fast. */
100-
static void secp256k1_gej_mul_lambda(secp256k1_gej_t *r, const secp256k1_gej_t *a);
114+
static void secp256k1_ge_mul_lambda(secp256k1_ge_t *r, const secp256k1_ge_t *a);
101115
#endif
102116

103117
/** Clear a secp256k1_gej_t to prevent leaking sensitive information. */

0 commit comments

Comments
 (0)