Line data Source code
1 : /* mpi-pow.c - MPI functions for exponentiation
2 : * Copyright (C) 1994, 1996, 1998, 2000, 2002
3 : * 2003 Free Software Foundation, Inc.
4 : * 2013 g10 Code GmbH
5 : *
6 : * This file is part of Libgcrypt.
7 : *
8 : * Libgcrypt is free software; you can redistribute it and/or modify
9 : * it under the terms of the GNU Lesser General Public License as
10 : * published by the Free Software Foundation; either version 2.1 of
11 : * the License, or (at your option) any later version.
12 : *
13 : * Libgcrypt is distributed in the hope that it will be useful,
14 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 : * GNU Lesser General Public License for more details.
17 : *
18 : * You should have received a copy of the GNU Lesser General Public
19 : * License along with this program; if not, see <http://www.gnu.org/licenses/>.
20 : *
21 : * Note: This code is heavily based on the GNU MP Library.
22 : * Actually it's the same code with only minor changes in the
23 : * way the data is stored; this is to support the abstraction
24 : * of an optional secure memory allocation which may be used
25 : * to avoid revealing of sensitive data due to paging etc.
26 : */
27 :
28 : #include <config.h>
29 : #include <stdio.h>
30 : #include <stdlib.h>
31 : #include <string.h>
32 :
33 : #include "mpi-internal.h"
34 : #include "longlong.h"
35 :
36 :
37 : /*
38 : * When you need old implementation, please add compilation option
39 : * -DUSE_ALGORITHM_SIMPLE_EXPONENTIATION
40 : * or expose this line:
41 : #define USE_ALGORITHM_SIMPLE_EXPONENTIATION 1
42 : */
43 :
44 : #if defined(USE_ALGORITHM_SIMPLE_EXPONENTIATION)
45 : /****************
46 : * RES = BASE ^ EXPO mod MOD
47 : */
48 : void
49 : _gcry_mpi_powm (gcry_mpi_t res,
50 : gcry_mpi_t base, gcry_mpi_t expo, gcry_mpi_t mod)
51 : {
52 : /* Pointer to the limbs of the arguments, their size and signs. */
53 : mpi_ptr_t rp, ep, mp, bp;
54 : mpi_size_t esize, msize, bsize, rsize;
55 : int msign, bsign, rsign;
56 : /* Flags telling the secure allocation status of the arguments. */
57 : int esec, msec, bsec;
58 : /* Size of the result including space for temporary values. */
59 : mpi_size_t size;
60 : /* Helper. */
61 : int mod_shift_cnt;
62 : int negative_result;
63 : mpi_ptr_t mp_marker = NULL;
64 : mpi_ptr_t bp_marker = NULL;
65 : mpi_ptr_t ep_marker = NULL;
66 : mpi_ptr_t xp_marker = NULL;
67 : unsigned int mp_nlimbs = 0;
68 : unsigned int bp_nlimbs = 0;
69 : unsigned int ep_nlimbs = 0;
70 : unsigned int xp_nlimbs = 0;
71 : mpi_ptr_t tspace = NULL;
72 : mpi_size_t tsize = 0;
73 :
74 :
75 : esize = expo->nlimbs;
76 : msize = mod->nlimbs;
77 : size = 2 * msize;
78 : msign = mod->sign;
79 :
80 : esec = mpi_is_secure(expo);
81 : msec = mpi_is_secure(mod);
82 : bsec = mpi_is_secure(base);
83 :
84 : rp = res->d;
85 : ep = expo->d;
86 :
87 : if (!msize)
88 : _gcry_divide_by_zero();
89 :
90 : if (!esize)
91 : {
92 : /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 depending
93 : on if MOD equals 1. */
94 : res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1;
95 : if (res->nlimbs)
96 : {
97 : RESIZE_IF_NEEDED (res, 1);
98 : rp = res->d;
99 : rp[0] = 1;
100 : }
101 : res->sign = 0;
102 : goto leave;
103 : }
104 :
105 : /* Normalize MOD (i.e. make its most significant bit set) as
106 : required by mpn_divrem. This will make the intermediate values
107 : in the calculation slightly larger, but the correct result is
108 : obtained after a final reduction using the original MOD value. */
109 : mp_nlimbs = msec? msize:0;
110 : mp = mp_marker = mpi_alloc_limb_space(msize, msec);
111 : count_leading_zeros (mod_shift_cnt, mod->d[msize-1]);
112 : if (mod_shift_cnt)
113 : _gcry_mpih_lshift (mp, mod->d, msize, mod_shift_cnt);
114 : else
115 : MPN_COPY( mp, mod->d, msize );
116 :
117 : bsize = base->nlimbs;
118 : bsign = base->sign;
119 : if (bsize > msize)
120 : {
121 : /* The base is larger than the module. Reduce it.
122 :
123 : Allocate (BSIZE + 1) with space for remainder and quotient.
124 : (The quotient is (bsize - msize + 1) limbs.) */
125 : bp_nlimbs = bsec ? (bsize + 1):0;
126 : bp = bp_marker = mpi_alloc_limb_space( bsize + 1, bsec );
127 : MPN_COPY ( bp, base->d, bsize );
128 : /* We don't care about the quotient, store it above the
129 : * remainder, at BP + MSIZE. */
130 : _gcry_mpih_divrem( bp + msize, 0, bp, bsize, mp, msize );
131 : bsize = msize;
132 : /* Canonicalize the base, since we are going to multiply with it
133 : quite a few times. */
134 : MPN_NORMALIZE( bp, bsize );
135 : }
136 : else
137 : bp = base->d;
138 :
139 : if (!bsize)
140 : {
141 : res->nlimbs = 0;
142 : res->sign = 0;
143 : goto leave;
144 : }
145 :
146 :
147 : /* Make BASE, EXPO and MOD not overlap with RES. */
148 : if ( rp == bp )
149 : {
150 : /* RES and BASE are identical. Allocate temp. space for BASE. */
151 : gcry_assert (!bp_marker);
152 : bp_nlimbs = bsec? bsize:0;
153 : bp = bp_marker = mpi_alloc_limb_space( bsize, bsec );
154 : MPN_COPY(bp, rp, bsize);
155 : }
156 : if ( rp == ep )
157 : {
158 : /* RES and EXPO are identical. Allocate temp. space for EXPO. */
159 : ep_nlimbs = esec? esize:0;
160 : ep = ep_marker = mpi_alloc_limb_space( esize, esec );
161 : MPN_COPY(ep, rp, esize);
162 : }
163 : if ( rp == mp )
164 : {
165 : /* RES and MOD are identical. Allocate temporary space for MOD.*/
166 : gcry_assert (!mp_marker);
167 : mp_nlimbs = msec?msize:0;
168 : mp = mp_marker = mpi_alloc_limb_space( msize, msec );
169 : MPN_COPY(mp, rp, msize);
170 : }
171 :
172 : /* Copy base to the result. */
173 : if (res->alloced < size)
174 : {
175 : mpi_resize (res, size);
176 : rp = res->d;
177 : }
178 : MPN_COPY ( rp, bp, bsize );
179 : rsize = bsize;
180 : rsign = 0;
181 :
182 : /* Main processing. */
183 : {
184 : mpi_size_t i;
185 : mpi_ptr_t xp;
186 : int c;
187 : mpi_limb_t e;
188 : mpi_limb_t carry_limb;
189 : struct karatsuba_ctx karactx;
190 :
191 : xp_nlimbs = msec? (2 * (msize + 1)):0;
192 : xp = xp_marker = mpi_alloc_limb_space( 2 * (msize + 1), msec );
193 :
194 : memset( &karactx, 0, sizeof karactx );
195 : negative_result = (ep[0] & 1) && bsign;
196 :
197 : i = esize - 1;
198 : e = ep[i];
199 : count_leading_zeros (c, e);
200 : e = (e << c) << 1; /* Shift the expo bits to the left, lose msb. */
201 : c = BITS_PER_MPI_LIMB - 1 - c;
202 :
203 : /* Main loop.
204 :
205 : Make the result be pointed to alternately by XP and RP. This
206 : helps us avoid block copying, which would otherwise be
207 : necessary with the overlap restrictions of
208 : _gcry_mpih_divmod. With 50% probability the result after this
209 : loop will be in the area originally pointed by RP (==RES->d),
210 : and with 50% probability in the area originally pointed to by XP. */
211 : for (;;)
212 : {
213 : while (c)
214 : {
215 : mpi_ptr_t tp;
216 : mpi_size_t xsize;
217 :
218 : /*mpih_mul_n(xp, rp, rp, rsize);*/
219 : if ( rsize < KARATSUBA_THRESHOLD )
220 : _gcry_mpih_sqr_n_basecase( xp, rp, rsize );
221 : else
222 : {
223 : if ( !tspace )
224 : {
225 : tsize = 2 * rsize;
226 : tspace = mpi_alloc_limb_space( tsize, 0 );
227 : }
228 : else if ( tsize < (2*rsize) )
229 : {
230 : _gcry_mpi_free_limb_space (tspace, 0);
231 : tsize = 2 * rsize;
232 : tspace = mpi_alloc_limb_space (tsize, 0 );
233 : }
234 : _gcry_mpih_sqr_n (xp, rp, rsize, tspace);
235 : }
236 :
237 : xsize = 2 * rsize;
238 : if ( xsize > msize )
239 : {
240 : _gcry_mpih_divrem(xp + msize, 0, xp, xsize, mp, msize);
241 : xsize = msize;
242 : }
243 :
244 : tp = rp; rp = xp; xp = tp;
245 : rsize = xsize;
246 :
247 : /* To mitigate the Yarom/Falkner flush+reload cache
248 : * side-channel attack on the RSA secret exponent, we do
249 : * the multiplication regardless of the value of the
250 : * high-bit of E. But to avoid this performance penalty
251 : * we do it only if the exponent has been stored in secure
252 : * memory and we can thus assume it is a secret exponent. */
253 : if (esec || (mpi_limb_signed_t)e < 0)
254 : {
255 : /*mpih_mul( xp, rp, rsize, bp, bsize );*/
256 : if( bsize < KARATSUBA_THRESHOLD )
257 : _gcry_mpih_mul ( xp, rp, rsize, bp, bsize );
258 : else
259 : _gcry_mpih_mul_karatsuba_case (xp, rp, rsize, bp, bsize,
260 : &karactx);
261 :
262 : xsize = rsize + bsize;
263 : if ( xsize > msize )
264 : {
265 : _gcry_mpih_divrem(xp + msize, 0, xp, xsize, mp, msize);
266 : xsize = msize;
267 : }
268 : }
269 : if ( (mpi_limb_signed_t)e < 0 )
270 : {
271 : tp = rp; rp = xp; xp = tp;
272 : rsize = xsize;
273 : }
274 : e <<= 1;
275 : c--;
276 : }
277 :
278 : i--;
279 : if ( i < 0 )
280 : break;
281 : e = ep[i];
282 : c = BITS_PER_MPI_LIMB;
283 : }
284 :
285 : /* We shifted MOD, the modulo reduction argument, left
286 : MOD_SHIFT_CNT steps. Adjust the result by reducing it with the
287 : original MOD.
288 :
289 : Also make sure the result is put in RES->d (where it already
290 : might be, see above). */
291 : if ( mod_shift_cnt )
292 : {
293 : carry_limb = _gcry_mpih_lshift( res->d, rp, rsize, mod_shift_cnt);
294 : rp = res->d;
295 : if ( carry_limb )
296 : {
297 : rp[rsize] = carry_limb;
298 : rsize++;
299 : }
300 : }
301 : else if (res->d != rp)
302 : {
303 : MPN_COPY (res->d, rp, rsize);
304 : rp = res->d;
305 : }
306 :
307 : if ( rsize >= msize )
308 : {
309 : _gcry_mpih_divrem(rp + msize, 0, rp, rsize, mp, msize);
310 : rsize = msize;
311 : }
312 :
313 : /* Remove any leading zero words from the result. */
314 : if ( mod_shift_cnt )
315 : _gcry_mpih_rshift( rp, rp, rsize, mod_shift_cnt);
316 : MPN_NORMALIZE (rp, rsize);
317 :
318 : _gcry_mpih_release_karatsuba_ctx (&karactx );
319 : }
320 :
321 : /* Fixup for negative results. */
322 : if ( negative_result && rsize )
323 : {
324 : if ( mod_shift_cnt )
325 : _gcry_mpih_rshift( mp, mp, msize, mod_shift_cnt);
326 : _gcry_mpih_sub( rp, mp, msize, rp, rsize);
327 : rsize = msize;
328 : rsign = msign;
329 : MPN_NORMALIZE(rp, rsize);
330 : }
331 : gcry_assert (res->d == rp);
332 : res->nlimbs = rsize;
333 : res->sign = rsign;
334 :
335 : leave:
336 : if (mp_marker)
337 : _gcry_mpi_free_limb_space( mp_marker, mp_nlimbs );
338 : if (bp_marker)
339 : _gcry_mpi_free_limb_space( bp_marker, bp_nlimbs );
340 : if (ep_marker)
341 : _gcry_mpi_free_limb_space( ep_marker, ep_nlimbs );
342 : if (xp_marker)
343 : _gcry_mpi_free_limb_space( xp_marker, xp_nlimbs );
344 : if (tspace)
345 : _gcry_mpi_free_limb_space( tspace, 0 );
346 : }
347 : #else
348 : /**
349 : * Internal function to compute
350 : *
351 : * X = R * S mod M
352 : *
353 : * and set the size of X at the pointer XSIZE_P.
354 : * Use karatsuba structure at KARACTX_P.
355 : *
356 : * Condition:
357 : * RSIZE >= SSIZE
358 : * Enough space for X is allocated beforehand.
359 : *
360 : * For generic cases, we can/should use gcry_mpi_mulm.
361 : * This function is use for specific internal case.
362 : */
363 : static void
364 8615087 : mul_mod (mpi_ptr_t xp, mpi_size_t *xsize_p,
365 : mpi_ptr_t rp, mpi_size_t rsize,
366 : mpi_ptr_t sp, mpi_size_t ssize,
367 : mpi_ptr_t mp, mpi_size_t msize,
368 : struct karatsuba_ctx *karactx_p)
369 : {
370 8615087 : if( ssize < KARATSUBA_THRESHOLD )
371 4158360 : _gcry_mpih_mul ( xp, rp, rsize, sp, ssize );
372 : else
373 4456727 : _gcry_mpih_mul_karatsuba_case (xp, rp, rsize, sp, ssize, karactx_p);
374 :
375 8615087 : if (rsize + ssize > msize)
376 : {
377 8485955 : _gcry_mpih_divrem (xp + msize, 0, xp, rsize + ssize, mp, msize);
378 8485955 : *xsize_p = msize;
379 : }
380 : else
381 129132 : *xsize_p = rsize + ssize;
382 8615087 : }
383 :
384 : #define SIZE_PRECOMP ((1 << (5 - 1)))
385 :
386 : /****************
387 : * RES = BASE ^ EXPO mod MOD
388 : *
389 : * To mitigate the Yarom/Falkner flush+reload cache side-channel
390 : * attack on the RSA secret exponent, we don't use the square
391 : * routine but multiplication.
392 : *
393 : * Reference:
394 : * Handbook of Applied Cryptography
395 : * Algorithm 14.83: Modified left-to-right k-ary exponentiation
396 : */
397 : void
398 212425 : _gcry_mpi_powm (gcry_mpi_t res,
399 : gcry_mpi_t base, gcry_mpi_t expo, gcry_mpi_t mod)
400 : {
401 : /* Pointer to the limbs of the arguments, their size and signs. */
402 : mpi_ptr_t rp, ep, mp, bp;
403 : mpi_size_t esize, msize, bsize, rsize;
404 : int msign, bsign, rsign;
405 : /* Flags telling the secure allocation status of the arguments. */
406 : int esec, msec, bsec;
407 : /* Size of the result including space for temporary values. */
408 : mpi_size_t size;
409 : /* Helper. */
410 : int mod_shift_cnt;
411 : int negative_result;
412 212425 : mpi_ptr_t mp_marker = NULL;
413 212425 : mpi_ptr_t bp_marker = NULL;
414 212425 : mpi_ptr_t ep_marker = NULL;
415 212425 : mpi_ptr_t xp_marker = NULL;
416 212425 : unsigned int mp_nlimbs = 0;
417 212425 : unsigned int bp_nlimbs = 0;
418 212425 : unsigned int ep_nlimbs = 0;
419 212425 : unsigned int xp_nlimbs = 0;
420 : mpi_ptr_t precomp[SIZE_PRECOMP]; /* Pre-computed array: BASE^1, ^3, ^5, ... */
421 : mpi_size_t precomp_size[SIZE_PRECOMP];
422 : mpi_size_t W;
423 : mpi_ptr_t base_u;
424 : mpi_size_t base_u_size;
425 : mpi_size_t max_u_size;
426 :
427 212425 : esize = expo->nlimbs;
428 212425 : msize = mod->nlimbs;
429 212425 : size = 2 * msize;
430 212425 : msign = mod->sign;
431 :
432 212425 : if (esize * BITS_PER_MPI_LIMB > 512)
433 3913 : W = 5;
434 208512 : else if (esize * BITS_PER_MPI_LIMB > 256)
435 3164 : W = 4;
436 205348 : else if (esize * BITS_PER_MPI_LIMB > 128)
437 5084 : W = 3;
438 200264 : else if (esize * BITS_PER_MPI_LIMB > 64)
439 2177 : W = 2;
440 : else
441 198087 : W = 1;
442 :
443 212425 : esec = mpi_is_secure(expo);
444 212425 : msec = mpi_is_secure(mod);
445 212425 : bsec = mpi_is_secure(base);
446 :
447 212425 : rp = res->d;
448 212425 : ep = expo->d;
449 :
450 212425 : if (!msize)
451 0 : _gcry_divide_by_zero();
452 :
453 212425 : if (!esize)
454 : {
455 : /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 depending
456 : on if MOD equals 1. */
457 0 : res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1;
458 0 : if (res->nlimbs)
459 : {
460 0 : RESIZE_IF_NEEDED (res, 1);
461 0 : rp = res->d;
462 0 : rp[0] = 1;
463 : }
464 0 : res->sign = 0;
465 0 : goto leave;
466 : }
467 :
468 : /* Normalize MOD (i.e. make its most significant bit set) as
469 : required by mpn_divrem. This will make the intermediate values
470 : in the calculation slightly larger, but the correct result is
471 : obtained after a final reduction using the original MOD value. */
472 212425 : mp_nlimbs = msec? msize:0;
473 212425 : mp = mp_marker = mpi_alloc_limb_space(msize, msec);
474 212425 : count_leading_zeros (mod_shift_cnt, mod->d[msize-1]);
475 212425 : if (mod_shift_cnt)
476 100759 : _gcry_mpih_lshift (mp, mod->d, msize, mod_shift_cnt);
477 : else
478 111666 : MPN_COPY( mp, mod->d, msize );
479 :
480 212425 : bsize = base->nlimbs;
481 212425 : bsign = base->sign;
482 212425 : if (bsize > msize)
483 : {
484 : /* The base is larger than the module. Reduce it.
485 :
486 : Allocate (BSIZE + 1) with space for remainder and quotient.
487 : (The quotient is (bsize - msize + 1) limbs.) */
488 4567 : bp_nlimbs = bsec ? (bsize + 1):0;
489 4567 : bp = bp_marker = mpi_alloc_limb_space( bsize + 1, bsec );
490 4567 : MPN_COPY ( bp, base->d, bsize );
491 : /* We don't care about the quotient, store it above the
492 : * remainder, at BP + MSIZE. */
493 4567 : _gcry_mpih_divrem( bp + msize, 0, bp, bsize, mp, msize );
494 4567 : bsize = msize;
495 : /* Canonicalize the base, since we are going to multiply with it
496 : quite a few times. */
497 4567 : MPN_NORMALIZE( bp, bsize );
498 : }
499 : else
500 207858 : bp = base->d;
501 :
502 212425 : if (!bsize)
503 : {
504 0 : res->nlimbs = 0;
505 0 : res->sign = 0;
506 0 : goto leave;
507 : }
508 :
509 :
510 : /* Make BASE, EXPO not overlap with RES. We don't need to check MOD
511 : because that has already been copied to the MP var. */
512 212425 : if ( rp == bp )
513 : {
514 : /* RES and BASE are identical. Allocate temp. space for BASE. */
515 4533 : gcry_assert (!bp_marker);
516 4533 : bp_nlimbs = bsec? bsize:0;
517 4533 : bp = bp_marker = mpi_alloc_limb_space( bsize, bsec );
518 4533 : MPN_COPY(bp, rp, bsize);
519 : }
520 212425 : if ( rp == ep )
521 : {
522 : /* RES and EXPO are identical. Allocate temp. space for EXPO. */
523 3 : ep_nlimbs = esec? esize:0;
524 3 : ep = ep_marker = mpi_alloc_limb_space( esize, esec );
525 3 : MPN_COPY(ep, rp, esize);
526 : }
527 :
528 : /* Copy base to the result. */
529 212425 : if (res->alloced < size)
530 : {
531 8358 : mpi_resize (res, size);
532 8358 : rp = res->d;
533 : }
534 :
535 : /* Main processing. */
536 : {
537 : mpi_size_t i, j, k;
538 : mpi_ptr_t xp;
539 : mpi_size_t xsize;
540 : int c;
541 : mpi_limb_t e;
542 : mpi_limb_t carry_limb;
543 : struct karatsuba_ctx karactx;
544 : mpi_ptr_t tp;
545 :
546 212425 : xp_nlimbs = msec? (2 * (msize + 1)):0;
547 212425 : xp = xp_marker = mpi_alloc_limb_space( 2 * (msize + 1), msec );
548 :
549 212425 : memset( &karactx, 0, sizeof karactx );
550 212425 : negative_result = (ep[0] & 1) && bsign;
551 :
552 : /* Precompute PRECOMP[], BASE^(2 * i + 1), BASE^1, ^3, ^5, ... */
553 212425 : if (W > 1) /* X := BASE^2 */
554 14338 : mul_mod (xp, &xsize, bp, bsize, bp, bsize, mp, msize, &karactx);
555 212425 : base_u = precomp[0] = mpi_alloc_limb_space (bsize, esec);
556 212425 : base_u_size = max_u_size = precomp_size[0] = bsize;
557 212425 : MPN_COPY (precomp[0], bp, bsize);
558 310697 : for (i = 1; i < (1 << (W - 1)); i++)
559 : { /* PRECOMP[i] = BASE^(2 * i + 1) */
560 98272 : if (xsize >= base_u_size)
561 57492 : mul_mod (rp, &rsize, xp, xsize, base_u, base_u_size,
562 : mp, msize, &karactx);
563 : else
564 40780 : mul_mod (rp, &rsize, base_u, base_u_size, xp, xsize,
565 : mp, msize, &karactx);
566 98272 : base_u = precomp[i] = mpi_alloc_limb_space (rsize, esec);
567 98272 : base_u_size = precomp_size[i] = rsize;
568 98272 : if (max_u_size < base_u_size)
569 24330 : max_u_size = base_u_size;
570 98272 : MPN_COPY (precomp[i], rp, rsize);
571 : }
572 :
573 212425 : base_u = mpi_alloc_limb_space (max_u_size, esec);
574 212425 : MPN_ZERO (base_u, max_u_size);
575 :
576 212425 : i = esize - 1;
577 :
578 : /* Main loop.
579 :
580 : Make the result be pointed to alternately by XP and RP. This
581 : helps us avoid block copying, which would otherwise be
582 : necessary with the overlap restrictions of
583 : _gcry_mpih_divmod. With 50% probability the result after this
584 : loop will be in the area originally pointed by RP (==RES->d),
585 : and with 50% probability in the area originally pointed to by XP. */
586 212425 : rsign = 0;
587 212425 : if (W == 1)
588 : {
589 198087 : rsize = bsize;
590 : }
591 : else
592 : {
593 14338 : rsize = msize;
594 14338 : MPN_ZERO (rp, rsize);
595 : }
596 212425 : MPN_COPY ( rp, bp, bsize );
597 :
598 212425 : e = ep[i];
599 212425 : count_leading_zeros (c, e);
600 212425 : e = (e << c) << 1;
601 212425 : c = BITS_PER_MPI_LIMB - 1 - c;
602 :
603 212425 : j = 0;
604 :
605 : for (;;)
606 1719511 : if (e == 0)
607 : {
608 238521 : j += c;
609 238521 : i--;
610 238521 : if ( i < 0 )
611 : {
612 203581 : c = 0;
613 203581 : break;
614 : }
615 :
616 34940 : e = ep[i];
617 34940 : c = BITS_PER_MPI_LIMB;
618 : }
619 : else
620 : {
621 : int c0;
622 : mpi_limb_t e0;
623 :
624 1480990 : count_leading_zeros (c0, e);
625 1480990 : e = (e << c0);
626 1480990 : c -= c0;
627 1480990 : j += c0;
628 :
629 1480990 : if (c >= W)
630 : {
631 1412364 : e0 = (e >> (BITS_PER_MPI_LIMB - W));
632 1412364 : e = (e << W);
633 1412364 : c -= W;
634 : }
635 : else
636 : {
637 68626 : i--;
638 68626 : if ( i < 0 )
639 : {
640 8844 : e = (e >> (BITS_PER_MPI_LIMB - c));
641 8844 : break;
642 : }
643 :
644 59782 : c0 = c;
645 119564 : e0 = (e >> (BITS_PER_MPI_LIMB - W))
646 59782 : | (ep[i] >> (BITS_PER_MPI_LIMB - W + c0));
647 59782 : e = (ep[i] << (W - c0));
648 59782 : c = BITS_PER_MPI_LIMB - W + c0;
649 : }
650 :
651 1472146 : count_trailing_zeros (c0, e0);
652 1472146 : e0 = (e0 >> c0) >> 1;
653 :
654 8384388 : for (j += W - c0; j; j--)
655 : {
656 6912242 : mul_mod (xp, &xsize, rp, rsize, rp, rsize, mp, msize, &karactx);
657 6912242 : tp = rp; rp = xp; xp = tp;
658 6912242 : rsize = xsize;
659 : }
660 :
661 : /*
662 : * base_u <= precomp[e0]
663 : * base_u_size <= precomp_size[e0]
664 : */
665 1472146 : base_u_size = 0;
666 16105433 : for (k = 0; k < (1<< (W - 1)); k++)
667 : {
668 : struct gcry_mpi w, u;
669 14633287 : w.alloced = w.nlimbs = precomp_size[k];
670 14633287 : u.alloced = u.nlimbs = precomp_size[k];
671 14633287 : w.sign = u.sign = 0;
672 14633287 : w.flags = u.flags = 0;
673 14633287 : w.d = base_u;
674 14633287 : u.d = precomp[k];
675 :
676 14633287 : mpi_set_cond (&w, &u, k == e0);
677 14633287 : base_u_size |= (precomp_size[k] & ((mpi_size_t)0 - (k == e0)) );
678 : }
679 :
680 1472146 : mul_mod (xp, &xsize, rp, rsize, base_u, base_u_size,
681 : mp, msize, &karactx);
682 1472146 : tp = rp; rp = xp; xp = tp;
683 1472146 : rsize = xsize;
684 :
685 1472146 : j = c0;
686 1507086 : }
687 :
688 212425 : if (c != 0)
689 : {
690 8844 : j += c;
691 8844 : count_trailing_zeros (c, e);
692 8844 : e = (e >> c);
693 8844 : j -= c;
694 : }
695 :
696 531769 : while (j--)
697 : {
698 106919 : mul_mod (xp, &xsize, rp, rsize, rp, rsize, mp, msize, &karactx);
699 106919 : tp = rp; rp = xp; xp = tp;
700 106919 : rsize = xsize;
701 : }
702 :
703 212425 : if (e != 0)
704 : {
705 : /*
706 : * base_u <= precomp[(e>>1)]
707 : * base_u_size <= precomp_size[(e>>1)]
708 : */
709 8844 : base_u_size = 0;
710 85132 : for (k = 0; k < (1<< (W - 1)); k++)
711 : {
712 : struct gcry_mpi w, u;
713 76288 : w.alloced = w.nlimbs = precomp_size[k];
714 76288 : u.alloced = u.nlimbs = precomp_size[k];
715 76288 : w.sign = u.sign = 0;
716 76288 : w.flags = u.flags = 0;
717 76288 : w.d = base_u;
718 76288 : u.d = precomp[k];
719 :
720 76288 : mpi_set_cond (&w, &u, k == (e>>1));
721 76288 : base_u_size |= (precomp_size[k] & ((mpi_size_t)0 - (k == (e>>1))) );
722 : }
723 :
724 8844 : mul_mod (xp, &xsize, rp, rsize, base_u, base_u_size,
725 : mp, msize, &karactx);
726 8844 : tp = rp; rp = xp; xp = tp;
727 8844 : rsize = xsize;
728 :
729 11170 : for (; c; c--)
730 : {
731 2326 : mul_mod (xp, &xsize, rp, rsize, rp, rsize, mp, msize, &karactx);
732 2326 : tp = rp; rp = xp; xp = tp;
733 2326 : rsize = xsize;
734 : }
735 : }
736 :
737 : /* We shifted MOD, the modulo reduction argument, left
738 : MOD_SHIFT_CNT steps. Adjust the result by reducing it with the
739 : original MOD.
740 :
741 : Also make sure the result is put in RES->d (where it already
742 : might be, see above). */
743 212425 : if ( mod_shift_cnt )
744 : {
745 100759 : carry_limb = _gcry_mpih_lshift( res->d, rp, rsize, mod_shift_cnt);
746 100759 : rp = res->d;
747 100759 : if ( carry_limb )
748 : {
749 49891 : rp[rsize] = carry_limb;
750 49891 : rsize++;
751 : }
752 : }
753 111666 : else if (res->d != rp)
754 : {
755 6619 : MPN_COPY (res->d, rp, rsize);
756 6619 : rp = res->d;
757 : }
758 :
759 212425 : if ( rsize >= msize )
760 : {
761 166865 : _gcry_mpih_divrem(rp + msize, 0, rp, rsize, mp, msize);
762 166865 : rsize = msize;
763 : }
764 :
765 : /* Remove any leading zero words from the result. */
766 212425 : if ( mod_shift_cnt )
767 100759 : _gcry_mpih_rshift( rp, rp, rsize, mod_shift_cnt);
768 212425 : MPN_NORMALIZE (rp, rsize);
769 :
770 212425 : _gcry_mpih_release_karatsuba_ctx (&karactx );
771 523122 : for (i = 0; i < (1 << (W - 1)); i++)
772 310697 : _gcry_mpi_free_limb_space( precomp[i], esec ? precomp_size[i] : 0 );
773 212425 : _gcry_mpi_free_limb_space (base_u, esec ? max_u_size : 0);
774 : }
775 :
776 : /* Fixup for negative results. */
777 212425 : if ( negative_result && rsize )
778 : {
779 44940 : if ( mod_shift_cnt )
780 21159 : _gcry_mpih_rshift( mp, mp, msize, mod_shift_cnt);
781 44940 : _gcry_mpih_sub( rp, mp, msize, rp, rsize);
782 44940 : rsize = msize;
783 44940 : rsign = msign;
784 44940 : MPN_NORMALIZE(rp, rsize);
785 : }
786 212425 : gcry_assert (res->d == rp);
787 212425 : res->nlimbs = rsize;
788 212425 : res->sign = rsign;
789 :
790 : leave:
791 212425 : if (mp_marker)
792 212425 : _gcry_mpi_free_limb_space( mp_marker, mp_nlimbs );
793 212425 : if (bp_marker)
794 9100 : _gcry_mpi_free_limb_space( bp_marker, bp_nlimbs );
795 212425 : if (ep_marker)
796 3 : _gcry_mpi_free_limb_space( ep_marker, ep_nlimbs );
797 212425 : if (xp_marker)
798 212425 : _gcry_mpi_free_limb_space( xp_marker, xp_nlimbs );
799 212425 : }
800 : #endif
|