Line data Source code
1 : /* mpih-div.c - MPI helper functions
2 : * Copyright (C) 1994, 1996, 1998, 2000,
3 : * 2001, 2002 Free Software Foundation, Inc.
4 : *
5 : * This file is part of Libgcrypt.
6 : *
7 : * Libgcrypt is free software; you can redistribute it and/or modify
8 : * it under the terms of the GNU Lesser General Public License as
9 : * published by the Free Software Foundation; either version 2.1 of
10 : * the License, or (at your option) any later version.
11 : *
12 : * Libgcrypt is distributed in the hope that it will be useful,
13 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 : * GNU Lesser General Public License for more details.
16 : *
17 : * You should have received a copy of the GNU Lesser General Public
18 : * License along with this program; if not, write to the Free Software
19 : * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
20 : *
21 : * Note: This code is heavily based on the GNU MP Library.
22 : * Actually it's the same code with only minor changes in the
23 : * way the data is stored; this is to support the abstraction
24 : * of an optional secure memory allocation which may be used
25 : * to avoid revealing of sensitive data due to paging etc.
26 : */
27 :
28 : #include <config.h>
29 : #include <stdio.h>
30 : #include <stdlib.h>
31 : #include "mpi-internal.h"
32 : #include "longlong.h"
33 :
34 : #ifndef UMUL_TIME
35 : #define UMUL_TIME 1
36 : #endif
37 : #ifndef UDIV_TIME
38 : #define UDIV_TIME UMUL_TIME
39 : #endif
40 :
41 : /* FIXME: We should be using invert_limb (or invert_normalized_limb)
42 : * here (not udiv_qrnnd).
43 : */
44 :
45 : mpi_limb_t
46 3265089 : _gcry_mpih_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
47 : mpi_limb_t divisor_limb)
48 : {
49 : mpi_size_t i;
50 : mpi_limb_t n1, n0, r;
51 : mpi_limb_t dummy GCC_ATTR_UNUSED;
52 :
53 : /* Botch: Should this be handled at all? Rely on callers? */
54 3265089 : if( !dividend_size )
55 0 : return 0;
56 :
57 : /* If multiplication is much faster than division, and the
58 : * dividend is large, pre-invert the divisor, and use
59 : * only multiplications in the inner loop.
60 : *
61 : * This test should be read:
62 : * Does it ever help to use udiv_qrnnd_preinv?
63 : * && Does what we save compensate for the inversion overhead?
64 : */
65 : if( UDIV_TIME > (2 * UMUL_TIME + 6)
66 : && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME ) {
67 : int normalization_steps;
68 :
69 : count_leading_zeros( normalization_steps, divisor_limb );
70 : if( normalization_steps ) {
71 : mpi_limb_t divisor_limb_inverted;
72 :
73 : divisor_limb <<= normalization_steps;
74 :
75 : /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The
76 : * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
77 : * most significant bit (with weight 2**N) implicit.
78 : *
79 : * Special case for DIVISOR_LIMB == 100...000.
80 : */
81 : if( !(divisor_limb << 1) )
82 : divisor_limb_inverted = ~(mpi_limb_t)0;
83 : else
84 : udiv_qrnnd(divisor_limb_inverted, dummy,
85 : -divisor_limb, 0, divisor_limb);
86 :
87 : n1 = dividend_ptr[dividend_size - 1];
88 : r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
89 :
90 : /* Possible optimization:
91 : * if (r == 0
92 : * && divisor_limb > ((n1 << normalization_steps)
93 : * | (dividend_ptr[dividend_size - 2] >> ...)))
94 : * ...one division less...
95 : */
96 : for( i = dividend_size - 2; i >= 0; i--) {
97 : n0 = dividend_ptr[i];
98 : UDIV_QRNND_PREINV(dummy, r, r,
99 : ((n1 << normalization_steps)
100 : | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
101 : divisor_limb, divisor_limb_inverted);
102 : n1 = n0;
103 : }
104 : UDIV_QRNND_PREINV(dummy, r, r,
105 : n1 << normalization_steps,
106 : divisor_limb, divisor_limb_inverted);
107 : return r >> normalization_steps;
108 : }
109 : else {
110 : mpi_limb_t divisor_limb_inverted;
111 :
112 : /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The
113 : * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
114 : * most significant bit (with weight 2**N) implicit.
115 : *
116 : * Special case for DIVISOR_LIMB == 100...000.
117 : */
118 : if( !(divisor_limb << 1) )
119 : divisor_limb_inverted = ~(mpi_limb_t)0;
120 : else
121 : udiv_qrnnd(divisor_limb_inverted, dummy,
122 : -divisor_limb, 0, divisor_limb);
123 :
124 : i = dividend_size - 1;
125 : r = dividend_ptr[i];
126 :
127 : if( r >= divisor_limb )
128 : r = 0;
129 : else
130 : i--;
131 :
132 : for( ; i >= 0; i--) {
133 : n0 = dividend_ptr[i];
134 : UDIV_QRNND_PREINV(dummy, r, r,
135 : n0, divisor_limb, divisor_limb_inverted);
136 : }
137 : return r;
138 : }
139 : }
140 : else {
141 : if( UDIV_NEEDS_NORMALIZATION ) {
142 : int normalization_steps;
143 :
144 : count_leading_zeros(normalization_steps, divisor_limb);
145 : if( normalization_steps ) {
146 : divisor_limb <<= normalization_steps;
147 :
148 : n1 = dividend_ptr[dividend_size - 1];
149 : r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
150 :
151 : /* Possible optimization:
152 : * if (r == 0
153 : * && divisor_limb > ((n1 << normalization_steps)
154 : * | (dividend_ptr[dividend_size - 2] >> ...)))
155 : * ...one division less...
156 : */
157 : for(i = dividend_size - 2; i >= 0; i--) {
158 : n0 = dividend_ptr[i];
159 : udiv_qrnnd (dummy, r, r,
160 : ((n1 << normalization_steps)
161 : | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
162 : divisor_limb);
163 : n1 = n0;
164 : }
165 : udiv_qrnnd (dummy, r, r,
166 : n1 << normalization_steps,
167 : divisor_limb);
168 : return r >> normalization_steps;
169 : }
170 : }
171 : /* No normalization needed, either because udiv_qrnnd doesn't require
172 : * it, or because DIVISOR_LIMB is already normalized. */
173 3265089 : i = dividend_size - 1;
174 3265089 : r = dividend_ptr[i];
175 :
176 3265089 : if(r >= divisor_limb)
177 3231309 : r = 0;
178 : else
179 33780 : i--;
180 :
181 68263703 : for(; i >= 0; i--) {
182 64998614 : n0 = dividend_ptr[i];
183 64998614 : udiv_qrnnd (dummy, r, r, n0, divisor_limb);
184 : }
185 3265089 : return r;
186 : }
187 : }
188 :
189 : /* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
190 : * the NSIZE-DSIZE least significant quotient limbs at QP
191 : * and the DSIZE long remainder at NP. If QEXTRA_LIMBS is
192 : * non-zero, generate that many fraction bits and append them after the
193 : * other quotient limbs.
194 : * Return the most significant limb of the quotient, this is always 0 or 1.
195 : *
196 : * Preconditions:
197 : * 0. NSIZE >= DSIZE.
198 : * 1. The most significant bit of the divisor must be set.
199 : * 2. QP must either not overlap with the input operands at all, or
200 : * QP + DSIZE >= NP must hold true. (This means that it's
201 : * possible to put the quotient in the high part of NUM, right after the
202 : * remainder in NUM.
203 : * 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero.
204 : */
205 :
206 : mpi_limb_t
207 52804823 : _gcry_mpih_divrem( mpi_ptr_t qp, mpi_size_t qextra_limbs,
208 : mpi_ptr_t np, mpi_size_t nsize,
209 : mpi_ptr_t dp, mpi_size_t dsize)
210 : {
211 52804823 : mpi_limb_t most_significant_q_limb = 0;
212 :
213 52804823 : switch(dsize) {
214 : case 0:
215 0 : _gcry_divide_by_zero();
216 : break;
217 :
218 : case 1:
219 : {
220 : mpi_size_t i;
221 : mpi_limb_t n1;
222 : mpi_limb_t d;
223 :
224 12767 : d = dp[0];
225 12767 : n1 = np[nsize - 1];
226 :
227 12767 : if( n1 >= d ) {
228 0 : n1 -= d;
229 0 : most_significant_q_limb = 1;
230 : }
231 :
232 12767 : qp += qextra_limbs;
233 25534 : for( i = nsize - 2; i >= 0; i--)
234 12767 : udiv_qrnnd( qp[i], n1, n1, np[i], d );
235 12767 : qp -= qextra_limbs;
236 :
237 12767 : for( i = qextra_limbs - 1; i >= 0; i-- )
238 0 : udiv_qrnnd (qp[i], n1, n1, 0, d);
239 :
240 12767 : np[0] = n1;
241 : }
242 12767 : break;
243 :
244 : case 2:
245 : {
246 : mpi_size_t i;
247 : mpi_limb_t n1, n0, n2;
248 : mpi_limb_t d1, d0;
249 :
250 304844 : np += nsize - 2;
251 304844 : d1 = dp[1];
252 304844 : d0 = dp[0];
253 304844 : n1 = np[1];
254 304844 : n0 = np[0];
255 :
256 304844 : if( n1 >= d1 && (n1 > d1 || n0 >= d0) ) {
257 970 : sub_ddmmss (n1, n0, n1, n0, d1, d0);
258 970 : most_significant_q_limb = 1;
259 : }
260 :
261 899053 : for( i = qextra_limbs + nsize - 2 - 1; i >= 0; i-- ) {
262 : mpi_limb_t q;
263 : mpi_limb_t r;
264 :
265 594209 : if( i >= qextra_limbs )
266 594209 : np--;
267 : else
268 0 : np[0] = 0;
269 :
270 594209 : if( n1 == d1 ) {
271 : /* Q should be either 111..111 or 111..110. Need special
272 : * treatment of this rare case as normal division would
273 : * give overflow. */
274 0 : q = ~(mpi_limb_t)0;
275 :
276 0 : r = n0 + d1;
277 0 : if( r < d1 ) { /* Carry in the addition? */
278 0 : add_ssaaaa( n1, n0, r - d0, np[0], 0, d0 );
279 0 : qp[i] = q;
280 0 : continue;
281 : }
282 0 : n1 = d0 - (d0 != 0?1:0);
283 0 : n0 = -d0;
284 : }
285 : else {
286 594209 : udiv_qrnnd (q, r, n1, n0, d1);
287 594209 : umul_ppmm (n1, n0, d0, q);
288 : }
289 :
290 594209 : n2 = np[0];
291 : q_test:
292 663547 : if( n1 > r || (n1 == r && n0 > n2) ) {
293 : /* The estimated Q was too large. */
294 113738 : q--;
295 113738 : sub_ddmmss (n1, n0, n1, n0, 0, d0);
296 113738 : r += d1;
297 113738 : if( r >= d1 ) /* If not carry, test Q again. */
298 69338 : goto q_test;
299 : }
300 :
301 594209 : qp[i] = q;
302 594209 : sub_ddmmss (n1, n0, r, n2, n1, n0);
303 : }
304 304844 : np[1] = n1;
305 304844 : np[0] = n0;
306 : }
307 304844 : break;
308 :
309 : default:
310 : {
311 : mpi_size_t i;
312 : mpi_limb_t dX, d1, n0;
313 :
314 52487212 : np += nsize - dsize;
315 52487212 : dX = dp[dsize - 1];
316 52487212 : d1 = dp[dsize - 2];
317 52487212 : n0 = np[dsize - 1];
318 :
319 52487212 : if( n0 >= dX ) {
320 136613 : if(n0 > dX || _gcry_mpih_cmp(np, dp, dsize - 1) >= 0 ) {
321 118451 : _gcry_mpih_sub_n(np, np, dp, dsize);
322 118451 : n0 = np[dsize - 1];
323 118451 : most_significant_q_limb = 1;
324 : }
325 : }
326 :
327 439743959 : for( i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) {
328 : mpi_limb_t q;
329 : mpi_limb_t n1, n2;
330 : mpi_limb_t cy_limb;
331 :
332 387256747 : if( i >= qextra_limbs ) {
333 387256747 : np--;
334 387256747 : n2 = np[dsize];
335 : }
336 : else {
337 0 : n2 = np[dsize - 1];
338 0 : MPN_COPY_DECR (np + 1, np, dsize - 1);
339 0 : np[0] = 0;
340 : }
341 :
342 387256747 : if( n0 == dX ) {
343 : /* This might over-estimate q, but it's probably not worth
344 : * the extra code here to find out. */
345 10638 : q = ~(mpi_limb_t)0;
346 : }
347 : else {
348 : mpi_limb_t r;
349 :
350 387246109 : udiv_qrnnd(q, r, n0, np[dsize - 1], dX);
351 387246109 : umul_ppmm(n1, n0, d1, q);
352 :
353 823371583 : while( n1 > r || (n1 == r && n0 > np[dsize - 2])) {
354 125156920 : q--;
355 125156920 : r += dX;
356 125156920 : if( r < dX ) /* I.e. "carry in previous addition?" */
357 76277555 : break;
358 48879365 : n1 -= n0 < d1;
359 48879365 : n0 -= d1;
360 : }
361 : }
362 :
363 : /* Possible optimization: We already have (q * n0) and (1 * n1)
364 : * after the calculation of q. Taking advantage of that, we
365 : * could make this loop make two iterations less. */
366 387256747 : cy_limb = _gcry_mpih_submul_1(np, dp, dsize, q);
367 :
368 387256747 : if( n2 != cy_limb ) {
369 8799 : _gcry_mpih_add_n(np, np, dp, dsize);
370 8799 : q--;
371 : }
372 :
373 387256747 : qp[i] = q;
374 387256747 : n0 = np[dsize - 1];
375 : }
376 : }
377 : }
378 :
379 52804823 : return most_significant_q_limb;
380 : }
381 :
382 :
383 : /****************
384 : * Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
385 : * Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
386 : * Return the single-limb remainder.
387 : * There are no constraints on the value of the divisor.
388 : *
389 : * QUOT_PTR and DIVIDEND_PTR might point to the same limb.
390 : */
391 :
392 : mpi_limb_t
393 63 : _gcry_mpih_divmod_1( mpi_ptr_t quot_ptr,
394 : mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
395 : mpi_limb_t divisor_limb)
396 : {
397 : mpi_size_t i;
398 : mpi_limb_t n1, n0, r;
399 : mpi_limb_t dummy GCC_ATTR_UNUSED;
400 :
401 63 : if( !dividend_size )
402 0 : return 0;
403 :
404 : /* If multiplication is much faster than division, and the
405 : * dividend is large, pre-invert the divisor, and use
406 : * only multiplications in the inner loop.
407 : *
408 : * This test should be read:
409 : * Does it ever help to use udiv_qrnnd_preinv?
410 : * && Does what we save compensate for the inversion overhead?
411 : */
412 : if( UDIV_TIME > (2 * UMUL_TIME + 6)
413 : && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME ) {
414 : int normalization_steps;
415 :
416 : count_leading_zeros( normalization_steps, divisor_limb );
417 : if( normalization_steps ) {
418 : mpi_limb_t divisor_limb_inverted;
419 :
420 : divisor_limb <<= normalization_steps;
421 :
422 : /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The
423 : * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
424 : * most significant bit (with weight 2**N) implicit.
425 : */
426 : /* Special case for DIVISOR_LIMB == 100...000. */
427 : if( !(divisor_limb << 1) )
428 : divisor_limb_inverted = ~(mpi_limb_t)0;
429 : else
430 : udiv_qrnnd(divisor_limb_inverted, dummy,
431 : -divisor_limb, 0, divisor_limb);
432 :
433 : n1 = dividend_ptr[dividend_size - 1];
434 : r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
435 :
436 : /* Possible optimization:
437 : * if (r == 0
438 : * && divisor_limb > ((n1 << normalization_steps)
439 : * | (dividend_ptr[dividend_size - 2] >> ...)))
440 : * ...one division less...
441 : */
442 : for( i = dividend_size - 2; i >= 0; i--) {
443 : n0 = dividend_ptr[i];
444 : UDIV_QRNND_PREINV( quot_ptr[i + 1], r, r,
445 : ((n1 << normalization_steps)
446 : | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
447 : divisor_limb, divisor_limb_inverted);
448 : n1 = n0;
449 : }
450 : UDIV_QRNND_PREINV( quot_ptr[0], r, r,
451 : n1 << normalization_steps,
452 : divisor_limb, divisor_limb_inverted);
453 : return r >> normalization_steps;
454 : }
455 : else {
456 : mpi_limb_t divisor_limb_inverted;
457 :
458 : /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The
459 : * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
460 : * most significant bit (with weight 2**N) implicit.
461 : */
462 : /* Special case for DIVISOR_LIMB == 100...000. */
463 : if( !(divisor_limb << 1) )
464 : divisor_limb_inverted = ~(mpi_limb_t) 0;
465 : else
466 : udiv_qrnnd(divisor_limb_inverted, dummy,
467 : -divisor_limb, 0, divisor_limb);
468 :
469 : i = dividend_size - 1;
470 : r = dividend_ptr[i];
471 :
472 : if( r >= divisor_limb )
473 : r = 0;
474 : else
475 : quot_ptr[i--] = 0;
476 :
477 : for( ; i >= 0; i-- ) {
478 : n0 = dividend_ptr[i];
479 : UDIV_QRNND_PREINV( quot_ptr[i], r, r,
480 : n0, divisor_limb, divisor_limb_inverted);
481 : }
482 : return r;
483 : }
484 : }
485 : else {
486 : if(UDIV_NEEDS_NORMALIZATION) {
487 : int normalization_steps;
488 :
489 : count_leading_zeros (normalization_steps, divisor_limb);
490 : if( normalization_steps ) {
491 : divisor_limb <<= normalization_steps;
492 :
493 : n1 = dividend_ptr[dividend_size - 1];
494 : r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
495 :
496 : /* Possible optimization:
497 : * if (r == 0
498 : * && divisor_limb > ((n1 << normalization_steps)
499 : * | (dividend_ptr[dividend_size - 2] >> ...)))
500 : * ...one division less...
501 : */
502 : for( i = dividend_size - 2; i >= 0; i--) {
503 : n0 = dividend_ptr[i];
504 : udiv_qrnnd (quot_ptr[i + 1], r, r,
505 : ((n1 << normalization_steps)
506 : | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
507 : divisor_limb);
508 : n1 = n0;
509 : }
510 : udiv_qrnnd (quot_ptr[0], r, r,
511 : n1 << normalization_steps,
512 : divisor_limb);
513 : return r >> normalization_steps;
514 : }
515 : }
516 : /* No normalization needed, either because udiv_qrnnd doesn't require
517 : * it, or because DIVISOR_LIMB is already normalized. */
518 63 : i = dividend_size - 1;
519 63 : r = dividend_ptr[i];
520 :
521 63 : if(r >= divisor_limb)
522 63 : r = 0;
523 : else
524 0 : quot_ptr[i--] = 0;
525 :
526 1099 : for(; i >= 0; i--) {
527 1036 : n0 = dividend_ptr[i];
528 1036 : udiv_qrnnd( quot_ptr[i], r, r, n0, divisor_limb );
529 : }
530 63 : return r;
531 : }
532 : }
|