-
+ 2D6F3C1ECB3331CDFB54C9EFA53246CAC6A6DFA16465D2E354C6801974964229CB173E7AFF19C4C92E896B8C65CFBC6DA79EF03C20022772F5C299F1C3CBC063
mpi/mpih-mul.c
(0 . 0)(1 . 527)
9316 /* mpihelp-mul.c - MPI helper functions
9317 * Copyright (C) 1994, 1996, 1998, 1999,
9318 * 2000 Free Software Foundation, Inc.
9319 *
9320 * This file is part of GnuPG.
9321 *
9322 * GnuPG is free software; you can redistribute it and/or modify
9323 * it under the terms of the GNU General Public License as published by
9324 * the Free Software Foundation; either version 3 of the License, or
9325 * (at your option) any later version.
9326 *
9327 * GnuPG is distributed in the hope that it will be useful,
9328 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9329 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9330 * GNU General Public License for more details.
9331 *
9332 * You should have received a copy of the GNU General Public License
9333 * along with this program; if not, see <http://www.gnu.org/licenses/>.
9334 *
9335 * Note: This code is heavily based on the GNU MP Library.
9336 * Actually it's the same code with only minor changes in the
9337 * way the data is stored; this is to support the abstraction
9338 * of an optional secure memory allocation which may be used
9339 * to avoid revealing of sensitive data due to paging etc.
9340 * The GNU MP Library itself is published under the LGPL;
9341 * however I decided to publish this code under the plain GPL.
9342 */
9343
9344 #include <config.h>
9345 #include <stdio.h>
9346 #include <stdlib.h>
9347 #include <string.h>
9348 #include "mpi-internal.h"
9349 #include "longlong.h"
9350
9351
9352
9353 #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
9354 do { \
9355 if( (size) < KARATSUBA_THRESHOLD ) \
9356 mul_n_basecase (prodp, up, vp, size); \
9357 else \
9358 mul_n (prodp, up, vp, size, tspace); \
9359 } while (0);
9360
9361 #define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \
9362 do { \
9363 if ((size) < KARATSUBA_THRESHOLD) \
9364 mpih_sqr_n_basecase (prodp, up, size); \
9365 else \
9366 mpih_sqr_n (prodp, up, size, tspace); \
9367 } while (0);
9368
9369
9370
9371
9372 /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
9373 * both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are
9374 * always stored. Return the most significant limb.
9375 *
9376 * Argument constraints:
9377 * 1. PRODP != UP and PRODP != VP, i.e. the destination
9378 * must be distinct from the multiplier and the multiplicand.
9379 *
9380 *
9381 * Handle simple cases with traditional multiplication.
9382 *
9383 * This is the most critical code of multiplication. All multiplies rely
9384 * on this, both small and huge. Small ones arrive here immediately. Huge
9385 * ones arrive here as this is the base case for Karatsuba's recursive
9386 * algorithm below.
9387 */
9388
9389 static mpi_limb_t
9390 mul_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up,
9391 mpi_ptr_t vp, mpi_size_t size)
9392 {
9393 mpi_size_t i;
9394 mpi_limb_t cy;
9395 mpi_limb_t v_limb;
9396
9397 /* Multiply by the first limb in V separately, as the result can be
9398 * stored (not added) to PROD. We also avoid a loop for zeroing. */
9399 v_limb = vp[0];
9400 if( v_limb <= 1 ) {
9401 if( v_limb == 1 )
9402 MPN_COPY( prodp, up, size );
9403 else
9404 MPN_ZERO( prodp, size );
9405 cy = 0;
9406 }
9407 else
9408 cy = mpihelp_mul_1( prodp, up, size, v_limb );
9409
9410 prodp[size] = cy;
9411 prodp++;
9412
9413 /* For each iteration in the outer loop, multiply one limb from
9414 * U with one limb from V, and add it to PROD. */
9415 for( i = 1; i < size; i++ ) {
9416 v_limb = vp[i];
9417 if( v_limb <= 1 ) {
9418 cy = 0;
9419 if( v_limb == 1 )
9420 cy = mpihelp_add_n(prodp, prodp, up, size);
9421 }
9422 else
9423 cy = mpihelp_addmul_1(prodp, up, size, v_limb);
9424
9425 prodp[size] = cy;
9426 prodp++;
9427 }
9428
9429 return cy;
9430 }
9431
9432
9433 static void
9434 mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
9435 mpi_size_t size, mpi_ptr_t tspace )
9436 {
9437 if( size & 1 ) {
9438 /* The size is odd, and the code below doesn't handle that.
9439 * Multiply the least significant (size - 1) limbs with a recursive
9440 * call, and handle the most significant limb of S1 and S2
9441 * separately.
9442 * A slightly faster way to do this would be to make the Karatsuba
9443 * code below behave as if the size were even, and let it check for
9444 * odd size in the end. I.e., in essence move this code to the end.
9445 * Doing so would save us a recursive call, and potentially make the
9446 * stack grow a lot less.
9447 */
9448 mpi_size_t esize = size - 1; /* even size */
9449 mpi_limb_t cy_limb;
9450
9451 MPN_MUL_N_RECURSE( prodp, up, vp, esize, tspace );
9452 cy_limb = mpihelp_addmul_1( prodp + esize, up, esize, vp[esize] );
9453 prodp[esize + esize] = cy_limb;
9454 cy_limb = mpihelp_addmul_1( prodp + esize, vp, size, up[esize] );
9455 prodp[esize + size] = cy_limb;
9456 }
9457 else {
9458 /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
9459 *
9460 * Split U in two pieces, U1 and U0, such that
9461 * U = U0 + U1*(B**n),
9462 * and V in V1 and V0, such that
9463 * V = V0 + V1*(B**n).
9464 *
9465 * UV is then computed recursively using the identity
9466 *
9467 * 2n n n n
9468 * UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V
9469 * 1 1 1 0 0 1 0 0
9470 *
9471 * Where B = 2**BITS_PER_MP_LIMB.
9472 */
9473 mpi_size_t hsize = size >> 1;
9474 mpi_limb_t cy;
9475 int negflg;
9476
9477 /* Product H. ________________ ________________
9478 * |_____U1 x V1____||____U0 x V0_____|
9479 * Put result in upper part of PROD and pass low part of TSPACE
9480 * as new TSPACE.
9481 */
9482 MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize, tspace);
9483
9484 /* Product M. ________________
9485 * |_(U1-U0)(V0-V1)_|
9486 */
9487 if( mpihelp_cmp(up + hsize, up, hsize) >= 0 ) {
9488 mpihelp_sub_n(prodp, up + hsize, up, hsize);
9489 negflg = 0;
9490 }
9491 else {
9492 mpihelp_sub_n(prodp, up, up + hsize, hsize);
9493 negflg = 1;
9494 }
9495 if( mpihelp_cmp(vp + hsize, vp, hsize) >= 0 ) {
9496 mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize);
9497 negflg ^= 1;
9498 }
9499 else {
9500 mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize);
9501 /* No change of NEGFLG. */
9502 }
9503 /* Read temporary operands from low part of PROD.
9504 * Put result in low part of TSPACE using upper part of TSPACE
9505 * as new TSPACE.
9506 */
9507 MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize, tspace + size);
9508
9509 /* Add/copy product H. */
9510 MPN_COPY (prodp + hsize, prodp + size, hsize);
9511 cy = mpihelp_add_n( prodp + size, prodp + size,
9512 prodp + size + hsize, hsize);
9513
9514 /* Add product M (if NEGFLG M is a negative number) */
9515 if(negflg)
9516 cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size);
9517 else
9518 cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
9519
9520 /* Product L. ________________ ________________
9521 * |________________||____U0 x V0_____|
9522 * Read temporary operands from low part of PROD.
9523 * Put result in low part of TSPACE using upper part of TSPACE
9524 * as new TSPACE.
9525 */
9526 MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size);
9527
9528 /* Add/copy Product L (twice) */
9529
9530 cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
9531 if( cy )
9532 mpihelp_add_1(prodp + hsize + size, prodp + hsize + size, hsize, cy);
9533
9534 MPN_COPY(prodp, tspace, hsize);
9535 cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, hsize);
9536 if( cy )
9537 mpihelp_add_1(prodp + size, prodp + size, size, 1);
9538 }
9539 }
9540
9541
9542 void
9543 mpih_sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size )
9544 {
9545 mpi_size_t i;
9546 mpi_limb_t cy_limb;
9547 mpi_limb_t v_limb;
9548
9549 /* Multiply by the first limb in V separately, as the result can be
9550 * stored (not added) to PROD. We also avoid a loop for zeroing. */
9551 v_limb = up[0];
9552 if( v_limb <= 1 ) {
9553 if( v_limb == 1 )
9554 MPN_COPY( prodp, up, size );
9555 else
9556 MPN_ZERO(prodp, size);
9557 cy_limb = 0;
9558 }
9559 else
9560 cy_limb = mpihelp_mul_1( prodp, up, size, v_limb );
9561
9562 prodp[size] = cy_limb;
9563 prodp++;
9564
9565 /* For each iteration in the outer loop, multiply one limb from
9566 * U with one limb from V, and add it to PROD. */
9567 for( i=1; i < size; i++) {
9568 v_limb = up[i];
9569 if( v_limb <= 1 ) {
9570 cy_limb = 0;
9571 if( v_limb == 1 )
9572 cy_limb = mpihelp_add_n(prodp, prodp, up, size);
9573 }
9574 else
9575 cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb);
9576
9577 prodp[size] = cy_limb;
9578 prodp++;
9579 }
9580 }
9581
9582
9583 void
9584 mpih_sqr_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)
9585 {
9586 if( size & 1 ) {
9587 /* The size is odd, and the code below doesn't handle that.
9588 * Multiply the least significant (size - 1) limbs with a recursive
9589 * call, and handle the most significant limb of S1 and S2
9590 * separately.
9591 * A slightly faster way to do this would be to make the Karatsuba
9592 * code below behave as if the size were even, and let it check for
9593 * odd size in the end. I.e., in essence move this code to the end.
9594 * Doing so would save us a recursive call, and potentially make the
9595 * stack grow a lot less.
9596 */
9597 mpi_size_t esize = size - 1; /* even size */
9598 mpi_limb_t cy_limb;
9599
9600 MPN_SQR_N_RECURSE( prodp, up, esize, tspace );
9601 cy_limb = mpihelp_addmul_1( prodp + esize, up, esize, up[esize] );
9602 prodp[esize + esize] = cy_limb;
9603 cy_limb = mpihelp_addmul_1( prodp + esize, up, size, up[esize] );
9604
9605 prodp[esize + size] = cy_limb;
9606 }
9607 else {
9608 mpi_size_t hsize = size >> 1;
9609 mpi_limb_t cy;
9610
9611 /* Product H. ________________ ________________
9612 * |_____U1 x U1____||____U0 x U0_____|
9613 * Put result in upper part of PROD and pass low part of TSPACE
9614 * as new TSPACE.
9615 */
9616 MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace);
9617
9618 /* Product M. ________________
9619 * |_(U1-U0)(U0-U1)_|
9620 */
9621 if( mpihelp_cmp( up + hsize, up, hsize) >= 0 )
9622 mpihelp_sub_n( prodp, up + hsize, up, hsize);
9623 else
9624 mpihelp_sub_n (prodp, up, up + hsize, hsize);
9625
9626 /* Read temporary operands from low part of PROD.
9627 * Put result in low part of TSPACE using upper part of TSPACE
9628 * as new TSPACE. */
9629 MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size);
9630
9631 /* Add/copy product H */
9632 MPN_COPY(prodp + hsize, prodp + size, hsize);
9633 cy = mpihelp_add_n(prodp + size, prodp + size,
9634 prodp + size + hsize, hsize);
9635
9636 /* Add product M (if NEGFLG M is a negative number). */
9637 cy -= mpihelp_sub_n (prodp + hsize, prodp + hsize, tspace, size);
9638
9639 /* Product L. ________________ ________________
9640 * |________________||____U0 x U0_____|
9641 * Read temporary operands from low part of PROD.
9642 * Put result in low part of TSPACE using upper part of TSPACE
9643 * as new TSPACE. */
9644 MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
9645
9646 /* Add/copy Product L (twice). */
9647 cy += mpihelp_add_n (prodp + hsize, prodp + hsize, tspace, size);
9648 if( cy )
9649 mpihelp_add_1(prodp + hsize + size, prodp + hsize + size,
9650 hsize, cy);
9651
9652 MPN_COPY(prodp, tspace, hsize);
9653 cy = mpihelp_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
9654 if( cy )
9655 mpihelp_add_1 (prodp + size, prodp + size, size, 1);
9656 }
9657 }
9658
9659
9660 /* This should be made into an inline function in gmp.h. */
9661 void
9662 mpihelp_mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
9663 {
9664 int secure;
9665
9666 if( up == vp ) {
9667 if( size < KARATSUBA_THRESHOLD )
9668 mpih_sqr_n_basecase( prodp, up, size );
9669 else {
9670 mpi_ptr_t tspace;
9671 secure = m_is_secure( up );
9672 tspace = mpi_alloc_limb_space( 2 * size, secure );
9673 mpih_sqr_n( prodp, up, size, tspace );
9674 mpi_free_limb_space( tspace );
9675 }
9676 }
9677 else {
9678 if( size < KARATSUBA_THRESHOLD )
9679 mul_n_basecase( prodp, up, vp, size );
9680 else {
9681 mpi_ptr_t tspace;
9682 secure = m_is_secure( up ) || m_is_secure( vp );
9683 tspace = mpi_alloc_limb_space( 2 * size, secure );
9684 mul_n (prodp, up, vp, size, tspace);
9685 mpi_free_limb_space( tspace );
9686 }
9687 }
9688 }
9689
9690
9691
9692 void
9693 mpihelp_mul_karatsuba_case( mpi_ptr_t prodp,
9694 mpi_ptr_t up, mpi_size_t usize,
9695 mpi_ptr_t vp, mpi_size_t vsize,
9696 struct karatsuba_ctx *ctx )
9697 {
9698 mpi_limb_t cy;
9699
9700 if( !ctx->tspace || ctx->tspace_size < vsize ) {
9701 if( ctx->tspace )
9702 mpi_free_limb_space( ctx->tspace );
9703 ctx->tspace = mpi_alloc_limb_space( 2 * vsize,
9704 m_is_secure( up ) || m_is_secure( vp ) );
9705 ctx->tspace_size = vsize;
9706 }
9707
9708 MPN_MUL_N_RECURSE( prodp, up, vp, vsize, ctx->tspace );
9709
9710 prodp += vsize;
9711 up += vsize;
9712 usize -= vsize;
9713 if( usize >= vsize ) {
9714 if( !ctx->tp || ctx->tp_size < vsize ) {
9715 if( ctx->tp )
9716 mpi_free_limb_space( ctx->tp );
9717 ctx->tp = mpi_alloc_limb_space( 2 * vsize, m_is_secure( up )
9718 || m_is_secure( vp ) );
9719 ctx->tp_size = vsize;
9720 }
9721
9722 do {
9723 MPN_MUL_N_RECURSE( ctx->tp, up, vp, vsize, ctx->tspace );
9724 cy = mpihelp_add_n( prodp, prodp, ctx->tp, vsize );
9725 mpihelp_add_1( prodp + vsize, ctx->tp + vsize, vsize, cy );
9726 prodp += vsize;
9727 up += vsize;
9728 usize -= vsize;
9729 } while( usize >= vsize );
9730 }
9731
9732 if( usize ) {
9733 if( usize < KARATSUBA_THRESHOLD ) {
9734 mpihelp_mul( ctx->tspace, vp, vsize, up, usize );
9735 }
9736 else {
9737 if( !ctx->next ) {
9738 ctx->next = xmalloc_clear( sizeof *ctx );
9739 }
9740 mpihelp_mul_karatsuba_case( ctx->tspace,
9741 vp, vsize,
9742 up, usize,
9743 ctx->next );
9744 }
9745
9746 cy = mpihelp_add_n( prodp, prodp, ctx->tspace, vsize);
9747 mpihelp_add_1( prodp + vsize, ctx->tspace + vsize, usize, cy );
9748 }
9749 }
9750
9751
9752 void
9753 mpihelp_release_karatsuba_ctx( struct karatsuba_ctx *ctx )
9754 {
9755 struct karatsuba_ctx *ctx2;
9756
9757 if( ctx->tp )
9758 mpi_free_limb_space( ctx->tp );
9759 if( ctx->tspace )
9760 mpi_free_limb_space( ctx->tspace );
9761 for( ctx=ctx->next; ctx; ctx = ctx2 ) {
9762 ctx2 = ctx->next;
9763 if( ctx->tp )
9764 mpi_free_limb_space( ctx->tp );
9765 if( ctx->tspace )
9766 mpi_free_limb_space( ctx->tspace );
9767 xfree( ctx );
9768 }
9769 }
9770
9771 /* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
9772 * and v (pointed to by VP, with VSIZE limbs), and store the result at
9773 * PRODP. USIZE + VSIZE limbs are always stored, but if the input
9774 * operands are normalized. Return the most significant limb of the
9775 * result.
9776 *
9777 * NOTE: The space pointed to by PRODP is overwritten before finished
9778 * with U and V, so overlap is an error.
9779 *
9780 * Argument constraints:
9781 * 1. USIZE >= VSIZE.
9782 * 2. PRODP != UP and PRODP != VP, i.e. the destination
9783 * must be distinct from the multiplier and the multiplicand.
9784 */
9785
9786 mpi_limb_t
9787 mpihelp_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
9788 mpi_ptr_t vp, mpi_size_t vsize)
9789 {
9790 mpi_ptr_t prod_endp = prodp + usize + vsize - 1;
9791 mpi_limb_t cy;
9792 struct karatsuba_ctx ctx;
9793
9794 if( vsize < KARATSUBA_THRESHOLD ) {
9795 mpi_size_t i;
9796 mpi_limb_t v_limb;
9797
9798 if( !vsize )
9799 return 0;
9800
9801 /* Multiply by the first limb in V separately, as the result can be
9802 * stored (not added) to PROD. We also avoid a loop for zeroing. */
9803 v_limb = vp[0];
9804 if( v_limb <= 1 ) {
9805 if( v_limb == 1 )
9806 MPN_COPY( prodp, up, usize );
9807 else
9808 MPN_ZERO( prodp, usize );
9809 cy = 0;
9810 }
9811 else
9812 cy = mpihelp_mul_1( prodp, up, usize, v_limb );
9813
9814 prodp[usize] = cy;
9815 prodp++;
9816
9817 /* For each iteration in the outer loop, multiply one limb from
9818 * U with one limb from V, and add it to PROD. */
9819 for( i = 1; i < vsize; i++ ) {
9820 v_limb = vp[i];
9821 if( v_limb <= 1 ) {
9822 cy = 0;
9823 if( v_limb == 1 )
9824 cy = mpihelp_add_n(prodp, prodp, up, usize);
9825 }
9826 else
9827 cy = mpihelp_addmul_1(prodp, up, usize, v_limb);
9828
9829 prodp[usize] = cy;
9830 prodp++;
9831 }
9832
9833 return cy;
9834 }
9835
9836 memset( &ctx, 0, sizeof ctx );
9837 mpihelp_mul_karatsuba_case( prodp, up, usize, vp, vsize, &ctx );
9838 mpihelp_release_karatsuba_ctx( &ctx );
9839 return *prod_endp;
9840 }
9841
9842