-
+ 1C5BD39C13569D58A5CFD542D96DF998B63EFD14EDFCDB9488975B69DD7796B21EF656577E9DA950CECE9D9122B7030395F0714CBBA2983B3DAFC860B34C1B85
smg_comms/mpi/mpih-mul.c
(0 . 0)(1 . 519)
6493 /* mpihelp-mul.c - MPI helper functions
6494 * Modified by No Such Labs. (C) 2015. See README.
6495 *
6496 * This file was originally part of Gnu Privacy Guard (GPG), ver. 1.4.10,
6497 * SHA256(gnupg-1.4.10.tar.gz):
6498 * 0bfd74660a2f6cedcf7d8256db4a63c996ffebbcdc2cf54397bfb72878c5a85a
6499 * (C) 1994-2005 Free Software Foundation, Inc.
6500 *
6501 * This program is free software: you can redistribute it and/or modify
6502 * it under the terms of the GNU General Public License as published by
6503 * the Free Software Foundation, either version 3 of the License, or
6504 * (at your option) any later version.
6505 *
6506 * This program is distributed in the hope that it will be useful,
6507 * but WITHOUT ANY WARRANTY; without even the implied warranty of
6508 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6509 * GNU General Public License for more details.
6510 *
6511 * You should have received a copy of the GNU General Public License
6512 * along with this program. If not, see <http://www.gnu.org/licenses/>.
6513 */
6514
6515 #include <stdio.h>
6516 #include <stdlib.h>
6517 #include <string.h>
6518
6519 #include "knobs.h"
6520 #include "mpi-internal.h"
6521 #include "longlong.h"
6522
6523
6524 #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
6525 do { \
6526 if( (size) < KARATSUBA_THRESHOLD ) \
6527 mul_n_basecase (prodp, up, vp, size); \
6528 else \
6529 mul_n (prodp, up, vp, size, tspace); \
6530 } while (0);
6531
6532 #define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \
6533 do { \
6534 if ((size) < KARATSUBA_THRESHOLD) \
6535 mpih_sqr_n_basecase (prodp, up, size); \
6536 else \
6537 mpih_sqr_n (prodp, up, size, tspace); \
6538 } while (0);
6539
6540
6541 /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
6542 * both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are
6543 * always stored. Return the most significant limb.
6544 *
6545 * Argument constraints:
6546 * 1. PRODP != UP and PRODP != VP, i.e. the destination
6547 * must be distinct from the multiplier and the multiplicand.
6548 *
6549 *
6550 * Handle simple cases with traditional multiplication.
6551 *
6552 * This is the most critical code of multiplication. All multiplies rely
6553 * on this, both small and huge. Small ones arrive here immediately. Huge
6554 * ones arrive here as this is the base case for Karatsuba's recursive
6555 * algorithm below.
6556 */
6557
6558 static mpi_limb_t
6559 mul_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up,
6560 mpi_ptr_t vp, mpi_size_t size)
6561 {
6562 mpi_size_t i;
6563 mpi_limb_t cy;
6564 mpi_limb_t v_limb;
6565
6566 /* Multiply by the first limb in V separately, as the result can be
6567 * stored (not added) to PROD. We also avoid a loop for zeroing. */
6568 v_limb = vp[0];
6569 if( v_limb <= 1 ) {
6570 if( v_limb == 1 )
6571 MPN_COPY( prodp, up, size );
6572 else
6573 MPN_ZERO( prodp, size );
6574 cy = 0;
6575 }
6576 else
6577 cy = mpihelp_mul_1( prodp, up, size, v_limb );
6578
6579 prodp[size] = cy;
6580 prodp++;
6581
6582 /* For each iteration in the outer loop, multiply one limb from
6583 * U with one limb from V, and add it to PROD. */
6584 for( i = 1; i < size; i++ ) {
6585 v_limb = vp[i];
6586 if( v_limb <= 1 ) {
6587 cy = 0;
6588 if( v_limb == 1 )
6589 cy = mpihelp_add_n(prodp, prodp, up, size);
6590 }
6591 else
6592 cy = mpihelp_addmul_1(prodp, up, size, v_limb);
6593
6594 prodp[size] = cy;
6595 prodp++;
6596 }
6597
6598 return cy;
6599 }
6600
6601
6602 static void
6603 mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
6604 mpi_size_t size, mpi_ptr_t tspace )
6605 {
6606 if( size & 1 ) {
6607 /* The size is odd, and the code below doesn't handle that.
6608 * Multiply the least significant (size - 1) limbs with a recursive
6609 * call, and handle the most significant limb of S1 and S2
6610 * separately.
6611 * A slightly faster way to do this would be to make the Karatsuba
6612 * code below behave as if the size were even, and let it check for
6613 * odd size in the end. I.e., in essence move this code to the end.
6614 * Doing so would save us a recursive call, and potentially make the
6615 * stack grow a lot less.
6616 */
6617 mpi_size_t esize = size - 1; /* even size */
6618 mpi_limb_t cy_limb;
6619
6620 MPN_MUL_N_RECURSE( prodp, up, vp, esize, tspace );
6621 cy_limb = mpihelp_addmul_1( prodp + esize, up, esize, vp[esize] );
6622 prodp[esize + esize] = cy_limb;
6623 cy_limb = mpihelp_addmul_1( prodp + esize, vp, size, up[esize] );
6624 prodp[esize + size] = cy_limb;
6625 }
6626 else {
6627 /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
6628 *
6629 * Split U in two pieces, U1 and U0, such that
6630 * U = U0 + U1*(B**n),
6631 * and V in V1 and V0, such that
6632 * V = V0 + V1*(B**n).
6633 *
6634 * UV is then computed recursively using the identity
6635 *
6636 * 2n n n n
6637 * UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V
6638 * 1 1 1 0 0 1 0 0
6639 *
6640 * Where B = 2**BITS_PER_MP_LIMB.
6641 */
6642 mpi_size_t hsize = size >> 1;
6643 mpi_limb_t cy;
6644 int negflg;
6645
6646 /* Product H. ________________ ________________
6647 * |_____U1 x V1____||____U0 x V0_____|
6648 * Put result in upper part of PROD and pass low part of TSPACE
6649 * as new TSPACE.
6650 */
6651 MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize, tspace);
6652
6653 /* Product M. ________________
6654 * |_(U1-U0)(V0-V1)_|
6655 */
6656 if( mpihelp_cmp(up + hsize, up, hsize) >= 0 ) {
6657 mpihelp_sub_n(prodp, up + hsize, up, hsize);
6658 negflg = 0;
6659 }
6660 else {
6661 mpihelp_sub_n(prodp, up, up + hsize, hsize);
6662 negflg = 1;
6663 }
6664 if( mpihelp_cmp(vp + hsize, vp, hsize) >= 0 ) {
6665 mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize);
6666 negflg ^= 1;
6667 }
6668 else {
6669 mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize);
6670 /* No change of NEGFLG. */
6671 }
6672 /* Read temporary operands from low part of PROD.
6673 * Put result in low part of TSPACE using upper part of TSPACE
6674 * as new TSPACE.
6675 */
6676 MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize, tspace + size);
6677
6678 /* Add/copy product H. */
6679 MPN_COPY (prodp + hsize, prodp + size, hsize);
6680 cy = mpihelp_add_n( prodp + size, prodp + size,
6681 prodp + size + hsize, hsize);
6682
6683 /* Add product M (if NEGFLG M is a negative number) */
6684 if(negflg)
6685 cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size);
6686 else
6687 cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
6688
6689 /* Product L. ________________ ________________
6690 * |________________||____U0 x V0_____|
6691 * Read temporary operands from low part of PROD.
6692 * Put result in low part of TSPACE using upper part of TSPACE
6693 * as new TSPACE.
6694 */
6695 MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size);
6696
6697 /* Add/copy Product L (twice) */
6698
6699 cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
6700 if( cy )
6701 mpihelp_add_1(prodp + hsize + size, prodp + hsize + size, hsize, cy);
6702
6703 MPN_COPY(prodp, tspace, hsize);
6704 cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, hsize);
6705 if( cy )
6706 mpihelp_add_1(prodp + size, prodp + size, size, 1);
6707 }
6708 }
6709
6710
6711 void
6712 mpih_sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size )
6713 {
6714 mpi_size_t i;
6715 mpi_limb_t cy_limb;
6716 mpi_limb_t v_limb;
6717
6718 /* Multiply by the first limb in V separately, as the result can be
6719 * stored (not added) to PROD. We also avoid a loop for zeroing. */
6720 v_limb = up[0];
6721 if( v_limb <= 1 ) {
6722 if( v_limb == 1 )
6723 MPN_COPY( prodp, up, size );
6724 else
6725 MPN_ZERO(prodp, size);
6726 cy_limb = 0;
6727 }
6728 else
6729 cy_limb = mpihelp_mul_1( prodp, up, size, v_limb );
6730
6731 prodp[size] = cy_limb;
6732 prodp++;
6733
6734 /* For each iteration in the outer loop, multiply one limb from
6735 * U with one limb from V, and add it to PROD. */
6736 for( i=1; i < size; i++) {
6737 v_limb = up[i];
6738 if( v_limb <= 1 ) {
6739 cy_limb = 0;
6740 if( v_limb == 1 )
6741 cy_limb = mpihelp_add_n(prodp, prodp, up, size);
6742 }
6743 else
6744 cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb);
6745
6746 prodp[size] = cy_limb;
6747 prodp++;
6748 }
6749 }
6750
6751
6752 void
6753 mpih_sqr_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)
6754 {
6755 if( size & 1 ) {
6756 /* The size is odd, and the code below doesn't handle that.
6757 * Multiply the least significant (size - 1) limbs with a recursive
6758 * call, and handle the most significant limb of S1 and S2
6759 * separately.
6760 * A slightly faster way to do this would be to make the Karatsuba
6761 * code below behave as if the size were even, and let it check for
6762 * odd size in the end. I.e., in essence move this code to the end.
6763 * Doing so would save us a recursive call, and potentially make the
6764 * stack grow a lot less.
6765 */
6766 mpi_size_t esize = size - 1; /* even size */
6767 mpi_limb_t cy_limb;
6768
6769 MPN_SQR_N_RECURSE( prodp, up, esize, tspace );
6770 cy_limb = mpihelp_addmul_1( prodp + esize, up, esize, up[esize] );
6771 prodp[esize + esize] = cy_limb;
6772 cy_limb = mpihelp_addmul_1( prodp + esize, up, size, up[esize] );
6773
6774 prodp[esize + size] = cy_limb;
6775 }
6776 else {
6777 mpi_size_t hsize = size >> 1;
6778 mpi_limb_t cy;
6779
6780 /* Product H. ________________ ________________
6781 * |_____U1 x U1____||____U0 x U0_____|
6782 * Put result in upper part of PROD and pass low part of TSPACE
6783 * as new TSPACE.
6784 */
6785 MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace);
6786
6787 /* Product M. ________________
6788 * |_(U1-U0)(U0-U1)_|
6789 */
6790 if( mpihelp_cmp( up + hsize, up, hsize) >= 0 )
6791 mpihelp_sub_n( prodp, up + hsize, up, hsize);
6792 else
6793 mpihelp_sub_n (prodp, up, up + hsize, hsize);
6794
6795 /* Read temporary operands from low part of PROD.
6796 * Put result in low part of TSPACE using upper part of TSPACE
6797 * as new TSPACE. */
6798 MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size);
6799
6800 /* Add/copy product H */
6801 MPN_COPY(prodp + hsize, prodp + size, hsize);
6802 cy = mpihelp_add_n(prodp + size, prodp + size,
6803 prodp + size + hsize, hsize);
6804
6805 /* Add product M (if NEGFLG M is a negative number). */
6806 cy -= mpihelp_sub_n (prodp + hsize, prodp + hsize, tspace, size);
6807
6808 /* Product L. ________________ ________________
6809 * |________________||____U0 x U0_____|
6810 * Read temporary operands from low part of PROD.
6811 * Put result in low part of TSPACE using upper part of TSPACE
6812 * as new TSPACE. */
6813 MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
6814
6815 /* Add/copy Product L (twice). */
6816 cy += mpihelp_add_n (prodp + hsize, prodp + hsize, tspace, size);
6817 if( cy )
6818 mpihelp_add_1(prodp + hsize + size, prodp + hsize + size,
6819 hsize, cy);
6820
6821 MPN_COPY(prodp, tspace, hsize);
6822 cy = mpihelp_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
6823 if( cy )
6824 mpihelp_add_1 (prodp + size, prodp + size, size, 1);
6825 }
6826 }
6827
6828
6829 /* This should be made into an inline function in gmp.h. */
6830 void
6831 mpihelp_mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
6832 {
6833 int secure;
6834
6835 if( up == vp ) {
6836 if( size < KARATSUBA_THRESHOLD )
6837 mpih_sqr_n_basecase( prodp, up, size );
6838 else {
6839 mpi_ptr_t tspace;
6840 secure = m_is_secure( up );
6841 tspace = mpi_alloc_limb_space( 2 * size, secure );
6842 mpih_sqr_n( prodp, up, size, tspace );
6843 mpi_free_limb_space( tspace );
6844 }
6845 }
6846 else {
6847 if( size < KARATSUBA_THRESHOLD )
6848 mul_n_basecase( prodp, up, vp, size );
6849 else {
6850 mpi_ptr_t tspace;
6851 secure = m_is_secure( up ) || m_is_secure( vp );
6852 tspace = mpi_alloc_limb_space( 2 * size, secure );
6853 mul_n (prodp, up, vp, size, tspace);
6854 mpi_free_limb_space( tspace );
6855 }
6856 }
6857 }
6858
6859
6860
6861 void
6862 mpihelp_mul_karatsuba_case( mpi_ptr_t prodp,
6863 mpi_ptr_t up, mpi_size_t usize,
6864 mpi_ptr_t vp, mpi_size_t vsize,
6865 struct karatsuba_ctx *ctx )
6866 {
6867 mpi_limb_t cy;
6868
6869 if( !ctx->tspace || ctx->tspace_size < vsize ) {
6870 if( ctx->tspace )
6871 mpi_free_limb_space( ctx->tspace );
6872 ctx->tspace = mpi_alloc_limb_space( 2 * vsize,
6873 m_is_secure( up ) || m_is_secure( vp ) );
6874 ctx->tspace_size = vsize;
6875 }
6876
6877 MPN_MUL_N_RECURSE( prodp, up, vp, vsize, ctx->tspace );
6878
6879 prodp += vsize;
6880 up += vsize;
6881 usize -= vsize;
6882 if( usize >= vsize ) {
6883 if( !ctx->tp || ctx->tp_size < vsize ) {
6884 if( ctx->tp )
6885 mpi_free_limb_space( ctx->tp );
6886 ctx->tp = mpi_alloc_limb_space( 2 * vsize, m_is_secure( up )
6887 || m_is_secure( vp ) );
6888 ctx->tp_size = vsize;
6889 }
6890
6891 do {
6892 MPN_MUL_N_RECURSE( ctx->tp, up, vp, vsize, ctx->tspace );
6893 cy = mpihelp_add_n( prodp, prodp, ctx->tp, vsize );
6894 mpihelp_add_1( prodp + vsize, ctx->tp + vsize, vsize, cy );
6895 prodp += vsize;
6896 up += vsize;
6897 usize -= vsize;
6898 } while( usize >= vsize );
6899 }
6900
6901 if( usize ) {
6902 if( usize < KARATSUBA_THRESHOLD ) {
6903 mpihelp_mul( ctx->tspace, vp, vsize, up, usize );
6904 }
6905 else {
6906 if( !ctx->next ) {
6907 ctx->next = xmalloc_clear( sizeof *ctx );
6908 }
6909 mpihelp_mul_karatsuba_case( ctx->tspace,
6910 vp, vsize,
6911 up, usize,
6912 ctx->next );
6913 }
6914
6915 cy = mpihelp_add_n( prodp, prodp, ctx->tspace, vsize);
6916 mpihelp_add_1( prodp + vsize, ctx->tspace + vsize, usize, cy );
6917 }
6918 }
6919
6920
6921 void
6922 mpihelp_release_karatsuba_ctx( struct karatsuba_ctx *ctx )
6923 {
6924 struct karatsuba_ctx *ctx2;
6925
6926 if( ctx->tp )
6927 mpi_free_limb_space( ctx->tp );
6928 if( ctx->tspace )
6929 mpi_free_limb_space( ctx->tspace );
6930 for( ctx=ctx->next; ctx; ctx = ctx2 ) {
6931 ctx2 = ctx->next;
6932 if( ctx->tp )
6933 mpi_free_limb_space( ctx->tp );
6934 if( ctx->tspace )
6935 mpi_free_limb_space( ctx->tspace );
6936 xfree( ctx );
6937 }
6938 }
6939
6940 /* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
6941 * and v (pointed to by VP, with VSIZE limbs), and store the result at
6942 * PRODP. USIZE + VSIZE limbs are always stored, but if the input
6943 * operands are normalized. Return the most significant limb of the
6944 * result.
6945 *
6946 * NOTE: The space pointed to by PRODP is overwritten before finished
6947 * with U and V, so overlap is an error.
6948 *
6949 * Argument constraints:
6950 * 1. USIZE >= VSIZE.
6951 * 2. PRODP != UP and PRODP != VP, i.e. the destination
6952 * must be distinct from the multiplier and the multiplicand.
6953 */
6954
6955 mpi_limb_t
6956 mpihelp_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
6957 mpi_ptr_t vp, mpi_size_t vsize)
6958 {
6959 mpi_ptr_t prod_endp = prodp + usize + vsize - 1;
6960 mpi_limb_t cy;
6961 struct karatsuba_ctx ctx;
6962
6963 if( vsize < KARATSUBA_THRESHOLD ) {
6964 mpi_size_t i;
6965 mpi_limb_t v_limb;
6966
6967 if( !vsize )
6968 return 0;
6969
6970 /* Multiply by the first limb in V separately, as the result can be
6971 * stored (not added) to PROD. We also avoid a loop for zeroing. */
6972 v_limb = vp[0];
6973 if( v_limb <= 1 ) {
6974 if( v_limb == 1 )
6975 MPN_COPY( prodp, up, usize );
6976 else
6977 MPN_ZERO( prodp, usize );
6978 cy = 0;
6979 }
6980 else
6981 cy = mpihelp_mul_1( prodp, up, usize, v_limb );
6982
6983 prodp[usize] = cy;
6984 prodp++;
6985
6986 /* For each iteration in the outer loop, multiply one limb from
6987 * U with one limb from V, and add it to PROD. */
6988 for( i = 1; i < vsize; i++ ) {
6989 v_limb = vp[i];
6990 if( v_limb <= 1 ) {
6991 cy = 0;
6992 if( v_limb == 1 )
6993 cy = mpihelp_add_n(prodp, prodp, up, usize);
6994 }
6995 else
6996 cy = mpihelp_addmul_1(prodp, up, usize, v_limb);
6997
6998 prodp[usize] = cy;
6999 prodp++;
7000 }
7001
7002 return cy;
7003 }
7004
7005 memset( &ctx, 0, sizeof ctx );
7006 mpihelp_mul_karatsuba_case( prodp, up, usize, vp, vsize, &ctx );
7007 mpihelp_release_karatsuba_ctx( &ctx );
7008 return *prod_endp;
7009 }
7010
7011