raw
smg_comms_c_wrappers    1 /* mpihelp-mul.c  -  MPI helper functions
smg_comms_c_wrappers 2 * Modified by No Such Labs. (C) 2015. See README.
smg_comms_c_wrappers 3 *
smg_comms_c_wrappers 4 * This file was originally part of Gnu Privacy Guard (GPG), ver. 1.4.10,
smg_comms_c_wrappers 5 * SHA256(gnupg-1.4.10.tar.gz):
smg_comms_c_wrappers 6 * 0bfd74660a2f6cedcf7d8256db4a63c996ffebbcdc2cf54397bfb72878c5a85a
smg_comms_c_wrappers 7 * (C) 1994-2005 Free Software Foundation, Inc.
smg_comms_c_wrappers 8 *
smg_comms_c_wrappers 9 * This program is free software: you can redistribute it and/or modify
smg_comms_c_wrappers 10 * it under the terms of the GNU General Public License as published by
smg_comms_c_wrappers 11 * the Free Software Foundation, either version 3 of the License, or
smg_comms_c_wrappers 12 * (at your option) any later version.
smg_comms_c_wrappers 13 *
smg_comms_c_wrappers 14 * This program is distributed in the hope that it will be useful,
smg_comms_c_wrappers 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
smg_comms_c_wrappers 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
smg_comms_c_wrappers 17 * GNU General Public License for more details.
smg_comms_c_wrappers 18 *
smg_comms_c_wrappers 19 * You should have received a copy of the GNU General Public License
smg_comms_c_wrappers 20 * along with this program. If not, see <http://www.gnu.org/licenses/>.
smg_comms_c_wrappers 21 */
smg_comms_c_wrappers 22
smg_comms_c_wrappers 23 #include <stdio.h>
smg_comms_c_wrappers 24 #include <stdlib.h>
smg_comms_c_wrappers 25 #include <string.h>
smg_comms_c_wrappers 26
smg_comms_c_wrappers 27 #include "knobs.h"
smg_comms_c_wrappers 28 #include "mpi-internal.h"
smg_comms_c_wrappers 29 #include "longlong.h"
smg_comms_c_wrappers 30
smg_comms_c_wrappers 31
smg_comms_c_wrappers 32 #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
smg_comms_c_wrappers 33 do { \
smg_comms_c_wrappers 34 if( (size) < KARATSUBA_THRESHOLD ) \
smg_comms_c_wrappers 35 mul_n_basecase (prodp, up, vp, size); \
smg_comms_c_wrappers 36 else \
smg_comms_c_wrappers 37 mul_n (prodp, up, vp, size, tspace); \
smg_comms_c_wrappers 38 } while (0);
smg_comms_c_wrappers 39
smg_comms_c_wrappers 40 #define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \
smg_comms_c_wrappers 41 do { \
smg_comms_c_wrappers 42 if ((size) < KARATSUBA_THRESHOLD) \
smg_comms_c_wrappers 43 mpih_sqr_n_basecase (prodp, up, size); \
smg_comms_c_wrappers 44 else \
smg_comms_c_wrappers 45 mpih_sqr_n (prodp, up, size, tspace); \
smg_comms_c_wrappers 46 } while (0);
smg_comms_c_wrappers 47
smg_comms_c_wrappers 48
smg_comms_c_wrappers 49 /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
smg_comms_c_wrappers 50 * both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are
smg_comms_c_wrappers 51 * always stored. Return the most significant limb.
smg_comms_c_wrappers 52 *
smg_comms_c_wrappers 53 * Argument constraints:
smg_comms_c_wrappers 54 * 1. PRODP != UP and PRODP != VP, i.e. the destination
smg_comms_c_wrappers 55 * must be distinct from the multiplier and the multiplicand.
smg_comms_c_wrappers 56 *
smg_comms_c_wrappers 57 *
smg_comms_c_wrappers 58 * Handle simple cases with traditional multiplication.
smg_comms_c_wrappers 59 *
smg_comms_c_wrappers 60 * This is the most critical code of multiplication. All multiplies rely
smg_comms_c_wrappers 61 * on this, both small and huge. Small ones arrive here immediately. Huge
smg_comms_c_wrappers 62 * ones arrive here as this is the base case for Karatsuba's recursive
smg_comms_c_wrappers 63 * algorithm below.
smg_comms_c_wrappers 64 */
smg_comms_c_wrappers 65
smg_comms_c_wrappers 66 static mpi_limb_t
smg_comms_c_wrappers 67 mul_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up,
smg_comms_c_wrappers 68 mpi_ptr_t vp, mpi_size_t size)
smg_comms_c_wrappers 69 {
smg_comms_c_wrappers 70 mpi_size_t i;
smg_comms_c_wrappers 71 mpi_limb_t cy;
smg_comms_c_wrappers 72 mpi_limb_t v_limb;
smg_comms_c_wrappers 73
smg_comms_c_wrappers 74 /* Multiply by the first limb in V separately, as the result can be
smg_comms_c_wrappers 75 * stored (not added) to PROD. We also avoid a loop for zeroing. */
smg_comms_c_wrappers 76 v_limb = vp[0];
smg_comms_c_wrappers 77 if( v_limb <= 1 ) {
smg_comms_c_wrappers 78 if( v_limb == 1 )
smg_comms_c_wrappers 79 MPN_COPY( prodp, up, size );
smg_comms_c_wrappers 80 else
smg_comms_c_wrappers 81 MPN_ZERO( prodp, size );
smg_comms_c_wrappers 82 cy = 0;
smg_comms_c_wrappers 83 }
smg_comms_c_wrappers 84 else
smg_comms_c_wrappers 85 cy = mpihelp_mul_1( prodp, up, size, v_limb );
smg_comms_c_wrappers 86
smg_comms_c_wrappers 87 prodp[size] = cy;
smg_comms_c_wrappers 88 prodp++;
smg_comms_c_wrappers 89
smg_comms_c_wrappers 90 /* For each iteration in the outer loop, multiply one limb from
smg_comms_c_wrappers 91 * U with one limb from V, and add it to PROD. */
smg_comms_c_wrappers 92 for( i = 1; i < size; i++ ) {
smg_comms_c_wrappers 93 v_limb = vp[i];
smg_comms_c_wrappers 94 if( v_limb <= 1 ) {
smg_comms_c_wrappers 95 cy = 0;
smg_comms_c_wrappers 96 if( v_limb == 1 )
smg_comms_c_wrappers 97 cy = mpihelp_add_n(prodp, prodp, up, size);
smg_comms_c_wrappers 98 }
smg_comms_c_wrappers 99 else
smg_comms_c_wrappers 100 cy = mpihelp_addmul_1(prodp, up, size, v_limb);
smg_comms_c_wrappers 101
smg_comms_c_wrappers 102 prodp[size] = cy;
smg_comms_c_wrappers 103 prodp++;
smg_comms_c_wrappers 104 }
smg_comms_c_wrappers 105
smg_comms_c_wrappers 106 return cy;
smg_comms_c_wrappers 107 }
smg_comms_c_wrappers 108
smg_comms_c_wrappers 109
smg_comms_c_wrappers 110 static void
smg_comms_c_wrappers 111 mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
smg_comms_c_wrappers 112 mpi_size_t size, mpi_ptr_t tspace )
smg_comms_c_wrappers 113 {
smg_comms_c_wrappers 114 if( size & 1 ) {
smg_comms_c_wrappers 115 /* The size is odd, and the code below doesn't handle that.
smg_comms_c_wrappers 116 * Multiply the least significant (size - 1) limbs with a recursive
smg_comms_c_wrappers 117 * call, and handle the most significant limb of S1 and S2
smg_comms_c_wrappers 118 * separately.
smg_comms_c_wrappers 119 * A slightly faster way to do this would be to make the Karatsuba
smg_comms_c_wrappers 120 * code below behave as if the size were even, and let it check for
smg_comms_c_wrappers 121 * odd size in the end. I.e., in essence move this code to the end.
smg_comms_c_wrappers 122 * Doing so would save us a recursive call, and potentially make the
smg_comms_c_wrappers 123 * stack grow a lot less.
smg_comms_c_wrappers 124 */
smg_comms_c_wrappers 125 mpi_size_t esize = size - 1; /* even size */
smg_comms_c_wrappers 126 mpi_limb_t cy_limb;
smg_comms_c_wrappers 127
smg_comms_c_wrappers 128 MPN_MUL_N_RECURSE( prodp, up, vp, esize, tspace );
smg_comms_c_wrappers 129 cy_limb = mpihelp_addmul_1( prodp + esize, up, esize, vp[esize] );
smg_comms_c_wrappers 130 prodp[esize + esize] = cy_limb;
smg_comms_c_wrappers 131 cy_limb = mpihelp_addmul_1( prodp + esize, vp, size, up[esize] );
smg_comms_c_wrappers 132 prodp[esize + size] = cy_limb;
smg_comms_c_wrappers 133 }
smg_comms_c_wrappers 134 else {
smg_comms_c_wrappers 135 /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
smg_comms_c_wrappers 136 *
smg_comms_c_wrappers 137 * Split U in two pieces, U1 and U0, such that
smg_comms_c_wrappers 138 * U = U0 + U1*(B**n),
smg_comms_c_wrappers 139 * and V in V1 and V0, such that
smg_comms_c_wrappers 140 * V = V0 + V1*(B**n).
smg_comms_c_wrappers 141 *
smg_comms_c_wrappers 142 * UV is then computed recursively using the identity
smg_comms_c_wrappers 143 *
smg_comms_c_wrappers 144 * 2n n n n
smg_comms_c_wrappers 145 * UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V
smg_comms_c_wrappers 146 * 1 1 1 0 0 1 0 0
smg_comms_c_wrappers 147 *
smg_comms_c_wrappers 148 * Where B = 2**BITS_PER_MP_LIMB.
smg_comms_c_wrappers 149 */
smg_comms_c_wrappers 150 mpi_size_t hsize = size >> 1;
smg_comms_c_wrappers 151 mpi_limb_t cy;
smg_comms_c_wrappers 152 int negflg;
smg_comms_c_wrappers 153
smg_comms_c_wrappers 154 /* Product H. ________________ ________________
smg_comms_c_wrappers 155 * |_____U1 x V1____||____U0 x V0_____|
smg_comms_c_wrappers 156 * Put result in upper part of PROD and pass low part of TSPACE
smg_comms_c_wrappers 157 * as new TSPACE.
smg_comms_c_wrappers 158 */
smg_comms_c_wrappers 159 MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize, tspace);
smg_comms_c_wrappers 160
smg_comms_c_wrappers 161 /* Product M. ________________
smg_comms_c_wrappers 162 * |_(U1-U0)(V0-V1)_|
smg_comms_c_wrappers 163 */
smg_comms_c_wrappers 164 if( mpihelp_cmp(up + hsize, up, hsize) >= 0 ) {
smg_comms_c_wrappers 165 mpihelp_sub_n(prodp, up + hsize, up, hsize);
smg_comms_c_wrappers 166 negflg = 0;
smg_comms_c_wrappers 167 }
smg_comms_c_wrappers 168 else {
smg_comms_c_wrappers 169 mpihelp_sub_n(prodp, up, up + hsize, hsize);
smg_comms_c_wrappers 170 negflg = 1;
smg_comms_c_wrappers 171 }
smg_comms_c_wrappers 172 if( mpihelp_cmp(vp + hsize, vp, hsize) >= 0 ) {
smg_comms_c_wrappers 173 mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize);
smg_comms_c_wrappers 174 negflg ^= 1;
smg_comms_c_wrappers 175 }
smg_comms_c_wrappers 176 else {
smg_comms_c_wrappers 177 mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize);
smg_comms_c_wrappers 178 /* No change of NEGFLG. */
smg_comms_c_wrappers 179 }
smg_comms_c_wrappers 180 /* Read temporary operands from low part of PROD.
smg_comms_c_wrappers 181 * Put result in low part of TSPACE using upper part of TSPACE
smg_comms_c_wrappers 182 * as new TSPACE.
smg_comms_c_wrappers 183 */
smg_comms_c_wrappers 184 MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize, tspace + size);
smg_comms_c_wrappers 185
smg_comms_c_wrappers 186 /* Add/copy product H. */
smg_comms_c_wrappers 187 MPN_COPY (prodp + hsize, prodp + size, hsize);
smg_comms_c_wrappers 188 cy = mpihelp_add_n( prodp + size, prodp + size,
smg_comms_c_wrappers 189 prodp + size + hsize, hsize);
smg_comms_c_wrappers 190
smg_comms_c_wrappers 191 /* Add product M (if NEGFLG M is a negative number) */
smg_comms_c_wrappers 192 if(negflg)
smg_comms_c_wrappers 193 cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size);
smg_comms_c_wrappers 194 else
smg_comms_c_wrappers 195 cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
smg_comms_c_wrappers 196
smg_comms_c_wrappers 197 /* Product L. ________________ ________________
smg_comms_c_wrappers 198 * |________________||____U0 x V0_____|
smg_comms_c_wrappers 199 * Read temporary operands from low part of PROD.
smg_comms_c_wrappers 200 * Put result in low part of TSPACE using upper part of TSPACE
smg_comms_c_wrappers 201 * as new TSPACE.
smg_comms_c_wrappers 202 */
smg_comms_c_wrappers 203 MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size);
smg_comms_c_wrappers 204
smg_comms_c_wrappers 205 /* Add/copy Product L (twice) */
smg_comms_c_wrappers 206
smg_comms_c_wrappers 207 cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
smg_comms_c_wrappers 208 if( cy )
smg_comms_c_wrappers 209 mpihelp_add_1(prodp + hsize + size, prodp + hsize + size, hsize, cy);
smg_comms_c_wrappers 210
smg_comms_c_wrappers 211 MPN_COPY(prodp, tspace, hsize);
smg_comms_c_wrappers 212 cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, hsize);
smg_comms_c_wrappers 213 if( cy )
smg_comms_c_wrappers 214 mpihelp_add_1(prodp + size, prodp + size, size, 1);
smg_comms_c_wrappers 215 }
smg_comms_c_wrappers 216 }
smg_comms_c_wrappers 217
smg_comms_c_wrappers 218
smg_comms_c_wrappers 219 void
smg_comms_c_wrappers 220 mpih_sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size )
smg_comms_c_wrappers 221 {
smg_comms_c_wrappers 222 mpi_size_t i;
smg_comms_c_wrappers 223 mpi_limb_t cy_limb;
smg_comms_c_wrappers 224 mpi_limb_t v_limb;
smg_comms_c_wrappers 225
smg_comms_c_wrappers 226 /* Multiply by the first limb in V separately, as the result can be
smg_comms_c_wrappers 227 * stored (not added) to PROD. We also avoid a loop for zeroing. */
smg_comms_c_wrappers 228 v_limb = up[0];
smg_comms_c_wrappers 229 if( v_limb <= 1 ) {
smg_comms_c_wrappers 230 if( v_limb == 1 )
smg_comms_c_wrappers 231 MPN_COPY( prodp, up, size );
smg_comms_c_wrappers 232 else
smg_comms_c_wrappers 233 MPN_ZERO(prodp, size);
smg_comms_c_wrappers 234 cy_limb = 0;
smg_comms_c_wrappers 235 }
smg_comms_c_wrappers 236 else
smg_comms_c_wrappers 237 cy_limb = mpihelp_mul_1( prodp, up, size, v_limb );
smg_comms_c_wrappers 238
smg_comms_c_wrappers 239 prodp[size] = cy_limb;
smg_comms_c_wrappers 240 prodp++;
smg_comms_c_wrappers 241
smg_comms_c_wrappers 242 /* For each iteration in the outer loop, multiply one limb from
smg_comms_c_wrappers 243 * U with one limb from V, and add it to PROD. */
smg_comms_c_wrappers 244 for( i=1; i < size; i++) {
smg_comms_c_wrappers 245 v_limb = up[i];
smg_comms_c_wrappers 246 if( v_limb <= 1 ) {
smg_comms_c_wrappers 247 cy_limb = 0;
smg_comms_c_wrappers 248 if( v_limb == 1 )
smg_comms_c_wrappers 249 cy_limb = mpihelp_add_n(prodp, prodp, up, size);
smg_comms_c_wrappers 250 }
smg_comms_c_wrappers 251 else
smg_comms_c_wrappers 252 cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb);
smg_comms_c_wrappers 253
smg_comms_c_wrappers 254 prodp[size] = cy_limb;
smg_comms_c_wrappers 255 prodp++;
smg_comms_c_wrappers 256 }
smg_comms_c_wrappers 257 }
smg_comms_c_wrappers 258
smg_comms_c_wrappers 259
smg_comms_c_wrappers 260 void
smg_comms_c_wrappers 261 mpih_sqr_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)
smg_comms_c_wrappers 262 {
smg_comms_c_wrappers 263 if( size & 1 ) {
smg_comms_c_wrappers 264 /* The size is odd, and the code below doesn't handle that.
smg_comms_c_wrappers 265 * Multiply the least significant (size - 1) limbs with a recursive
smg_comms_c_wrappers 266 * call, and handle the most significant limb of S1 and S2
smg_comms_c_wrappers 267 * separately.
smg_comms_c_wrappers 268 * A slightly faster way to do this would be to make the Karatsuba
smg_comms_c_wrappers 269 * code below behave as if the size were even, and let it check for
smg_comms_c_wrappers 270 * odd size in the end. I.e., in essence move this code to the end.
smg_comms_c_wrappers 271 * Doing so would save us a recursive call, and potentially make the
smg_comms_c_wrappers 272 * stack grow a lot less.
smg_comms_c_wrappers 273 */
smg_comms_c_wrappers 274 mpi_size_t esize = size - 1; /* even size */
smg_comms_c_wrappers 275 mpi_limb_t cy_limb;
smg_comms_c_wrappers 276
smg_comms_c_wrappers 277 MPN_SQR_N_RECURSE( prodp, up, esize, tspace );
smg_comms_c_wrappers 278 cy_limb = mpihelp_addmul_1( prodp + esize, up, esize, up[esize] );
smg_comms_c_wrappers 279 prodp[esize + esize] = cy_limb;
smg_comms_c_wrappers 280 cy_limb = mpihelp_addmul_1( prodp + esize, up, size, up[esize] );
smg_comms_c_wrappers 281
smg_comms_c_wrappers 282 prodp[esize + size] = cy_limb;
smg_comms_c_wrappers 283 }
smg_comms_c_wrappers 284 else {
smg_comms_c_wrappers 285 mpi_size_t hsize = size >> 1;
smg_comms_c_wrappers 286 mpi_limb_t cy;
smg_comms_c_wrappers 287
smg_comms_c_wrappers 288 /* Product H. ________________ ________________
smg_comms_c_wrappers 289 * |_____U1 x U1____||____U0 x U0_____|
smg_comms_c_wrappers 290 * Put result in upper part of PROD and pass low part of TSPACE
smg_comms_c_wrappers 291 * as new TSPACE.
smg_comms_c_wrappers 292 */
smg_comms_c_wrappers 293 MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace);
smg_comms_c_wrappers 294
smg_comms_c_wrappers 295 /* Product M. ________________
smg_comms_c_wrappers 296 * |_(U1-U0)(U0-U1)_|
smg_comms_c_wrappers 297 */
smg_comms_c_wrappers 298 if( mpihelp_cmp( up + hsize, up, hsize) >= 0 )
smg_comms_c_wrappers 299 mpihelp_sub_n( prodp, up + hsize, up, hsize);
smg_comms_c_wrappers 300 else
smg_comms_c_wrappers 301 mpihelp_sub_n (prodp, up, up + hsize, hsize);
smg_comms_c_wrappers 302
smg_comms_c_wrappers 303 /* Read temporary operands from low part of PROD.
smg_comms_c_wrappers 304 * Put result in low part of TSPACE using upper part of TSPACE
smg_comms_c_wrappers 305 * as new TSPACE. */
smg_comms_c_wrappers 306 MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size);
smg_comms_c_wrappers 307
smg_comms_c_wrappers 308 /* Add/copy product H */
smg_comms_c_wrappers 309 MPN_COPY(prodp + hsize, prodp + size, hsize);
smg_comms_c_wrappers 310 cy = mpihelp_add_n(prodp + size, prodp + size,
smg_comms_c_wrappers 311 prodp + size + hsize, hsize);
smg_comms_c_wrappers 312
smg_comms_c_wrappers 313 /* Add product M (if NEGFLG M is a negative number). */
smg_comms_c_wrappers 314 cy -= mpihelp_sub_n (prodp + hsize, prodp + hsize, tspace, size);
smg_comms_c_wrappers 315
smg_comms_c_wrappers 316 /* Product L. ________________ ________________
smg_comms_c_wrappers 317 * |________________||____U0 x U0_____|
smg_comms_c_wrappers 318 * Read temporary operands from low part of PROD.
smg_comms_c_wrappers 319 * Put result in low part of TSPACE using upper part of TSPACE
smg_comms_c_wrappers 320 * as new TSPACE. */
smg_comms_c_wrappers 321 MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
smg_comms_c_wrappers 322
smg_comms_c_wrappers 323 /* Add/copy Product L (twice). */
smg_comms_c_wrappers 324 cy += mpihelp_add_n (prodp + hsize, prodp + hsize, tspace, size);
smg_comms_c_wrappers 325 if( cy )
smg_comms_c_wrappers 326 mpihelp_add_1(prodp + hsize + size, prodp + hsize + size,
smg_comms_c_wrappers 327 hsize, cy);
smg_comms_c_wrappers 328
smg_comms_c_wrappers 329 MPN_COPY(prodp, tspace, hsize);
smg_comms_c_wrappers 330 cy = mpihelp_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
smg_comms_c_wrappers 331 if( cy )
smg_comms_c_wrappers 332 mpihelp_add_1 (prodp + size, prodp + size, size, 1);
smg_comms_c_wrappers 333 }
smg_comms_c_wrappers 334 }
smg_comms_c_wrappers 335
smg_comms_c_wrappers 336
smg_comms_c_wrappers 337 /* This should be made into an inline function in gmp.h. */
smg_comms_c_wrappers 338 void
smg_comms_c_wrappers 339 mpihelp_mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
smg_comms_c_wrappers 340 {
smg_comms_c_wrappers 341 int secure;
smg_comms_c_wrappers 342
smg_comms_c_wrappers 343 if( up == vp ) {
smg_comms_c_wrappers 344 if( size < KARATSUBA_THRESHOLD )
smg_comms_c_wrappers 345 mpih_sqr_n_basecase( prodp, up, size );
smg_comms_c_wrappers 346 else {
smg_comms_c_wrappers 347 mpi_ptr_t tspace;
smg_comms_c_wrappers 348 secure = m_is_secure( up );
smg_comms_c_wrappers 349 tspace = mpi_alloc_limb_space( 2 * size, secure );
smg_comms_c_wrappers 350 mpih_sqr_n( prodp, up, size, tspace );
smg_comms_c_wrappers 351 mpi_free_limb_space( tspace );
smg_comms_c_wrappers 352 }
smg_comms_c_wrappers 353 }
smg_comms_c_wrappers 354 else {
smg_comms_c_wrappers 355 if( size < KARATSUBA_THRESHOLD )
smg_comms_c_wrappers 356 mul_n_basecase( prodp, up, vp, size );
smg_comms_c_wrappers 357 else {
smg_comms_c_wrappers 358 mpi_ptr_t tspace;
smg_comms_c_wrappers 359 secure = m_is_secure( up ) || m_is_secure( vp );
smg_comms_c_wrappers 360 tspace = mpi_alloc_limb_space( 2 * size, secure );
smg_comms_c_wrappers 361 mul_n (prodp, up, vp, size, tspace);
smg_comms_c_wrappers 362 mpi_free_limb_space( tspace );
smg_comms_c_wrappers 363 }
smg_comms_c_wrappers 364 }
smg_comms_c_wrappers 365 }
smg_comms_c_wrappers 366
smg_comms_c_wrappers 367
smg_comms_c_wrappers 368
smg_comms_c_wrappers 369 void
smg_comms_c_wrappers 370 mpihelp_mul_karatsuba_case( mpi_ptr_t prodp,
smg_comms_c_wrappers 371 mpi_ptr_t up, mpi_size_t usize,
smg_comms_c_wrappers 372 mpi_ptr_t vp, mpi_size_t vsize,
smg_comms_c_wrappers 373 struct karatsuba_ctx *ctx )
smg_comms_c_wrappers 374 {
smg_comms_c_wrappers 375 mpi_limb_t cy;
smg_comms_c_wrappers 376
smg_comms_c_wrappers 377 if( !ctx->tspace || ctx->tspace_size < vsize ) {
smg_comms_c_wrappers 378 if( ctx->tspace )
smg_comms_c_wrappers 379 mpi_free_limb_space( ctx->tspace );
smg_comms_c_wrappers 380 ctx->tspace = mpi_alloc_limb_space( 2 * vsize,
smg_comms_c_wrappers 381 m_is_secure( up ) || m_is_secure( vp ) );
smg_comms_c_wrappers 382 ctx->tspace_size = vsize;
smg_comms_c_wrappers 383 }
smg_comms_c_wrappers 384
smg_comms_c_wrappers 385 MPN_MUL_N_RECURSE( prodp, up, vp, vsize, ctx->tspace );
smg_comms_c_wrappers 386
smg_comms_c_wrappers 387 prodp += vsize;
smg_comms_c_wrappers 388 up += vsize;
smg_comms_c_wrappers 389 usize -= vsize;
smg_comms_c_wrappers 390 if( usize >= vsize ) {
smg_comms_c_wrappers 391 if( !ctx->tp || ctx->tp_size < vsize ) {
smg_comms_c_wrappers 392 if( ctx->tp )
smg_comms_c_wrappers 393 mpi_free_limb_space( ctx->tp );
smg_comms_c_wrappers 394 ctx->tp = mpi_alloc_limb_space( 2 * vsize, m_is_secure( up )
smg_comms_c_wrappers 395 || m_is_secure( vp ) );
smg_comms_c_wrappers 396 ctx->tp_size = vsize;
smg_comms_c_wrappers 397 }
smg_comms_c_wrappers 398
smg_comms_c_wrappers 399 do {
smg_comms_c_wrappers 400 MPN_MUL_N_RECURSE( ctx->tp, up, vp, vsize, ctx->tspace );
smg_comms_c_wrappers 401 cy = mpihelp_add_n( prodp, prodp, ctx->tp, vsize );
smg_comms_c_wrappers 402 mpihelp_add_1( prodp + vsize, ctx->tp + vsize, vsize, cy );
smg_comms_c_wrappers 403 prodp += vsize;
smg_comms_c_wrappers 404 up += vsize;
smg_comms_c_wrappers 405 usize -= vsize;
smg_comms_c_wrappers 406 } while( usize >= vsize );
smg_comms_c_wrappers 407 }
smg_comms_c_wrappers 408
smg_comms_c_wrappers 409 if( usize ) {
smg_comms_c_wrappers 410 if( usize < KARATSUBA_THRESHOLD ) {
smg_comms_c_wrappers 411 mpihelp_mul( ctx->tspace, vp, vsize, up, usize );
smg_comms_c_wrappers 412 }
smg_comms_c_wrappers 413 else {
smg_comms_c_wrappers 414 if( !ctx->next ) {
smg_comms_c_wrappers 415 ctx->next = xmalloc_clear( sizeof *ctx );
smg_comms_c_wrappers 416 }
smg_comms_c_wrappers 417 mpihelp_mul_karatsuba_case( ctx->tspace,
smg_comms_c_wrappers 418 vp, vsize,
smg_comms_c_wrappers 419 up, usize,
smg_comms_c_wrappers 420 ctx->next );
smg_comms_c_wrappers 421 }
smg_comms_c_wrappers 422
smg_comms_c_wrappers 423 cy = mpihelp_add_n( prodp, prodp, ctx->tspace, vsize);
smg_comms_c_wrappers 424 mpihelp_add_1( prodp + vsize, ctx->tspace + vsize, usize, cy );
smg_comms_c_wrappers 425 }
smg_comms_c_wrappers 426 }
smg_comms_c_wrappers 427
smg_comms_c_wrappers 428
smg_comms_c_wrappers 429 void
smg_comms_c_wrappers 430 mpihelp_release_karatsuba_ctx( struct karatsuba_ctx *ctx )
smg_comms_c_wrappers 431 {
smg_comms_c_wrappers 432 struct karatsuba_ctx *ctx2;
smg_comms_c_wrappers 433
smg_comms_c_wrappers 434 if( ctx->tp )
smg_comms_c_wrappers 435 mpi_free_limb_space( ctx->tp );
smg_comms_c_wrappers 436 if( ctx->tspace )
smg_comms_c_wrappers 437 mpi_free_limb_space( ctx->tspace );
smg_comms_c_wrappers 438 for( ctx=ctx->next; ctx; ctx = ctx2 ) {
smg_comms_c_wrappers 439 ctx2 = ctx->next;
smg_comms_c_wrappers 440 if( ctx->tp )
smg_comms_c_wrappers 441 mpi_free_limb_space( ctx->tp );
smg_comms_c_wrappers 442 if( ctx->tspace )
smg_comms_c_wrappers 443 mpi_free_limb_space( ctx->tspace );
smg_comms_c_wrappers 444 xfree( ctx );
smg_comms_c_wrappers 445 }
smg_comms_c_wrappers 446 }
smg_comms_c_wrappers 447
smg_comms_c_wrappers 448 /* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
smg_comms_c_wrappers 449 * and v (pointed to by VP, with VSIZE limbs), and store the result at
smg_comms_c_wrappers 450 * PRODP. USIZE + VSIZE limbs are always stored, but if the input
smg_comms_c_wrappers 451 * operands are normalized. Return the most significant limb of the
smg_comms_c_wrappers 452 * result.
smg_comms_c_wrappers 453 *
smg_comms_c_wrappers 454 * NOTE: The space pointed to by PRODP is overwritten before finished
smg_comms_c_wrappers 455 * with U and V, so overlap is an error.
smg_comms_c_wrappers 456 *
smg_comms_c_wrappers 457 * Argument constraints:
smg_comms_c_wrappers 458 * 1. USIZE >= VSIZE.
smg_comms_c_wrappers 459 * 2. PRODP != UP and PRODP != VP, i.e. the destination
smg_comms_c_wrappers 460 * must be distinct from the multiplier and the multiplicand.
smg_comms_c_wrappers 461 */
smg_comms_c_wrappers 462
smg_comms_c_wrappers 463 mpi_limb_t
smg_comms_c_wrappers 464 mpihelp_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
smg_comms_c_wrappers 465 mpi_ptr_t vp, mpi_size_t vsize)
smg_comms_c_wrappers 466 {
smg_comms_c_wrappers 467 mpi_ptr_t prod_endp = prodp + usize + vsize - 1;
smg_comms_c_wrappers 468 mpi_limb_t cy;
smg_comms_c_wrappers 469 struct karatsuba_ctx ctx;
smg_comms_c_wrappers 470
smg_comms_c_wrappers 471 if( vsize < KARATSUBA_THRESHOLD ) {
smg_comms_c_wrappers 472 mpi_size_t i;
smg_comms_c_wrappers 473 mpi_limb_t v_limb;
smg_comms_c_wrappers 474
smg_comms_c_wrappers 475 if( !vsize )
smg_comms_c_wrappers 476 return 0;
smg_comms_c_wrappers 477
smg_comms_c_wrappers 478 /* Multiply by the first limb in V separately, as the result can be
smg_comms_c_wrappers 479 * stored (not added) to PROD. We also avoid a loop for zeroing. */
smg_comms_c_wrappers 480 v_limb = vp[0];
smg_comms_c_wrappers 481 if( v_limb <= 1 ) {
smg_comms_c_wrappers 482 if( v_limb == 1 )
smg_comms_c_wrappers 483 MPN_COPY( prodp, up, usize );
smg_comms_c_wrappers 484 else
smg_comms_c_wrappers 485 MPN_ZERO( prodp, usize );
smg_comms_c_wrappers 486 cy = 0;
smg_comms_c_wrappers 487 }
smg_comms_c_wrappers 488 else
smg_comms_c_wrappers 489 cy = mpihelp_mul_1( prodp, up, usize, v_limb );
smg_comms_c_wrappers 490
smg_comms_c_wrappers 491 prodp[usize] = cy;
smg_comms_c_wrappers 492 prodp++;
smg_comms_c_wrappers 493
smg_comms_c_wrappers 494 /* For each iteration in the outer loop, multiply one limb from
smg_comms_c_wrappers 495 * U with one limb from V, and add it to PROD. */
smg_comms_c_wrappers 496 for( i = 1; i < vsize; i++ ) {
smg_comms_c_wrappers 497 v_limb = vp[i];
smg_comms_c_wrappers 498 if( v_limb <= 1 ) {
smg_comms_c_wrappers 499 cy = 0;
smg_comms_c_wrappers 500 if( v_limb == 1 )
smg_comms_c_wrappers 501 cy = mpihelp_add_n(prodp, prodp, up, usize);
smg_comms_c_wrappers 502 }
smg_comms_c_wrappers 503 else
smg_comms_c_wrappers 504 cy = mpihelp_addmul_1(prodp, up, usize, v_limb);
smg_comms_c_wrappers 505
smg_comms_c_wrappers 506 prodp[usize] = cy;
smg_comms_c_wrappers 507 prodp++;
smg_comms_c_wrappers 508 }
smg_comms_c_wrappers 509
smg_comms_c_wrappers 510 return cy;
smg_comms_c_wrappers 511 }
smg_comms_c_wrappers 512
smg_comms_c_wrappers 513 memset( &ctx, 0, sizeof ctx );
smg_comms_c_wrappers 514 mpihelp_mul_karatsuba_case( prodp, up, usize, vp, vsize, &ctx );
smg_comms_c_wrappers 515 mpihelp_release_karatsuba_ctx( &ctx );
smg_comms_c_wrappers 516 return *prod_endp;
smg_comms_c_wrappers 517 }
smg_comms_c_wrappers 518
smg_comms_c_wrappers 519