raw
ch1_mpi                 1 /* mpihelp-mul.c  -  MPI helper functions
ch1_mpi 2 * Modified by No Such Labs. (C) 2015. See README.
ch1_mpi 3 *
ch1_mpi 4 * This file was originally part of Gnu Privacy Guard (GPG), ver. 1.4.10,
ch1_mpi 5 * SHA256(gnupg-1.4.10.tar.gz):
ch1_mpi 6 * 0bfd74660a2f6cedcf7d8256db4a63c996ffebbcdc2cf54397bfb72878c5a85a
ch1_mpi 7 * (C) 1994-2005 Free Software Foundation, Inc.
ch1_mpi 8 *
ch1_mpi 9 * This program is free software: you can redistribute it and/or modify
ch1_mpi 10 * it under the terms of the GNU General Public License as published by
ch1_mpi 11 * the Free Software Foundation, either version 3 of the License, or
ch1_mpi 12 * (at your option) any later version.
ch1_mpi 13 *
ch1_mpi 14 * This program is distributed in the hope that it will be useful,
ch1_mpi 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ch1_mpi 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
ch1_mpi 17 * GNU General Public License for more details.
ch1_mpi 18 *
ch1_mpi 19 * You should have received a copy of the GNU General Public License
ch1_mpi 20 * along with this program. If not, see <http://www.gnu.org/licenses/>.
ch1_mpi 21 */
ch1_mpi 22
ch1_mpi 23 #include <stdio.h>
ch1_mpi 24 #include <stdlib.h>
ch1_mpi 25 #include <string.h>
ch1_mpi 26
ch1_mpi 27 #include "knobs.h"
ch1_mpi 28 #include "mpi-internal.h"
ch1_mpi 29 #include "longlong.h"
ch1_mpi 30
ch1_mpi 31
ch1_mpi 32 #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
ch1_mpi 33 do { \
ch1_mpi 34 if( (size) < KARATSUBA_THRESHOLD ) \
ch1_mpi 35 mul_n_basecase (prodp, up, vp, size); \
ch1_mpi 36 else \
ch1_mpi 37 mul_n (prodp, up, vp, size, tspace); \
ch1_mpi 38 } while (0);
ch1_mpi 39
ch1_mpi 40 #define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \
ch1_mpi 41 do { \
ch1_mpi 42 if ((size) < KARATSUBA_THRESHOLD) \
ch1_mpi 43 mpih_sqr_n_basecase (prodp, up, size); \
ch1_mpi 44 else \
ch1_mpi 45 mpih_sqr_n (prodp, up, size, tspace); \
ch1_mpi 46 } while (0);
ch1_mpi 47
ch1_mpi 48
ch1_mpi 49 /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
ch1_mpi 50 * both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are
ch1_mpi 51 * always stored. Return the most significant limb.
ch1_mpi 52 *
ch1_mpi 53 * Argument constraints:
ch1_mpi 54 * 1. PRODP != UP and PRODP != VP, i.e. the destination
ch1_mpi 55 * must be distinct from the multiplier and the multiplicand.
ch1_mpi 56 *
ch1_mpi 57 *
ch1_mpi 58 * Handle simple cases with traditional multiplication.
ch1_mpi 59 *
ch1_mpi 60 * This is the most critical code of multiplication. All multiplies rely
ch1_mpi 61 * on this, both small and huge. Small ones arrive here immediately. Huge
ch1_mpi 62 * ones arrive here as this is the base case for Karatsuba's recursive
ch1_mpi 63 * algorithm below.
ch1_mpi 64 */
ch1_mpi 65
ch1_mpi 66 static mpi_limb_t
ch1_mpi 67 mul_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up,
ch1_mpi 68 mpi_ptr_t vp, mpi_size_t size)
ch1_mpi 69 {
ch1_mpi 70 mpi_size_t i;
ch1_mpi 71 mpi_limb_t cy;
ch1_mpi 72 mpi_limb_t v_limb;
ch1_mpi 73
ch1_mpi 74 /* Multiply by the first limb in V separately, as the result can be
ch1_mpi 75 * stored (not added) to PROD. We also avoid a loop for zeroing. */
ch1_mpi 76 v_limb = vp[0];
ch1_mpi 77 if( v_limb <= 1 ) {
ch1_mpi 78 if( v_limb == 1 )
ch1_mpi 79 MPN_COPY( prodp, up, size );
ch1_mpi 80 else
ch1_mpi 81 MPN_ZERO( prodp, size );
ch1_mpi 82 cy = 0;
ch1_mpi 83 }
ch1_mpi 84 else
ch1_mpi 85 cy = mpihelp_mul_1( prodp, up, size, v_limb );
ch1_mpi 86
ch1_mpi 87 prodp[size] = cy;
ch1_mpi 88 prodp++;
ch1_mpi 89
ch1_mpi 90 /* For each iteration in the outer loop, multiply one limb from
ch1_mpi 91 * U with one limb from V, and add it to PROD. */
ch1_mpi 92 for( i = 1; i < size; i++ ) {
ch1_mpi 93 v_limb = vp[i];
ch1_mpi 94 if( v_limb <= 1 ) {
ch1_mpi 95 cy = 0;
ch1_mpi 96 if( v_limb == 1 )
ch1_mpi 97 cy = mpihelp_add_n(prodp, prodp, up, size);
ch1_mpi 98 }
ch1_mpi 99 else
ch1_mpi 100 cy = mpihelp_addmul_1(prodp, up, size, v_limb);
ch1_mpi 101
ch1_mpi 102 prodp[size] = cy;
ch1_mpi 103 prodp++;
ch1_mpi 104 }
ch1_mpi 105
ch1_mpi 106 return cy;
ch1_mpi 107 }
ch1_mpi 108
ch1_mpi 109
ch1_mpi 110 static void
ch1_mpi 111 mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
ch1_mpi 112 mpi_size_t size, mpi_ptr_t tspace )
ch1_mpi 113 {
ch1_mpi 114 if( size & 1 ) {
ch1_mpi 115 /* The size is odd, and the code below doesn't handle that.
ch1_mpi 116 * Multiply the least significant (size - 1) limbs with a recursive
ch1_mpi 117 * call, and handle the most significant limb of S1 and S2
ch1_mpi 118 * separately.
ch1_mpi 119 * A slightly faster way to do this would be to make the Karatsuba
ch1_mpi 120 * code below behave as if the size were even, and let it check for
ch1_mpi 121 * odd size in the end. I.e., in essence move this code to the end.
ch1_mpi 122 * Doing so would save us a recursive call, and potentially make the
ch1_mpi 123 * stack grow a lot less.
ch1_mpi 124 */
ch1_mpi 125 mpi_size_t esize = size - 1; /* even size */
ch1_mpi 126 mpi_limb_t cy_limb;
ch1_mpi 127
ch1_mpi 128 MPN_MUL_N_RECURSE( prodp, up, vp, esize, tspace );
ch1_mpi 129 cy_limb = mpihelp_addmul_1( prodp + esize, up, esize, vp[esize] );
ch1_mpi 130 prodp[esize + esize] = cy_limb;
ch1_mpi 131 cy_limb = mpihelp_addmul_1( prodp + esize, vp, size, up[esize] );
ch1_mpi 132 prodp[esize + size] = cy_limb;
ch1_mpi 133 }
ch1_mpi 134 else {
ch1_mpi 135 /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
ch1_mpi 136 *
ch1_mpi 137 * Split U in two pieces, U1 and U0, such that
ch1_mpi 138 * U = U0 + U1*(B**n),
ch1_mpi 139 * and V in V1 and V0, such that
ch1_mpi 140 * V = V0 + V1*(B**n).
ch1_mpi 141 *
ch1_mpi 142 * UV is then computed recursively using the identity
ch1_mpi 143 *
ch1_mpi 144 * 2n n n n
ch1_mpi 145 * UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V
ch1_mpi 146 * 1 1 1 0 0 1 0 0
ch1_mpi 147 *
ch1_mpi 148 * Where B = 2**BITS_PER_MP_LIMB.
ch1_mpi 149 */
ch1_mpi 150 mpi_size_t hsize = size >> 1;
ch1_mpi 151 mpi_limb_t cy;
ch1_mpi 152 int negflg;
ch1_mpi 153
ch1_mpi 154 /* Product H. ________________ ________________
ch1_mpi 155 * |_____U1 x V1____||____U0 x V0_____|
ch1_mpi 156 * Put result in upper part of PROD and pass low part of TSPACE
ch1_mpi 157 * as new TSPACE.
ch1_mpi 158 */
ch1_mpi 159 MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize, tspace);
ch1_mpi 160
ch1_mpi 161 /* Product M. ________________
ch1_mpi 162 * |_(U1-U0)(V0-V1)_|
ch1_mpi 163 */
ch1_mpi 164 if( mpihelp_cmp(up + hsize, up, hsize) >= 0 ) {
ch1_mpi 165 mpihelp_sub_n(prodp, up + hsize, up, hsize);
ch1_mpi 166 negflg = 0;
ch1_mpi 167 }
ch1_mpi 168 else {
ch1_mpi 169 mpihelp_sub_n(prodp, up, up + hsize, hsize);
ch1_mpi 170 negflg = 1;
ch1_mpi 171 }
ch1_mpi 172 if( mpihelp_cmp(vp + hsize, vp, hsize) >= 0 ) {
ch1_mpi 173 mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize);
ch1_mpi 174 negflg ^= 1;
ch1_mpi 175 }
ch1_mpi 176 else {
ch1_mpi 177 mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize);
ch1_mpi 178 /* No change of NEGFLG. */
ch1_mpi 179 }
ch1_mpi 180 /* Read temporary operands from low part of PROD.
ch1_mpi 181 * Put result in low part of TSPACE using upper part of TSPACE
ch1_mpi 182 * as new TSPACE.
ch1_mpi 183 */
ch1_mpi 184 MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize, tspace + size);
ch1_mpi 185
ch1_mpi 186 /* Add/copy product H. */
ch1_mpi 187 MPN_COPY (prodp + hsize, prodp + size, hsize);
ch1_mpi 188 cy = mpihelp_add_n( prodp + size, prodp + size,
ch1_mpi 189 prodp + size + hsize, hsize);
ch1_mpi 190
ch1_mpi 191 /* Add product M (if NEGFLG M is a negative number) */
ch1_mpi 192 if(negflg)
ch1_mpi 193 cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size);
ch1_mpi 194 else
ch1_mpi 195 cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
ch1_mpi 196
ch1_mpi 197 /* Product L. ________________ ________________
ch1_mpi 198 * |________________||____U0 x V0_____|
ch1_mpi 199 * Read temporary operands from low part of PROD.
ch1_mpi 200 * Put result in low part of TSPACE using upper part of TSPACE
ch1_mpi 201 * as new TSPACE.
ch1_mpi 202 */
ch1_mpi 203 MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size);
ch1_mpi 204
ch1_mpi 205 /* Add/copy Product L (twice) */
ch1_mpi 206
ch1_mpi 207 cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
ch1_mpi 208 if( cy )
ch1_mpi 209 mpihelp_add_1(prodp + hsize + size, prodp + hsize + size, hsize, cy);
ch1_mpi 210
ch1_mpi 211 MPN_COPY(prodp, tspace, hsize);
ch1_mpi 212 cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, hsize);
ch1_mpi 213 if( cy )
ch1_mpi 214 mpihelp_add_1(prodp + size, prodp + size, size, 1);
ch1_mpi 215 }
ch1_mpi 216 }
ch1_mpi 217
ch1_mpi 218
ch1_mpi 219 void
ch1_mpi 220 mpih_sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size )
ch1_mpi 221 {
ch1_mpi 222 mpi_size_t i;
ch1_mpi 223 mpi_limb_t cy_limb;
ch1_mpi 224 mpi_limb_t v_limb;
ch1_mpi 225
ch1_mpi 226 /* Multiply by the first limb in V separately, as the result can be
ch1_mpi 227 * stored (not added) to PROD. We also avoid a loop for zeroing. */
ch1_mpi 228 v_limb = up[0];
ch1_mpi 229 if( v_limb <= 1 ) {
ch1_mpi 230 if( v_limb == 1 )
ch1_mpi 231 MPN_COPY( prodp, up, size );
ch1_mpi 232 else
ch1_mpi 233 MPN_ZERO(prodp, size);
ch1_mpi 234 cy_limb = 0;
ch1_mpi 235 }
ch1_mpi 236 else
ch1_mpi 237 cy_limb = mpihelp_mul_1( prodp, up, size, v_limb );
ch1_mpi 238
ch1_mpi 239 prodp[size] = cy_limb;
ch1_mpi 240 prodp++;
ch1_mpi 241
ch1_mpi 242 /* For each iteration in the outer loop, multiply one limb from
ch1_mpi 243 * U with one limb from V, and add it to PROD. */
ch1_mpi 244 for( i=1; i < size; i++) {
ch1_mpi 245 v_limb = up[i];
ch1_mpi 246 if( v_limb <= 1 ) {
ch1_mpi 247 cy_limb = 0;
ch1_mpi 248 if( v_limb == 1 )
ch1_mpi 249 cy_limb = mpihelp_add_n(prodp, prodp, up, size);
ch1_mpi 250 }
ch1_mpi 251 else
ch1_mpi 252 cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb);
ch1_mpi 253
ch1_mpi 254 prodp[size] = cy_limb;
ch1_mpi 255 prodp++;
ch1_mpi 256 }
ch1_mpi 257 }
ch1_mpi 258
ch1_mpi 259
ch1_mpi 260 void
ch1_mpi 261 mpih_sqr_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)
ch1_mpi 262 {
ch1_mpi 263 if( size & 1 ) {
ch1_mpi 264 /* The size is odd, and the code below doesn't handle that.
ch1_mpi 265 * Multiply the least significant (size - 1) limbs with a recursive
ch1_mpi 266 * call, and handle the most significant limb of S1 and S2
ch1_mpi 267 * separately.
ch1_mpi 268 * A slightly faster way to do this would be to make the Karatsuba
ch1_mpi 269 * code below behave as if the size were even, and let it check for
ch1_mpi 270 * odd size in the end. I.e., in essence move this code to the end.
ch1_mpi 271 * Doing so would save us a recursive call, and potentially make the
ch1_mpi 272 * stack grow a lot less.
ch1_mpi 273 */
ch1_mpi 274 mpi_size_t esize = size - 1; /* even size */
ch1_mpi 275 mpi_limb_t cy_limb;
ch1_mpi 276
ch1_mpi 277 MPN_SQR_N_RECURSE( prodp, up, esize, tspace );
ch1_mpi 278 cy_limb = mpihelp_addmul_1( prodp + esize, up, esize, up[esize] );
ch1_mpi 279 prodp[esize + esize] = cy_limb;
ch1_mpi 280 cy_limb = mpihelp_addmul_1( prodp + esize, up, size, up[esize] );
ch1_mpi 281
ch1_mpi 282 prodp[esize + size] = cy_limb;
ch1_mpi 283 }
ch1_mpi 284 else {
ch1_mpi 285 mpi_size_t hsize = size >> 1;
ch1_mpi 286 mpi_limb_t cy;
ch1_mpi 287
ch1_mpi 288 /* Product H. ________________ ________________
ch1_mpi 289 * |_____U1 x U1____||____U0 x U0_____|
ch1_mpi 290 * Put result in upper part of PROD and pass low part of TSPACE
ch1_mpi 291 * as new TSPACE.
ch1_mpi 292 */
ch1_mpi 293 MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace);
ch1_mpi 294
ch1_mpi 295 /* Product M. ________________
ch1_mpi 296 * |_(U1-U0)(U0-U1)_|
ch1_mpi 297 */
ch1_mpi 298 if( mpihelp_cmp( up + hsize, up, hsize) >= 0 )
ch1_mpi 299 mpihelp_sub_n( prodp, up + hsize, up, hsize);
ch1_mpi 300 else
ch1_mpi 301 mpihelp_sub_n (prodp, up, up + hsize, hsize);
ch1_mpi 302
ch1_mpi 303 /* Read temporary operands from low part of PROD.
ch1_mpi 304 * Put result in low part of TSPACE using upper part of TSPACE
ch1_mpi 305 * as new TSPACE. */
ch1_mpi 306 MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size);
ch1_mpi 307
ch1_mpi 308 /* Add/copy product H */
ch1_mpi 309 MPN_COPY(prodp + hsize, prodp + size, hsize);
ch1_mpi 310 cy = mpihelp_add_n(prodp + size, prodp + size,
ch1_mpi 311 prodp + size + hsize, hsize);
ch1_mpi 312
ch1_mpi 313 /* Add product M (if NEGFLG M is a negative number). */
ch1_mpi 314 cy -= mpihelp_sub_n (prodp + hsize, prodp + hsize, tspace, size);
ch1_mpi 315
ch1_mpi 316 /* Product L. ________________ ________________
ch1_mpi 317 * |________________||____U0 x U0_____|
ch1_mpi 318 * Read temporary operands from low part of PROD.
ch1_mpi 319 * Put result in low part of TSPACE using upper part of TSPACE
ch1_mpi 320 * as new TSPACE. */
ch1_mpi 321 MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
ch1_mpi 322
ch1_mpi 323 /* Add/copy Product L (twice). */
ch1_mpi 324 cy += mpihelp_add_n (prodp + hsize, prodp + hsize, tspace, size);
ch1_mpi 325 if( cy )
ch1_mpi 326 mpihelp_add_1(prodp + hsize + size, prodp + hsize + size,
ch1_mpi 327 hsize, cy);
ch1_mpi 328
ch1_mpi 329 MPN_COPY(prodp, tspace, hsize);
ch1_mpi 330 cy = mpihelp_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
ch1_mpi 331 if( cy )
ch1_mpi 332 mpihelp_add_1 (prodp + size, prodp + size, size, 1);
ch1_mpi 333 }
ch1_mpi 334 }
ch1_mpi 335
ch1_mpi 336
ch1_mpi 337 /* This should be made into an inline function in gmp.h. */
ch1_mpi 338 void
ch1_mpi 339 mpihelp_mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
ch1_mpi 340 {
ch1_mpi 341 int secure;
ch1_mpi 342
ch1_mpi 343 if( up == vp ) {
ch1_mpi 344 if( size < KARATSUBA_THRESHOLD )
ch1_mpi 345 mpih_sqr_n_basecase( prodp, up, size );
ch1_mpi 346 else {
ch1_mpi 347 mpi_ptr_t tspace;
ch1_mpi 348 secure = m_is_secure( up );
ch1_mpi 349 tspace = mpi_alloc_limb_space( 2 * size, secure );
ch1_mpi 350 mpih_sqr_n( prodp, up, size, tspace );
ch1_mpi 351 mpi_free_limb_space( tspace );
ch1_mpi 352 }
ch1_mpi 353 }
ch1_mpi 354 else {
ch1_mpi 355 if( size < KARATSUBA_THRESHOLD )
ch1_mpi 356 mul_n_basecase( prodp, up, vp, size );
ch1_mpi 357 else {
ch1_mpi 358 mpi_ptr_t tspace;
ch1_mpi 359 secure = m_is_secure( up ) || m_is_secure( vp );
ch1_mpi 360 tspace = mpi_alloc_limb_space( 2 * size, secure );
ch1_mpi 361 mul_n (prodp, up, vp, size, tspace);
ch1_mpi 362 mpi_free_limb_space( tspace );
ch1_mpi 363 }
ch1_mpi 364 }
ch1_mpi 365 }
ch1_mpi 366
ch1_mpi 367
ch1_mpi 368
ch1_mpi 369 void
ch1_mpi 370 mpihelp_mul_karatsuba_case( mpi_ptr_t prodp,
ch1_mpi 371 mpi_ptr_t up, mpi_size_t usize,
ch1_mpi 372 mpi_ptr_t vp, mpi_size_t vsize,
ch1_mpi 373 struct karatsuba_ctx *ctx )
ch1_mpi 374 {
ch1_mpi 375 mpi_limb_t cy;
ch1_mpi 376
ch1_mpi 377 if( !ctx->tspace || ctx->tspace_size < vsize ) {
ch1_mpi 378 if( ctx->tspace )
ch1_mpi 379 mpi_free_limb_space( ctx->tspace );
ch1_mpi 380 ctx->tspace = mpi_alloc_limb_space( 2 * vsize,
ch1_mpi 381 m_is_secure( up ) || m_is_secure( vp ) );
ch1_mpi 382 ctx->tspace_size = vsize;
ch1_mpi 383 }
ch1_mpi 384
ch1_mpi 385 MPN_MUL_N_RECURSE( prodp, up, vp, vsize, ctx->tspace );
ch1_mpi 386
ch1_mpi 387 prodp += vsize;
ch1_mpi 388 up += vsize;
ch1_mpi 389 usize -= vsize;
ch1_mpi 390 if( usize >= vsize ) {
ch1_mpi 391 if( !ctx->tp || ctx->tp_size < vsize ) {
ch1_mpi 392 if( ctx->tp )
ch1_mpi 393 mpi_free_limb_space( ctx->tp );
ch1_mpi 394 ctx->tp = mpi_alloc_limb_space( 2 * vsize, m_is_secure( up )
ch1_mpi 395 || m_is_secure( vp ) );
ch1_mpi 396 ctx->tp_size = vsize;
ch1_mpi 397 }
ch1_mpi 398
ch1_mpi 399 do {
ch1_mpi 400 MPN_MUL_N_RECURSE( ctx->tp, up, vp, vsize, ctx->tspace );
ch1_mpi 401 cy = mpihelp_add_n( prodp, prodp, ctx->tp, vsize );
ch1_mpi 402 mpihelp_add_1( prodp + vsize, ctx->tp + vsize, vsize, cy );
ch1_mpi 403 prodp += vsize;
ch1_mpi 404 up += vsize;
ch1_mpi 405 usize -= vsize;
ch1_mpi 406 } while( usize >= vsize );
ch1_mpi 407 }
ch1_mpi 408
ch1_mpi 409 if( usize ) {
ch1_mpi 410 if( usize < KARATSUBA_THRESHOLD ) {
ch1_mpi 411 mpihelp_mul( ctx->tspace, vp, vsize, up, usize );
ch1_mpi 412 }
ch1_mpi 413 else {
ch1_mpi 414 if( !ctx->next ) {
ch1_mpi 415 ctx->next = xmalloc_clear( sizeof *ctx );
ch1_mpi 416 }
ch1_mpi 417 mpihelp_mul_karatsuba_case( ctx->tspace,
ch1_mpi 418 vp, vsize,
ch1_mpi 419 up, usize,
ch1_mpi 420 ctx->next );
ch1_mpi 421 }
ch1_mpi 422
ch1_mpi 423 cy = mpihelp_add_n( prodp, prodp, ctx->tspace, vsize);
ch1_mpi 424 mpihelp_add_1( prodp + vsize, ctx->tspace + vsize, usize, cy );
ch1_mpi 425 }
ch1_mpi 426 }
ch1_mpi 427
ch1_mpi 428
ch1_mpi 429 void
ch1_mpi 430 mpihelp_release_karatsuba_ctx( struct karatsuba_ctx *ctx )
ch1_mpi 431 {
ch1_mpi 432 struct karatsuba_ctx *ctx2;
ch1_mpi 433
ch1_mpi 434 if( ctx->tp )
ch1_mpi 435 mpi_free_limb_space( ctx->tp );
ch1_mpi 436 if( ctx->tspace )
ch1_mpi 437 mpi_free_limb_space( ctx->tspace );
ch1_mpi 438 for( ctx=ctx->next; ctx; ctx = ctx2 ) {
ch1_mpi 439 ctx2 = ctx->next;
ch1_mpi 440 if( ctx->tp )
ch1_mpi 441 mpi_free_limb_space( ctx->tp );
ch1_mpi 442 if( ctx->tspace )
ch1_mpi 443 mpi_free_limb_space( ctx->tspace );
ch1_mpi 444 xfree( ctx );
ch1_mpi 445 }
ch1_mpi 446 }
ch1_mpi 447
ch1_mpi 448 /* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
ch1_mpi 449 * and v (pointed to by VP, with VSIZE limbs), and store the result at
ch1_mpi 450 * PRODP. USIZE + VSIZE limbs are always stored, but if the input
ch1_mpi 451 * operands are normalized. Return the most significant limb of the
ch1_mpi 452 * result.
ch1_mpi 453 *
ch1_mpi 454 * NOTE: The space pointed to by PRODP is overwritten before finished
ch1_mpi 455 * with U and V, so overlap is an error.
ch1_mpi 456 *
ch1_mpi 457 * Argument constraints:
ch1_mpi 458 * 1. USIZE >= VSIZE.
ch1_mpi 459 * 2. PRODP != UP and PRODP != VP, i.e. the destination
ch1_mpi 460 * must be distinct from the multiplier and the multiplicand.
ch1_mpi 461 */
ch1_mpi 462
ch1_mpi 463 mpi_limb_t
ch1_mpi 464 mpihelp_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
ch1_mpi 465 mpi_ptr_t vp, mpi_size_t vsize)
ch1_mpi 466 {
ch1_mpi 467 mpi_ptr_t prod_endp = prodp + usize + vsize - 1;
ch1_mpi 468 mpi_limb_t cy;
ch1_mpi 469 struct karatsuba_ctx ctx;
ch1_mpi 470
ch1_mpi 471 if( vsize < KARATSUBA_THRESHOLD ) {
ch1_mpi 472 mpi_size_t i;
ch1_mpi 473 mpi_limb_t v_limb;
ch1_mpi 474
ch1_mpi 475 if( !vsize )
ch1_mpi 476 return 0;
ch1_mpi 477
ch1_mpi 478 /* Multiply by the first limb in V separately, as the result can be
ch1_mpi 479 * stored (not added) to PROD. We also avoid a loop for zeroing. */
ch1_mpi 480 v_limb = vp[0];
ch1_mpi 481 if( v_limb <= 1 ) {
ch1_mpi 482 if( v_limb == 1 )
ch1_mpi 483 MPN_COPY( prodp, up, usize );
ch1_mpi 484 else
ch1_mpi 485 MPN_ZERO( prodp, usize );
ch1_mpi 486 cy = 0;
ch1_mpi 487 }
ch1_mpi 488 else
ch1_mpi 489 cy = mpihelp_mul_1( prodp, up, usize, v_limb );
ch1_mpi 490
ch1_mpi 491 prodp[usize] = cy;
ch1_mpi 492 prodp++;
ch1_mpi 493
ch1_mpi 494 /* For each iteration in the outer loop, multiply one limb from
ch1_mpi 495 * U with one limb from V, and add it to PROD. */
ch1_mpi 496 for( i = 1; i < vsize; i++ ) {
ch1_mpi 497 v_limb = vp[i];
ch1_mpi 498 if( v_limb <= 1 ) {
ch1_mpi 499 cy = 0;
ch1_mpi 500 if( v_limb == 1 )
ch1_mpi 501 cy = mpihelp_add_n(prodp, prodp, up, usize);
ch1_mpi 502 }
ch1_mpi 503 else
ch1_mpi 504 cy = mpihelp_addmul_1(prodp, up, usize, v_limb);
ch1_mpi 505
ch1_mpi 506 prodp[usize] = cy;
ch1_mpi 507 prodp++;
ch1_mpi 508 }
ch1_mpi 509
ch1_mpi 510 return cy;
ch1_mpi 511 }
ch1_mpi 512
ch1_mpi 513 memset( &ctx, 0, sizeof ctx );
ch1_mpi 514 mpihelp_mul_karatsuba_case( prodp, up, usize, vp, vsize, &ctx );
ch1_mpi 515 mpihelp_release_karatsuba_ctx( &ctx );
ch1_mpi 516 return *prod_endp;
ch1_mpi 517 }
ch1_mpi 518
ch1_mpi 519