| /* mpn_mul_n -- Multiply two natural numbers of length n. |
| |
| Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc. |
| |
| This file is part of the GNU MP Library. |
| |
| The GNU MP Library is free software; you can redistribute it and/or modify |
| it under the terms of the GNU Lesser General Public License as published by |
| the Free Software Foundation; either version 2.1 of the License, or (at your |
| option) any later version. |
| |
| The GNU MP Library is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
| License for more details. |
| |
| You should have received a copy of the GNU Lesser General Public License |
| along with the GNU MP Library; see the file COPYING.LIB. If not, write to |
| the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
| MA 02111-1307, USA. */ |
| |
| #include <config.h> |
| #include "gmp-impl.h" |
| |
| /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP), |
| both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are |
| always stored. Return the most significant limb. |
| |
| Argument constraints: |
| 1. PRODP != UP and PRODP != VP, i.e. the destination |
| must be distinct from the multiplier and the multiplicand. */ |
| |
| /* If KARATSUBA_THRESHOLD is not already defined, define it to a |
| value which is good on most machines. */ |
| #ifndef KARATSUBA_THRESHOLD |
| #define KARATSUBA_THRESHOLD 32 |
| #endif |
| |
| /* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */ |
| #if KARATSUBA_THRESHOLD < 2 |
| #undef KARATSUBA_THRESHOLD |
| #define KARATSUBA_THRESHOLD 2 |
| #endif |
| |
| /* Handle simple cases with traditional multiplication. |
| |
| This is the most critical code of multiplication. All multiplies rely |
| on this, both small and huge. Small ones arrive here immediately. Huge |
| ones arrive here as this is the base case for Karatsuba's recursive |
| algorithm below. */ |
| |
| void |
| #if __STDC__ |
| impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) |
| #else |
| impn_mul_n_basecase (prodp, up, vp, size) |
| mp_ptr prodp; |
| mp_srcptr up; |
| mp_srcptr vp; |
| mp_size_t size; |
| #endif |
| { |
| mp_size_t i; |
| mp_limb_t cy_limb; |
| mp_limb_t v_limb; |
| |
| /* Multiply by the first limb in V separately, as the result can be |
| stored (not added) to PROD. We also avoid a loop for zeroing. */ |
| v_limb = vp[0]; |
| if (v_limb <= 1) |
| { |
| if (v_limb == 1) |
| MPN_COPY (prodp, up, size); |
| else |
| MPN_ZERO (prodp, size); |
| cy_limb = 0; |
| } |
| else |
| cy_limb = mpn_mul_1 (prodp, up, size, v_limb); |
| |
| prodp[size] = cy_limb; |
| prodp++; |
| |
| /* For each iteration in the outer loop, multiply one limb from |
| U with one limb from V, and add it to PROD. */ |
| for (i = 1; i < size; i++) |
| { |
| v_limb = vp[i]; |
| if (v_limb <= 1) |
| { |
| cy_limb = 0; |
| if (v_limb == 1) |
| cy_limb = mpn_add_n (prodp, prodp, up, size); |
| } |
| else |
| cy_limb = mpn_addmul_1 (prodp, up, size, v_limb); |
| |
| prodp[size] = cy_limb; |
| prodp++; |
| } |
| } |
| |
| void |
| #if __STDC__ |
| impn_mul_n (mp_ptr prodp, |
| mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace) |
| #else |
| impn_mul_n (prodp, up, vp, size, tspace) |
| mp_ptr prodp; |
| mp_srcptr up; |
| mp_srcptr vp; |
| mp_size_t size; |
| mp_ptr tspace; |
| #endif |
| { |
| if ((size & 1) != 0) |
| { |
| /* The size is odd, the code code below doesn't handle that. |
| Multiply the least significant (size - 1) limbs with a recursive |
| call, and handle the most significant limb of S1 and S2 |
| separately. */ |
| /* A slightly faster way to do this would be to make the Karatsuba |
| code below behave as if the size were even, and let it check for |
| odd size in the end. I.e., in essence move this code to the end. |
| Doing so would save us a recursive call, and potentially make the |
| stack grow a lot less. */ |
| |
| mp_size_t esize = size - 1; /* even size */ |
| mp_limb_t cy_limb; |
| |
| MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace); |
| cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]); |
| prodp[esize + esize] = cy_limb; |
| cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]); |
| |
| prodp[esize + size] = cy_limb; |
| } |
| else |
| { |
| /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm. |
| |
| Split U in two pieces, U1 and U0, such that |
| U = U0 + U1*(B**n), |
| and V in V1 and V0, such that |
| V = V0 + V1*(B**n). |
| |
| UV is then computed recursively using the identity |
| |
| 2n n n n |
| UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V |
| 1 1 1 0 0 1 0 0 |
| |
| Where B = 2**BITS_PER_MP_LIMB. */ |
| |
| mp_size_t hsize = size >> 1; |
| mp_limb_t cy; |
| int negflg; |
| |
| /*** Product H. ________________ ________________ |
| |_____U1 x V1____||____U0 x V0_____| */ |
| /* Put result in upper part of PROD and pass low part of TSPACE |
| as new TSPACE. */ |
| MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace); |
| |
| /*** Product M. ________________ |
| |_(U1-U0)(V0-V1)_| */ |
| if (mpn_cmp (up + hsize, up, hsize) >= 0) |
| { |
| mpn_sub_n (prodp, up + hsize, up, hsize); |
| negflg = 0; |
| } |
| else |
| { |
| mpn_sub_n (prodp, up, up + hsize, hsize); |
| negflg = 1; |
| } |
| if (mpn_cmp (vp + hsize, vp, hsize) >= 0) |
| { |
| mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize); |
| negflg ^= 1; |
| } |
| else |
| { |
| mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize); |
| /* No change of NEGFLG. */ |
| } |
| /* Read temporary operands from low part of PROD. |
| Put result in low part of TSPACE using upper part of TSPACE |
| as new TSPACE. */ |
| MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size); |
| |
| /*** Add/copy product H. */ |
| MPN_COPY (prodp + hsize, prodp + size, hsize); |
| cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize); |
| |
| /*** Add product M (if NEGFLG M is a negative number). */ |
| if (negflg) |
| cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size); |
| else |
| cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size); |
| |
| /*** Product L. ________________ ________________ |
| |________________||____U0 x V0_____| */ |
| /* Read temporary operands from low part of PROD. |
| Put result in low part of TSPACE using upper part of TSPACE |
| as new TSPACE. */ |
| MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size); |
| |
| /*** Add/copy Product L (twice). */ |
| |
| cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size); |
| if (cy) |
| mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy); |
| |
| MPN_COPY (prodp, tspace, hsize); |
| cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize); |
| if (cy) |
| mpn_add_1 (prodp + size, prodp + size, size, 1); |
| } |
| } |