raw
ffa_ch12_karatsub...    1 ------------------------------------------------------------------------------
ffa_ch12_karatsub... 2 ------------------------------------------------------------------------------
ffa_ch12_karatsub... 3 -- This file is part of 'Finite Field Arithmetic', aka 'FFA'. --
ffa_ch12_karatsub... 4 -- --
ffa_ch15_gcd.kv 5 -- (C) 2019 Stanislav Datskovskiy ( www.loper-os.org ) --
ffa_ch12_karatsub... 6 -- http://wot.deedbot.org/17215D118B7239507FAFED98B98228A001ABFFC7.html --
ffa_ch12_karatsub... 7 -- --
ffa_ch12_karatsub... 8 -- You do not have, nor can you ever acquire the right to use, copy or --
ffa_ch12_karatsub... 9 -- distribute this software ; Should you use this software for any purpose, --
ffa_ch12_karatsub... 10 -- or copy and distribute it to anyone or in any manner, you are breaking --
ffa_ch12_karatsub... 11 -- the laws of whatever soi-disant jurisdiction, and you promise to --
ffa_ch12_karatsub... 12 -- continue doing so for the indefinite future. In any case, please --
ffa_ch12_karatsub... 13 -- always : read and understand any software ; verify any PGP signatures --
ffa_ch12_karatsub... 14 -- that you use - for any purpose. --
ffa_ch12_karatsub... 15 -- --
ffa_ch12_karatsub... 16 -- See also http://trilema.com/2015/a-new-software-licensing-paradigm . --
ffa_ch12_karatsub... 17 ------------------------------------------------------------------------------
ffa_ch12_karatsub... 18 ------------------------------------------------------------------------------
ffa_ch12_karatsub... 19
ffa_ch12_karatsub... 20 with Words; use Words;
ffa_ch12_karatsub... 21 with Word_Ops; use Word_Ops;
ffa_ch12_karatsub... 22 with W_Mul; use W_Mul;
ffa_ch12_karatsub... 23 with FZ_Arith; use FZ_Arith;
ffa_ch12_karatsub... 24
ffa_ch12_karatsub... 25
ffa_ch12_karatsub... 26 package body FZ_Sqr is
ffa_ch12_karatsub... 27
ffa_ch12_karatsub... 28 -- Square case of Comba's multiplier. (CAUTION: UNBUFFERED)
ffa_ch12_karatsub... 29 procedure FZ_Sqr_Comba(X : in FZ;
ffa_ch12_karatsub... 30 XX : out FZ) is
ffa_ch12_karatsub... 31
ffa_ch12_karatsub... 32 -- Words in each multiplicand
ffa_ch12_karatsub... 33 L : constant Word_Index := X'Length;
ffa_ch12_karatsub... 34
ffa_ch12_karatsub... 35 -- Length of Product, i.e. double the length of X
ffa_ch12_karatsub... 36 LP : constant Word_Index := 2 * L;
ffa_ch12_karatsub... 37
ffa_ch12_karatsub... 38 -- 3-word Accumulator
ffa_ch12_karatsub... 39 A2, A1, A0 : Word := 0;
ffa_ch12_karatsub... 40
ffa_ch12_karatsub... 41 Lo, Hi : Word; -- Output of WxW multiply/square
ffa_ch12_karatsub... 42
ffa_ch12_karatsub... 43 -- Type for referring to a column of XX
ffa_ch12_karatsub... 44 subtype ColXX is Word_Index range 0 .. LP - 1;
ffa_ch12_karatsub... 45
ffa_ch12_karatsub... 46 procedure Accum is
ffa_ch12_karatsub... 47 C : WBool; -- Carry for the Accumulator addition
ffa_ch12_karatsub... 48 Sum : Word; -- Sum for Accumulator addition
ffa_ch12_karatsub... 49 begin
ffa_ch12_karatsub... 50 -- Now add add double-Word Hi:Lo to accumulator A2:A1:A0:
ffa_ch12_karatsub... 51 -- A0 += Lo; C := Carry
ffa_ch12_karatsub... 52 Sum := A0 + Lo;
ffa_ch12_karatsub... 53 C := W_Carry(A0, Lo, Sum);
ffa_ch12_karatsub... 54 A0 := Sum;
ffa_ch12_karatsub... 55 -- A1 += Hi + C; C := Carry
ffa_ch12_karatsub... 56 Sum := A1 + Hi + C;
ffa_ch12_karatsub... 57 C := W_Carry(A1, Hi, Sum);
ffa_ch12_karatsub... 58 A1 := Sum;
ffa_ch12_karatsub... 59 -- A2 += A2 + C
ffa_ch12_karatsub... 60 A2 := A2 + C;
ffa_ch12_karatsub... 61 end Accum;
ffa_ch12_karatsub... 62 pragma Inline_Always(Accum);
ffa_ch12_karatsub... 63
ffa_ch12_karatsub... 64 procedure SymmDigits(N : in ColXX; From : in ColXX; To : in ColXX) is
ffa_ch12_karatsub... 65 begin
ffa_ch12_karatsub... 66 for j in From .. To loop
ffa_ch12_karatsub... 67 -- Hi:Lo := j-th * (N - j)-th Word, and then,
ffa_ch12_karatsub... 68 Mul_Word(X(X'First + j),
ffa_ch12_karatsub... 69 X(X'First - j + N),
ffa_ch12_karatsub... 70 Lo, Hi);
ffa_ch12_karatsub... 71 Accum;
ffa_ch12_karatsub... 72 Accum; -- Accum += 2 * (Hi:Lo)
ffa_ch12_karatsub... 73 end loop;
ffa_ch12_karatsub... 74 end SymmDigits;
ffa_ch12_karatsub... 75 pragma Inline_Always(SymmDigits);
ffa_ch12_karatsub... 76
ffa_ch12_karatsub... 77 procedure SqrDigit(N : in ColXX) is
ffa_ch12_karatsub... 78 begin
ffa_ch12_karatsub... 79 Sqr_Word(X(X'First + N), Lo, Hi); -- Hi:Lo := Square(N-th digit)
ffa_ch12_karatsub... 80 Accum; -- Accum += Hi:Lo
ffa_ch12_karatsub... 81 end SqrDigit;
ffa_ch12_karatsub... 82 pragma Inline_Always(SqrDigit);
ffa_ch12_karatsub... 83
ffa_ch12_karatsub... 84 procedure HaveDigit(N : in ColXX) is
ffa_ch12_karatsub... 85 begin
ffa_ch12_karatsub... 86 -- Save the Nth (indexed from zero) word of XX:
ffa_ch12_karatsub... 87 XX(XX'First + N) := A0;
ffa_ch12_karatsub... 88 -- Right-Shift the Accumulator by one Word width:
ffa_ch12_karatsub... 89 A0 := A1;
ffa_ch12_karatsub... 90 A1 := A2;
ffa_ch12_karatsub... 91 A2 := 0;
ffa_ch12_karatsub... 92 end HaveDigit;
ffa_ch12_karatsub... 93 pragma Inline_Always(HaveDigit);
ffa_ch12_karatsub... 94
ffa_ch12_karatsub... 95 -- Compute the Nth (indexed from zero) column of the Product
ffa_ch12_karatsub... 96 procedure Col(N : in ColXX; U : in ColXX; V : in ColXX) is
ffa_ch12_karatsub... 97 begin
ffa_ch12_karatsub... 98 -- The branch pattern depends only on FZ wordness
ffa_ch12_karatsub... 99 if N mod 2 = 0 then -- If we're doing an EVEN-numbered column:
ffa_ch12_karatsub... 100 SymmDigits(N, U, V - 1); -- Stop before V: it is the square case
ffa_ch12_karatsub... 101 SqrDigit(V); -- Compute the square case at V
ffa_ch12_karatsub... 102 else -- If we're doing an ODD-numbered column:
ffa_ch12_karatsub... 103 SymmDigits(N, U, V); -- All of them are the symmetric case
ffa_ch12_karatsub... 104 end if; -- After either even or odd column:
ffa_ch12_karatsub... 105 HaveDigit(N); -- We have the N-th digit of the result.
ffa_ch12_karatsub... 106 end Col;
ffa_ch12_karatsub... 107 pragma Inline_Always(Col);
ffa_ch12_karatsub... 108
ffa_ch12_karatsub... 109 begin
ffa_ch12_karatsub... 110 -- First col always exists:
ffa_ch12_karatsub... 111 SqrDigit(ColXX'First);
ffa_ch12_karatsub... 112 HaveDigit(ColXX'First);
ffa_ch12_karatsub... 113
ffa_ch12_karatsub... 114 -- Compute the lower half of the Product:
ffa_ch12_karatsub... 115 for i in 1 .. L - 1 loop
ffa_ch12_karatsub... 116 Col(i, 0, i / 2);
ffa_ch12_karatsub... 117 end loop;
ffa_ch12_karatsub... 118
ffa_ch12_karatsub... 119 -- Compute the upper half (sans last Word) of the Product:
ffa_ch12_karatsub... 120 for i in L .. LP - 2 loop
ffa_ch12_karatsub... 121 Col(i, i - L + 1, i / 2);
ffa_ch12_karatsub... 122 end loop;
ffa_ch12_karatsub... 123
ffa_ch12_karatsub... 124 -- The very last Word of the Product:
ffa_ch12_karatsub... 125 XX(XX'Last) := A0; -- Equiv. of XX(XX'First + 2*L - 1) := A0;
ffa_ch12_karatsub... 126 end FZ_Sqr_Comba;
ffa_ch12_karatsub... 127
ffa_ch12_karatsub... 128
ffa_ch12_karatsub... 129 -- Karatsuba's Squaring. (CAUTION: UNBUFFERED)
ffa_ch12_karatsub... 130 procedure Sqr_Karatsuba(X : in FZ;
ffa_ch12_karatsub... 131 XX : out FZ) is
ffa_ch12_karatsub... 132
ffa_ch12_karatsub... 133 -- L is the wordness of X. Guaranteed to be a power of two.
ffa_ch12_karatsub... 134 L : constant Word_Count := X'Length;
ffa_ch12_karatsub... 135
ffa_ch12_karatsub... 136 -- An 'LSeg' is the same length as X.
ffa_ch12_karatsub... 137 subtype LSeg is FZ(1 .. L);
ffa_ch12_karatsub... 138
ffa_ch12_karatsub... 139 -- K is HALF of the length of X.
ffa_ch12_karatsub... 140 K : constant Word_Index := L / 2;
ffa_ch12_karatsub... 141
ffa_ch12_karatsub... 142 -- A 'KSeg' is the same length as HALF of X.
ffa_ch12_karatsub... 143 subtype KSeg is FZ(1 .. K);
ffa_ch12_karatsub... 144
ffa_ch12_karatsub... 145 -- The three L-sized variables of the product equation, i.e.:
ffa_ch12_karatsub... 146 -- XX = LL + 2^(K*Bitness)(LL + HH - DD) + 2^(2*K*Bitness)HH
ffa_ch12_karatsub... 147 LL, DD, HH : LSeg;
ffa_ch12_karatsub... 148
ffa_ch12_karatsub... 149 -- K-sized term Dx, Dx := abs(XLo - XHi) to then get DD := Dx^2
ffa_ch12_karatsub... 150 Dx : KSeg;
ffa_ch12_karatsub... 151
ffa_ch12_karatsub... 152 -- IGNORED Subtraction borrow, sign of (XL - XH)
ffa_ch12_karatsub... 153 Cx : WBool; -- given as DD := Dx^2, DD is always positive
ffa_ch12_karatsub... 154 pragma Unreferenced(Cx);
ffa_ch12_karatsub... 155
ffa_ch12_karatsub... 156 -- Bottom and Top K-sized halves of X.
ffa_ch12_karatsub... 157 XLo : KSeg renames X( X'First .. X'Last - K );
ffa_ch12_karatsub... 158 XHi : KSeg renames X( X'First + K .. X'Last );
ffa_ch12_karatsub... 159
ffa_ch12_karatsub... 160 -- L-sized middle segment of the product XX (+/- K from the midpoint).
ffa_ch12_karatsub... 161 XXMid : LSeg renames XX( XX'First + K .. XX'Last - K );
ffa_ch12_karatsub... 162
ffa_ch12_karatsub... 163 -- Bottom and Top L-sized halves of the product XX.
ffa_ch12_karatsub... 164 XXLo : LSeg renames XX( XX'First .. XX'Last - L );
ffa_ch12_karatsub... 165 XXHi : LSeg renames XX( XX'First + L .. XX'Last );
ffa_ch12_karatsub... 166
ffa_ch12_karatsub... 167 -- Topmost K-sized quarter segment of the product XX, or 'tail'
ffa_ch12_karatsub... 168 XXHiHi : KSeg renames XXHi( XXHi'First + K .. XXHi'Last );
ffa_ch12_karatsub... 169
ffa_ch12_karatsub... 170 -- Carry from addition of HH and LL terms; borrow from subtraction of DD
ffa_ch12_karatsub... 171 C : WBool;
ffa_ch12_karatsub... 172
ffa_ch12_karatsub... 173 -- Barring a cosmic ray, 0 <= TC <= 2
ffa_ch12_karatsub... 174 subtype TailCarry is Word range 0 .. 2;
ffa_ch12_karatsub... 175
ffa_ch12_karatsub... 176 -- Tail-Carry accumulator, for the final ripple-out into XXHiHi
ffa_ch12_karatsub... 177 TC : TailCarry := 0;
ffa_ch12_karatsub... 178
ffa_ch12_karatsub... 179 -- Barring a cosmic ray, the tail ripple will NOT overflow.
ffa_ch12_karatsub... 180 FinalCarry : WZeroOrDie := 0;
ffa_ch12_karatsub... 181
ffa_ch12_karatsub... 182 begin
ffa_ch12_karatsub... 183
ffa_ch12_karatsub... 184 -- Recurse: LL := XLo^2
ffa_ch12_karatsub... 185 FZ_Square_Unbuffered(XLo, LL);
ffa_ch12_karatsub... 186
ffa_ch12_karatsub... 187 -- Recurse: HH := XHi^2
ffa_ch12_karatsub... 188 FZ_Square_Unbuffered(XHi, HH);
ffa_ch12_karatsub... 189
ffa_ch12_karatsub... 190 -- Dx := |XLo - XHi| , and we don't care about the borrow
ffa_ch12_karatsub... 191 FZ_Sub_Abs(X => XLo, Y => XHi, Difference => Dx, Underflow => Cx);
ffa_ch12_karatsub... 192
ffa_ch12_karatsub... 193 -- Recurse: DD := Dx^2 (always positive, which is why we don't need Cx)
ffa_ch12_karatsub... 194 FZ_Square_Unbuffered(Dx, DD);
ffa_ch12_karatsub... 195
ffa_ch12_karatsub... 196 -- XX := LL + 2^(2 * K * Bitness) * HH
ffa_ch12_karatsub... 197 XXLo := LL;
ffa_ch12_karatsub... 198 XXHi := HH;
ffa_ch12_karatsub... 199
ffa_ch12_karatsub... 200 -- XX += 2^(K * Bitness) * HH, carry goes into Tail Carry accumulator.
ffa_ch12_karatsub... 201 FZ_Add_D(X => XXMid, Y => HH, Overflow => TC);
ffa_ch12_karatsub... 202
ffa_ch12_karatsub... 203 -- XX += 2^(K * Bitness) * LL, ...
ffa_ch12_karatsub... 204 FZ_Add_D(X => XXMid, Y => LL, Overflow => C);
ffa_ch12_karatsub... 205
ffa_ch12_karatsub... 206 -- ... add the carry from LL addition into the Tail Carry accumulator.
ffa_ch12_karatsub... 207 TC := TC + C;
ffa_ch12_karatsub... 208
ffa_ch12_karatsub... 209 -- XX -= 2^(K * Bitness) * DD
ffa_ch12_karatsub... 210 FZ_Sub_D(X => XXMid, -- Because DD is always positive,
ffa_ch12_karatsub... 211 Y => DD, -- this term is always subtractive.
ffa_ch12_karatsub... 212 Underflow => C); -- C is the borrow from DD term subtraction
ffa_ch12_karatsub... 213
ffa_ch12_karatsub... 214 -- Get final Tail Carry for the ripple by subtracting DD term's borrow
ffa_ch12_karatsub... 215 TC := TC - C;
ffa_ch12_karatsub... 216
ffa_ch12_karatsub... 217 -- Ripple the Tail Carry into the tail.
ffa_ch12_karatsub... 218 FZ_Add_D_W(X => XXHiHi, W => TC, Overflow => FinalCarry);
ffa_ch12_karatsub... 219
ffa_ch12_karatsub... 220 end Sqr_Karatsuba;
ffa_ch12_karatsub... 221 -- CAUTION: Inlining prohibited for Sqr_Karatsuba !
ffa_ch12_karatsub... 222
ffa_ch12_karatsub... 223
ffa_ch12_karatsub... 224 -- Squaring. (CAUTION: UNBUFFERED)
ffa_ch12_karatsub... 225 procedure FZ_Square_Unbuffered(X : in FZ;
ffa_ch12_karatsub... 226 XX : out FZ) is
ffa_ch12_karatsub... 227 begin
ffa_ch12_karatsub... 228
ffa_ch12_karatsub... 229 if X'Length <= Sqr_Karatsuba_Thresh then
ffa_ch12_karatsub... 230
ffa_ch12_karatsub... 231 -- Base case:
ffa_ch12_karatsub... 232 FZ_Sqr_Comba(X, XX);
ffa_ch12_karatsub... 233
ffa_ch12_karatsub... 234 else
ffa_ch12_karatsub... 235
ffa_ch12_karatsub... 236 -- Recursive case:
ffa_ch12_karatsub... 237 Sqr_Karatsuba(X, XX);
ffa_ch12_karatsub... 238
ffa_ch12_karatsub... 239 end if;
ffa_ch12_karatsub... 240
ffa_ch12_karatsub... 241 end FZ_Square_Unbuffered;
ffa_ch12_karatsub... 242
ffa_ch12_karatsub... 243
ffa_ch12_karatsub... 244 -- Squaring. Preserves the inputs.
ffa_ch12_karatsub... 245 procedure FZ_Square_Buffered(X : in FZ;
ffa_ch12_karatsub... 246 XX_Lo : out FZ;
ffa_ch12_karatsub... 247 XX_Hi : out FZ) is
ffa_ch12_karatsub... 248
ffa_ch12_karatsub... 249 -- Product buffer.
ffa_ch12_karatsub... 250 P : FZ(1 .. 2 * X'Length);
ffa_ch12_karatsub... 251
ffa_ch12_karatsub... 252 begin
ffa_ch12_karatsub... 253
ffa_ch12_karatsub... 254 FZ_Square_Unbuffered(X, P);
ffa_ch12_karatsub... 255
ffa_ch12_karatsub... 256 XX_Lo := P(P'First .. P'First + X'Length - 1);
ffa_ch12_karatsub... 257 XX_Hi := P(P'First + X'Length .. P'Last);
ffa_ch12_karatsub... 258
ffa_ch12_karatsub... 259 end FZ_Square_Buffered;
ffa_ch12_karatsub... 260
ffa_ch12_karatsub... 261 end FZ_Sqr;