ffa_ch12_karatsuba

all signers: bvt asciilifeform diana_coman

antecedents: ffa_ch11_tuning_and_api.kv

press order:

ffa_ch1_genesis.kv	asciilifeform bvt diana_coman
ffa_ch2_logicals.kv	asciilifeform bvt diana_coman
ffa_ch3_shifts.kv	asciilifeform bvt diana_coman
ffa_ch4_ffacalc.kv	asciilifeform bvt diana_coman
ffa_ch5_egypt.kv	asciilifeform bvt diana_coman
ffa_ch6_simplest_rsa.kv	asciilifeform bvt diana_coman
ffa_ch7_turbo_egyptians.kv	asciilifeform bvt diana_coman
ffa_ch8_randomism.kv	asciilifeform bvt diana_coman
ffa_ch9_exodus.kv	asciilifeform bvt diana_coman
ffa_ch10_karatsuba.kv	asciilifeform bvt diana_coman
ffa_ch11_tuning_and_api.kv	asciilifeform bvt diana_coman
ffa_ch12_karatsuba_redux.kv	asciilifeform diana_coman

patch:

- E47AB3F867F34FF2D307BF328D9FD39D51E3C0C93CFA4E328997B9322A642542F66E623A658D5F2FC87ACFD2A72555790BD08F25948D9CB227866C7A24853985
+
ffa/HISTORY.TXT

(1 . 47)(0 . 0)
  5 ###############
  6 ### HISTORY ###
  7 ###############
  8 
  9 "Chapter 1: Genesis."
 10 ffa_ch1_genesis.vpatch
 11 http://www.loper-os.org/?p=1913
 12 
 13 "Chapter 2: Logical and Bitwise Operations."
 14 ffa_ch2_logicals.vpatch
 15 http://www.loper-os.org/?p=2026
 16 
 17 "Chapter 3: Shifts."
 18 ffa_ch3_shifts.vpatch
 19 http://www.loper-os.org/?p=2032
 20 
 21 "Chapter 4: Interlude: FFACalc."
 22 ffa_ch4_ffacalc.vpatch
 23 http://www.loper-os.org/?p=2051
 24 
 25 "Chapter 5: "Egyptological" Multiplication and Division."
 26 ffa_ch5_egypt.vpatch
 27 http://www.loper-os.org/?p=2071
 28 
 29 "Chapter 6: "Geological" RSA."
 30 ffa_ch6_simplest_rsa.vpatch
 31 http://www.loper-os.org/?p=2105
 32 
 33 "Chapter 7: "Turbo Egyptians.""
 34 ffa_ch7_turbo_egyptians.vpatch
 35 http://www.loper-os.org/?p=2118
 36 
 37 "Chapter 8: Interlude: Randomism."
 38 ffa_ch8_randomism.vpatch
 39 http://www.loper-os.org/?p=2175
 40 
 41 "Chapter 9: "Exodus from Egypt" with Comba’s Algorithm."
 42 ffa_ch9_exodus.vpatch
 43 http://www.loper-os.org/?p=2186
 44 
 45 "Chapter 10: Introducing Karatsuba’s Multiplication."
 46 ffa_ch10_karatsuba.vpatch
 47 http://www.loper-os.org/?p=2238
 48 
 49 "Chapter 11: Tuning and Unified API."
 50 ffa_ch11_tuning_and_api.vpatch
 51 ### YOU ARE HERE ###

-
+ E5B1858F8D2C3AD2AEB9C4BA9E1FE28B5C79588305357385DB3AC1AFE50796268014B8480CA0C48508DCDB3416172E478B1291EF8A62B584B41CD6F9D6BCF988
ffa/MANIFEST.TXT

(0 . 0)(1 . 12)
 56 xxxxxxx ffa_ch1_genesis                "Genesis."
 57 xxxxxxx ffa_ch2_logicals               "Logical and Bitwise Operations."
 58 xxxxxxx ffa_ch3_shifts                 "Shifts."
 59 xxxxxxx ffa_ch4_ffacalc                "Interlude: FFACalc."
 60 xxxxxxx ffa_ch5_egypt                  ""Egyptological" Multiplication and Division."
 61 xxxxxxx ffa_ch6_simplest_rsa           ""Geological" RSA."
 62 xxxxxxx ffa_ch7_turbo_egyptians        ""Turbo Egyptians.""
 63 xxxxxxx ffa_ch8_randomism              "Interlude: Randomism."
 64 xxxxxxx ffa_ch9_exodus                 ""Exodus from Egypt" with Comba’s Algorithm."
 65 xxxxxxx ffa_ch10_karatsuba             "Introducing Karatsuba's Multiplication."
 66 xxxxxxx ffa_ch11_tuning_and_api        "Tuning and Unified API."
 67  551091 ffa_ch12_karatsuba_redux       "Karatsuba Redux."

- 74DCE12DC3FA30B4FD9E60AE209C1DEBA564B19DD95B413F02FA9A0E45120242CD9682A079C92B7E5A2EB19941A7EEC6F0F3EC9C2B88058F88CF891828496D58
+ 4A851245719704AF35888E2D1BC00B4CEC16E66093E7D79408E98E8A2EABA0E0F5A7ED1EEE26D0F75FA651DBE4BDDD48C31722A2B9B98464362B681F5D7B4C8A
ffa/ffacalc/ffa_calc.adb

(474 . 6)(474 . 17)
 72                end loop;
 73                Quit(0);
 74                
 75                ---------------------------------------------------------
 76                -- Ch. 12B:
 77                -- Square, give bottom and top halves
 78             when 'S' =>
 79                Want(1);
 80                Push;
 81                FFA_FZ_Square(X     => Stack(SP - 1),
 82                              XX_Lo => Stack(SP - 1),
 83                              XX_Hi => Stack(SP));
 84                ---------------------------------------------------------
 85                
 86                ----------
 87                -- NOPs --
 88                ----------

- 9BD9383546E9B4E6EBD42F6E4DC7BB07A449487CEA22AE1D5EFAC8D74AF48907D827295E6E388A1912D964E3A6D5C818A4250BFCB95505CA6E5310F2CA0FE355
+ 720237083E8D484B06D432529E8547D78ADB2EB1F72CF0934F7596CE8CA7AEC883C43F0951E133294B5139D4386E749C0939EFE3715EC46BC2515E3B23493A7B
ffa/libffa/ffa.adb

(20 . 6)(20 . 7)
 93 with FZ_Arith;
 94 with FZ_Shift;
 95 with FZ_Mul;
 96 with FZ_Sqr;
 97 
 98 
 99 -- Wrapper bodies for routines that we inline, but must enforce preconditions
(107 . 4)(108 . 13)
101                                   XY_Lo => XY_Lo, XY_Hi => XY_Hi);
102    end FFA_FZ_Multiply;
103    
104    
105    -- Square. Preserves the inputs.
106    procedure FFA_FZ_Square(X     : in  FZ;
107                            XX_Lo : out FZ;
108                            XX_Hi : out FZ) is
109    begin
110       FZ_Sqr.FZ_Square_Buffered(X => X, XX_Lo => XX_Lo, XX_Hi => XX_Hi);
111    end FFA_FZ_Square;
112    
113 end FFA;

- 98EAA2028C09D5ABFDBEB11C8665906E67392BD4E25CD0434565D0E350A9211EEDB31EE4327793BA9678722C15868C249E6CEAA06616F9CE27918375EE6BD69A
+ AA1B855DFFA6FB750CF1778F0DFB532328FC555DFA0C1FAB99B3D9DD85B6C5BC16508A1422273F9706ACC917E1B873A38AE9B358FB9A070F25F5403195CAC01C
ffa/libffa/ffa.ads

(256 . 6)(256 . 14)
118      XY_Lo'Length = XY_Hi'Length and
119      XY_Lo'Length = ((X'Length + Y'Length) / 2);
120    
121    -- Square. Preserves the inputs.
122    procedure FFA_FZ_Square(X     : in  FZ;
123                            XX_Lo : out FZ;
124                            XX_Hi : out FZ)
125      with Pre => XX_Lo'Length = X'Length and
126      XX_Hi'Length = X'Length and
127      X'Length mod 2 = 0;
128    
129    ----------------------------------------------------------------------------
130    --- Modular Operations on FZ
131    ----------------------------------------------------------------------------

- 990ABD54E1786DA34415CDE07148189077B5AD6A36E527B1DE4FB22DDE308D1BA505FC6923AF2F7AFFCC9846A59CB6C465DE9F02F215CFBE59696CFD3B036B2F
+ 118A03B1BEAB9BFC7C517C193F333B5F9A1169CD7B3364CA7B10B7050F8AA8F85E4323B925F6534041F0F02DE5242A72CC3A9829BF9E5CF25A075AA162CB1244
ffa/libffa/fz_arith.adb

(120 . 6)(120 . 26)
136    end FZ_Add_Gated;
137    
138    
139    -- Destructive Sub: X := X - Y; Underflow := Borrow
140    procedure FZ_Sub_D(X          : in out FZ;
141                       Y          : in     FZ;
142                       Underflow  : out    WBool) is
143       Borrow : WBool := 0;
144    begin
145       for i in 0 .. Word_Index(X'Length - 1) loop
146          declare
147             A : constant Word := X(X'First + i);
148             B : constant Word := Y(Y'First + i);
149             S : constant Word := A - B - Borrow;
150          begin
151             X(X'First + i) := S;
152             Borrow         := W_Borrow(A, B, S);
153          end;
154       end loop;
155       Underflow := Borrow;
156    end FZ_Sub_D;
157    
158    
159    -- Difference := X - Y; Underflow := Borrow
160    procedure FZ_Sub(X          : in  FZ;
161                     Y          : in  FZ;

- 23437D66C7048DADB0D307D41A30D3DE3C879A107D4804C6C95766CA62CA7F875C7C4CC98EFEFD1EE085D6C2AD7844A3C7CD15DD61ED79F5F7ED0017F8F90262
+ 1BE9E99C85140FDBCAA506EA97DE7B3E469ED5F0D30A252CCA92DC74C0F8D77DDEA63F7552907CD0705879984CB74259A228982504C441286C56BBF1D18A14BE
ffa/libffa/fz_arith.ads

(62 . 6)(62 . 12)
166                           Sum        : out FZ);
167    pragma Inline_Always(FZ_Add_Gated);
168    
169    -- Destructive Sub: X := X - Y; Underflow := Borrow
170    procedure FZ_Sub_D(X          : in out FZ;
171                       Y          : in     FZ;
172                       Underflow  : out    WBool);
173    pragma Inline_Always(FZ_Sub_D);
174    
175    -- Difference := X - Y; Underflow := Borrow
176    procedure FZ_Sub(X          : in  FZ;
177                     Y          : in  FZ;

- ED92BD76AE835F0C692EDDABE634ADAD968C4B698F70716192B3156C3F28F7C41D5859C05B7337F6990576B0A8C5964B9F39D84C074F7763ACAE3253DDCEE771
+ 84C9976AA492F507F8F21C8BBEB3B7DC671F8AA45C9479808DFFDA6B62FD23A1891F08740937D5D61B8D5FAD9DA7AD9E890AAA79059C0A1EE69096D877A93577
ffa/libffa/fz_modex.adb

(21 . 6)(21 . 7)
182 with FZ_Pred;  use FZ_Pred;
183 with FZ_Shift; use FZ_Shift;
184 with FZ_Mul;   use FZ_Mul;
185 with FZ_Sqr;   use FZ_Sqr;
186 with FZ_Divis; use FZ_Divis;
187 
188 
(53 . 6)(54 . 32)
190    end FZ_Mod_Mul;
191    
192    
193    -- Modular Squaring: Product := X*X mod Modulus
194    procedure FZ_Mod_Sqr(X        : in  FZ;
195                         Modulus  : in  FZ;
196                         Product  : out FZ) is
197       
198       -- The wordness of both operands is equal:
199       L     : constant Indices := X'Length;
200       
201       -- Double-width register for squaring and modulus operations
202       XX    : FZ(1 .. L * 2);
203       
204       -- To refer to the lower and upper halves of the working register:
205       XX_Lo : FZ renames XX(1     .. L);
206       XX_Hi : FZ renames XX(L + 1 .. XX'Last);
207    
208    begin
209       
210       -- XX_Lo:XX_Hi := X^2
211       FZ_Square_Buffered(X, XX_Lo, XX_Hi);
212       
213       -- Product := XX mod M
214       FZ_Mod(XX, Modulus, Product);
215       
216    end FZ_Mod_Sqr;
217    
218    
219    -- Modular Exponent: Result := Base^Exponent mod Modulus
220    procedure FZ_Mod_Exp(Base     : in  FZ;
221                         Exponent : in  FZ;
(89 . 8)(116 . 8)
223          -- Advance to the next bit of E
224          FZ_ShiftRight(E, E, 1);
225          
226          -- B := B*B mod Modulus
227          FZ_Mod_Mul(X => B, Y => B, Modulus => Modulus, Product => B);
228          -- B := B^2 mod Modulus
229          FZ_Mod_Sqr(X => B, Modulus => Modulus, Product => B);
230          
231       end loop;
232

- AFC76CF7AE5FD7F854E19546E2C6A7804C613C3CD60BE68EC4F8C7DA8C5B0E33E656017B4C402B8B668B7EF021BB3DB3DC861B54CF3A2B836D2A3D2B4F9D454B
+ 4D75A96582C1897F5A85DA8983033487653B0B0678D88BC3C7F895FCBB4A0A63B9019426D119CB3EDDF959856959CBBA295BC1D46AE36B0C4B7F4A6F9E2A00A7
ffa/libffa/fz_modex.ads

(33 . 6)(33 . 13)
237      Modulus'Length = X'Length and
238      Product'Length = Modulus'Length;
239    
240    -- Modular Square: Product := X*X mod Modulus
241    procedure FZ_Mod_Sqr(X        : in  FZ;
242                         Modulus  : in  FZ;
243                         Product  : out FZ)
244      with Pre => Modulus'Length = X'Length and
245      Product'Length = Modulus'Length;
246    
247    -- Modular Exponent: Result := Base^Exponent mod Modulus
248    procedure FZ_Mod_Exp(Base     : in  FZ;
249                         Exponent : in  FZ;

- A819415BC60308FE0B550EEE13DA2D6D4819064E4D336E9766E65A63768AEF24AC7DB9F5AC238725587F4660C2992AE17ABC85EF5047D62BEB04BA2C12D1172A
+ 92BD5F707CEBADBABC29F5F02A452F8D223B640AD39BF3B08CEA613CAA9CA92A7AE91096812FFC09E895C97BCC5ABA5054E89C30A6D550926D9CC4814B3180A7
ffa/libffa/fz_mul.adb

(166 . 8)(166 . 14)
254       -- Carry from individual term additions.
255       C          : WBool;
256       
257       -- Tail-Carry accumulator, for the final ripple
258       TC         : Word;
259       -- Barring a cosmic ray, 0 <= TC <= 2
260       subtype TailCarry is Word range 0 .. 2;
261       
262       -- Tail-Carry accumulator, for the final ripple-out into XXHiHi
263       TC         : TailCarry := 0;
264       
265       -- Barring a cosmic ray, the tail ripple will NOT overflow.
266       FinalCarry : WZeroOrDie := 0;
267       
268    begin
269       
(212 . 15)(218 . 9)
271       -- Compute the final Tail Carry for the ripple
272       TC := TC + C - DD_Sub;
273       
274       -- Barring a cosmic ray, 0 <= TC <= 2 .
275       pragma Assert(TC <= 2);
276       
277       -- Ripple the Tail Carry into the tail.
278       FZ_Add_D_W(X => XYHiHi, W => TC, Overflow => C);
279       FZ_Add_D_W(X => XYHiHi, W => TC, Overflow => FinalCarry);
280       
281       -- Barring a cosmic ray, the tail ripple will NOT overflow.
282       pragma Assert(C = 0);
283 
284    end Mul_Karatsuba;
285    -- CAUTION: Inlining prohibited for Mul_Karatsuba !
286

-
+ 8F0062ED157D9EB9910C160640F3F6DA391D6724A0023DA3B21B6BB00F5391E53A35569073EA698D82A259A26F910DE0803FB8EABAC6B2694F3D43AC709AE76E
ffa/libffa/fz_sqr.adb

(0 . 0)(1 . 261)
291 ------------------------------------------------------------------------------
292 ------------------------------------------------------------------------------
293 -- This file is part of 'Finite Field Arithmetic', aka 'FFA'.               --
294 --                                                                          --
295 -- (C) 2018 Stanislav Datskovskiy ( www.loper-os.org )                      --
296 -- http://wot.deedbot.org/17215D118B7239507FAFED98B98228A001ABFFC7.html     --
297 --                                                                          --
298 -- You do not have, nor can you ever acquire the right to use, copy or      --
299 -- distribute this software ; Should you use this software for any purpose, --
300 -- or copy and distribute it to anyone or in any manner, you are breaking   --
301 -- the laws of whatever soi-disant jurisdiction, and you promise to         --
302 -- continue doing so for the indefinite future. In any case, please         --
303 -- always : read and understand any software ; verify any PGP signatures    --
304 -- that you use - for any purpose.                                          --
305 --                                                                          --
306 -- See also http://trilema.com/2015/a-new-software-licensing-paradigm .     --
307 ------------------------------------------------------------------------------
308 ------------------------------------------------------------------------------
309 
310 with Words;    use Words;
311 with Word_Ops; use Word_Ops;
312 with W_Mul;    use W_Mul;
313 with FZ_Arith; use FZ_Arith;
314 
315 
316 package body FZ_Sqr is
317    
318    -- Square case of Comba's multiplier. (CAUTION: UNBUFFERED)
319    procedure FZ_Sqr_Comba(X  : in  FZ;
320                           XX : out FZ) is
321       
322       -- Words in each multiplicand
323       L : constant Word_Index := X'Length;
324       
325       -- Length of Product, i.e. double the length of X
326       LP : constant Word_Index := 2 * L;
327       
328       -- 3-word Accumulator
329       A2, A1, A0 : Word := 0;
330       
331       Lo, Hi  : Word; -- Output of WxW multiply/square
332       
333       -- Type for referring to a column of XX
334       subtype ColXX is Word_Index range 0 .. LP - 1;
335       
336       procedure Accum is
337          C      : WBool; -- Carry for the Accumulator addition
338          Sum    : Word;  -- Sum for Accumulator addition
339       begin
340          -- Now add add double-Word Hi:Lo to accumulator A2:A1:A0:
341          -- A0 += Lo; C := Carry
342          Sum := A0 + Lo;
343          C   := W_Carry(A0, Lo, Sum);
344          A0  := Sum;
345          -- A1 += Hi + C; C := Carry
346          Sum := A1 + Hi + C;
347          C   := W_Carry(A1, Hi, Sum);
348          A1  := Sum;
349          -- A2 += A2 + C
350          A2  := A2 + C;
351       end Accum;
352       pragma Inline_Always(Accum);
353       
354       procedure SymmDigits(N : in ColXX; From : in ColXX; To : in ColXX) is
355       begin
356          for j in From .. To loop
357             -- Hi:Lo := j-th * (N - j)-th Word, and then,
358             Mul_Word(X(X'First + j),
359                      X(X'First - j + N),
360                      Lo, Hi);
361             Accum;
362             Accum; -- Accum += 2 * (Hi:Lo)
363          end loop;
364       end SymmDigits;
365       pragma Inline_Always(SymmDigits);
366       
367       procedure SqrDigit(N : in ColXX) is
368       begin
369          Sqr_Word(X(X'First + N), Lo, Hi); -- Hi:Lo := Square(N-th digit)
370          Accum;                            -- Accum += Hi:Lo
371       end SqrDigit;
372       pragma Inline_Always(SqrDigit);
373       
374       procedure HaveDigit(N : in ColXX) is
375       begin
376          -- Save the Nth (indexed from zero) word of XX:
377          XX(XX'First + N) := A0;
378          -- Right-Shift the Accumulator by one Word width:
379          A0 := A1;
380          A1 := A2;
381          A2 := 0;
382       end HaveDigit;
383       pragma Inline_Always(HaveDigit);
384       
385       -- Compute the Nth (indexed from zero) column of the Product
386       procedure Col(N : in ColXX; U : in ColXX; V : in ColXX) is
387       begin
388          -- The branch pattern depends only on FZ wordness
389          if N mod 2 = 0 then         -- If we're doing an EVEN-numbered column:
390             SymmDigits(N, U, V - 1); --   Stop before V: it is the square case
391             SqrDigit(V);             --   Compute the square case at V
392          else                        -- If we're doing an ODD-numbered column:
393             SymmDigits(N, U, V);     --   All of them are the symmetric case
394          end if;                     -- After either even or odd column:
395          HaveDigit(N);               --   We have the N-th digit of the result.
396       end Col;
397       pragma Inline_Always(Col);
398       
399    begin
400       -- First col always exists:
401       SqrDigit(ColXX'First);
402       HaveDigit(ColXX'First);
403       
404       -- Compute the lower half of the Product:
405       for i in 1 .. L - 1 loop
406          Col(i, 0, i / 2);
407       end loop;
408       
409       -- Compute the upper half (sans last Word) of the Product:
410       for i in L .. LP - 2 loop
411          Col(i, i - L + 1, i / 2);
412       end loop;
413       
414       -- The very last Word of the Product:
415       XX(XX'Last) := A0; -- Equiv. of XX(XX'First + 2*L - 1) := A0;
416    end FZ_Sqr_Comba;
417    
418    
419    -- Karatsuba's Squaring. (CAUTION: UNBUFFERED)
420    procedure Sqr_Karatsuba(X  : in  FZ;
421                            XX : out FZ) is
422       
423       -- L is the wordness of X. Guaranteed to be a power of two.
424       L : constant Word_Count := X'Length;
425       
426       -- An 'LSeg' is the same length as X.
427       subtype LSeg is FZ(1 .. L);
428       
429       -- K is HALF of the length of X.
430       K : constant Word_Index := L / 2;
431       
432       -- A 'KSeg' is the same length as HALF of X.
433       subtype KSeg is FZ(1 .. K);
434       
435       -- The three L-sized variables of the product equation, i.e.:
436       -- XX = LL + 2^(K*Bitness)(LL + HH - DD) + 2^(2*K*Bitness)HH
437       LL, DD, HH : LSeg;
438       
439       -- K-sized term Dx, Dx := abs(XLo - XHi) to then get DD := Dx^2
440       Dx         : KSeg;
441       
442       -- IGNORED Subtraction borrow, sign of (XL - XH)
443       Cx         : WBool; -- given as DD := Dx^2, DD is always positive
444       pragma Unreferenced(Cx);
445       
446       -- Bottom and Top K-sized halves of X.
447       XLo        : KSeg  renames    X(    X'First       ..    X'Last - K );
448       XHi        : KSeg  renames    X(    X'First + K   ..    X'Last     );
449       
450       -- L-sized middle segment of the product XX (+/- K from the midpoint).
451       XXMid      : LSeg  renames   XX(   XX'First + K   ..   XX'Last - K );
452       
453       -- Bottom and Top L-sized halves of the product XX.
454       XXLo       : LSeg  renames   XX(   XX'First       ..   XX'Last - L );
455       XXHi       : LSeg  renames   XX(   XX'First + L   ..   XX'Last     );
456       
457       -- Topmost K-sized quarter segment of the product XX, or 'tail'
458       XXHiHi     : KSeg  renames XXHi( XXHi'First + K   .. XXHi'Last     );
459       
460       -- Carry from addition of HH and LL terms; borrow from subtraction of DD
461       C          : WBool;
462       
463       -- Barring a cosmic ray, 0 <= TC <= 2
464       subtype TailCarry is Word range 0 .. 2;
465       
466       -- Tail-Carry accumulator, for the final ripple-out into XXHiHi
467       TC         : TailCarry := 0;
468       
469       -- Barring a cosmic ray, the tail ripple will NOT overflow.
470       FinalCarry : WZeroOrDie := 0;
471       
472    begin
473       
474       -- Recurse: LL := XLo^2
475       FZ_Square_Unbuffered(XLo, LL);
476       
477       -- Recurse: HH := XHi^2
478       FZ_Square_Unbuffered(XHi, HH);
479       
480       -- Dx := |XLo - XHi| , and we don't care about the borrow
481       FZ_Sub_Abs(X => XLo, Y => XHi, Difference => Dx, Underflow => Cx);
482       
483       -- Recurse: DD := Dx^2 (always positive, which is why we don't need Cx)
484       FZ_Square_Unbuffered(Dx, DD);
485       
486       -- XX := LL + 2^(2 * K * Bitness) * HH
487       XXLo := LL;
488       XXHi := HH;
489       
490       -- XX += 2^(K * Bitness) * HH, carry goes into Tail Carry accumulator.
491       FZ_Add_D(X => XXMid, Y => HH, Overflow => TC);
492       
493       -- XX += 2^(K * Bitness) * LL, ...
494       FZ_Add_D(X => XXMid, Y => LL, Overflow => C);
495       
496       -- ... add the carry from LL addition into the Tail Carry accumulator.
497       TC := TC + C;
498       
499       -- XX -= 2^(K * Bitness) * DD
500       FZ_Sub_D(X         => XXMid, -- Because DD is always positive,
501                Y         => DD,    -- this term is always subtractive.
502                Underflow => C); -- C is the borrow from DD term subtraction
503       
504       -- Get final Tail Carry for the ripple by subtracting DD term's borrow
505       TC := TC - C;
506       
507       -- Ripple the Tail Carry into the tail.
508       FZ_Add_D_W(X => XXHiHi, W => TC, Overflow => FinalCarry);
509       
510    end Sqr_Karatsuba;
511    -- CAUTION: Inlining prohibited for Sqr_Karatsuba !
512    
513    
514    -- Squaring. (CAUTION: UNBUFFERED)
515    procedure FZ_Square_Unbuffered(X     : in  FZ;
516                                   XX    : out FZ) is
517    begin
518       
519       if X'Length <= Sqr_Karatsuba_Thresh then
520          
521          -- Base case:
522          FZ_Sqr_Comba(X, XX);
523          
524       else
525          
526          -- Recursive case:
527          Sqr_Karatsuba(X, XX);
528          
529       end if;
530       
531    end FZ_Square_Unbuffered;
532    
533    
534    -- Squaring. Preserves the inputs.
535    procedure FZ_Square_Buffered(X     : in  FZ;
536                                 XX_Lo : out FZ;
537                                 XX_Hi : out FZ) is
538       
539       -- Product buffer.
540       P : FZ(1 .. 2 * X'Length);
541       
542    begin
543       
544       FZ_Square_Unbuffered(X, P);
545       
546       XX_Lo := P(P'First            .. P'First + X'Length - 1);
547       XX_Hi := P(P'First + X'Length .. P'Last);
548       
549    end FZ_Square_Buffered;
550    
551 end FZ_Sqr;

-
+ EB5CBE08505A865FDBEC2D54434B3C1A2460D1BBF78B0CC90499B198B57BE364844BAA8A8921056A13B6F0EBC35C281E5021D7D60D931FF42F9BBCA8EF253575
ffa/libffa/fz_sqr.ads

(0 . 0)(1 . 53)
556 ------------------------------------------------------------------------------
557 ------------------------------------------------------------------------------
558 -- This file is part of 'Finite Field Arithmetic', aka 'FFA'.               --
559 --                                                                          --
560 -- (C) 2018 Stanislav Datskovskiy ( www.loper-os.org )                      --
561 -- http://wot.deedbot.org/17215D118B7239507FAFED98B98228A001ABFFC7.html     --
562 --                                                                          --
563 -- You do not have, nor can you ever acquire the right to use, copy or      --
564 -- distribute this software ; Should you use this software for any purpose, --
565 -- or copy and distribute it to anyone or in any manner, you are breaking   --
566 -- the laws of whatever soi-disant jurisdiction, and you promise to         --
567 -- continue doing so for the indefinite future. In any case, please         --
568 -- always : read and understand any software ; verify any PGP signatures    --
569 -- that you use - for any purpose.                                          --
570 --                                                                          --
571 -- See also http://trilema.com/2015/a-new-software-licensing-paradigm .     --
572 ------------------------------------------------------------------------------
573 ------------------------------------------------------------------------------
574 
575 with FZ_Type; use FZ_Type;
576 
577 
578 package FZ_Sqr is
579    
580    pragma Pure;
581    
582    -- Karatsuba Threshhold - at or below this many Words, we use Comba mult.
583    Sqr_Karatsuba_Thresh : constant Indices := 8;
584    
585    -- Square. (CAUTION: UNBUFFERED)
586    procedure FZ_Square_Unbuffered(X     : in  FZ;
587                                   XX    : out FZ);
588    pragma Inline_Always(FZ_Square_Unbuffered);
589    
590    -- Comba's squaring. (CAUTION: UNBUFFERED)
591    procedure FZ_Sqr_Comba(X     : in  FZ;
592                           XX    : out FZ);
593    pragma Inline_Always(FZ_Sqr_Comba);
594    
595    -- Karatsuba's Squaring. (CAUTION: UNBUFFERED)
596    procedure Sqr_Karatsuba(X  : in  FZ;
597                            XX : out FZ)
598      with Pre => XX'Length = 2 * X'Length and
599      X'Length mod 2 = 0;
600    -- CAUTION: Inlining prohibited for Sqr_Karatsuba !
601    
602    -- Squaring. Preserves the inputs.
603    procedure FZ_Square_Buffered(X     : in  FZ;
604                                 XX_Lo : out FZ;
605                                 XX_Hi : out FZ);
606    pragma Inline_Always(FZ_Square_Buffered);
607    
608 end FZ_Sqr;

- B37749E920BE9F2CA06679BA00C8D924C0E0D9F309921DDD36096A4770B6380DDD4DEB9A56A84F535A8D02DEB2E379C13C45B2CA40045E8BC4B230BD1C1F3EF5
+ F79F6F0C2069D6D35E407CA4BDAB3F1B1F90CB9CA55664E64E73E7130D03AD3056C62E0DEEAC005DCB3728F3FF5CA1357AE608277F8B062BC610F7459B93E561
ffa/libffa/w_mul.adb

(134 . 4)(134 . 46)
613       
614    end Mul_Word;
615    
616    ---------------------------------------------------------------------------
617    -- LET A CURSE FALL FOREVER on the authors of GCC, and on the Ada committee,
618    -- neither of whom saw it fit to decree a primitive which returns both
619    -- upper and lower halves of an iron MUL instruction's result. Consequently,
620    -- portable Mul_Word demands ~four~ MULs (and several additions and shifts);
621    -- while portable Sqr_Word demands ~three~ MULs (and likewise adds/shifts.)
622    -- If it were not for their idiocy, BOTH routines would weigh 1 CPU instr.!
623    ---------------------------------------------------------------------------
624    
625    -- Carry out X*X squaring, return lower word XX_LW and upper word XX_HW.
626    procedure Sqr_Word(X       : in  Word;
627                       XX_LW   : out Word;
628                       XX_HW   : out Word) is
629       
630       -- Bottom half of multiplicand X
631       XL : constant HalfWord := BottomHW(X);
632       
633       -- Top half of multiplicand X
634       XH : constant HalfWord := TopHW(X);
635       
636       -- XL^2
637       LL : constant Word := Mul_HalfWord_Iron(XL, XL);
638       
639       -- XL * XH
640       LH : constant Word := Mul_HalfWord_Iron(XL, XH);
641       
642       -- XH^2
643       HH : constant Word := Mul_HalfWord_Iron(XH, XH);
644       
645       -- Carry
646       CL : constant Word := TopHW(TopHW(LL) + Shift_Left(BottomHW(LH), 1));
647       
648    begin
649       
650       -- Get the bottom half of the Product:
651       XX_LW := LL + Shift_Left(LH, HalfBitness + 1);
652       
653       -- Get the top half of the Product:
654       XX_HW := HH + Shift_Left(TopHW(LH), 1) + CL;
655       
656    end Sqr_Word;
657    
658 end W_Mul;

- 0D60E63CF1527DE3C76D96FE3F58151B0E02814EEF0FBA0D7F4AB5E28B938FFE0430D3B05B378695D942217110D323EB41934C818B187F5EE50D6C402CA35C57
+ CBC2184B13F61F3540BAA6350673BC3735E29C3B9F4EA9B2EBCD7525D75A51CC9A11B84FDF5F92B61661DAF3607643D6A935CB2113F8F5EDD0AE46D37F41BB9F
ffa/libffa/w_mul.ads

(52 . 4)(52 . 10)
663                       XY_LW : out Word; XY_HW : out Word);
664    pragma Inline_Always(Mul_Word);
665    
666    -- Carry out X*X squaring, return lower word XX_LW and upper word XX_HW.
667    procedure Sqr_Word(X       : in  Word;
668                       XX_LW   : out Word;
669                       XX_HW   : out Word);
670    pragma Inline_Always(Sqr_Word);
671    
672 end W_Mul;

- C84E7E88846A8C51CCD53F1B704B66F7E23107FD56DA9DBD5054A55DBA963E730A9F5A8E62846D5A451B3BBAE26B976746547DF526689AB7548B261FE6B4FB2D
+ 864714838C024EE2B0F4A2F8B3FCF5CCAE3B055429B3E1763FDFB596B9115BF964C95214674E8EEA859ADA4CAF41691362DF3083CB4EF25DD218BC9237F28DFA
ffa/libffa/words.ads

(49 . 4)(49 . 7)
677    -- When we must refer to individual bit positions of a machine word:
678    subtype WBit_Index is Natural range 0 .. Bitness - 1;
679    
680    -- For when a computed Word is mandatorily expected to equal zero:
681    subtype WZeroOrDie is Word range 0 .. 0;
682    
683 end Words;