------------------------------------------------------------------------------ ------------------------------------------------------------------------------ -- This file is part of 'Finite Field Arithmetic', aka 'FFA'. -- -- -- -- (C) 2019 Stanislav Datskovskiy ( www.loper-os.org ) -- -- http://wot.deedbot.org/17215D118B7239507FAFED98B98228A001ABFFC7.html -- -- -- -- You do not have, nor can you ever acquire the right to use, copy or -- -- distribute this software ; Should you use this software for any purpose, -- -- or copy and distribute it to anyone or in any manner, you are breaking -- -- the laws of whatever soi-disant jurisdiction, and you promise to -- -- continue doing so for the indefinite future. In any case, please -- -- always : read and understand any software ; verify any PGP signatures -- -- that you use - for any purpose. -- -- -- -- See also http://trilema.com/2015/a-new-software-licensing-paradigm . -- ------------------------------------------------------------------------------ ------------------------------------------------------------------------------ with Words; use Words; with Word_Ops; use Word_Ops; with W_Mul; use W_Mul; with FZ_Arith; use FZ_Arith; package body FZ_Sqr is -- Square case of Comba's multiplier. (CAUTION: UNBUFFERED) procedure FZ_Sqr_Comba(X : in FZ; XX : out FZ) is -- Words in each multiplicand L : constant Word_Index := X'Length; -- Length of Product, i.e. double the length of X LP : constant Word_Index := 2 * L; -- 3-word Accumulator A2, A1, A0 : Word := 0; Lo, Hi : Word; -- Output of WxW multiply/square -- Type for referring to a column of XX subtype ColXX is Word_Index range 0 .. LP - 1; procedure Accum is C : WBool; -- Carry for the Accumulator addition Sum : Word; -- Sum for Accumulator addition begin -- Now add add double-Word Hi:Lo to accumulator A2:A1:A0: -- A0 += Lo; C := Carry Sum := A0 + Lo; C := W_Carry(A0, Lo, Sum); A0 := Sum; -- A1 += Hi + C; C := Carry Sum := A1 + Hi + C; C := W_Carry(A1, Hi, Sum); A1 := Sum; -- A2 += A2 + C A2 := A2 + C; end Accum; pragma Inline_Always(Accum); procedure SymmDigits(N : in ColXX; From : in ColXX; To : in ColXX) is begin for j in From .. To loop -- Hi:Lo := j-th * (N - j)-th Word, and then, Mul_Word(X(X'First + j), X(X'First - j + N), Lo, Hi); Accum; Accum; -- Accum += 2 * (Hi:Lo) end loop; end SymmDigits; pragma Inline_Always(SymmDigits); procedure SqrDigit(N : in ColXX) is begin Sqr_Word(X(X'First + N), Lo, Hi); -- Hi:Lo := Square(N-th digit) Accum; -- Accum += Hi:Lo end SqrDigit; pragma Inline_Always(SqrDigit); procedure HaveDigit(N : in ColXX) is begin -- Save the Nth (indexed from zero) word of XX: XX(XX'First + N) := A0; -- Right-Shift the Accumulator by one Word width: A0 := A1; A1 := A2; A2 := 0; end HaveDigit; pragma Inline_Always(HaveDigit); -- Compute the Nth (indexed from zero) column of the Product procedure Col(N : in ColXX; U : in ColXX; V : in ColXX) is begin -- The branch pattern depends only on FZ wordness if N mod 2 = 0 then -- If we're doing an EVEN-numbered column: SymmDigits(N, U, V - 1); -- Stop before V: it is the square case SqrDigit(V); -- Compute the square case at V else -- If we're doing an ODD-numbered column: SymmDigits(N, U, V); -- All of them are the symmetric case end if; -- After either even or odd column: HaveDigit(N); -- We have the N-th digit of the result. end Col; pragma Inline_Always(Col); begin -- First col always exists: SqrDigit(ColXX'First); HaveDigit(ColXX'First); -- Compute the lower half of the Product: for i in 1 .. L - 1 loop Col(i, 0, i / 2); end loop; -- Compute the upper half (sans last Word) of the Product: for i in L .. LP - 2 loop Col(i, i - L + 1, i / 2); end loop; -- The very last Word of the Product: XX(XX'Last) := A0; -- Equiv. of XX(XX'First + 2*L - 1) := A0; end FZ_Sqr_Comba; -- Karatsuba's Squaring. (CAUTION: UNBUFFERED) procedure Sqr_Karatsuba(X : in FZ; XX : out FZ) is -- L is the wordness of X. Guaranteed to be a power of two. L : constant Word_Count := X'Length; -- An 'LSeg' is the same length as X. subtype LSeg is FZ(1 .. L); -- K is HALF of the length of X. K : constant Word_Index := L / 2; -- A 'KSeg' is the same length as HALF of X. subtype KSeg is FZ(1 .. K); -- The three L-sized variables of the product equation, i.e.: -- XX = LL + 2^(K*Bitness)(LL + HH - DD) + 2^(2*K*Bitness)HH LL, DD, HH : LSeg; -- K-sized term Dx, Dx := abs(XLo - XHi) to then get DD := Dx^2 Dx : KSeg; -- IGNORED Subtraction borrow, sign of (XL - XH) Cx : WBool; -- given as DD := Dx^2, DD is always positive pragma Unreferenced(Cx); -- Bottom and Top K-sized halves of X. XLo : KSeg renames X( X'First .. X'Last - K ); XHi : KSeg renames X( X'First + K .. X'Last ); -- L-sized middle segment of the product XX (+/- K from the midpoint). XXMid : LSeg renames XX( XX'First + K .. XX'Last - K ); -- Bottom and Top L-sized halves of the product XX. XXLo : LSeg renames XX( XX'First .. XX'Last - L ); XXHi : LSeg renames XX( XX'First + L .. XX'Last ); -- Topmost K-sized quarter segment of the product XX, or 'tail' XXHiHi : KSeg renames XXHi( XXHi'First + K .. XXHi'Last ); -- Carry from addition of HH and LL terms; borrow from subtraction of DD C : WBool; -- Barring a cosmic ray, 0 <= TC <= 2 subtype TailCarry is Word range 0 .. 2; -- Tail-Carry accumulator, for the final ripple-out into XXHiHi TC : TailCarry := 0; -- Barring a cosmic ray, the tail ripple will NOT overflow. FinalCarry : WZeroOrDie := 0; begin -- Recurse: LL := XLo^2 FZ_Square_Unbuffered(XLo, LL); -- Recurse: HH := XHi^2 FZ_Square_Unbuffered(XHi, HH); -- Dx := |XLo - XHi| , and we don't care about the borrow FZ_Sub_Abs(X => XLo, Y => XHi, Difference => Dx, Underflow => Cx); -- Recurse: DD := Dx^2 (always positive, which is why we don't need Cx) FZ_Square_Unbuffered(Dx, DD); -- XX := LL + 2^(2 * K * Bitness) * HH XXLo := LL; XXHi := HH; -- XX += 2^(K * Bitness) * HH, carry goes into Tail Carry accumulator. FZ_Add_D(X => XXMid, Y => HH, Overflow => TC); -- XX += 2^(K * Bitness) * LL, ... FZ_Add_D(X => XXMid, Y => LL, Overflow => C); -- ... add the carry from LL addition into the Tail Carry accumulator. TC := TC + C; -- XX -= 2^(K * Bitness) * DD FZ_Sub_D(X => XXMid, -- Because DD is always positive, Y => DD, -- this term is always subtractive. Underflow => C); -- C is the borrow from DD term subtraction -- Get final Tail Carry for the ripple by subtracting DD term's borrow TC := TC - C; -- Ripple the Tail Carry into the tail. FZ_Add_D_W(X => XXHiHi, W => TC, Overflow => FinalCarry); end Sqr_Karatsuba; -- CAUTION: Inlining prohibited for Sqr_Karatsuba ! -- Squaring. (CAUTION: UNBUFFERED) procedure FZ_Square_Unbuffered(X : in FZ; XX : out FZ) is begin if X'Length <= Sqr_Karatsuba_Thresh then -- Base case: FZ_Sqr_Comba(X, XX); else -- Recursive case: Sqr_Karatsuba(X, XX); end if; end FZ_Square_Unbuffered; -- Squaring. Preserves the inputs. procedure FZ_Square_Buffered(X : in FZ; XX_Lo : out FZ; XX_Hi : out FZ) is -- Product buffer. P : FZ(1 .. 2 * X'Length); begin FZ_Square_Unbuffered(X, P); XX_Lo := P(P'First .. P'First + X'Length - 1); XX_Hi := P(P'First + X'Length .. P'Last); end FZ_Square_Buffered; end FZ_Sqr;