------------------------------------------------------------------------------
------------------------------------------------------------------------------
-- This file is part of 'Finite Field Arithmetic', aka 'FFA'.               --
--                                                                          --
-- (C) 2019 Stanislav Datskovskiy ( www.loper-os.org )                      --
-- http://wot.deedbot.org/17215D118B7239507FAFED98B98228A001ABFFC7.html     --
--                                                                          --
-- You do not have, nor can you ever acquire the right to use, copy or      --
-- distribute this software ; Should you use this software for any purpose, --
-- or copy and distribute it to anyone or in any manner, you are breaking   --
-- the laws of whatever soi-disant jurisdiction, and you promise to         --
-- continue doing so for the indefinite future. In any case, please         --
-- always : read and understand any software ; verify any PGP signatures    --
-- that you use - for any purpose.                                          --
--                                                                          --
-- See also http://trilema.com/2015/a-new-software-licensing-paradigm .     --
------------------------------------------------------------------------------

-----------------------------------------------------------------------------
--          BEFORE YOU EVEN *THINK* ABOUT MODIFYING THIS PROGRAM:          --
-----------------------------------------------------------------------------
--                                    `dMMd`   +NMMMMMMMNo                 --
--                                 .dM++Md..oNMMMMMMmo`                    --
--                                 /mM+  +MmmMMMMMMNo.                     --
--                               /NM+    +MMMMMMNo`     /                  --
--                              /Nd-   `sNMMMMMy.-oohNNm`                  --
--                            `yNd-   -yMMMMMMMNNNMMMMs.                   --
--                            hMd::. -mMMMMMdNMMMMMMm+`                    --
--                          :hNs.`.:yNyyyo--/sNMMMMy`                      --
--                         -o..    `..        `.sNh`                       --
--                       ..`RRR   EEE   AA  DDD  !+:`                      --
--                     `:   R  R  E    A  A D  D ! .o-                     --
--                    .s    RRR   EEE  AAAA D  D !  .:`                    --
--                    .. `` R  R  E    A  A D  D     ys.                   --
--                   -h  /: R   R EEE  A  A DDD  !/  :mm-                  --
--                  -mm  `/-    THE PROOFS!!!    -y   sMm-                 --
--                 -mNy `++` YES THAT MEANS YOU! .s. .`-Nm-                --
--               `oNN-`:/y``:////:`       `-////``-o.+. -NNo`              --
--              `oNN: `+:::hNMMMMNyo.   `smNMMMMmy`++    :NNo`             --
--             `oNy-   .: sMMMMMMMMM:   -MMMMMMMMMs/s.    -yNh-            --
--            -dNy.   `s. sMMMMMMMmo.----mMMMMMMMNo `.`    .yMd-           --
--           .dmo.    `o` /mNNNmyo.`sNMMy.+ymNNNNh  `-`     .omd.          --
--          .mN/       -o` .---.  `oNMNMNs  .----. -/.        /Nm.         --
--         +mN/        .hhs:.. `  .hMN-MMy   ` `.-+-`          /Nm+        --
--        +NN:        :hMMMs/m`d   -y- dy    -`:/y              :NN+       --
--       +Nd:    /: `hMMMmm/ y:Ns::.`````.:oh--                  :dNs.     --
--     .sNh.    .h+:hMMMy./-  -yMMMyyod+dssMM:. `:                .hMh.    --
--    .hMy.     +MNMMMMh`   `  `yNMhmsNsmhNh:   /`                  +Mh.   --
--   -hN+      -dMMMMMNso+- :s   .ymmNMmyh+-   +                     +Nh-  --
--  `MN+       /MMMMMMh:-    :-      :: :    .+                       +NM` --
--  `Md///////+mMMMMys////////sh/-         -yy/////////////////////////dM` --
--   -ssssssssymssssssssssssssssso-      .+ossssssssssssssssssssssssssss-  --
--                                                                         --
--Ch. 14A: Barrett's Modular Reduction:   http://www.loper-os.org/?p=2842  --
--Ch. 14A-Bis: Barrett's Physical Bounds: http://www.loper-os.org/?p=2875  --
--                                                                         --
-----------------------------------------------------------------------------

with W_Pred;   use W_Pred;
with W_Shifts; use W_Shifts;
with FZ_Basic; use FZ_Basic;
with FZ_Shift; use FZ_Shift;
with FZ_Arith; use FZ_Arith;
with FZ_Mul;   use FZ_Mul;
with FZ_LoMul; use FZ_LoMul;
with FZ_Measr; use FZ_Measr;
with FZ_QShft; use FZ_QShft;


package body FZ_Barr is
   
   -- Prepare the precomputed Barrettoid corresponding to a given Modulus
   procedure FZ_Make_Barrettoid(Modulus    : in  FZ;
                                Result     : out Barretoid) is
      
      -- Length of Modulus and Remainder
      Lm : constant Indices := Modulus'Length;
      
      -- Remainder register, starts as zero
      Remainder : FZ(1 .. Lm) := (others => 0);
      
      -- Length of Quotient, with an extra Word for top bit (if Degenerate)
      Lq : constant Indices := (2 * Lm) + 1;
      
      -- Valid indices into Quotient, using the above
      subtype Quotient_Index is Word_Index range 1 .. Lq;
      
      -- The Quotient we need, i.e. 2^(2 * ModulusBitness) / Modulus
      Quotient : FZ(Quotient_Index);
      
      -- Permissible 'cuts' for the Slice operation
      subtype Divisor_Cuts is Word_Index range 2 .. Lm;
      
      -- Current bit of Pseudo-Dividend; high bit is 1, all others 0
      Pb  : WBool := 1;
      
      -- Performs Restoring Division on a given segment
      procedure Slice(Index : Quotient_Index;
                      Cut   : Divisor_Cuts;
                      Bits  : Positive) is
      begin
         
         declare
            
            -- Borrow, from comparator
            C   : WBool;
            
            -- Left-Shift Overflow
            LsO : WBool;
            
            -- Current cut of Remainder register
            Rs  : FZ renames Remainder(1 .. Cut);
            
            -- Current cut of Divisor
            Ds  : FZ renames   Modulus(1 .. Cut);
            
            -- Current Word of Quotient being made, starting from the highest
            W   : Word := 0;
            
            -- Current bit of Quotient (inverted)
            nQb : WBool;
            
         begin
            
            -- For each bit in the current Pseudo-Dividend Word:
            for b in 1 .. Bits loop
               
               -- Advance Rs, shifting in the current Pseudo-Dividend bit:
               FZ_ShiftLeft_O_I(N        => Rs,
                                ShiftedN => Rs,
                                Count    => 1,
                                OF_In    => Pb, -- Current Pseudo-Dividend bit
                                Overflow => LsO);
               
               -- Subtract Divisor-Cut from R-Cut; Underflow goes into C:
               FZ_Sub(X => Rs, Y => Ds, Difference => Rs, Underflow => C);
               
               -- Negation of current Quotient bit
               nQb := C and W_Not(LsO);
               
               -- If C=1, the subtraction underflowed, and we must undo it:
               FZ_Add_Gated(X => Rs, Y => Ds, Sum => Rs,
                            Gate => nQb);
               
               -- Save the bit of Quotient that we have found:
               W := Shift_Left(W, 1) or (1 - nQb); -- i.e. inverse of nQb
               
            end loop;
            
            -- We made a complete Word of the Quotient; save it:
            Quotient(Quotient'Last + 1 - Index) := W; -- Indexed from end
            
         end;
         
      end Slice;
      pragma Inline_Always(Slice);
      
      -- Measure of the Modulus
      ModulusMeasure : constant FZBit_Index := FZ_Measure(Modulus);
      
   begin
      
      -- First, process the high Word of the Pseudo-Dividend:
      Slice(1, 2, 1); -- ... it has just one bit: a 1 at the bottom position
      
      -- Once we ate the top 1 bit of Pseudo-Dividend, rest of its bits are 0
      Pb := 0;
      
      -- Process the Modulus-sized segment below the top Word:
      for i in 2 .. Lm - 1 loop
         
         Slice(i, i + 1, Bitness); -- stay ahead by a word to handle carry
         
      end loop;
      
      -- Process remaining Words:
      for i in Lm .. Lq loop
         
         Slice(i, Lm, Bitness);
         
      end loop;
      
      -- Record the Quotient (i.e. the Barrettoid proper, Bm)
      Result.B                    := Quotient(Result.B'Range);
      
      -- Record whether we have the Degenerate Case (1 iff Modulus = 1)
      Result.Degenerate           := W_NZeroP(Quotient(Lq));
      
      -- Record a copy of the Modulus, extended with zero Word:
      Result.ZXM(Modulus'Range)   := Modulus;
      Result.ZXM(Result.ZXM'Last) := 0;
      
      -- Record the parameter Jm:
      Result.J                    := ModulusMeasure - 1;
      
      -- Wm - Jm
      Result.ZSlide :=
        FZBit_Index(Bitness * Modulus'Length) - ModulusMeasure + 1;
      
   end FZ_Make_Barrettoid;
   
   
   -- Reduce X using the given precomputed Barrettoid.
   procedure FZ_Barrett_Reduce(X          : in     FZ;
                               Bar        : in     Barretoid;
                               XReduced   : in out FZ) is
      
      -- Wordness of X, the quantity being reduced
      Xl      : constant Indices := X'Length;
      
      -- Wordness of XReduced (result), one-half of Xl, and same as of Modulus
      Ml      : constant Indices := XReduced'Length; -- i.e. # of Words in Wm.
      
      -- The Modulus we will reduce X by
      Modulus : FZ renames Bar.ZXM(1 .. Ml);
      
      -- Shifted X
      Xs      : FZ(X'Range);
      
      -- Z := Xs * Bm (has twice the length of X)
      Z       : FZ(1 .. 2 * Xl);
      
      -- Upper 3Wm-bit segment of Z that gets shifted to form Zs
      ZHi     : FZ renames   Z(Ml       + 1  ..  Z'Last       );
      
      -- Middle 2Wm-bit segment of Z, that gets multiplied by M to form Q
      Zs      : FZ renames   Z(Z'First  + Ml ..  Z'Last  - Ml );
      
      -- Sub-terms of the Zs * M multiplication:
      ZsLo    : FZ renames  Zs(Zs'First      .. Zs'Last  - Ml );
      ZsHi    : FZ renames  Zs(Zs'First + Ml .. Zs'Last       );
      ZsHiM   : FZ(1 .. Ml);
      
      -- Q := Modulus * Zs, i.e. floor(X / M)*M + E*M
      Q       : FZ(1 .. Xl);
      QHi     : FZ renames   Q(Q'First  + Ml ..  Q'Last       );
      
      -- R is made one Word longer than Modulus (see proof re: why)
      Rl      : constant Indices := Ml + 1;
      
      -- Reduction estimate, overshot by 0, 1, or 2 multiple of Modulus
      R       : FZ(1 .. Rl);
      
      -- Scratch for the outputs of the gated subtractions
      S       : FZ(1 .. Rl);
      
      -- Borrow from the gated subtractions
      C       : WBool;
      
      -- Barring cosmic ray, no underflow can take place in (4) and (5)
      NoCarry : WZeroOrDie := 0;
      
   begin
      
      -- Result is initially zero (and will stay zero if Modulus = 1)
      FZ_Clear(XReduced);
      
      -- (1) Xs := X >> Jm
      FZ_Quiet_ShiftRight(N => X, ShiftedN => Xs, Count => Bar.J);
      
      -- (2) Z  := Xs * Bm
      FZ_Multiply_Unbuffered(X => Bar.B, Y => Xs, XY => Z);
      
      -- (3) Zs := Z >> 2Wm - Jm (already thrown lower Wm, so only Wm - Jm now)
      FZ_Quiet_ShiftRight(N => ZHi, ShiftedN => ZHi, Count => Bar.ZSlide);
      
      -- (4) Q  := Zs * M (this is split into three operations, see below)
      
      -- ... first, Q := ZsLo * M,
      FZ_Multiply_Unbuffered(ZsLo, Modulus, Q);
      
      -- ... then, compute ZsHiM := ZsHi * M,
      FZ_Low_Multiply_Unbuffered(ZsHi, Modulus, ZsHiM);
      
      -- ... finally, add ZsHiM to upper half of Q.
      FZ_Add_D(X => QHi, Y => ZsHiM, Overflow => NoCarry);
      
      -- (5) R  := X - Q (we only need Rl-sized segments of X and Q here)
      FZ_Sub(X => X(1 .. Rl), Y => Q(1 .. Rl),
             Difference => R, Underflow => NoCarry);
      
      -- (6) S1 := R - M, C1 := Borrow (1st gated subtraction of Modulus)
      FZ_Sub(X => R, Y => Bar.ZXM, Difference => S, Underflow => C);
      
      -- (7) R := {C1=0 -> S1, C1=1 -> R}
      FZ_Mux(X => S, Y => R, Result => R, Sel => C);
      
      -- (8) S2 := R - M, C2 := Borrow (2nd gated subtraction of Modulus)
      FZ_Sub(X => R, Y => Bar.ZXM, Difference => S, Underflow => C);
      
      -- (9) R := {C2=0 -> S2, C2=1 -> R}
      FZ_Mux(X => S, Y => R, Result => R, Sel => C);
      
      -- (10) RFinal := {DM=0 -> R, DM=1 -> 0} (handle the degenerate case)
      FZ_Mux(X => R(1 .. Ml), Y => XReduced, Result => XReduced,
             Sel => Bar.Degenerate); -- If Modulus = 1, then XReduced is 0.
      
   end FZ_Barrett_Reduce;
   
end FZ_Barr;