-- S-expression parser implementation.
with Ada.Text_IO; use Ada.Text_IO;

package body Parser is
   -- Predicates on input characters
   WhitespaceP : constant array (Character) of Boolean :=
     (' ' | ASCII.Nul | ASCII.HT | ASCII.CR | ASCII.LF => True, others => False);
   DigitP : constant array (Character) of Boolean :=
     ('0' .. '9' => True, others => False);
   
   -- Reserved characters, although '.' and '#' can technically speaking
   --  be part of symbol names.
   ReservedP : constant array (Character) of Boolean :=
     ('(' | ')' | '#' | '.' | ''' => True, others => False);
   
   C : Character := ' ';
   I : Long_Integer;
   
   -- Given a string, check if all its characters are digits. If so,
   --  accumulate them in I.
   procedure Parse_Integer(Str : in MemPtr;
                           Success : out Boolean;
                           I : out Long_Integer) is
      P : MemPtr := Str;
      PC : MemPtr;
      C : Character;
      Negative : Boolean := False;
      Result : Long_Integer;
   begin
      Result := 0;
      Success := True;
      
      pragma Assert (P /= 0, "Parse_Integer received a NIL string!");
      
      -- Check for leading +/- signs first
      PC := Get_Car(AMem(P));
      C := Get_Char(AMem(PC));
      if C = '-' then
         Negative := True;
      end if;
      if C = '-' or C = '+' then
         P := Get_Cdr(AMem(P));
         -- If we don't have other characters after + or -, then this is
         --  not a number.
         if P = 0 then
            Success := False;
            return;
         end if;
      end if;
      
      while P /= 0 loop
         -- Check list and its car.
         PC := Get_Car(AMem(P));
         C := Get_Char(AMem(PC));
         
         -- Do we have a non-digit?
         if not DigitP(C) then
            Success := False;
            exit;
         end if;
         
         -- If we're still in the all-digits game, gather them, hoping
         --  we get a number.
         Result := Result * 10 + (Character'Pos(C) - Character'Pos('0'));
         
         -- Move on.
         P := Get_Cdr(AMem(P));
      end loop;

      if Negative then
         Result := -Result;
      end if;
      
      -- Set the output value if and only if we succeeded.
      if Success then
         I := Result;
      end if;
   end Parse_Integer;
   
   procedure Eat_Whitespace is
   begin
      while WhitespaceP(C) loop
         Get(C);
      end loop;
   end Eat_Whitespace;
   
   -- Parse a list of characters that may be a symbol or an integer.
   procedure Parse_Atom(P : out MemPtr) is
      CharP, TempP : MemPtr;
      ListP : MemPtr := 0;
      ListTailP : MemPtr := 0;
   begin
      pragma Assert (not ReservedP(C),
                     "Parse_Atom received a reserved character!");
      
      loop
         -- Get a new char cell
         Alloc_Char(C, CharP);
         -- Save old list tail
         TempP := ListTailP;
         -- Cons cell to be appended to the list
         Alloc_Cons(CharP, 0, ListTailP);
         
         -- Does the old list tail point to a cons cell? If so, set the
         --  old cdr to the new list tail, otherwise set the list
         --  pointer to the tail.
         if TempP /= 0 then
            Set_Cdr(AMem(TempP), ListTailP);
         else
            ListP := ListTailP;
         end if;
         
         -- Get a new character and verify the exit condition
         Get(C);
         exit when WhitespaceP(C);
         exit when C = '(' or C = ')';
      end loop;
      -- Assign output parameter to our list.
      P := ListP;
   end Parse_Atom;
   
   -- Parse hash-prepended expression.
   procedure Parse_Hash(P : out MemPtr; TID : out TokenID) is
   begin
      pragma Assert (C = '#',
                     "Parse_Hash does not begin with a hash.");
      
      -- We support the following hash-prepended expressions:
      --
      -- . booleans (#t or #f)
      -- . characters (e.g. #\a, #\b, ...)
      
      Get(C);
      
      if (C = 't') then -- true
         Alloc_Bool(True, P);
         TID := Bool_Token;
      elsif (C = 'f') then -- false
         Alloc_Bool(False, P);
         TID := Bool_Token;
      elsif (C = '\') then -- char
         -- XXX should do more elaborate parsing here, e.g. #\space
         --  etc.
         Get(C);
         Alloc_Char(C, P);
         TID := Char_Token;
      else -- unknown
         pragma Assert (False, "Unknown hash expression.");
      end if;
      
      -- Emulate a space for future calls of Parse
      C := ' ';
   end Parse_Hash;
   
   -- Parse cons objects, i.e. lists and pairs.
   procedure Parse_Cons(P : out MemPtr) is
      ListP : MemPtr := 0;
      ListTailP : MemPtr := 0;
      TID : TokenID;
   begin
      pragma Assert (C = '(', "Parse_Cons should receive an open paren.");
      
      -- Emulate a space for the first call to Parse
      C := ' ';
      
      -- Iterate through the list elements and add them to the list.
      loop
         declare
            ElemP, TempP : MemPtr;
         begin
            -- Parse current element
            Parse(ElemP, TID);
            
            -- Exit conditions
            exit when TID = ListE_Token; -- list end
            exit when TID = ListP_Token; -- pair marker
            
            -- Save old list tail
            TempP := ListTailP;
            -- Add new element to the list: if the TempP is NIL, then we
            --  have a fresh list that we can populate with the new
            --  element.
            Alloc_Cons(ElemP, 0, ListTailP);
            if TempP = 0 then
               ListP := ListTailP;
            else
               Set_Cdr(AMem(TempP), ListTailP);
            end if;
         end;
      end loop;
      
      -- If we received a pair marker, then we have one more element to
      --  parse.
      if TID = ListP_Token then
         declare
            ElemP : MemPtr;
         begin
            pragma Assert (ListTailP /= 0, "Syntax error parsing pair.");
            -- Emulate space
            C := ' ';
            -- Parse element
            Parse(ElemP, TID);
            
            pragma Assert(TID = Bool_Token or TID = Num_Token or
                            TID = List_Token or TID = Char_Token or
                            TID = Symbol_Token,
                          "Syntax error parsing pair.");
            -- Point cdr of list tail to element
            Set_Cdr(AMem(ListTailP), ElemP);
         end;
      elsif TID /= ListE_Token then
         Put_Line("Impossible to get here!");
         P := 0;
         return;
      end if;
      
      -- Set the output and emulate a space for whatever comes next.
      P := ListP;
      C := ' ';
   end Parse_Cons;
   
   -- Parse quoted S-expression.
   procedure Parse_Quoted(P : out MemPtr; TID : out TokenID) is
      ExprP : MemPtr;
   begin
      pragma Assert (C = ''', "Parse_Quoted not given a quote");
      
      -- Emulate a space and parse whatever comes after the quote.
      C := ' ';
      Parse(ExprP, TID);
      
      -- If the result of Parse is an atomic constant, we return it as
      --  it is. Otherwise we wrap whatever expr we get in a quote,
      --  i.e. (quote expr).
      case TID is
         when Bool_Token | Num_Token | Char_Token =>
            P := ExprP;
         when others =>
            -- (expr)
            Alloc_Cons(ExprP, 0, ExprP);
            -- (quote expr)
            Alloc_Cons(Quote_Sym, ExprP, ExprP);
            -- Assign output
            P := ExprP;
            TID := Quoted_Token;
      end case;      
   end Parse_Quoted;
   
   -- Parse any S-expression
   procedure Parse(P : out MemPtr; TID : out TokenID) is
      TempTID : TokenID := Error_Token;
      AtomP : MemPtr;
      TempP1, TempP2, TempP3 : MemPtr := 0;
      Is_Number : Boolean;
   begin
      P := 0;
      TID := Error_Token;
      
      Eat_Whitespace;
      
      -- Not (, ), #, . or '
      if not ReservedP(C) then -- atom
         -- Read atom from stdin
         Parse_Atom(AtomP);
         
         -- Try to parse a number; if we succeed, then return it, else
         --  return the atom as an interned symbol.
         Parse_Integer(AtomP, Is_Number, I);
         if Is_Number then
            Alloc_Fixnum(I, P);
            TID := Num_Token;
         else
            -- Alloc symbol cell
            Lookup_Or_Create_Symbol(AtomP, P);
            TID := Symbol_Token;
         end if;
      elsif C = '#' then -- hash token
         Parse_Hash(P, TID);
      elsif C = '(' then -- list/pair
         Parse_Cons(P);
         -- Instantiate TID
         TID := List_Token;
      elsif C = ')' then -- list/pair end
         TID := ListE_Token;
      elsif C = '.' then -- pair marker
         TID := ListP_Token;
      elsif C = ''' then -- quote marker
         Parse_Quoted(P, TID);
      end if;
   end Parse;
end Parser;
