-
+ D343C9D00E533B72E6B07F6E9849BFB796DF42AE0E0D376EF7B459C261124311AAB7402EA6E9608BB84DD2037AB837A6C06F9935D371A6D7EFF75A619C054670
adalisp/src/parser.adb
(0 . 0)(1 . 291)
2023 -- S-expression parser implementation.
2024 with Ada.Text_IO; use Ada.Text_IO;
2025
2026 package body Parser is
2027 -- Predicates on input characters
2028 WhitespaceP : constant array (Character) of Boolean :=
2029 (' ' | ASCII.Nul | ASCII.HT | ASCII.CR | ASCII.LF => True, others => False);
2030 DigitP : constant array (Character) of Boolean :=
2031 ('0' .. '9' => True, others => False);
2032
2033 -- Reserved characters, although '.' and '#' can technically speaking
2034 -- be part of symbol names.
2035 ReservedP : constant array (Character) of Boolean :=
2036 ('(' | ')' | '#' | '.' | ''' => True, others => False);
2037
2038 C : Character := ' ';
2039 I : Long_Integer;
2040
2041 -- Given a string, check if all its characters are digits. If so,
2042 -- accumulate them in I.
2043 procedure Parse_Integer(Str : in MemPtr;
2044 Success : out Boolean;
2045 I : out Long_Integer) is
2046 P : MemPtr := Str;
2047 PC : MemPtr;
2048 C : Character;
2049 Negative : Boolean := False;
2050 Result : Long_Integer;
2051 begin
2052 Result := 0;
2053 Success := True;
2054
2055 pragma Assert (P /= 0, "Parse_Integer received a NIL string!");
2056
2057 -- Check for leading +/- signs first
2058 PC := Get_Car(AMem(P));
2059 C := Get_Char(AMem(PC));
2060 if C = '-' then
2061 Negative := True;
2062 end if;
2063 if C = '-' or C = '+' then
2064 P := Get_Cdr(AMem(P));
2065 -- If we don't have other characters after + or -, then this is
2066 -- not a number.
2067 if P = 0 then
2068 Success := False;
2069 return;
2070 end if;
2071 end if;
2072
2073 while P /= 0 loop
2074 -- Check list and its car.
2075 PC := Get_Car(AMem(P));
2076 C := Get_Char(AMem(PC));
2077
2078 -- Do we have a non-digit?
2079 if not DigitP(C) then
2080 Success := False;
2081 exit;
2082 end if;
2083
2084 -- If we're still in the all-digits game, gather them, hoping
2085 -- we get a number.
2086 Result := Result * 10 + (Character'Pos(C) - Character'Pos('0'));
2087
2088 -- Move on.
2089 P := Get_Cdr(AMem(P));
2090 end loop;
2091
2092 if Negative then
2093 Result := -Result;
2094 end if;
2095
2096 -- Set the output value if and only if we succeeded.
2097 if Success then
2098 I := Result;
2099 end if;
2100 end Parse_Integer;
2101
2102 procedure Eat_Whitespace is
2103 begin
2104 while WhitespaceP(C) loop
2105 Get(C);
2106 end loop;
2107 end Eat_Whitespace;
2108
2109 -- Parse a list of characters that may be a symbol or an integer.
2110 procedure Parse_Atom(P : out MemPtr) is
2111 CharP, TempP : MemPtr;
2112 ListP : MemPtr := 0;
2113 ListTailP : MemPtr := 0;
2114 begin
2115 pragma Assert (not ReservedP(C),
2116 "Parse_Atom received a reserved character!");
2117
2118 loop
2119 -- Get a new char cell
2120 Alloc_Char(C, CharP);
2121 -- Save old list tail
2122 TempP := ListTailP;
2123 -- Cons cell to be appended to the list
2124 Alloc_Cons(CharP, 0, ListTailP);
2125
2126 -- Does the old list tail point to a cons cell? If so, set the
2127 -- old cdr to the new list tail, otherwise set the list
2128 -- pointer to the tail.
2129 if TempP /= 0 then
2130 Set_Cdr(AMem(TempP), ListTailP);
2131 else
2132 ListP := ListTailP;
2133 end if;
2134
2135 -- Get a new character and verify the exit condition
2136 Get(C);
2137 exit when WhitespaceP(C);
2138 exit when C = '(' or C = ')';
2139 end loop;
2140 -- Assign output parameter to our list.
2141 P := ListP;
2142 end Parse_Atom;
2143
2144 -- Parse hash-prepended expression.
2145 procedure Parse_Hash(P : out MemPtr; TID : out TokenID) is
2146 begin
2147 pragma Assert (C = '#',
2148 "Parse_Hash does not begin with a hash.");
2149
2150 -- We support the following hash-prepended expressions:
2151 --
2152 -- . booleans (#t or #f)
2153 -- . characters (e.g. #\a, #\b, ...)
2154
2155 Get(C);
2156
2157 if (C = 't') then -- true
2158 Alloc_Bool(True, P);
2159 TID := Bool_Token;
2160 elsif (C = 'f') then -- false
2161 Alloc_Bool(False, P);
2162 TID := Bool_Token;
2163 elsif (C = '\') then -- char
2164 -- XXX should do more elaborate parsing here, e.g. #\space
2165 -- etc.
2166 Get(C);
2167 Alloc_Char(C, P);
2168 TID := Char_Token;
2169 else -- unknown
2170 pragma Assert (False, "Unknown hash expression.");
2171 end if;
2172
2173 -- Emulate a space for future calls of Parse
2174 C := ' ';
2175 end Parse_Hash;
2176
2177 -- Parse cons objects, i.e. lists and pairs.
2178 procedure Parse_Cons(P : out MemPtr) is
2179 ListP : MemPtr := 0;
2180 ListTailP : MemPtr := 0;
2181 TID : TokenID;
2182 begin
2183 pragma Assert (C = '(', "Parse_Cons should receive an open paren.");
2184
2185 -- Emulate a space for the first call to Parse
2186 C := ' ';
2187
2188 -- Iterate through the list elements and add them to the list.
2189 loop
2190 declare
2191 ElemP, TempP : MemPtr;
2192 begin
2193 -- Parse current element
2194 Parse(ElemP, TID);
2195
2196 -- Exit conditions
2197 exit when TID = ListE_Token; -- list end
2198 exit when TID = ListP_Token; -- pair marker
2199
2200 -- Save old list tail
2201 TempP := ListTailP;
2202 -- Add new element to the list: if the TempP is NIL, then we
2203 -- have a fresh list that we can populate with the new
2204 -- element.
2205 Alloc_Cons(ElemP, 0, ListTailP);
2206 if TempP = 0 then
2207 ListP := ListTailP;
2208 else
2209 Set_Cdr(AMem(TempP), ListTailP);
2210 end if;
2211 end;
2212 end loop;
2213
2214 -- If we received a pair marker, then we have one more element to
2215 -- parse.
2216 if TID = ListP_Token then
2217 declare
2218 ElemP : MemPtr;
2219 begin
2220 pragma Assert (ListTailP /= 0, "Syntax error parsing pair.");
2221 -- Emulate space
2222 C := ' ';
2223 -- Parse element
2224 Parse(ElemP, TID);
2225
2226 pragma Assert(TID = Bool_Token or TID = Num_Token or
2227 TID = List_Token or TID = Char_Token or
2228 TID = Symbol_Token,
2229 "Syntax error parsing pair.");
2230 -- Point cdr of list tail to element
2231 Set_Cdr(AMem(ListTailP), ElemP);
2232 end;
2233 elsif TID /= ListE_Token then
2234 Put_Line("Impossible to get here!");
2235 P := 0;
2236 return;
2237 end if;
2238
2239 -- Set the output and emulate a space for whatever comes next.
2240 P := ListP;
2241 C := ' ';
2242 end Parse_Cons;
2243
2244 -- Parse quoted S-expression.
2245 procedure Parse_Quoted(P : out MemPtr; TID : out TokenID) is
2246 ExprP : MemPtr;
2247 begin
2248 pragma Assert (C = ''', "Parse_Quoted not given a quote");
2249
2250 -- Emulate a space and parse whatever comes after the quote.
2251 C := ' ';
2252 Parse(ExprP, TID);
2253
2254 -- If the result of Parse is an atomic constant, we return it as
2255 -- it is. Otherwise we wrap whatever expr we get in a quote,
2256 -- i.e. (quote expr).
2257 case TID is
2258 when Bool_Token | Num_Token | Char_Token =>
2259 P := ExprP;
2260 when others =>
2261 -- (expr)
2262 Alloc_Cons(ExprP, 0, ExprP);
2263 -- (quote expr)
2264 Alloc_Cons(Quote_Sym, ExprP, ExprP);
2265 -- Assign output
2266 P := ExprP;
2267 TID := Quoted_Token;
2268 end case;
2269 end Parse_Quoted;
2270
2271 -- Parse any S-expression
2272 procedure Parse(P : out MemPtr; TID : out TokenID) is
2273 TempTID : TokenID := Error_Token;
2274 AtomP : MemPtr;
2275 TempP1, TempP2, TempP3 : MemPtr := 0;
2276 Is_Number : Boolean;
2277 begin
2278 P := 0;
2279 TID := Error_Token;
2280
2281 Eat_Whitespace;
2282
2283 -- Not (, ), #, . or '
2284 if not ReservedP(C) then -- atom
2285 -- Read atom from stdin
2286 Parse_Atom(AtomP);
2287
2288 -- Try to parse a number; if we succeed, then return it, else
2289 -- return the atom as an interned symbol.
2290 Parse_Integer(AtomP, Is_Number, I);
2291 if Is_Number then
2292 Alloc_Fixnum(I, P);
2293 TID := Num_Token;
2294 else
2295 -- Alloc symbol cell
2296 Lookup_Or_Create_Symbol(AtomP, P);
2297 TID := Symbol_Token;
2298 end if;
2299 elsif C = '#' then -- hash token
2300 Parse_Hash(P, TID);
2301 elsif C = '(' then -- list/pair
2302 Parse_Cons(P);
2303 -- Instantiate TID
2304 TID := List_Token;
2305 elsif C = ')' then -- list/pair end
2306 TID := ListE_Token;
2307 elsif C = '.' then -- pair marker
2308 TID := ListP_Token;
2309 elsif C = ''' then -- quote marker
2310 Parse_Quoted(P, TID);
2311 end if;
2312 end Parse;
2313 end Parser;