-
+ 37B242324EBF0BAB3BC67812F23111C84D70420348F47F1CB617856E9F18A5E11A30D2CD0CEBB802BC435588BFC49AF3A50D8EB24674A795B83102809C52D3AE
tinyscheme/hack.txt
(0 . 0)(1 . 244)
1227
1228 How to hack TinyScheme
1229 ----------------------
1230
1231 TinyScheme is easy to learn and modify. It is structured like a
1232 meta-interpreter, only it is written in C. All data are Scheme
1233 objects, which facilitates both understanding/modifying the
1234 code and reifying the interpreter workings.
1235
1236 In place of a dry description, we will pace through the addition
1237 of a useful new datatype: garbage-collected memory blocks.
1238 The interface will be:
1239
1240 (make-block <n> [<fill>]) makes a new block of the specified size
1241 optionally filling it with a specified byte
1242 (block? <obj>)
1243 (block-length <block>)
1244 (block-ref <block> <index>) retrieves byte at location
1245 (block-set! <block> <index> <byte>) modifies byte at location
1246
1247 In the sequel, lines that begin with '>' denote lines to add to the
1248 code. Lines that begin with '|' are just citations of existing code.
1249 Lines that begin with X denote lines to be removed from the code.
1250
1251 First of all, we need to assign a typeid to our new type. Typeids
1252 in TinyScheme are small integers declared in the scheme_types enum
1253 located near the top of the scheme.c file; it begins with T_STRING.
1254 Add a new entry at the end, say T_MEMBLOCK. Remember to adjust the
1255 value of T_LAST_SYTEM_TYPE when adding new entries. There can be at
1256 most 31 types, but you don't have to worry about that limit yet.
1257
1258 | T_ENVIRONMENT=14,
1259 X T_LAST_SYSTEM_TYPE=14
1260 > T_MEMBLOCK=15,
1261 > T_LAST_SYSTEM_TYPE=15
1262 | };
1263
1264
1265 Then, some helper macros would be useful. Go to where is_string()
1266 and the rest are defined and add:
1267
1268 > INTERFACE INLINE int is_memblock(pointer p) { return (type(p)==T_MEMBLOCK); }
1269
1270 This actually is a function, because it is meant to be exported by
1271 scheme.h. If no foreign function will ever manipulate a memory block,
1272 you can instead define it as a macro:
1273
1274 > #define is_memblock(p) (type(p)==T_MEMBLOCK)
1275
1276 Then we make space for the new type in the main data structure:
1277 struct cell. As it happens, the _string part of the union _object
1278 (that is used to hold character strings) has two fields that suit us:
1279
1280 | struct {
1281 | char *_svalue;
1282 | int _keynum;
1283 | } _string;
1284
1285 We can use _svalue to hold the actual pointer and _keynum to hold its
1286 length. If we couln't reuse existing fields, we could always add other
1287 alternatives in union _object.
1288
1289 We then proceed to write the function that actually makes a new block.
1290 For conformance reasons, we name it mk_memblock
1291
1292 > static pointer mk_memblock(scheme *sc, int len, char fill) {
1293 > pointer x;
1294 > char *p=(char*)sc->malloc(len);
1295 >
1296 > if(p==0) {
1297 > return sc->NIL;
1298 > }
1299 > x = get_cell(sc, sc->NIL, sc->NIL);
1300 >
1301 > typeflag(x) = T_MEMBLOCK|T_ATOM;
1302 > strvalue(x)=p;
1303 > keynum(x)=len;
1304 > memset(p,fill,len);
1305 > return (x);
1306 > }
1307
1308 The memory used by the MEMBLOCK will have to be freed when the cell
1309 is reclaimed during garbage collection. There is a placeholder for
1310 that staff, function finalize_cell(), currently handling strings only.
1311
1312 | static void finalize_cell(scheme *sc, pointer a) {
1313 | if(is_string(a)) {
1314 | sc->free(strvalue(a));
1315 > } else if(is_memblock(a)) {
1316 > sc->free(strvalue(a));
1317 | } else if(is_port(a)) {
1318
1319 There are no MEMBLOCK literals, so we don't concern ourselves with
1320 the READER part (yet!). We must cater to the PRINTER, though. We
1321 add one case more in atom2str().
1322
1323 | } else if (iscontinuation(l)) {
1324 | p = "#<CONTINUATION>";
1325 > } else if (is_memblock(l)) {
1326 > p = "#<MEMORY BLOCK>";
1327 | } else {
1328
1329 Whenever a MEMBLOCK is displayed, it will look like that.
1330 Now, we must add the interface functions: constructor, predicate,
1331 accessor, modifier. We must in fact create new op-codes for the virtual
1332 machine underlying TinyScheme. Since version 1.30, TinyScheme uses
1333 macros and a single source text to keep the enums and the dispatch table
1334 in sync. The op-codes are defined in the opdefines.h file with one line
1335 for each op-code. The lines in the file have six columns between the
1336 starting _OPDEF( and ending ): A, B, C, D, E, and OP.
1337 Note that this file uses unusually long lines to accomodate all the
1338 information; adjust your editor to handle this.
1339
1340 The purpose of the columns is:
1341 - Column A is the name of the subroutine that handles the op-code.
1342 - Column B is the name of the op-code function.
1343 - Columns C and D are the minimum and maximum number of arguments
1344 that are accepted by the op-code.
1345 - Column E is a set of flags that tells the interpreter the type of
1346 each of the arguments expected by the op-code.
1347 - Column OP is used in the scheme_opcodes enum located in the
1348 scheme-private.h file.
1349
1350 Op-codes are really just tags for a huge C switch, only this switch
1351 is broken up in to a number of different opexe_X functions. The
1352 correspondence is made in table "dispatch_table". There, we assign
1353 the new op-codes to opexe_2, where the equivalent ones for vectors
1354 are situated. We also assign a name for them, and specify the minimum
1355 and maximum arity (number of expected arguments). INF_ARG as a maximum
1356 arity means "unlimited".
1357
1358 For reasons of consistency, we add the new op-codes right after those
1359 for vectors:
1360
1361 | _OP_DEF(opexe_2, "vector-set!", 3, 3, TST_VECTOR TST_NATURAL TST_ANY, OP_VECSET )
1362 > _OP_DEF(opexe_2, "make-block", 1, 2, TST_NATURAL TST_CHAR, OP_MKBLOCK )
1363 > _OP_DEF(opexe_2, "block-length", 1, 1, T_MEMBLOCK, OP_BLOCKLEN )
1364 > _OP_DEF(opexe_2, "block-ref", 2, 2, T_MEMBLOCK TST_NATURAL, OP_BLOCKREF )
1365 > _OP_DEF(opexe_2, "block-set!", 1, 1, T_MEMBLOCK TST_NATURAL TST_CHAR, OP_BLOCKSET )
1366 | _OP_DEF(opexe_3, "not", 1, 1, TST_NONE, OP_NOT )
1367
1368 We add the predicate along with the other predicates in opexe_3:
1369
1370 | _OP_DEF(opexe_3, "vector?", 1, 1, TST_ANY, OP_VECTORP )
1371 > _OP_DEF(opexe_3, "block?", 1, 1, TST_ANY, OP_BLOCKP )
1372 | _OP_DEF(opexe_3, "eq?", 2, 2, TST_ANY, OP_EQ )
1373
1374 All that remains is to write the actual code to do the processing and
1375 add it to the switch statement in opexe_2, after the OP_VECSET case.
1376
1377 > case OP_MKBLOCK: { /* make-block */
1378 > int fill=0;
1379 > int len;
1380 >
1381 > if(!isnumber(car(sc->args))) {
1382 > Error_1(sc,"make-block: not a number:",car(sc->args));
1383 > }
1384 > len=ivalue(car(sc->args));
1385 > if(len<=0) {
1386 > Error_1(sc,"make-block: not positive:",car(sc->args));
1387 > }
1388 >
1389 > if(cdr(sc->args)!=sc->NIL) {
1390 > if(!isnumber(cadr(sc->args)) || ivalue(cadr(sc->args))<0) {
1391 > Error_1(sc,"make-block: not a positive number:",cadr(sc->args));
1392 > }
1393 > fill=charvalue(cadr(sc->args))%255;
1394 > }
1395 > s_return(sc,mk_memblock(sc,len,(char)fill));
1396 > }
1397 >
1398 > case OP_BLOCKLEN: /* block-length */
1399 > if(!ismemblock(car(sc->args))) {
1400 > Error_1(sc,"block-length: not a memory block:",car(sc->args));
1401 > }
1402 > s_return(sc,mk_integer(sc,keynum(car(sc->args))));
1403 >
1404 > case OP_BLOCKREF: { /* block-ref */
1405 > char *str;
1406 > int index;
1407 >
1408 > if(!ismemblock(car(sc->args))) {
1409 > Error_1(sc,"block-ref: not a memory block:",car(sc->args));
1410 > }
1411 > str=strvalue(car(sc->args));
1412 >
1413 > if(cdr(sc->args)==sc->NIL) {
1414 > Error_0(sc,"block-ref: needs two arguments");
1415 > }
1416 > if(!isnumber(cadr(sc->args))) {
1417 > Error_1(sc,"block-ref: not a number:",cadr(sc->args));
1418 > }
1419 > index=ivalue(cadr(sc->args));
1420 >
1421 > if(index<0 || index>=keynum(car(sc->args))) {
1422 > Error_1(sc,"block-ref: out of bounds:",cadr(sc->args));
1423 > }
1424 >
1425 > s_return(sc,mk_integer(sc,str[index]));
1426 > }
1427 >
1428 > case OP_BLOCKSET: { /* block-set! */
1429 > char *str;
1430 > int index;
1431 > int c;
1432 >
1433 > if(!ismemblock(car(sc->args))) {
1434 > Error_1(sc,"block-set!: not a memory block:",car(sc->args));
1435 > }
1436 > if(isimmutable(car(sc->args))) {
1437 > Error_1(sc,"block-set!: unable to alter immutable memory block:",car(sc->args));
1438 > }
1439 > str=strvalue(car(sc->args));
1440 >
1441 > if(cdr(sc->args)==sc->NIL) {
1442 > Error_0(sc,"block-set!: needs three arguments");
1443 > }
1444 > if(!isnumber(cadr(sc->args))) {
1445 > Error_1(sc,"block-set!: not a number:",cadr(sc->args));
1446 > }
1447 > index=ivalue(cadr(sc->args));
1448 > if(index<0 || index>=keynum(car(sc->args))) {
1449 > Error_1(sc,"block-set!: out of bounds:",cadr(sc->args));
1450 > }
1451 >
1452 > if(cddr(sc->args)==sc->NIL) {
1453 > Error_0(sc,"block-set!: needs three arguments");
1454 > }
1455 > if(!isinteger(caddr(sc->args))) {
1456 > Error_1(sc,"block-set!: not an integer:",caddr(sc->args));
1457 > }
1458 > c=ivalue(caddr(sc->args))%255;
1459 >
1460 > str[index]=(char)c;
1461 > s_return(sc,car(sc->args));
1462 > }
1463
1464 Finally, do the same for the predicate in opexe_3.
1465
1466 | case OP_VECTORP: /* vector? */
1467 | s_retbool(is_vector(car(sc->args)));
1468 > case OP_BLOCKP: /* block? */
1469 > s_retbool(is_memblock(car(sc->args)));
1470 | case OP_EQ: /* eq? */