v

-
+ 17F92FC88844A70C26B1CCD4CC068F81EB9320557802E744A3190C944EAB08E5DBCB58CAF37008DE8031261D3BA6E584F9795F24B11B3B47B2243FAEA18F180B
vtools/lib/hash.c
(0 . 0)(1 . 1152)
 971 /* A generic hash table package.  */
 972 
 973 /* Define |USE_OBSTACK| to 1 if you want the allocator to use obstacks
 974    instead of |malloc|.  If you change |USE_OBSTACK|, you have to
 975    recompile!  */
 976 
 977 #include "hash.h"
 978 #include "system.h"
 979 
 980 #include <stdlib.h>
 981 #include <limits.h>
 982 
 983 #if USE_OBSTACK
 984 # include "obstack.h"
 985 # ifndef obstack_chunk_alloc
 986 #  define obstack_chunk_alloc malloc
 987 # endif
 988 # ifndef obstack_chunk_free
 989 #  define obstack_chunk_free free
 990 # endif
 991 #endif
 992 
 993 struct hash_entry {
 994     void *data;
 995     struct hash_entry *next;
 996 };
 997 
 998 struct hash_table {
 999     /* The array of buckets starts at |bucket| and extends to
1000        |bucket_limit-1|, for a possibility of |n_buckets|.  Among
1001        those, |n_buckets_used| buckets are not empty, there are
1002        |n_entries| active entries in the table.  */
1003     struct hash_entry *bucket;
1004     struct hash_entry const *bucket_limit;
1005     size_t n_buckets;
1006     size_t n_buckets_used;
1007     size_t n_entries;
1008 
1009     /* Tuning arguments, kept in a physically separate structure.  */
1010     const Hash_tuning *tuning;
1011 
1012     /* Three functions are given to |hash_initialize|, see the
1013        documentation block for this function.  In a word, |hasher|
1014        randomizes a user entry into a number up from 0 up to some
1015        maximum minus 1; |comparator| returns true if two user entries
1016        compare equally; and |data_freer| is the cleanup function for a
1017        user entry.  */
1018     Hash_hasher hasher;
1019     Hash_comparator comparator;
1020     Hash_data_freer data_freer;
1021 
1022     /* A linked list of freed struct |hash_entry| structs.  */
1023     struct hash_entry *free_entry_list;
1024 
1025 #if USE_OBSTACK
1026     /* Whenever obstacks are used, it is possible to allocate all
1027        overflowed entries into a single stack, so they all can be
1028        freed in a single operation.  It is not clear if the speedup is
1029        worth the trouble.  */
1030     struct obstack entry_stack;
1031 #endif
1032 };
1033 
1034 /* A hash table contains many internal entries, each holding a pointer
1035    to some user-provided data (also called a user entry).  An entry
1036    indistinctly refers to both the internal entry and its associated
1037    user entry.  A user entry contents may be hashed by a randomization
1038    function (the hashing function, or just "hasher" for short) into a
1039    number (or "slot") between 0 and the current table size.  At each
1040    slot position in the hash table, starts a linked chain of entries
1041    for which the user data all hash to this slot.  A bucket is the
1042    collection of all entries hashing to the same slot.
1043 
1044    A good "hasher" function will distribute entries rather evenly in
1045    buckets.  In the ideal case, the length of each bucket is roughly
1046    the number of entries divided by the table size.  Finding the slot
1047    for a data is usually done in constant time by the "hasher", and
1048    the later finding of a precise entry is linear in time with the
1049    size of the bucket.  Consequently, a larger hash table size (that
1050    is, a larger number of buckets) is prone to yielding shorter
1051    chains, {\bf given} the "hasher" function behaves properly.
1052 
1053    Long buckets slow down the lookup algorithm.  One might use big
1054    hash table sizes in hope to reduce the average length of buckets,
1055    but this might become inordinate, as unused slots in the hash table
1056    take some space.  The best bet is to make sure you are using a good
1057    "hasher" function (beware that those are not that easy to write!
1058    :-), and to use a table size larger than the actual number of
1059    entries.  */
1060 
1061 /* If an insertion makes the ratio of nonempty buckets to table size
1062    larger than the growth threshold (a number between 0.0 and 1.0),
1063    then increase the table size by multiplying by the growth factor (a
1064    number greater than 1.0).  The growth threshold defaults to 0.8,
1065    and the growth factor defaults to 1.414, meaning that the table
1066    will have doubled its size every second time 80\% of the buckets
1067    get used.  */
1068 #define DEFAULT_GROWTH_THRESHOLD 0.8f
1069 #define DEFAULT_GROWTH_FACTOR 1.414f
1070 
1071 /* If a deletion empties a bucket and causes the ratio of used buckets
1072    to table size to become smaller than the shrink threshold (a number
1073    between 0.0 and 1.0), then shrink the table by multiplying by the
1074    shrink factor (a number greater than the shrink threshold but
1075    smaller than 1.0).  The shrink threshold and factor default to 0.0
1076    and 1.0, meaning that the table never shrinks.  */
1077 #define DEFAULT_SHRINK_THRESHOLD 0.0f
1078 #define DEFAULT_SHRINK_FACTOR 1.0f
1079 
1080 /* Use this to initialize or reset a |tuning| structure to some
1081    sensible values. */
1082 static const Hash_tuning default_tuning =
1083         {
1084                 DEFAULT_SHRINK_THRESHOLD,
1085                 DEFAULT_SHRINK_FACTOR,
1086                 DEFAULT_GROWTH_THRESHOLD,
1087                 DEFAULT_GROWTH_FACTOR,
1088                 false
1089         };
1090 
1091 /* Information and lookup.  */
1092 
1093 /* The following few functions provide information about the overall
1094    hash table organization: the number of entries, number of buckets
1095    and maximum length of buckets.  */
1096 
1097 /* Return the number of buckets in the hash table.  The table size,
1098    the total number of buckets (used plus unused), or the maximum
1099    number of slots, are the same quantity.  */
1100 
1101 size_t
1102 hash_get_n_buckets(const Hash_table *table) {
1103     return table->n_buckets;
1104 }
1105 
1106 /* Return the number of slots in use (non-empty buckets).  */
1107 
1108 size_t
1109 hash_get_n_buckets_used(const Hash_table *table) {
1110     return table->n_buckets_used;
1111 }
1112 
1113 /* Return the number of active entries.  */
1114 
1115 size_t
1116 hash_get_n_entries(const Hash_table *table) {
1117     return table->n_entries;
1118 }
1119 
1120 /* Return the length of the longest chain (bucket).  */
1121 
1122 size_t
1123 hash_get_max_bucket_length(const Hash_table *table) {
1124     struct hash_entry const *bucket;
1125     size_t max_bucket_length = 0;
1126 
1127     for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1128         if (bucket->data) {
1129             struct hash_entry const *cursor = bucket;
1130             size_t bucket_length = 1;
1131 
1132             while (cursor = cursor->next, cursor)
1133                 bucket_length++;
1134 
1135             if (bucket_length > max_bucket_length)
1136                 max_bucket_length = bucket_length;
1137         }
1138     }
1139 
1140     return max_bucket_length;
1141 }
1142 
1143 /* Do a mild validation of a hash table, by traversing it and checking
1144    two statistics.  */
1145 
1146 bool
1147 hash_table_ok(const Hash_table *table) {
1148     struct hash_entry const *bucket;
1149     size_t n_buckets_used = 0;
1150     size_t n_entries = 0;
1151 
1152     for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1153         if (bucket->data) {
1154             struct hash_entry const *cursor = bucket;
1155 
1156             /* Count bucket head.  */
1157             n_buckets_used++;
1158             n_entries++;
1159 
1160             /* Count bucket overflow.  */
1161             while (cursor = cursor->next, cursor)
1162                 n_entries++;
1163         }
1164     }
1165 
1166     if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
1167         return true;
1168 
1169     return false;
1170 }
1171 
1172 void
1173 hash_print_statistics(const Hash_table *table, FILE *stream) {
1174     size_t n_entries = hash_get_n_entries(table);
1175     size_t n_buckets = hash_get_n_buckets(table);
1176     size_t n_buckets_used = hash_get_n_buckets_used(table);
1177     size_t max_bucket_length = hash_get_max_bucket_length(table);
1178 
1179     fprintf(stream, "# entries:         %lu\n", (unsigned long int) n_entries);
1180     fprintf(stream, "# buckets:         %lu\n", (unsigned long int) n_buckets);
1181     fprintf(stream, "# buckets used:    %lu (%.2f%%)\n",
1182             (unsigned long int) n_buckets_used,
1183             (100.0 * n_buckets_used) / n_buckets);
1184     fprintf(stream, "max bucket length: %lu\n",
1185             (unsigned long int) max_bucket_length);
1186 }
1187 
1188 /* Hash |key| and return a pointer to the selected bucket.  If
1189    |table->hasher| misbehaves, abort.  */
1190 static struct hash_entry *
1191 safe_hasher(const Hash_table *table, const void *key) {
1192     size_t n = table->hasher(key, table->n_buckets);
1193     if (!(n < table->n_buckets))
1194         abort();
1195     return table->bucket + n;
1196 }
1197 
1198 /* If |entry| matches an entry already in the hash table, return the
1199    entry from the table.  Otherwise, return |NULL|.  */
1200 
1201 void *
1202 hash_lookup(const Hash_table *table, const void *entry) {
1203     struct hash_entry const *bucket = safe_hasher(table, entry);
1204     struct hash_entry const *cursor;
1205 
1206     if (bucket->data == NULL)
1207         return NULL;
1208 
1209     for (cursor = bucket; cursor; cursor = cursor->next)
1210         if (entry == cursor->data || table->comparator(entry, cursor->data))
1211             return cursor->data;
1212 
1213     return NULL;
1214 }
1215 
1216 /* Walking.  */
1217 
1218 /* The functions in this page traverse the hash table and process the
1219    contained entries.  For the traversal to work properly, the hash
1220    table should not be resized nor modified while any particular entry
1221    is being processed.  In particular, entries should not be added,
1222    and an entry may be removed only if there is no shrink threshold
1223    and the entry being removed has already been passed to
1224    |hash_get_next|.  */
1225 
1226 /* Return the first data in the table, or |NULL| if the table is
1227    empty.  */
1228 
1229 void *
1230 hash_get_first(const Hash_table *table) {
1231     struct hash_entry const *bucket;
1232 
1233     if (table->n_entries == 0)
1234         return NULL;
1235 
1236     for (bucket = table->bucket;; bucket++)
1237         if (!(bucket < table->bucket_limit))
1238             abort();
1239         else if (bucket->data)
1240             return bucket->data;
1241 }
1242 
1243 /* Return the user data for the entry following |entry|, where |entry|
1244    has been returned by a previous call to either |hash_get_first| or
1245    |hash_get_next|.  Return |NULL| if there are no more entries.  */
1246 
1247 void *
1248 hash_get_next(const Hash_table *table, const void *entry) {
1249     struct hash_entry const *bucket = safe_hasher(table, entry);
1250     struct hash_entry const *cursor;
1251 
1252     /* Find next entry in the same bucket.  */
1253     cursor = bucket;
1254     do {
1255         if (cursor->data == entry && cursor->next)
1256             return cursor->next->data;
1257         cursor = cursor->next;
1258     } while (cursor != NULL);
1259 
1260     /* Find first entry in any subsequent bucket.  */
1261     while (++bucket < table->bucket_limit)
1262         if (bucket->data)
1263             return bucket->data;
1264 
1265     /* None found.  */
1266     return NULL;
1267 }
1268 
1269 /* Fill |buffer| with pointers to active user entries in the hash
1270    table, then return the number of pointers copied.  Do not copy more
1271    than |buffer_size| pointers.  */
1272 
1273 size_t
1274 hash_get_entries(const Hash_table *table, void **buffer,
1275                  size_t buffer_size) {
1276     size_t counter = 0;
1277     struct hash_entry const *bucket;
1278     struct hash_entry const *cursor;
1279 
1280     for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1281         if (bucket->data) {
1282             for (cursor = bucket; cursor; cursor = cursor->next) {
1283                 if (counter >= buffer_size)
1284                     return counter;
1285                 buffer[counter++] = cursor->data;
1286             }
1287         }
1288     }
1289 
1290     return counter;
1291 }
1292 
1293 /* Call a |processor| function for each entry of a hash table, and
1294    return the number of entries for which the processor function
1295    returned success.  A pointer to some |processor_data| which will be
1296    made available to each call to the processor function.  The
1297    |processor| accepts two arguments: the first is the user entry
1298    being walked into, the second is the value of |processor_data| as
1299    received.  The walking continue for as long as the |processor|
1300    function returns nonzero.  When it returns zero, the walking is
1301    interrupted.  */
1302 
1303 size_t
1304 hash_do_for_each(const Hash_table *table, Hash_processor processor,
1305                  void *processor_data) {
1306     size_t counter = 0;
1307     struct hash_entry const *bucket;
1308     struct hash_entry const *cursor;
1309 
1310     for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1311         if (bucket->data) {
1312             for (cursor = bucket; cursor; cursor = cursor->next) {
1313                 if (!processor(cursor->data, processor_data))
1314                     return counter;
1315                 counter++;
1316             }
1317         }
1318     }
1319 
1320     return counter;
1321 }
1322 
1323 /* Allocation and clean-up.  */
1324 
1325 /* Return a hash index for a |NULL|-terminated |string| between |0| and
1326    |n_buckets-1|.  This is a convenience routine for constructing
1327    other hashing functions.  */
1328 
1329 #if USE_DIFF_HASH
1330 
1331 /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01:
1332    ``Please see B. J. McKenzie, R. Harries \& T. Bell, Selecting a
1333    hashing algorithm, Software--practice \& experience 20, 2 (Feb
1334    1990), 209-224.  Good hash algorithms tend to be domain-specific,
1335    so what's good for [diffutils'] |io.c| may not be good for your
1336    application.''  */
1337 
1338 size_t
1339 hash_string (const char *string, size_t n_buckets)
1340 {
1341 # define HASH_ONE_CHAR(Value, Byte) \
1342   ((Byte) + rotl_sz (Value, 7))
1343 
1344   size_t value = 0;
1345   unsigned char ch;
1346 
1347   for (; (ch = *string); string++)
1348     value = HASH_ONE_CHAR (value, ch);
1349   return value % n_buckets;
1350 
1351 # undef HASH_ONE_CHAR
1352 }
1353 
1354 #else
1355 
1356 /* This one comes from |recode|, and performs a bit better than the
1357    above as per a few experiments.  It is inspired from a hashing
1358    routine found in the very old Cyber `snoop', itself written in
1359    typical Greg Mansfield style.  (By the way, what happened to this
1360    excellent man?  Is he still alive?)  */
1361 
1362 size_t
1363 hash_string(const char *string, size_t n_buckets) {
1364     size_t value = 0;
1365     unsigned char ch;
1366 
1367     for (; (ch = *string); string++)
1368         value = (value * 31 + ch) % n_buckets;
1369     return value;
1370 }
1371 
1372 #endif
1373 
1374 /* Return true if |candidate| is a prime number.  |candidate| should
1375    be an odd number at least equal to 11.  */
1376 
1377 static bool
1378 is_prime(size_t candidate) {
1379     size_t divisor = 3;
1380     size_t square = divisor * divisor;
1381 
1382     while (square < candidate && (candidate % divisor)) {
1383         divisor++;
1384         square += 4 * divisor;
1385         divisor++;
1386     }
1387 
1388     return (candidate % divisor ? true : false);
1389 }
1390 
1391 /* Round a given |candidate| number up to the nearest prime, and
1392    return that prime.  Primes lower than 10 are merely skipped.  */
1393 
1394 static size_t
1395 next_prime(size_t candidate) {
1396     /* Skip small primes.  */
1397     if (candidate < 10)
1398         candidate = 10;
1399 
1400     /* Make it definitely odd.  */
1401     candidate |= 1;
1402 
1403     while (SIZE_MAX != candidate && !is_prime(candidate))
1404         candidate += 2;
1405 
1406     return candidate;
1407 }
1408 
1409 void
1410 hash_reset_tuning(Hash_tuning *tuning) {
1411     *tuning = default_tuning;
1412 }
1413 
1414 /* Given a |size_t| argument |x|, return the value corresponding to
1415    rotating the bits |n| steps to the right.  |n| must be between 1 to
1416    |(CHAR_BIT * sizeof (size_t) - 1)| inclusive.  */
1417 static inline size_t
1418 rotr_sz(size_t x, int n) {
1419     return ((x >> n) | (x << ((CHAR_BIT * sizeof x) - n))) & SIZE_MAX;
1420 }
1421 
1422 /* If the user passes a |NULL| hasher, we hash the raw pointer.  */
1423 static size_t
1424 raw_hasher(const void *data, size_t n) {
1425     /* When hashing unique pointers, it is often the case that they
1426        were generated by malloc and thus have the property that the
1427        low-order bits are 0.  As this tends to give poorer performance
1428        with small tables, we rotate the pointer value before
1429        performing division, in an attempt to improve hash quality.  */
1430     size_t val = rotr_sz((size_t) data, 3);
1431     return val % n;
1432 }
1433 
1434 /* If the user passes a |NULL| comparator, we use pointer
1435    comparison.  */
1436 static bool
1437 raw_comparator(const void *a, const void *b) {
1438     return a == b;
1439 }
1440 
1441 
1442 /* For the given hash |table|, check the user supplied tuning
1443    structure for reasonable values, and return true if there is no
1444    gross error with it.  Otherwise, definitively reset the |tuning|
1445    field to some acceptable default in the hash table (that is, the
1446    user loses the right of further modifying tuning arguments), and
1447    return false.  */
1448 
1449 static bool
1450 check_tuning(Hash_table *table) {
1451     const Hash_tuning *tuning = table->tuning;
1452     float epsilon;
1453     if (tuning == &default_tuning)
1454         return true;
1455 
1456     /* Be a bit stricter than mathematics would require, so that
1457        rounding errors in size calculations do not cause allocations
1458        to fail to grow or shrink as they should.  The smallest
1459        allocation is 11 (due to |next_prime|'s algorithm), so an
1460        epsilon of 0.1 should be good enough.  */
1461     epsilon = 0.1f;
1462 
1463     if (epsilon < tuning->growth_threshold
1464         && tuning->growth_threshold < 1 - epsilon
1465         && 1 + epsilon < tuning->growth_factor
1466         && 0 <= tuning->shrink_threshold
1467         && tuning->shrink_threshold + epsilon < tuning->shrink_factor
1468         && tuning->shrink_factor <= 1
1469         && tuning->shrink_threshold + epsilon < tuning->growth_threshold)
1470         return true;
1471 
1472     table->tuning = &default_tuning;
1473     return false;
1474 }
1475 
1476 /* Compute the size of the bucket array for the given |candidate| and
1477    |tuning|, or return 0 if there is no possible way to allocate that
1478    many entries.  */
1479 
1480 static size_t
1481 compute_bucket_size(size_t candidate, const Hash_tuning *tuning) {
1482     if (!tuning->is_n_buckets) {
1483         float new_candidate = candidate / tuning->growth_threshold;
1484         if (SIZE_MAX <= new_candidate)
1485             return 0;
1486         candidate = new_candidate;
1487     }
1488     candidate = next_prime(candidate);
1489     if (xalloc_oversized (candidate, sizeof(struct hash_entry *)))
1490         return 0;
1491     return candidate;
1492 }
1493 
1494 /* Allocate and return a new hash table, or |NULL| upon failure.  The
1495    initial number of buckets is automatically selected so as to {\it
1496    guarantee} that you may insert at least |candidate| different user
1497    entries before any growth of the hash table size occurs.  So, if
1498    have a reasonably tight a-priori upper bound on the number of
1499    entries you intend to insert in the hash table, you may save some
1500    table memory and insertion time, by specifying it here.  If the
1501    |is_n_buckets| field of the |tuning| structure is true, the
1502    |candidate| argument has its meaning changed to the wanted number
1503    of buckets.
1504 
1505    |tuning| points to a structure of user-supplied values, in case
1506    some fine tuning is wanted over the default behavior of the hasher.
1507    If |tuning| is |NULL|, the default tuning parameters are used
1508    instead.  If |tuning| is provided but the values requested are out
1509    of bounds or might cause rounding errors, return |NULL|.
1510 
1511    The user-supplied |hasher| function, when not |NULL|, accepts two
1512    arguments |entry| and |table_size|.  It computes, by hashing
1513    |entry| contents, a slot number for that entry which should be in
1514    the range |0..table_size-1|.  This slot number is then returned.
1515 
1516    The user-supplied |comparator| function, when not |NULL|, accepts
1517    two arguments pointing to user data, it then returns true for a
1518    pair of entries that compare equal, or false otherwise.  This
1519    function is internally called on entries which are already known to
1520    hash to the same bucket index, but which are distinct pointers.
1521 
1522    The user-supplied |data_freer| function, when not |NULL|, may be
1523    later called with the user data as an argument, just before the
1524    entry containing the data gets freed.  This happens from within
1525    |hash_free| or |hash_clear|.  You should specify this function only
1526    if you want these functions to free all of your |data| data.  This
1527    is typically the case when your data is simply an auxiliary struct
1528    that you have |malloc|'d to aggregate several values.  */
1529 
1530 Hash_table *
1531 hash_initialize(size_t candidate, const Hash_tuning *tuning,
1532                 Hash_hasher hasher, Hash_comparator comparator,
1533                 Hash_data_freer data_freer) {
1534     Hash_table *table;
1535 
1536     if (hasher == NULL)
1537         hasher = raw_hasher;
1538     if (comparator == NULL)
1539         comparator = raw_comparator;
1540 
1541     table = malloc(sizeof *table);
1542     if (table == NULL)
1543         return NULL;
1544 
1545     if (!tuning)
1546         tuning = &default_tuning;
1547     table->tuning = tuning;
1548     if (!check_tuning(table)) {
1549         /* Fail if the tuning options are invalid.  This is the only
1550            occasion when the user gets some feedback about it.  Once
1551            the table is created, if the user provides invalid tuning
1552            options, we silently revert to using the defaults, and
1553            ignore further request to change the tuning options.  */
1554         goto fail;
1555     }
1556 
1557     table->n_buckets = compute_bucket_size(candidate, tuning);
1558     if (!table->n_buckets)
1559         goto fail;
1560 
1561     table->bucket = calloc(table->n_buckets, sizeof *table->bucket);
1562     if (table->bucket == NULL)
1563         goto fail;
1564     table->bucket_limit = table->bucket + table->n_buckets;
1565     table->n_buckets_used = 0;
1566     table->n_entries = 0;
1567 
1568     table->hasher = hasher;
1569     table->comparator = comparator;
1570     table->data_freer = data_freer;
1571 
1572     table->free_entry_list = NULL;
1573 #if USE_OBSTACK
1574     obstack_init (&table->entry_stack);
1575 #endif
1576     return table;
1577 
1578     fail:
1579     free(table);
1580     return NULL;
1581 }
1582 
1583 /* Make all buckets empty, placing any chained entries on the free
1584    list.  Apply the user-specified function |data_freer| (if any) to
1585    the datas of any affected entries.  */
1586 
1587 void
1588 hash_clear(Hash_table *table) {
1589     struct hash_entry *bucket;
1590 
1591     for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1592         if (bucket->data) {
1593             struct hash_entry *cursor;
1594             struct hash_entry *next;
1595 
1596             /* Free the bucket overflow.  */
1597             for (cursor = bucket->next; cursor; cursor = next) {
1598                 if (table->data_freer)
1599                     table->data_freer(cursor->data);
1600                 cursor->data = NULL;
1601 
1602                 next = cursor->next;
1603                 /* Relinking is done one entry at a time, as it is to
1604                    be expected that overflows are either rare or
1605                    short.  */
1606                 cursor->next = table->free_entry_list;
1607                 table->free_entry_list = cursor;
1608             }
1609 
1610             /* Free the bucket head.  */
1611             if (table->data_freer)
1612                 table->data_freer(bucket->data);
1613             bucket->data = NULL;
1614             bucket->next = NULL;
1615         }
1616     }
1617 
1618     table->n_buckets_used = 0;
1619     table->n_entries = 0;
1620 }
1621 
1622 /* Reclaim all storage associated with a hash table.  If a
1623    |data_freer| function has been supplied by the user when the hash
1624    table was created, this function applies it to the data of each
1625    entry before freeing that entry.  */
1626 
1627 void
1628 hash_free(Hash_table *table) {
1629     struct hash_entry *bucket;
1630     struct hash_entry *cursor;
1631     struct hash_entry *next;
1632 
1633     /* Call the user |data_freer| function.  */
1634     if (table->data_freer && table->n_entries) {
1635         for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1636             if (bucket->data) {
1637                 for (cursor = bucket; cursor; cursor = cursor->next)
1638                     table->data_freer(cursor->data);
1639             }
1640         }
1641     }
1642 
1643 #if USE_OBSTACK
1644 
1645     obstack_free (&table->entry_stack, NULL);
1646 
1647 #else
1648 
1649     /* Free all bucket overflowed entries.  */
1650     for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1651         for (cursor = bucket->next; cursor; cursor = next) {
1652             next = cursor->next;
1653             free(cursor);
1654         }
1655     }
1656 
1657     /* Also reclaim the internal list of previously freed entries.  */
1658     for (cursor = table->free_entry_list; cursor; cursor = next) {
1659         next = cursor->next;
1660         free(cursor);
1661     }
1662 
1663 #endif
1664 
1665     /* Free the remainder of the hash table structure.  */
1666     free(table->bucket);
1667     free(table);
1668 }
1669 
1670 /* Insertion and deletion.  */
1671 
1672 /* Get a new hash entry for a bucket overflow, possibly by recycling a
1673    previously freed one.  If this is not possible, allocate a new
1674    one.  */
1675 
1676 static struct hash_entry *
1677 allocate_entry(Hash_table *table) {
1678     struct hash_entry *new;
1679 
1680     if (table->free_entry_list) {
1681         new = table->free_entry_list;
1682         table->free_entry_list = new->next;
1683     } else {
1684 #if USE_OBSTACK
1685         new = obstack_alloc (&table->entry_stack, sizeof *new);
1686 #else
1687         new = malloc(sizeof *new);
1688 #endif
1689     }
1690 
1691     return new;
1692 }
1693 
1694 /* Free a hash entry which was part of some bucket overflow, saving it
1695    for later recycling.  */
1696 
1697 static void
1698 free_entry(Hash_table *table, struct hash_entry *entry) {
1699     entry->data = NULL;
1700     entry->next = table->free_entry_list;
1701     table->free_entry_list = entry;
1702 }
1703 
1704 /* This private function is used to help with insertion and deletion.
1705    When |entry| matches an entry in the table, return a pointer to the
1706    corresponding user data and set |*bucket_head| to the head of the
1707    selected bucket.  Otherwise, return |NULL|.  When |delete| is true
1708    and |entry| matches an entry in the table, unlink the matching
1709    entry.  */
1710 
1711 static void *
1712 hash_find_entry(Hash_table *table, const void *entry,
1713                 struct hash_entry **bucket_head, bool delete) {
1714     struct hash_entry *bucket = safe_hasher(table, entry);
1715     struct hash_entry *cursor;
1716 
1717     *bucket_head = bucket;
1718 
1719     /* Test for empty bucket.  */
1720     if (bucket->data == NULL)
1721         return NULL;
1722 
1723     /* See if the entry is the first in the bucket.  */
1724     if (entry == bucket->data || table->comparator(entry, bucket->data)) {
1725         void *data = bucket->data;
1726 
1727         if (delete) {
1728             if (bucket->next) {
1729                 struct hash_entry *next = bucket->next;
1730 
1731                 /* Bump the first overflow entry into the bucket head, then save
1732                    the previous first overflow entry for later recycling.  */
1733                 *bucket = *next;
1734                 free_entry(table, next);
1735             } else {
1736                 bucket->data = NULL;
1737             }
1738         }
1739 
1740         return data;
1741     }
1742 
1743     /* Scan the bucket overflow.  */
1744     for (cursor = bucket; cursor->next; cursor = cursor->next) {
1745         if (entry == cursor->next->data
1746             || table->comparator(entry, cursor->next->data)) {
1747             void *data = cursor->next->data;
1748 
1749             if (delete) {
1750                 struct hash_entry *next = cursor->next;
1751 
1752                 /* Unlink the entry to delete, then save the freed entry for later
1753                    recycling.  */
1754                 cursor->next = next->next;
1755                 free_entry(table, next);
1756             }
1757 
1758             return data;
1759         }
1760     }
1761 
1762     /* No entry found.  */
1763     return NULL;
1764 }
1765 
1766 /* Internal helper, to move entries from |src| to |dst|.  Both tables
1767    must share the same free entry list.  If |safe|, only move overflow
1768    entries, saving bucket heads for later, so that no allocations will
1769    occur.  Return false if the free entry list is exhausted and an
1770    allocation fails.  */
1771 
1772 static bool
1773 transfer_entries(Hash_table *dst, Hash_table *src, bool safe) {
1774     struct hash_entry *bucket;
1775     struct hash_entry *cursor;
1776     struct hash_entry *next;
1777     for (bucket = src->bucket; bucket < src->bucket_limit; bucket++)
1778         if (bucket->data) {
1779             void *data;
1780             struct hash_entry *new_bucket;
1781 
1782             /* Within each bucket, transfer overflow entries first and
1783                then the bucket head, to minimize memory pressure.
1784                After all, the only time we might allocate is when
1785                moving the bucket head, but moving overflow entries
1786                first may create free entries that can be recycled by
1787                the time we finally get to the bucket head.  */
1788             for (cursor = bucket->next; cursor; cursor = next) {
1789                 data = cursor->data;
1790                 new_bucket = safe_hasher(dst, data);
1791 
1792                 next = cursor->next;
1793 
1794                 if (new_bucket->data) {
1795                     /* Merely relink an existing entry, when moving
1796                        from a bucket overflow into a bucket
1797                        overflow.  */
1798                     cursor->next = new_bucket->next;
1799                     new_bucket->next = cursor;
1800                 } else {
1801                     /* Free an existing entry, when moving from a
1802                        bucket overflow into a bucket header.  */
1803                     new_bucket->data = data;
1804                     dst->n_buckets_used++;
1805                     free_entry(dst, cursor);
1806                 }
1807             }
1808             /* Now move the bucket head.  Be sure that if we fail due
1809                to allocation failure that the src table is in a
1810                consistent state.  */
1811             data = bucket->data;
1812             bucket->next = NULL;
1813             if (safe)
1814                 continue;
1815             new_bucket = safe_hasher(dst, data);
1816 
1817             if (new_bucket->data) {
1818                 /* Allocate or recycle an entry, when moving from a
1819                    bucket header into a bucket overflow.  */
1820                 struct hash_entry *new_entry = allocate_entry(dst);
1821 
1822                 if (new_entry == NULL)
1823                     return false;
1824 
1825                 new_entry->data = data;
1826                 new_entry->next = new_bucket->next;
1827                 new_bucket->next = new_entry;
1828             } else {
1829                 /* Move from one bucket header to another.  */
1830                 new_bucket->data = data;
1831                 dst->n_buckets_used++;
1832             }
1833             bucket->data = NULL;
1834             src->n_buckets_used--;
1835         }
1836     return true;
1837 }
1838 
1839 /* For an already existing hash table, change the number of buckets
1840    through specifying |candidate|.  The contents of the hash table are
1841    preserved.  The new number of buckets is automatically selected so
1842    as to {\it guarantee} that the table may receive at least
1843    |candidate| different user entries, including those already in the
1844    table, before any other growth of the hash table size occurs.  If
1845    |tuning->is_n_buckets| is true, then |candidate| specifies the
1846    exact number of buckets desired.  Return true iff the rehash
1847    succeeded.  */
1848 
1849 bool
1850 hash_rehash(Hash_table *table, size_t candidate) {
1851     Hash_table storage;
1852     Hash_table *new_table;
1853     size_t new_size = compute_bucket_size(candidate, table->tuning);
1854 
1855     if (!new_size)
1856         return false;
1857     if (new_size == table->n_buckets)
1858         return true;
1859     new_table = &storage;
1860     new_table->bucket = calloc(new_size, sizeof *new_table->bucket);
1861     if (new_table->bucket == NULL)
1862         return false;
1863     new_table->n_buckets = new_size;
1864     new_table->bucket_limit = new_table->bucket + new_size;
1865     new_table->n_buckets_used = 0;
1866     new_table->n_entries = 0;
1867     new_table->tuning = table->tuning;
1868     new_table->hasher = table->hasher;
1869     new_table->comparator = table->comparator;
1870     new_table->data_freer = table->data_freer;
1871 
1872     /* In order for the transfer to successfully complete, we need
1873        additional overflow entries when distinct buckets in the old
1874        table collide into a common bucket in the new table.  The worst
1875        case possible is a hasher that gives a good spread with the old
1876        size, but returns a constant with the new size; if we were to
1877        guarantee |table->n_buckets_used-1| free entries in advance, then
1878        the transfer would be guaranteed to not allocate memory.
1879        However, for large tables, a guarantee of no further allocation
1880        introduces a lot of extra memory pressure, all for an unlikely
1881        corner case (most rehashes reduce, rather than increase, the
1882        number of overflow entries needed).  So, we instead ensure that
1883        the transfer process can be reversed if we hit a memory
1884        allocation failure mid-transfer.  */
1885 
1886     /* Merely reuse the extra old space into the new table.  */
1887 #if USE_OBSTACK
1888     new_table->entry_stack = table->entry_stack;
1889 #endif
1890     new_table->free_entry_list = table->free_entry_list;
1891 
1892     if (transfer_entries(new_table, table, false)) {
1893         /* Entries transferred successfully; tie up the loose ends.  */
1894         free(table->bucket);
1895         table->bucket = new_table->bucket;
1896         table->bucket_limit = new_table->bucket_limit;
1897         table->n_buckets = new_table->n_buckets;
1898         table->n_buckets_used = new_table->n_buckets_used;
1899         table->free_entry_list = new_table->free_entry_list;
1900         /* |table->n_entries| and |table->entry_stack| already hold their value.  */
1901         return true;
1902     }
1903 
1904     /* We've allocated |new_table->bucket| (and possibly some
1905        entries), exhausted the free list, and moved some but not all
1906        entries into |new_table|.  We must undo the partial move before
1907        returning failure.  The only way to get into this situation is
1908        if |new_table| uses fewer buckets than the old table, so we
1909        will reclaim some free entries as overflows in the new table
1910        are put back into distinct buckets in the old table.
1911 
1912        There are some pathological cases where a single pass through
1913        the table requires more intermediate overflow entries than
1914        using two passes.  Two passes give worse cache performance and
1915        takes longer, but at this point, we're already out of memory,
1916        so slow and safe is better than failure.  */
1917     table->free_entry_list = new_table->free_entry_list;
1918     if (!(transfer_entries(table, new_table, true)
1919           && transfer_entries(table, new_table, false)))
1920         abort();
1921     /* |table->n_entries| already holds its value.  */
1922     free(new_table->bucket);
1923     return false;
1924 }
1925 
1926 /* Insert |entry| into hash |table| if there is not already a matching
1927    entry.
1928 
1929    Return -1 upon memory allocation failure.
1930    Return 1 if insertion succeeded.
1931    Return 0 if there is already a matching entry in the table, and in
1932    that case, if |matched_ent| is non-|NULL|, set |*matched_ent| to
1933    that entry.
1934 
1935    This interface is easier to use than |hash_insert| when you must
1936    distinguish between the latter two cases.  More importantly,
1937    |hash_insert| is unusable for some types of |entry| values.  When
1938    using |hash_insert|, the only way to distinguish those cases is to
1939    compare the return value and |entry|.  That works only when you can
1940    have two different |entry| values that point to data that compares
1941    "equal".  Thus, when the |entry| value is a simple scalar, you must
1942    use |hash_insert_if_absent|.  |entry| must not be |NULL|.  */
1943 int
1944 hash_insert_if_absent(Hash_table *table, void const *entry,
1945                       void const **matched_ent) {
1946     void *data;
1947     struct hash_entry *bucket;
1948 
1949     /* The caller cannot insert a |NULL| entry, since |hash_lookup|
1950        returns |NULL| to indicate "not found", and |hash_find_entry|
1951        uses |bucket->data == NULL| to indicate an empty bucket.  */
1952     if (!entry)
1953         abort();
1954 
1955     /* If there's a matching entry already in the table, return that.  */
1956     if ((data = hash_find_entry(table, entry, &bucket, false)) != NULL) {
1957         if (matched_ent)
1958             *matched_ent = data;
1959         return 0;
1960     }
1961 
1962     /* If the growth threshold of the buckets in use has been reached,
1963        increase the table size and rehash.  There's no point in
1964        checking the number of entries: if the hashing function is
1965        ill-conditioned, rehashing is not likely to improve it.  */
1966 
1967     if (table->n_buckets_used
1968         > table->tuning->growth_threshold * table->n_buckets) {
1969         /* Check more fully, before starting real work.  If tuning
1970            arguments became invalid, the second check will rely on
1971            proper defaults.  */
1972         check_tuning(table);
1973         if (table->n_buckets_used
1974             > table->tuning->growth_threshold * table->n_buckets) {
1975             const Hash_tuning *tuning = table->tuning;
1976             float candidate =
1977                     (tuning->is_n_buckets
1978                      ? (table->n_buckets * tuning->growth_factor)
1979                      : (table->n_buckets * tuning->growth_factor
1980                         * tuning->growth_threshold));
1981 
1982             if (SIZE_MAX <= candidate)
1983                 return -1;
1984 
1985             /* If the rehash fails, arrange to return |NULL|.  */
1986             if (!hash_rehash(table, candidate))
1987                 return -1;
1988 
1989             /* Update the bucket we are interested in.  */
1990             if (hash_find_entry(table, entry, &bucket, false) != NULL)
1991                 abort();
1992         }
1993     }
1994 
1995     /* |entry| is not matched, it should be inserted.  */
1996 
1997     if (bucket->data) {
1998         struct hash_entry *new_entry = allocate_entry(table);
1999 
2000         if (new_entry == NULL)
2001             return -1;
2002 
2003         /* Add |entry| in the overflow of the bucket.  */
2004 
2005         new_entry->data = (void *) entry;
2006         new_entry->next = bucket->next;
2007         bucket->next = new_entry;
2008         table->n_entries++;
2009         return 1;
2010     }
2011 
2012     /* Add |entry| right in the bucket head.  */
2013 
2014     bucket->data = (void *) entry;
2015     table->n_entries++;
2016     table->n_buckets_used++;
2017 
2018     return 1;
2019 }
2020 
2021 /* If |entry| matches an entry already in the hash table, return the
2022    pointer to the entry from the table.  Otherwise, insert |entry| and
2023    return |entry|.  Return |NULL| if the storage required for
2024    insertion cannot be allocated.  This implementation does not
2025    support duplicate entries or insertion of |NULL|.  */
2026 
2027 void *
2028 hash_insert(Hash_table *table, void const *entry) {
2029     void const *matched_ent;
2030     int err = hash_insert_if_absent(table, entry, &matched_ent);
2031     return (err == -1
2032             ? NULL
2033             : (void *) (err == 0 ? matched_ent : entry));
2034 }
2035 
2036 /* If |entry| is already in the table, remove it and return the
2037    just-deleted data (the user may want to deallocate its storage).
2038    If |entry| is not in the table, don't modify the table and return
2039    |NULL|.  */
2040 
2041 void *
2042 hash_delete(Hash_table *table, const void *entry) {
2043     void *data;
2044     struct hash_entry *bucket;
2045 
2046     data = hash_find_entry(table, entry, &bucket, true);
2047     if (!data)
2048         return NULL;
2049 
2050     table->n_entries--;
2051     if (!bucket->data) {
2052         table->n_buckets_used--;
2053 
2054         /* If the shrink threshold of the buckets in use has been
2055            reached, rehash into a smaller table.  */
2056 
2057         if (table->n_buckets_used
2058             < table->tuning->shrink_threshold * table->n_buckets) {
2059             /* Check more fully, before starting real work.  If tuning
2060                arguments became invalid, the second check will rely on
2061                proper defaults.  */
2062             check_tuning(table);
2063             if (table->n_buckets_used
2064                 < table->tuning->shrink_threshold * table->n_buckets) {
2065                 const Hash_tuning *tuning = table->tuning;
2066                 size_t candidate =
2067                         (tuning->is_n_buckets
2068                          ? table->n_buckets * tuning->shrink_factor
2069                          : (table->n_buckets * tuning->shrink_factor
2070                             * tuning->growth_threshold));
2071 
2072                 if (!hash_rehash(table, candidate)) {
2073                     /* Failure to allocate memory in an attempt to
2074                        shrink the table is not fatal.  But since
2075                        memory is low, we can at least be kind and free
2076                        any spare entries, rather than keeping them
2077                        tied up in the free entry list.  */
2078 #if !USE_OBSTACK
2079                     struct hash_entry *cursor = table->free_entry_list;
2080                     struct hash_entry *next;
2081                     while (cursor) {
2082                         next = cursor->next;
2083                         free(cursor);
2084                         cursor = next;
2085                     }
2086                     table->free_entry_list = NULL;
2087 #endif
2088                 }
2089             }
2090         }
2091     }
2092 
2093     return data;
2094 }
2095 
2096 /* Testing.  */
2097 
2098 #if 0
2099 
2100 void
2101 hash_print (const Hash_table *table)
2102 {
2103   struct hash_entry *bucket = (struct hash_entry *) table->bucket;
2104 
2105   for ( ; bucket < table->bucket_limit; bucket++)
2106     {
2107       struct hash_entry *cursor;
2108 
2109       if (bucket)
2110         printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));
2111 
2112       for (cursor = bucket; cursor; cursor = cursor->next)
2113         {
2114           char const *s = cursor->data;
2115           /* FIXME */
2116           if (s)
2117             printf ("  %s\n", s);
2118         }
2119     }
2120 }
2121 
2122 #endif