-
+ 17F92FC88844A70C26B1CCD4CC068F81EB9320557802E744A3190C944EAB08E5DBCB58CAF37008DE8031261D3BA6E584F9795F24B11B3B47B2243FAEA18F180B
vtools/lib/hash.c
(0 . 0)(1 . 1152)
971 /* A generic hash table package. */
972
973 /* Define |USE_OBSTACK| to 1 if you want the allocator to use obstacks
974 instead of |malloc|. If you change |USE_OBSTACK|, you have to
975 recompile! */
976
977 #include "hash.h"
978 #include "system.h"
979
980 #include <stdlib.h>
981 #include <limits.h>
982
983 #if USE_OBSTACK
984 # include "obstack.h"
985 # ifndef obstack_chunk_alloc
986 # define obstack_chunk_alloc malloc
987 # endif
988 # ifndef obstack_chunk_free
989 # define obstack_chunk_free free
990 # endif
991 #endif
992
993 struct hash_entry {
994 void *data;
995 struct hash_entry *next;
996 };
997
998 struct hash_table {
999 /* The array of buckets starts at |bucket| and extends to
1000 |bucket_limit-1|, for a possibility of |n_buckets|. Among
1001 those, |n_buckets_used| buckets are not empty, there are
1002 |n_entries| active entries in the table. */
1003 struct hash_entry *bucket;
1004 struct hash_entry const *bucket_limit;
1005 size_t n_buckets;
1006 size_t n_buckets_used;
1007 size_t n_entries;
1008
1009 /* Tuning arguments, kept in a physically separate structure. */
1010 const Hash_tuning *tuning;
1011
1012 /* Three functions are given to |hash_initialize|, see the
1013 documentation block for this function. In a word, |hasher|
1014 randomizes a user entry into a number up from 0 up to some
1015 maximum minus 1; |comparator| returns true if two user entries
1016 compare equally; and |data_freer| is the cleanup function for a
1017 user entry. */
1018 Hash_hasher hasher;
1019 Hash_comparator comparator;
1020 Hash_data_freer data_freer;
1021
1022 /* A linked list of freed struct |hash_entry| structs. */
1023 struct hash_entry *free_entry_list;
1024
1025 #if USE_OBSTACK
1026 /* Whenever obstacks are used, it is possible to allocate all
1027 overflowed entries into a single stack, so they all can be
1028 freed in a single operation. It is not clear if the speedup is
1029 worth the trouble. */
1030 struct obstack entry_stack;
1031 #endif
1032 };
1033
1034 /* A hash table contains many internal entries, each holding a pointer
1035 to some user-provided data (also called a user entry). An entry
1036 indistinctly refers to both the internal entry and its associated
1037 user entry. A user entry contents may be hashed by a randomization
1038 function (the hashing function, or just "hasher" for short) into a
1039 number (or "slot") between 0 and the current table size. At each
1040 slot position in the hash table, starts a linked chain of entries
1041 for which the user data all hash to this slot. A bucket is the
1042 collection of all entries hashing to the same slot.
1043
1044 A good "hasher" function will distribute entries rather evenly in
1045 buckets. In the ideal case, the length of each bucket is roughly
1046 the number of entries divided by the table size. Finding the slot
1047 for a data is usually done in constant time by the "hasher", and
1048 the later finding of a precise entry is linear in time with the
1049 size of the bucket. Consequently, a larger hash table size (that
1050 is, a larger number of buckets) is prone to yielding shorter
1051 chains, {\bf given} the "hasher" function behaves properly.
1052
1053 Long buckets slow down the lookup algorithm. One might use big
1054 hash table sizes in hope to reduce the average length of buckets,
1055 but this might become inordinate, as unused slots in the hash table
1056 take some space. The best bet is to make sure you are using a good
1057 "hasher" function (beware that those are not that easy to write!
1058 :-), and to use a table size larger than the actual number of
1059 entries. */
1060
1061 /* If an insertion makes the ratio of nonempty buckets to table size
1062 larger than the growth threshold (a number between 0.0 and 1.0),
1063 then increase the table size by multiplying by the growth factor (a
1064 number greater than 1.0). The growth threshold defaults to 0.8,
1065 and the growth factor defaults to 1.414, meaning that the table
1066 will have doubled its size every second time 80\% of the buckets
1067 get used. */
1068 #define DEFAULT_GROWTH_THRESHOLD 0.8f
1069 #define DEFAULT_GROWTH_FACTOR 1.414f
1070
1071 /* If a deletion empties a bucket and causes the ratio of used buckets
1072 to table size to become smaller than the shrink threshold (a number
1073 between 0.0 and 1.0), then shrink the table by multiplying by the
1074 shrink factor (a number greater than the shrink threshold but
1075 smaller than 1.0). The shrink threshold and factor default to 0.0
1076 and 1.0, meaning that the table never shrinks. */
1077 #define DEFAULT_SHRINK_THRESHOLD 0.0f
1078 #define DEFAULT_SHRINK_FACTOR 1.0f
1079
1080 /* Use this to initialize or reset a |tuning| structure to some
1081 sensible values. */
1082 static const Hash_tuning default_tuning =
1083 {
1084 DEFAULT_SHRINK_THRESHOLD,
1085 DEFAULT_SHRINK_FACTOR,
1086 DEFAULT_GROWTH_THRESHOLD,
1087 DEFAULT_GROWTH_FACTOR,
1088 false
1089 };
1090
1091 /* Information and lookup. */
1092
1093 /* The following few functions provide information about the overall
1094 hash table organization: the number of entries, number of buckets
1095 and maximum length of buckets. */
1096
1097 /* Return the number of buckets in the hash table. The table size,
1098 the total number of buckets (used plus unused), or the maximum
1099 number of slots, are the same quantity. */
1100
1101 size_t
1102 hash_get_n_buckets(const Hash_table *table) {
1103 return table->n_buckets;
1104 }
1105
1106 /* Return the number of slots in use (non-empty buckets). */
1107
1108 size_t
1109 hash_get_n_buckets_used(const Hash_table *table) {
1110 return table->n_buckets_used;
1111 }
1112
1113 /* Return the number of active entries. */
1114
1115 size_t
1116 hash_get_n_entries(const Hash_table *table) {
1117 return table->n_entries;
1118 }
1119
1120 /* Return the length of the longest chain (bucket). */
1121
1122 size_t
1123 hash_get_max_bucket_length(const Hash_table *table) {
1124 struct hash_entry const *bucket;
1125 size_t max_bucket_length = 0;
1126
1127 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1128 if (bucket->data) {
1129 struct hash_entry const *cursor = bucket;
1130 size_t bucket_length = 1;
1131
1132 while (cursor = cursor->next, cursor)
1133 bucket_length++;
1134
1135 if (bucket_length > max_bucket_length)
1136 max_bucket_length = bucket_length;
1137 }
1138 }
1139
1140 return max_bucket_length;
1141 }
1142
1143 /* Do a mild validation of a hash table, by traversing it and checking
1144 two statistics. */
1145
1146 bool
1147 hash_table_ok(const Hash_table *table) {
1148 struct hash_entry const *bucket;
1149 size_t n_buckets_used = 0;
1150 size_t n_entries = 0;
1151
1152 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1153 if (bucket->data) {
1154 struct hash_entry const *cursor = bucket;
1155
1156 /* Count bucket head. */
1157 n_buckets_used++;
1158 n_entries++;
1159
1160 /* Count bucket overflow. */
1161 while (cursor = cursor->next, cursor)
1162 n_entries++;
1163 }
1164 }
1165
1166 if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
1167 return true;
1168
1169 return false;
1170 }
1171
1172 void
1173 hash_print_statistics(const Hash_table *table, FILE *stream) {
1174 size_t n_entries = hash_get_n_entries(table);
1175 size_t n_buckets = hash_get_n_buckets(table);
1176 size_t n_buckets_used = hash_get_n_buckets_used(table);
1177 size_t max_bucket_length = hash_get_max_bucket_length(table);
1178
1179 fprintf(stream, "# entries: %lu\n", (unsigned long int) n_entries);
1180 fprintf(stream, "# buckets: %lu\n", (unsigned long int) n_buckets);
1181 fprintf(stream, "# buckets used: %lu (%.2f%%)\n",
1182 (unsigned long int) n_buckets_used,
1183 (100.0 * n_buckets_used) / n_buckets);
1184 fprintf(stream, "max bucket length: %lu\n",
1185 (unsigned long int) max_bucket_length);
1186 }
1187
1188 /* Hash |key| and return a pointer to the selected bucket. If
1189 |table->hasher| misbehaves, abort. */
1190 static struct hash_entry *
1191 safe_hasher(const Hash_table *table, const void *key) {
1192 size_t n = table->hasher(key, table->n_buckets);
1193 if (!(n < table->n_buckets))
1194 abort();
1195 return table->bucket + n;
1196 }
1197
1198 /* If |entry| matches an entry already in the hash table, return the
1199 entry from the table. Otherwise, return |NULL|. */
1200
1201 void *
1202 hash_lookup(const Hash_table *table, const void *entry) {
1203 struct hash_entry const *bucket = safe_hasher(table, entry);
1204 struct hash_entry const *cursor;
1205
1206 if (bucket->data == NULL)
1207 return NULL;
1208
1209 for (cursor = bucket; cursor; cursor = cursor->next)
1210 if (entry == cursor->data || table->comparator(entry, cursor->data))
1211 return cursor->data;
1212
1213 return NULL;
1214 }
1215
1216 /* Walking. */
1217
1218 /* The functions in this page traverse the hash table and process the
1219 contained entries. For the traversal to work properly, the hash
1220 table should not be resized nor modified while any particular entry
1221 is being processed. In particular, entries should not be added,
1222 and an entry may be removed only if there is no shrink threshold
1223 and the entry being removed has already been passed to
1224 |hash_get_next|. */
1225
1226 /* Return the first data in the table, or |NULL| if the table is
1227 empty. */
1228
1229 void *
1230 hash_get_first(const Hash_table *table) {
1231 struct hash_entry const *bucket;
1232
1233 if (table->n_entries == 0)
1234 return NULL;
1235
1236 for (bucket = table->bucket;; bucket++)
1237 if (!(bucket < table->bucket_limit))
1238 abort();
1239 else if (bucket->data)
1240 return bucket->data;
1241 }
1242
1243 /* Return the user data for the entry following |entry|, where |entry|
1244 has been returned by a previous call to either |hash_get_first| or
1245 |hash_get_next|. Return |NULL| if there are no more entries. */
1246
1247 void *
1248 hash_get_next(const Hash_table *table, const void *entry) {
1249 struct hash_entry const *bucket = safe_hasher(table, entry);
1250 struct hash_entry const *cursor;
1251
1252 /* Find next entry in the same bucket. */
1253 cursor = bucket;
1254 do {
1255 if (cursor->data == entry && cursor->next)
1256 return cursor->next->data;
1257 cursor = cursor->next;
1258 } while (cursor != NULL);
1259
1260 /* Find first entry in any subsequent bucket. */
1261 while (++bucket < table->bucket_limit)
1262 if (bucket->data)
1263 return bucket->data;
1264
1265 /* None found. */
1266 return NULL;
1267 }
1268
1269 /* Fill |buffer| with pointers to active user entries in the hash
1270 table, then return the number of pointers copied. Do not copy more
1271 than |buffer_size| pointers. */
1272
1273 size_t
1274 hash_get_entries(const Hash_table *table, void **buffer,
1275 size_t buffer_size) {
1276 size_t counter = 0;
1277 struct hash_entry const *bucket;
1278 struct hash_entry const *cursor;
1279
1280 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1281 if (bucket->data) {
1282 for (cursor = bucket; cursor; cursor = cursor->next) {
1283 if (counter >= buffer_size)
1284 return counter;
1285 buffer[counter++] = cursor->data;
1286 }
1287 }
1288 }
1289
1290 return counter;
1291 }
1292
1293 /* Call a |processor| function for each entry of a hash table, and
1294 return the number of entries for which the processor function
1295 returned success. A pointer to some |processor_data| which will be
1296 made available to each call to the processor function. The
1297 |processor| accepts two arguments: the first is the user entry
1298 being walked into, the second is the value of |processor_data| as
1299 received. The walking continue for as long as the |processor|
1300 function returns nonzero. When it returns zero, the walking is
1301 interrupted. */
1302
1303 size_t
1304 hash_do_for_each(const Hash_table *table, Hash_processor processor,
1305 void *processor_data) {
1306 size_t counter = 0;
1307 struct hash_entry const *bucket;
1308 struct hash_entry const *cursor;
1309
1310 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1311 if (bucket->data) {
1312 for (cursor = bucket; cursor; cursor = cursor->next) {
1313 if (!processor(cursor->data, processor_data))
1314 return counter;
1315 counter++;
1316 }
1317 }
1318 }
1319
1320 return counter;
1321 }
1322
1323 /* Allocation and clean-up. */
1324
1325 /* Return a hash index for a |NULL|-terminated |string| between |0| and
1326 |n_buckets-1|. This is a convenience routine for constructing
1327 other hashing functions. */
1328
1329 #if USE_DIFF_HASH
1330
1331 /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01:
1332 ``Please see B. J. McKenzie, R. Harries \& T. Bell, Selecting a
1333 hashing algorithm, Software--practice \& experience 20, 2 (Feb
1334 1990), 209-224. Good hash algorithms tend to be domain-specific,
1335 so what's good for [diffutils'] |io.c| may not be good for your
1336 application.'' */
1337
1338 size_t
1339 hash_string (const char *string, size_t n_buckets)
1340 {
1341 # define HASH_ONE_CHAR(Value, Byte) \
1342 ((Byte) + rotl_sz (Value, 7))
1343
1344 size_t value = 0;
1345 unsigned char ch;
1346
1347 for (; (ch = *string); string++)
1348 value = HASH_ONE_CHAR (value, ch);
1349 return value % n_buckets;
1350
1351 # undef HASH_ONE_CHAR
1352 }
1353
1354 #else
1355
1356 /* This one comes from |recode|, and performs a bit better than the
1357 above as per a few experiments. It is inspired from a hashing
1358 routine found in the very old Cyber `snoop', itself written in
1359 typical Greg Mansfield style. (By the way, what happened to this
1360 excellent man? Is he still alive?) */
1361
1362 size_t
1363 hash_string(const char *string, size_t n_buckets) {
1364 size_t value = 0;
1365 unsigned char ch;
1366
1367 for (; (ch = *string); string++)
1368 value = (value * 31 + ch) % n_buckets;
1369 return value;
1370 }
1371
1372 #endif
1373
1374 /* Return true if |candidate| is a prime number. |candidate| should
1375 be an odd number at least equal to 11. */
1376
1377 static bool
1378 is_prime(size_t candidate) {
1379 size_t divisor = 3;
1380 size_t square = divisor * divisor;
1381
1382 while (square < candidate && (candidate % divisor)) {
1383 divisor++;
1384 square += 4 * divisor;
1385 divisor++;
1386 }
1387
1388 return (candidate % divisor ? true : false);
1389 }
1390
1391 /* Round a given |candidate| number up to the nearest prime, and
1392 return that prime. Primes lower than 10 are merely skipped. */
1393
1394 static size_t
1395 next_prime(size_t candidate) {
1396 /* Skip small primes. */
1397 if (candidate < 10)
1398 candidate = 10;
1399
1400 /* Make it definitely odd. */
1401 candidate |= 1;
1402
1403 while (SIZE_MAX != candidate && !is_prime(candidate))
1404 candidate += 2;
1405
1406 return candidate;
1407 }
1408
1409 void
1410 hash_reset_tuning(Hash_tuning *tuning) {
1411 *tuning = default_tuning;
1412 }
1413
1414 /* Given a |size_t| argument |x|, return the value corresponding to
1415 rotating the bits |n| steps to the right. |n| must be between 1 to
1416 |(CHAR_BIT * sizeof (size_t) - 1)| inclusive. */
1417 static inline size_t
1418 rotr_sz(size_t x, int n) {
1419 return ((x >> n) | (x << ((CHAR_BIT * sizeof x) - n))) & SIZE_MAX;
1420 }
1421
1422 /* If the user passes a |NULL| hasher, we hash the raw pointer. */
1423 static size_t
1424 raw_hasher(const void *data, size_t n) {
1425 /* When hashing unique pointers, it is often the case that they
1426 were generated by malloc and thus have the property that the
1427 low-order bits are 0. As this tends to give poorer performance
1428 with small tables, we rotate the pointer value before
1429 performing division, in an attempt to improve hash quality. */
1430 size_t val = rotr_sz((size_t) data, 3);
1431 return val % n;
1432 }
1433
1434 /* If the user passes a |NULL| comparator, we use pointer
1435 comparison. */
1436 static bool
1437 raw_comparator(const void *a, const void *b) {
1438 return a == b;
1439 }
1440
1441
1442 /* For the given hash |table|, check the user supplied tuning
1443 structure for reasonable values, and return true if there is no
1444 gross error with it. Otherwise, definitively reset the |tuning|
1445 field to some acceptable default in the hash table (that is, the
1446 user loses the right of further modifying tuning arguments), and
1447 return false. */
1448
1449 static bool
1450 check_tuning(Hash_table *table) {
1451 const Hash_tuning *tuning = table->tuning;
1452 float epsilon;
1453 if (tuning == &default_tuning)
1454 return true;
1455
1456 /* Be a bit stricter than mathematics would require, so that
1457 rounding errors in size calculations do not cause allocations
1458 to fail to grow or shrink as they should. The smallest
1459 allocation is 11 (due to |next_prime|'s algorithm), so an
1460 epsilon of 0.1 should be good enough. */
1461 epsilon = 0.1f;
1462
1463 if (epsilon < tuning->growth_threshold
1464 && tuning->growth_threshold < 1 - epsilon
1465 && 1 + epsilon < tuning->growth_factor
1466 && 0 <= tuning->shrink_threshold
1467 && tuning->shrink_threshold + epsilon < tuning->shrink_factor
1468 && tuning->shrink_factor <= 1
1469 && tuning->shrink_threshold + epsilon < tuning->growth_threshold)
1470 return true;
1471
1472 table->tuning = &default_tuning;
1473 return false;
1474 }
1475
1476 /* Compute the size of the bucket array for the given |candidate| and
1477 |tuning|, or return 0 if there is no possible way to allocate that
1478 many entries. */
1479
1480 static size_t
1481 compute_bucket_size(size_t candidate, const Hash_tuning *tuning) {
1482 if (!tuning->is_n_buckets) {
1483 float new_candidate = candidate / tuning->growth_threshold;
1484 if (SIZE_MAX <= new_candidate)
1485 return 0;
1486 candidate = new_candidate;
1487 }
1488 candidate = next_prime(candidate);
1489 if (xalloc_oversized (candidate, sizeof(struct hash_entry *)))
1490 return 0;
1491 return candidate;
1492 }
1493
1494 /* Allocate and return a new hash table, or |NULL| upon failure. The
1495 initial number of buckets is automatically selected so as to {\it
1496 guarantee} that you may insert at least |candidate| different user
1497 entries before any growth of the hash table size occurs. So, if
1498 have a reasonably tight a-priori upper bound on the number of
1499 entries you intend to insert in the hash table, you may save some
1500 table memory and insertion time, by specifying it here. If the
1501 |is_n_buckets| field of the |tuning| structure is true, the
1502 |candidate| argument has its meaning changed to the wanted number
1503 of buckets.
1504
1505 |tuning| points to a structure of user-supplied values, in case
1506 some fine tuning is wanted over the default behavior of the hasher.
1507 If |tuning| is |NULL|, the default tuning parameters are used
1508 instead. If |tuning| is provided but the values requested are out
1509 of bounds or might cause rounding errors, return |NULL|.
1510
1511 The user-supplied |hasher| function, when not |NULL|, accepts two
1512 arguments |entry| and |table_size|. It computes, by hashing
1513 |entry| contents, a slot number for that entry which should be in
1514 the range |0..table_size-1|. This slot number is then returned.
1515
1516 The user-supplied |comparator| function, when not |NULL|, accepts
1517 two arguments pointing to user data, it then returns true for a
1518 pair of entries that compare equal, or false otherwise. This
1519 function is internally called on entries which are already known to
1520 hash to the same bucket index, but which are distinct pointers.
1521
1522 The user-supplied |data_freer| function, when not |NULL|, may be
1523 later called with the user data as an argument, just before the
1524 entry containing the data gets freed. This happens from within
1525 |hash_free| or |hash_clear|. You should specify this function only
1526 if you want these functions to free all of your |data| data. This
1527 is typically the case when your data is simply an auxiliary struct
1528 that you have |malloc|'d to aggregate several values. */
1529
1530 Hash_table *
1531 hash_initialize(size_t candidate, const Hash_tuning *tuning,
1532 Hash_hasher hasher, Hash_comparator comparator,
1533 Hash_data_freer data_freer) {
1534 Hash_table *table;
1535
1536 if (hasher == NULL)
1537 hasher = raw_hasher;
1538 if (comparator == NULL)
1539 comparator = raw_comparator;
1540
1541 table = malloc(sizeof *table);
1542 if (table == NULL)
1543 return NULL;
1544
1545 if (!tuning)
1546 tuning = &default_tuning;
1547 table->tuning = tuning;
1548 if (!check_tuning(table)) {
1549 /* Fail if the tuning options are invalid. This is the only
1550 occasion when the user gets some feedback about it. Once
1551 the table is created, if the user provides invalid tuning
1552 options, we silently revert to using the defaults, and
1553 ignore further request to change the tuning options. */
1554 goto fail;
1555 }
1556
1557 table->n_buckets = compute_bucket_size(candidate, tuning);
1558 if (!table->n_buckets)
1559 goto fail;
1560
1561 table->bucket = calloc(table->n_buckets, sizeof *table->bucket);
1562 if (table->bucket == NULL)
1563 goto fail;
1564 table->bucket_limit = table->bucket + table->n_buckets;
1565 table->n_buckets_used = 0;
1566 table->n_entries = 0;
1567
1568 table->hasher = hasher;
1569 table->comparator = comparator;
1570 table->data_freer = data_freer;
1571
1572 table->free_entry_list = NULL;
1573 #if USE_OBSTACK
1574 obstack_init (&table->entry_stack);
1575 #endif
1576 return table;
1577
1578 fail:
1579 free(table);
1580 return NULL;
1581 }
1582
1583 /* Make all buckets empty, placing any chained entries on the free
1584 list. Apply the user-specified function |data_freer| (if any) to
1585 the datas of any affected entries. */
1586
1587 void
1588 hash_clear(Hash_table *table) {
1589 struct hash_entry *bucket;
1590
1591 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1592 if (bucket->data) {
1593 struct hash_entry *cursor;
1594 struct hash_entry *next;
1595
1596 /* Free the bucket overflow. */
1597 for (cursor = bucket->next; cursor; cursor = next) {
1598 if (table->data_freer)
1599 table->data_freer(cursor->data);
1600 cursor->data = NULL;
1601
1602 next = cursor->next;
1603 /* Relinking is done one entry at a time, as it is to
1604 be expected that overflows are either rare or
1605 short. */
1606 cursor->next = table->free_entry_list;
1607 table->free_entry_list = cursor;
1608 }
1609
1610 /* Free the bucket head. */
1611 if (table->data_freer)
1612 table->data_freer(bucket->data);
1613 bucket->data = NULL;
1614 bucket->next = NULL;
1615 }
1616 }
1617
1618 table->n_buckets_used = 0;
1619 table->n_entries = 0;
1620 }
1621
1622 /* Reclaim all storage associated with a hash table. If a
1623 |data_freer| function has been supplied by the user when the hash
1624 table was created, this function applies it to the data of each
1625 entry before freeing that entry. */
1626
1627 void
1628 hash_free(Hash_table *table) {
1629 struct hash_entry *bucket;
1630 struct hash_entry *cursor;
1631 struct hash_entry *next;
1632
1633 /* Call the user |data_freer| function. */
1634 if (table->data_freer && table->n_entries) {
1635 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1636 if (bucket->data) {
1637 for (cursor = bucket; cursor; cursor = cursor->next)
1638 table->data_freer(cursor->data);
1639 }
1640 }
1641 }
1642
1643 #if USE_OBSTACK
1644
1645 obstack_free (&table->entry_stack, NULL);
1646
1647 #else
1648
1649 /* Free all bucket overflowed entries. */
1650 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1651 for (cursor = bucket->next; cursor; cursor = next) {
1652 next = cursor->next;
1653 free(cursor);
1654 }
1655 }
1656
1657 /* Also reclaim the internal list of previously freed entries. */
1658 for (cursor = table->free_entry_list; cursor; cursor = next) {
1659 next = cursor->next;
1660 free(cursor);
1661 }
1662
1663 #endif
1664
1665 /* Free the remainder of the hash table structure. */
1666 free(table->bucket);
1667 free(table);
1668 }
1669
1670 /* Insertion and deletion. */
1671
1672 /* Get a new hash entry for a bucket overflow, possibly by recycling a
1673 previously freed one. If this is not possible, allocate a new
1674 one. */
1675
1676 static struct hash_entry *
1677 allocate_entry(Hash_table *table) {
1678 struct hash_entry *new;
1679
1680 if (table->free_entry_list) {
1681 new = table->free_entry_list;
1682 table->free_entry_list = new->next;
1683 } else {
1684 #if USE_OBSTACK
1685 new = obstack_alloc (&table->entry_stack, sizeof *new);
1686 #else
1687 new = malloc(sizeof *new);
1688 #endif
1689 }
1690
1691 return new;
1692 }
1693
1694 /* Free a hash entry which was part of some bucket overflow, saving it
1695 for later recycling. */
1696
1697 static void
1698 free_entry(Hash_table *table, struct hash_entry *entry) {
1699 entry->data = NULL;
1700 entry->next = table->free_entry_list;
1701 table->free_entry_list = entry;
1702 }
1703
1704 /* This private function is used to help with insertion and deletion.
1705 When |entry| matches an entry in the table, return a pointer to the
1706 corresponding user data and set |*bucket_head| to the head of the
1707 selected bucket. Otherwise, return |NULL|. When |delete| is true
1708 and |entry| matches an entry in the table, unlink the matching
1709 entry. */
1710
1711 static void *
1712 hash_find_entry(Hash_table *table, const void *entry,
1713 struct hash_entry **bucket_head, bool delete) {
1714 struct hash_entry *bucket = safe_hasher(table, entry);
1715 struct hash_entry *cursor;
1716
1717 *bucket_head = bucket;
1718
1719 /* Test for empty bucket. */
1720 if (bucket->data == NULL)
1721 return NULL;
1722
1723 /* See if the entry is the first in the bucket. */
1724 if (entry == bucket->data || table->comparator(entry, bucket->data)) {
1725 void *data = bucket->data;
1726
1727 if (delete) {
1728 if (bucket->next) {
1729 struct hash_entry *next = bucket->next;
1730
1731 /* Bump the first overflow entry into the bucket head, then save
1732 the previous first overflow entry for later recycling. */
1733 *bucket = *next;
1734 free_entry(table, next);
1735 } else {
1736 bucket->data = NULL;
1737 }
1738 }
1739
1740 return data;
1741 }
1742
1743 /* Scan the bucket overflow. */
1744 for (cursor = bucket; cursor->next; cursor = cursor->next) {
1745 if (entry == cursor->next->data
1746 || table->comparator(entry, cursor->next->data)) {
1747 void *data = cursor->next->data;
1748
1749 if (delete) {
1750 struct hash_entry *next = cursor->next;
1751
1752 /* Unlink the entry to delete, then save the freed entry for later
1753 recycling. */
1754 cursor->next = next->next;
1755 free_entry(table, next);
1756 }
1757
1758 return data;
1759 }
1760 }
1761
1762 /* No entry found. */
1763 return NULL;
1764 }
1765
1766 /* Internal helper, to move entries from |src| to |dst|. Both tables
1767 must share the same free entry list. If |safe|, only move overflow
1768 entries, saving bucket heads for later, so that no allocations will
1769 occur. Return false if the free entry list is exhausted and an
1770 allocation fails. */
1771
1772 static bool
1773 transfer_entries(Hash_table *dst, Hash_table *src, bool safe) {
1774 struct hash_entry *bucket;
1775 struct hash_entry *cursor;
1776 struct hash_entry *next;
1777 for (bucket = src->bucket; bucket < src->bucket_limit; bucket++)
1778 if (bucket->data) {
1779 void *data;
1780 struct hash_entry *new_bucket;
1781
1782 /* Within each bucket, transfer overflow entries first and
1783 then the bucket head, to minimize memory pressure.
1784 After all, the only time we might allocate is when
1785 moving the bucket head, but moving overflow entries
1786 first may create free entries that can be recycled by
1787 the time we finally get to the bucket head. */
1788 for (cursor = bucket->next; cursor; cursor = next) {
1789 data = cursor->data;
1790 new_bucket = safe_hasher(dst, data);
1791
1792 next = cursor->next;
1793
1794 if (new_bucket->data) {
1795 /* Merely relink an existing entry, when moving
1796 from a bucket overflow into a bucket
1797 overflow. */
1798 cursor->next = new_bucket->next;
1799 new_bucket->next = cursor;
1800 } else {
1801 /* Free an existing entry, when moving from a
1802 bucket overflow into a bucket header. */
1803 new_bucket->data = data;
1804 dst->n_buckets_used++;
1805 free_entry(dst, cursor);
1806 }
1807 }
1808 /* Now move the bucket head. Be sure that if we fail due
1809 to allocation failure that the src table is in a
1810 consistent state. */
1811 data = bucket->data;
1812 bucket->next = NULL;
1813 if (safe)
1814 continue;
1815 new_bucket = safe_hasher(dst, data);
1816
1817 if (new_bucket->data) {
1818 /* Allocate or recycle an entry, when moving from a
1819 bucket header into a bucket overflow. */
1820 struct hash_entry *new_entry = allocate_entry(dst);
1821
1822 if (new_entry == NULL)
1823 return false;
1824
1825 new_entry->data = data;
1826 new_entry->next = new_bucket->next;
1827 new_bucket->next = new_entry;
1828 } else {
1829 /* Move from one bucket header to another. */
1830 new_bucket->data = data;
1831 dst->n_buckets_used++;
1832 }
1833 bucket->data = NULL;
1834 src->n_buckets_used--;
1835 }
1836 return true;
1837 }
1838
1839 /* For an already existing hash table, change the number of buckets
1840 through specifying |candidate|. The contents of the hash table are
1841 preserved. The new number of buckets is automatically selected so
1842 as to {\it guarantee} that the table may receive at least
1843 |candidate| different user entries, including those already in the
1844 table, before any other growth of the hash table size occurs. If
1845 |tuning->is_n_buckets| is true, then |candidate| specifies the
1846 exact number of buckets desired. Return true iff the rehash
1847 succeeded. */
1848
1849 bool
1850 hash_rehash(Hash_table *table, size_t candidate) {
1851 Hash_table storage;
1852 Hash_table *new_table;
1853 size_t new_size = compute_bucket_size(candidate, table->tuning);
1854
1855 if (!new_size)
1856 return false;
1857 if (new_size == table->n_buckets)
1858 return true;
1859 new_table = &storage;
1860 new_table->bucket = calloc(new_size, sizeof *new_table->bucket);
1861 if (new_table->bucket == NULL)
1862 return false;
1863 new_table->n_buckets = new_size;
1864 new_table->bucket_limit = new_table->bucket + new_size;
1865 new_table->n_buckets_used = 0;
1866 new_table->n_entries = 0;
1867 new_table->tuning = table->tuning;
1868 new_table->hasher = table->hasher;
1869 new_table->comparator = table->comparator;
1870 new_table->data_freer = table->data_freer;
1871
1872 /* In order for the transfer to successfully complete, we need
1873 additional overflow entries when distinct buckets in the old
1874 table collide into a common bucket in the new table. The worst
1875 case possible is a hasher that gives a good spread with the old
1876 size, but returns a constant with the new size; if we were to
1877 guarantee |table->n_buckets_used-1| free entries in advance, then
1878 the transfer would be guaranteed to not allocate memory.
1879 However, for large tables, a guarantee of no further allocation
1880 introduces a lot of extra memory pressure, all for an unlikely
1881 corner case (most rehashes reduce, rather than increase, the
1882 number of overflow entries needed). So, we instead ensure that
1883 the transfer process can be reversed if we hit a memory
1884 allocation failure mid-transfer. */
1885
1886 /* Merely reuse the extra old space into the new table. */
1887 #if USE_OBSTACK
1888 new_table->entry_stack = table->entry_stack;
1889 #endif
1890 new_table->free_entry_list = table->free_entry_list;
1891
1892 if (transfer_entries(new_table, table, false)) {
1893 /* Entries transferred successfully; tie up the loose ends. */
1894 free(table->bucket);
1895 table->bucket = new_table->bucket;
1896 table->bucket_limit = new_table->bucket_limit;
1897 table->n_buckets = new_table->n_buckets;
1898 table->n_buckets_used = new_table->n_buckets_used;
1899 table->free_entry_list = new_table->free_entry_list;
1900 /* |table->n_entries| and |table->entry_stack| already hold their value. */
1901 return true;
1902 }
1903
1904 /* We've allocated |new_table->bucket| (and possibly some
1905 entries), exhausted the free list, and moved some but not all
1906 entries into |new_table|. We must undo the partial move before
1907 returning failure. The only way to get into this situation is
1908 if |new_table| uses fewer buckets than the old table, so we
1909 will reclaim some free entries as overflows in the new table
1910 are put back into distinct buckets in the old table.
1911
1912 There are some pathological cases where a single pass through
1913 the table requires more intermediate overflow entries than
1914 using two passes. Two passes give worse cache performance and
1915 takes longer, but at this point, we're already out of memory,
1916 so slow and safe is better than failure. */
1917 table->free_entry_list = new_table->free_entry_list;
1918 if (!(transfer_entries(table, new_table, true)
1919 && transfer_entries(table, new_table, false)))
1920 abort();
1921 /* |table->n_entries| already holds its value. */
1922 free(new_table->bucket);
1923 return false;
1924 }
1925
1926 /* Insert |entry| into hash |table| if there is not already a matching
1927 entry.
1928
1929 Return -1 upon memory allocation failure.
1930 Return 1 if insertion succeeded.
1931 Return 0 if there is already a matching entry in the table, and in
1932 that case, if |matched_ent| is non-|NULL|, set |*matched_ent| to
1933 that entry.
1934
1935 This interface is easier to use than |hash_insert| when you must
1936 distinguish between the latter two cases. More importantly,
1937 |hash_insert| is unusable for some types of |entry| values. When
1938 using |hash_insert|, the only way to distinguish those cases is to
1939 compare the return value and |entry|. That works only when you can
1940 have two different |entry| values that point to data that compares
1941 "equal". Thus, when the |entry| value is a simple scalar, you must
1942 use |hash_insert_if_absent|. |entry| must not be |NULL|. */
1943 int
1944 hash_insert_if_absent(Hash_table *table, void const *entry,
1945 void const **matched_ent) {
1946 void *data;
1947 struct hash_entry *bucket;
1948
1949 /* The caller cannot insert a |NULL| entry, since |hash_lookup|
1950 returns |NULL| to indicate "not found", and |hash_find_entry|
1951 uses |bucket->data == NULL| to indicate an empty bucket. */
1952 if (!entry)
1953 abort();
1954
1955 /* If there's a matching entry already in the table, return that. */
1956 if ((data = hash_find_entry(table, entry, &bucket, false)) != NULL) {
1957 if (matched_ent)
1958 *matched_ent = data;
1959 return 0;
1960 }
1961
1962 /* If the growth threshold of the buckets in use has been reached,
1963 increase the table size and rehash. There's no point in
1964 checking the number of entries: if the hashing function is
1965 ill-conditioned, rehashing is not likely to improve it. */
1966
1967 if (table->n_buckets_used
1968 > table->tuning->growth_threshold * table->n_buckets) {
1969 /* Check more fully, before starting real work. If tuning
1970 arguments became invalid, the second check will rely on
1971 proper defaults. */
1972 check_tuning(table);
1973 if (table->n_buckets_used
1974 > table->tuning->growth_threshold * table->n_buckets) {
1975 const Hash_tuning *tuning = table->tuning;
1976 float candidate =
1977 (tuning->is_n_buckets
1978 ? (table->n_buckets * tuning->growth_factor)
1979 : (table->n_buckets * tuning->growth_factor
1980 * tuning->growth_threshold));
1981
1982 if (SIZE_MAX <= candidate)
1983 return -1;
1984
1985 /* If the rehash fails, arrange to return |NULL|. */
1986 if (!hash_rehash(table, candidate))
1987 return -1;
1988
1989 /* Update the bucket we are interested in. */
1990 if (hash_find_entry(table, entry, &bucket, false) != NULL)
1991 abort();
1992 }
1993 }
1994
1995 /* |entry| is not matched, it should be inserted. */
1996
1997 if (bucket->data) {
1998 struct hash_entry *new_entry = allocate_entry(table);
1999
2000 if (new_entry == NULL)
2001 return -1;
2002
2003 /* Add |entry| in the overflow of the bucket. */
2004
2005 new_entry->data = (void *) entry;
2006 new_entry->next = bucket->next;
2007 bucket->next = new_entry;
2008 table->n_entries++;
2009 return 1;
2010 }
2011
2012 /* Add |entry| right in the bucket head. */
2013
2014 bucket->data = (void *) entry;
2015 table->n_entries++;
2016 table->n_buckets_used++;
2017
2018 return 1;
2019 }
2020
2021 /* If |entry| matches an entry already in the hash table, return the
2022 pointer to the entry from the table. Otherwise, insert |entry| and
2023 return |entry|. Return |NULL| if the storage required for
2024 insertion cannot be allocated. This implementation does not
2025 support duplicate entries or insertion of |NULL|. */
2026
2027 void *
2028 hash_insert(Hash_table *table, void const *entry) {
2029 void const *matched_ent;
2030 int err = hash_insert_if_absent(table, entry, &matched_ent);
2031 return (err == -1
2032 ? NULL
2033 : (void *) (err == 0 ? matched_ent : entry));
2034 }
2035
2036 /* If |entry| is already in the table, remove it and return the
2037 just-deleted data (the user may want to deallocate its storage).
2038 If |entry| is not in the table, don't modify the table and return
2039 |NULL|. */
2040
2041 void *
2042 hash_delete(Hash_table *table, const void *entry) {
2043 void *data;
2044 struct hash_entry *bucket;
2045
2046 data = hash_find_entry(table, entry, &bucket, true);
2047 if (!data)
2048 return NULL;
2049
2050 table->n_entries--;
2051 if (!bucket->data) {
2052 table->n_buckets_used--;
2053
2054 /* If the shrink threshold of the buckets in use has been
2055 reached, rehash into a smaller table. */
2056
2057 if (table->n_buckets_used
2058 < table->tuning->shrink_threshold * table->n_buckets) {
2059 /* Check more fully, before starting real work. If tuning
2060 arguments became invalid, the second check will rely on
2061 proper defaults. */
2062 check_tuning(table);
2063 if (table->n_buckets_used
2064 < table->tuning->shrink_threshold * table->n_buckets) {
2065 const Hash_tuning *tuning = table->tuning;
2066 size_t candidate =
2067 (tuning->is_n_buckets
2068 ? table->n_buckets * tuning->shrink_factor
2069 : (table->n_buckets * tuning->shrink_factor
2070 * tuning->growth_threshold));
2071
2072 if (!hash_rehash(table, candidate)) {
2073 /* Failure to allocate memory in an attempt to
2074 shrink the table is not fatal. But since
2075 memory is low, we can at least be kind and free
2076 any spare entries, rather than keeping them
2077 tied up in the free entry list. */
2078 #if !USE_OBSTACK
2079 struct hash_entry *cursor = table->free_entry_list;
2080 struct hash_entry *next;
2081 while (cursor) {
2082 next = cursor->next;
2083 free(cursor);
2084 cursor = next;
2085 }
2086 table->free_entry_list = NULL;
2087 #endif
2088 }
2089 }
2090 }
2091 }
2092
2093 return data;
2094 }
2095
2096 /* Testing. */
2097
2098 #if 0
2099
2100 void
2101 hash_print (const Hash_table *table)
2102 {
2103 struct hash_entry *bucket = (struct hash_entry *) table->bucket;
2104
2105 for ( ; bucket < table->bucket_limit; bucket++)
2106 {
2107 struct hash_entry *cursor;
2108
2109 if (bucket)
2110 printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));
2111
2112 for (cursor = bucket; cursor; cursor = cursor->next)
2113 {
2114 char const *s = cursor->data;
2115 /* FIXME */
2116 if (s)
2117 printf (" %s\n", s);
2118 }
2119 }
2120 }
2121
2122 #endif