src/third_party/google-sparsehash/experimental/libchash.c - incubator-pagespeed-debian - Git at Google

 /* Copyright (c) 1998 - 2005, Google Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *
  *     * Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
  *     * Neither the name of Google Inc. nor the names of its
  * contributors may be used to endorse or promote products derived from
  * this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * ---
  * Author: Craig Silverstein
  *
  *  This library is intended to be used for in-memory hash tables,
  *  though it provides rudimentary permanent-storage capabilities.
  *  It attempts to be fast, portable, and small.  The best algorithm
  *  to fulfill these goals is an internal probing hashing algorithm,
  *  as in Knuth, _Art of Computer Programming_, vol III.  Unlike
  *  chained (open) hashing, it doesn't require a pointer for every
  *  item, yet it is still constant time lookup in practice.
  *
  *  Also to save space, we let the contents (both data and key) that
  *  you insert be a union: if the key/data is small, we store it
  *  directly in the hashtable, otherwise we store a pointer to it.
  *  To keep you from having to figure out which, use KEY_PTR and
  *  PTR_KEY to convert between the arguments to these functions and
  *  a pointer to the real data.  For instance:
  *     char key[] = "ab", *key2;
  *     HTItem *bck; HashTable *ht;
  *     HashInsert(ht, PTR_KEY(ht, key), 0);
  *     bck = HashFind(ht, PTR_KEY(ht, "ab"));
  *     key2 = KEY_PTR(ht, bck->key);
  *
  *  There are a rich set of operations supported:
  *     AllocateHashTable() -- Allocates a hashtable structure and
  *                            returns it.
  *        cchKey: if it's a positive number, then each key is a
  *                fixed-length record of that length.  If it's 0,
  *                the key is assumed to be a \0-terminated string.
  *        fSaveKey: normally, you are responsible for allocating
  *                  space for the key.  If this is 1, we make a
  *                  copy of the key for you.
  *     ClearHashTable() -- Removes everything from a hashtable
  *     FreeHashTable() -- Frees memory used by a hashtable
  *
  *     HashFind() -- takes a key (use PTR_KEY) and returns the
  *                   HTItem containing that key, or NULL if the
  *                   key is not in the hashtable.
  *     HashFindLast() -- returns the item found by last HashFind()
  *     HashFindOrInsert() -- inserts the key/data pair if the key
  *                           is not already in the hashtable, or
  *                           returns the appropraite HTItem if it is.
  *     HashFindOrInsertItem() -- takes key/data as an HTItem.
  *     HashInsert() -- adds a key/data pair to the hashtable.  What
  *                     it does if the key is already in the table
  *                     depends on the value of SAMEKEY_OVERWRITE.
  *     HashInsertItem() -- takes key/data as an HTItem.
  *     HashDelete() -- removes a key/data pair from the hashtable,
  *                     if it's there.  RETURNS 1 if it was there,
  *                     0 else.
  *        If you use sparse tables and never delete, the full data
  *        space is available.  Otherwise we steal -2 (maybe -3),
  *        so you can't have data fields with those values.
  *     HashDeleteLast() -- deletes the item returned by the last Find().
  *
  *     HashFirstBucket() -- used to iterate over the buckets in a
  *                          hashtable.  DON'T INSERT OR DELETE WHILE
  *                          ITERATING!  You can't nest iterations.
  *     HashNextBucket() -- RETURNS NULL at the end of iterating.
  *
  *     HashSetDeltaGoalSize() -- if you're going to insert 1000 items
  *                               at once, call this fn with arg 1000.
  *                               It grows the table more intelligently.
  *
  *     HashSave() -- saves the hashtable to a file.  It saves keys ok,
  *                   but it doesn't know how to interpret the data field,
  *                   so if the data field is a pointer to some complex
  *                   structure, you must send a function that takes a
  *                   file pointer and a pointer to the structure, and
  *                   write whatever you want to write.  It should return
  *                   the number of bytes written.  If the file is NULL,
  *                   it should just return the number of bytes it would
  *                   write, without writing anything.
  *                      If your data field is just an integer, not a
  *                   pointer, just send NULL for the function.
  *     HashLoad() -- loads a hashtable.  It needs a function that takes
  *                   a file and the size of the structure, and expects
  *                   you to read in the structure and return a pointer
  *                   to it.  You must do memory allocation, etc.  If
  *                   the data is just a number, send NULL.
  *     HashLoadKeys() -- unlike HashLoad(), doesn't load the data off disk
  *                       until needed.  This saves memory, but if you look
  *                       up the same key a lot, it does a disk access each
  *                       time.
  *        You can't do Insert() or Delete() on hashtables that were loaded
  *        from disk.
  *
  *  See libchash.h for parameters you can modify.  Make sure LOG_WORD_SIZE
  *  is defined correctly for your machine!  (5 for 32 bit words, 6 for 64).
  */

 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>       /* for strcmp, memcmp, etc */
 #include <sys/types.h>    /* ULTRIX needs this for in.h */
 #include <netinet/in.h>   /* for reading/writing hashtables */
 #include <assert.h>
 #include "libchash.h"     /* all the types */

    /* if keys are stored directly but cchKey is less than sizeof(ulong), */
    /* this cuts off the bits at the end */
 char grgKeyTruncMask[sizeof(ulong)][sizeof(ulong)];
 #define KEY_TRUNC(ht, key)                                                    \
    ( STORES_PTR(ht) || (ht)->cchKey == sizeof(ulong)                          \
        ? (key) : ((key) & *(ulong *)&(grgKeyTruncMask[(ht)->cchKey][0])) )

    /* round num up to a multiple of wordsize.  (LOG_WORD_SIZE-3 is in bytes) */
 #define WORD_ROUND(num)         ( ((num-1) | ((1<<(LOG_WORD_SIZE-3))-1)) + 1 )
 #define NULL_TERMINATED  0    /* val of cchKey if keys are null-term strings */

    /* Useful operations we do to keys: compare them, copy them, free them */

 #define KEY_CMP(ht, key1, key2)      ( !STORES_PTR(ht)  ? (key1) - (key2) :   \
                                        (key1) == (key2) ? 0 :                 \
                                        HashKeySize(ht) == NULL_TERMINATED ?   \
                                           strcmp((char *)key1, (char *)key2) :\
                                           memcmp((void *)key1, (void *)key2,  \
 						 HashKeySize(ht)) )

 #define COPY_KEY(ht, keyTo, keyFrom) do                                       \
    if ( !STORES_PTR(ht) || !(ht)->fSaveKeys )                                 \
       (keyTo) = (keyFrom);                     /* just copy pointer or info */\
    else if ( (ht)->cchKey == NULL_TERMINATED )        /* copy 0-term.ed str */\
    {                                                                          \
       (keyTo) = (ulong)HTsmalloc( WORD_ROUND(strlen((char *)(keyFrom))+1) );  \
       strcpy((char *)(keyTo), (char *)(keyFrom));                             \
    }                                                                          \
    else                                                                       \
    {                                                                          \
       (keyTo) = (ulong) HTsmalloc( WORD_ROUND((ht)->cchKey) );                \
       memcpy( (char *)(keyTo), (char *)(keyFrom), (ht)->cchKey);              \
    }                                                                          \
    while ( 0 )

 #define FREE_KEY(ht, key) do                                                  \
    if ( STORES_PTR(ht) && (ht)->fSaveKeys )                                   \
      if ( (ht)->cchKey == NULL_TERMINATED )                                   \
         HTfree((char *)(key), WORD_ROUND(strlen((char *)(key))+1));           \
      else                                                                     \
         HTfree((char *)(key), WORD_ROUND((ht)->cchKey));                      \
    while ( 0 )

    /* the following are useful for bitmaps */
    /* Format is like this (if 1 word = 4 bits):  3210 7654 ba98 fedc ... */
 typedef ulong          HTBitmapPart;  /* this has to be unsigned, for >> */
 typedef HTBitmapPart   HTBitmap[1<<LOG_BM_WORDS];
 typedef ulong          HTOffset; /* something big enough to hold offsets */

 #define BM_BYTES(cBuckets)   /* we must ensure it's a multiple of word size */\
    ( (((cBuckets) + 8*sizeof(ulong)-1) >> LOG_WORD_SIZE) << (LOG_WORD_SIZE-3) )
 #define MOD2(i, logmod)      ( (i) & ((1<<(logmod))-1) )
 #define DIV_NUM_ENTRIES(i)   ( (i) >> LOG_WORD_SIZE )
 #define MOD_NUM_ENTRIES(i)   ( MOD2(i, LOG_WORD_SIZE) )
 #define MODBIT(i)            ( ((ulong)1) << MOD_NUM_ENTRIES(i) )

 #define TEST_BITMAP(bm, i)   ( (bm)[DIV_NUM_ENTRIES(i)] & MODBIT(i) ? 1 : 0 )
 #define SET_BITMAP(bm, i)    (bm)[DIV_NUM_ENTRIES(i)] |= MODBIT(i)
 #define CLEAR_BITMAP(bm, i)  (bm)[DIV_NUM_ENTRIES(i)] &= ~MODBIT(i)

    /* the following are useful for reading and writing hashtables */
 #define READ_UL(fp, data)                  \
    do {                                    \
       long _ul;                            \
       fread(&_ul, sizeof(_ul), 1, (fp));   \
       data = ntohl(_ul);                   \
    } while (0)

 #define WRITE_UL(fp, data)                 \
    do {                                    \
       long _ul = htonl((long)(data));      \
       fwrite(&_ul, sizeof(_ul), 1, (fp));  \
    } while (0)

    /* Moves data from disk to memory if necessary.  Note dataRead cannot be  *
     * NULL, because then we might as well (and do) load the data into memory */
 #define LOAD_AND_RETURN(ht, loadCommand)     /* lC returns an HTItem * */     \
    if ( !(ht)->fpData )          /* data is stored in memory */               \
       return (loadCommand);                                                   \
    else                          /* must read data off of disk */             \
    {                                                                          \
       int cchData;                                                            \
       HTItem *bck;                                                            \
       if ( (ht)->bckData.data )  free((char *)(ht)->bckData.data);            \
       ht->bckData.data = (ulong)NULL;   /* needed if loadCommand fails */     \
       bck = (loadCommand);                                                    \
       if ( bck == NULL )          /* loadCommand failed: key not found */     \
          return NULL;                                                         \
       else                                                                    \
          (ht)->bckData = *bck;                                                \
       fseek(ht->fpData, (ht)->bckData.data, SEEK_SET);                        \
       READ_UL((ht)->fpData, cchData);                                         \
       (ht)->bckData.data = (ulong)(ht)->dataRead((ht)->fpData, cchData);      \
       return &((ht)->bckData);                                                \
    }


 /* ======================================================================== */
 /*                          UTILITY ROUTINES                                */
 /*                       ----------------------                             */

 /* HTsmalloc() -- safe malloc
  *    allocates memory, or crashes if the allocation fails.
  */
 static void *HTsmalloc(unsigned long size)
 {
    void *retval;

    if ( size == 0 )
       return NULL;
    retval = (void *)malloc(size);
    if ( !retval )
    {
       fprintf(stderr, "HTsmalloc: Unable to allocate %lu bytes of memory\n",
 	      size);
       exit(1);
    }
    return retval;
 }

 /* HTscalloc() -- safe calloc
  *    allocates memory and initializes it to 0, or crashes if
  *    the allocation fails.
  */
 static void *HTscalloc(unsigned long size)
 {
    void *retval;

    retval = (void *)calloc(size, 1);
    if ( !retval && size > 0 )
    {
       fprintf(stderr, "HTscalloc: Unable to allocate %lu bytes of memory\n",
 	      size);
       exit(1);
    }
    return retval;
 }

 /* HTsrealloc() -- safe calloc
  *    grows the amount of memory from a source, or crashes if
  *    the allocation fails.
  */
 static void *HTsrealloc(void *ptr, unsigned long new_size, long delta)
 {
    if ( ptr == NULL )
       return HTsmalloc(new_size);
    ptr = realloc(ptr, new_size);
    if ( !ptr && new_size > 0 )
    {
       fprintf(stderr, "HTsrealloc: Unable to reallocate %lu bytes of memory\n",
 	      new_size);
       exit(1);
    }
    return ptr;
 }

 /* HTfree() -- keep track of memory use
  *    frees memory using free, but updates count of how much memory
  *    is being used.
  */
 static void HTfree(void *ptr, unsigned long size)
 {
    if ( size > 0 )         /* some systems seem to not like freeing NULL */
       free(ptr);
 }

 /*************************************************************************\
 | HTcopy()                                                                |
 |     Sometimes we interpret data as a ulong.  But ulongs must be         |
 |     aligned on some machines, so instead of casting we copy.            |
 \*************************************************************************/

 unsigned long HTcopy(char *ul)
 {
    unsigned long retval;

    memcpy(&retval, ul, sizeof(retval));
    return retval;
 }

 /*************************************************************************\
 | HTSetupKeyTrunc()                                                       |
 |     If keys are stored directly but cchKey is less than                 |
 |     sizeof(ulong), this cuts off the bits at the end.                   |
 \*************************************************************************/

 static void HTSetupKeyTrunc(void)
 {
    int i, j;

    for ( i = 0; i < sizeof(unsigned long); i++ )
       for ( j = 0; j < sizeof(unsigned long); j++ )
 	 grgKeyTruncMask[i][j] = j < i ? 255 : 0;   /* chars have 8 bits */
 }


 /* ======================================================================== */
 /*                            TABLE ROUTINES                                */
 /*                         --------------------                             */

 /*  The idea is that a hashtable with (logically) t buckets is divided
  *  into t/M groups of M buckets each.  (M is a constant set in
  *  LOG_BM_WORDS for efficiency.)  Each group is stored sparsely.
  *  Thus, inserting into the table causes some array to grow, which is
  *  slow but still constant time.  Lookup involves doing a
  *  logical-position-to-sparse-position lookup, which is also slow but
  *  constant time.  The larger M is, the slower these operations are
  *  but the less overhead (slightly).
  *
  *  To store the sparse array, we store a bitmap B, where B[i] = 1 iff
  *  bucket i is non-empty.  Then to look up bucket i we really look up
  *  array[# of 1s before i in B].  This is constant time for fixed M.
  *
  *  Terminology: the position of an item in the overall table (from
  *  1 .. t) is called its "location."  The logical position in a group
  *  (from 1 .. M ) is called its "position."  The actual location in
  *  the array (from 1 .. # of non-empty buckets in the group) is
  *  called its "offset."
  *
  *  The following operations are supported:
  *     o Allocate an array with t buckets, all empty
  *     o Free a array (but not whatever was stored in the buckets)
  *     o Tell whether or not a bucket is empty
  *     o Return a bucket with a given location
  *     o Set the value of a bucket at a given location
  *     o Iterate through all the buckets in the array
  *     o Read and write an occupancy bitmap to disk
  *     o Return how much memory is being allocated by the array structure
  */

 #ifndef SparseBucket            /* by default, each bucket holds an HTItem */
 #define SparseBucket            HTItem
 #endif

 typedef struct SparseBin {
    SparseBucket *binSparse;
    HTBitmap bmOccupied;      /* bmOccupied[i] is 1 if bucket i has an item */
    short cOccupied;          /* size of binSparse; useful for iterators, eg */
 } SparseBin;

 typedef struct SparseIterator {
    long posGroup;
    long posOffset;
    SparseBin *binSparse;     /* state info, to avoid args for NextBucket() */
    ulong cBuckets;
 } SparseIterator;

 #define LOG_LOW_BIN_SIZE        ( LOG_BM_WORDS+LOG_WORD_SIZE )
 #define SPARSE_GROUPS(cBuckets) ( (((cBuckets)-1) >> LOG_LOW_BIN_SIZE) + 1 )

    /* we need a small function to figure out # of items set in the bm */
 static HTOffset EntriesUpto(HTBitmapPart *bm, int i)
 {                                       /* returns # of set bits in 0..i-1 */
    HTOffset retval = 0;
    static HTOffset rgcBits[256] =             /* # of bits set in one char */
       {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
        1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
        1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
        2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
        1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
        2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
        2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
        3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
        1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
        2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
        2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
        3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
        2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
        3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
        3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
        4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};

    if ( i == 0 ) return 0;
    for ( ; i > sizeof(*bm)*8; i -= sizeof(*bm)*8, bm++ )
    {                                       /* think of it as loop unrolling */
 #if LOG_WORD_SIZE >= 3                     /* 1 byte per word, or more */
       retval += rgcBits[*bm & 255];        /* get the low byte */
 #if LOG_WORD_SIZE >= 4                     /* at least 2 bytes */
       retval += rgcBits[(*bm >> 8) & 255];
 #if LOG_WORD_SIZE >= 5                     /* at least 4 bytes */
       retval += rgcBits[(*bm >> 16) & 255];
       retval += rgcBits[(*bm >> 24) & 255];
 #if LOG_WORD_SIZE >= 6                     /* 8 bytes! */
       retval += rgcBits[(*bm >> 32) & 255];
       retval += rgcBits[(*bm >> 40) & 255];
       retval += rgcBits[(*bm >> 48) & 255];
       retval += rgcBits[(*bm >> 56) & 255];
 #if LOG_WORD_SIZE >= 7                     /* not a concern for a while... */
 #error Need to rewrite EntriesUpto to support such big words
 #endif   /* >8 bytes */
 #endif   /* 8 bytes */
 #endif   /* 4 bytes */
 #endif   /* 2 bytes */
 #endif   /* 1 byte */
    }
    switch ( i ) {                         /* from 0 to 63 */
       case 0:
 	 return retval;
 #if LOG_WORD_SIZE >= 3                     /* 1 byte per word, or more */
       case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8:
 	 return (retval + rgcBits[*bm & ((1 << i)-1)]);
 #if LOG_WORD_SIZE >= 4                     /* at least 2 bytes */
       case 9: case 10: case 11: case 12: case 13: case 14: case 15: case 16:
 	 return (retval + rgcBits[*bm & 255] +
 		 rgcBits[(*bm >> 8) & ((1 << (i-8))-1)]);
 #if LOG_WORD_SIZE >= 5                     /* at least 4 bytes */
       case 17: case 18: case 19: case 20: case 21: case 22: case 23: case 24:
 	 return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +
 		 rgcBits[(*bm >> 16) & ((1 << (i-16))-1)]);
       case 25: case 26: case 27: case 28: case 29: case 30: case 31: case 32:
 	 return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +
 		 rgcBits[(*bm >> 16) & 255] +
 		 rgcBits[(*bm >> 24) & ((1 << (i-24))-1)]);
 #if LOG_WORD_SIZE >= 6                     /* 8 bytes! */
       case 33: case 34: case 35: case 36: case 37: case 38: case 39: case 40:
 	 return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +
 		 rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] +
 		 rgcBits[(*bm >> 32) & ((1 << (i-32))-1)]);
       case 41: case 42: case 43: case 44: case 45: case 46: case 47: case 48:
 	 return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +
 		 rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] +
 		 rgcBits[(*bm >> 32) & 255] +
 		 rgcBits[(*bm >> 40) & ((1 << (i-40))-1)]);
       case 49: case 50: case 51: case 52: case 53: case 54: case 55: case 56:
 	 return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +
 		 rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] +
 		 rgcBits[(*bm >> 32) & 255] + rgcBits[(*bm >> 40) & 255] +
 		 rgcBits[(*bm >> 48) & ((1 << (i-48))-1)]);
       case 57: case 58: case 59: case 60: case 61: case 62: case 63: case 64:
 	 return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +
 		 rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] +
 		 rgcBits[(*bm >> 32) & 255] + rgcBits[(*bm >> 40) & 255] +
 		 rgcBits[(*bm >> 48) & 255] +
 		 rgcBits[(*bm >> 56) & ((1 << (i-56))-1)]);
 #endif   /* 8 bytes */
 #endif   /* 4 bytes */
 #endif   /* 2 bytes */
 #endif   /* 1 byte */
    }
    assert("" == "word size is too big in EntriesUpto()");
    return -1;
 }
 #define SPARSE_POS_TO_OFFSET(bm, i)   ( EntriesUpto(&((bm)[0]), i) )
 #define SPARSE_BUCKET(bin, location)  \
    ( (bin)[(location) >> LOG_LOW_BIN_SIZE].binSparse +                     \
       SPARSE_POS_TO_OFFSET((bin)[(location)>>LOG_LOW_BIN_SIZE].bmOccupied, \
 		           MOD2(location, LOG_LOW_BIN_SIZE)) )


 /*************************************************************************\
 | SparseAllocate()                                                        |
 | SparseFree()                                                            |
 |     Allocates, sets-to-empty, and frees a sparse array.  All you need   |
 |     to tell me is how many buckets you want.  I return the number of    |
 |     buckets I actually allocated, setting the array as a parameter.     |
 |     Note that you have to set auxilliary parameters, like cOccupied.    |
 \*************************************************************************/

 static ulong SparseAllocate(SparseBin **pbinSparse, ulong cBuckets)
 {
    int cGroups = SPARSE_GROUPS(cBuckets);

    *pbinSparse = (SparseBin *) HTscalloc(sizeof(**pbinSparse) * cGroups);
    return cGroups << LOG_LOW_BIN_SIZE;
 }

 static SparseBin *SparseFree(SparseBin *binSparse, ulong cBuckets)
 {
    ulong iGroup, cGroups = SPARSE_GROUPS(cBuckets);

    for ( iGroup = 0; iGroup < cGroups; iGroup++ )
       HTfree(binSparse[iGroup].binSparse, (sizeof(*binSparse[iGroup].binSparse)
 					   * binSparse[iGroup].cOccupied));
    HTfree(binSparse, sizeof(*binSparse) * cGroups);
    return NULL;
 }

 /*************************************************************************\
 | SparseIsEmpty()                                                         |
 | SparseFind()                                                            |
 |     You give me a location (ie a number between 1 and t), and I         |
 |     return the bucket at that location, or NULL if the bucket is        |
 |     empty.  It's OK to call Find() on an empty table.                   |
 \*************************************************************************/

 static int SparseIsEmpty(SparseBin *binSparse, ulong location)
 {
    return !TEST_BITMAP(binSparse[location>>LOG_LOW_BIN_SIZE].bmOccupied,
 		       MOD2(location, LOG_LOW_BIN_SIZE));
 }

 static SparseBucket *SparseFind(SparseBin *binSparse, ulong location)
 {
    if ( SparseIsEmpty(binSparse, location) )
       return NULL;
    return SPARSE_BUCKET(binSparse, location);
 }

 /*************************************************************************\
 | SparseInsert()                                                          |
 |     You give me a location, and contents to put there, and I insert     |
 |     into that location and RETURN a pointer to the location.  If        |
 |     bucket was already occupied, I write over the contents only if      |
 |     *pfOverwrite is 1.  We set *pfOverwrite to 1 if there was someone   |
 |     there (whether or not we overwrote) and 0 else.                     |
 \*************************************************************************/

 static SparseBucket *SparseInsert(SparseBin *binSparse, SparseBucket *bckInsert,
 				  ulong location, int *pfOverwrite)
 {
    SparseBucket *bckPlace;
    HTOffset offset;

    bckPlace = SparseFind(binSparse, location);
    if ( bckPlace )                /* means we replace old contents */
    {
       if ( *pfOverwrite )
 	 *bckPlace = *bckInsert;
       *pfOverwrite = 1;
       return bckPlace;
    }

    binSparse += (location >> LOG_LOW_BIN_SIZE);
    offset = SPARSE_POS_TO_OFFSET(binSparse->bmOccupied,
 				 MOD2(location, LOG_LOW_BIN_SIZE));
    binSparse->binSparse = (SparseBucket *)
       HTsrealloc(binSparse->binSparse,
 		 sizeof(*binSparse->binSparse) * ++binSparse->cOccupied,
 		 sizeof(*binSparse->binSparse));
    memmove(binSparse->binSparse + offset+1,
 	   binSparse->binSparse + offset,
 	   (binSparse->cOccupied-1 - offset) * sizeof(*binSparse->binSparse));
    binSparse->binSparse[offset] = *bckInsert;
    SET_BITMAP(binSparse->bmOccupied, MOD2(location, LOG_LOW_BIN_SIZE));
    *pfOverwrite = 0;
    return binSparse->binSparse + offset;
 }

 /*************************************************************************\
 | SparseFirstBucket()                                                     |
 | SparseNextBucket()                                                      |
 | SparseCurrentBit()                                                      |
 |     Iterate through the occupied buckets of a dense hashtable.  You     |
 |     must, of course, have allocated space yourself for the iterator.    |
 \*************************************************************************/

 static SparseBucket *SparseNextBucket(SparseIterator *iter)
 {
    if ( iter->posOffset != -1 &&      /* not called from FirstBucket()? */
         (++iter->posOffset < iter->binSparse[iter->posGroup].cOccupied) )
       return iter->binSparse[iter->posGroup].binSparse + iter->posOffset;

    iter->posOffset = 0;                         /* start the next group */
    for ( iter->posGroup++;  iter->posGroup < SPARSE_GROUPS(iter->cBuckets);
 	 iter->posGroup++ )
       if ( iter->binSparse[iter->posGroup].cOccupied > 0 )
 	 return iter->binSparse[iter->posGroup].binSparse; /* + 0 */
    return NULL;                      /* all remaining groups were empty */
 }

 static SparseBucket *SparseFirstBucket(SparseIterator *iter,
 				       SparseBin *binSparse, ulong cBuckets)
 {
    iter->binSparse = binSparse;        /* set it up for NextBucket() */
    iter->cBuckets = cBuckets;
    iter->posOffset = -1;               /* when we advance, we're at 0 */
    iter->posGroup = -1;
    return SparseNextBucket(iter);
 }

 /*************************************************************************\
 | SparseWrite()                                                           |
 | SparseRead()                                                            |
 |     These are routines for storing a sparse hashtable onto disk.  We    |
 |     store the number of buckets and a bitmap indicating which buckets   |
 |     are allocated (occupied).  The actual contents of the buckets       |
 |     must be stored separately.                                          |
 \*************************************************************************/

 static void SparseWrite(FILE *fp, SparseBin *binSparse, ulong cBuckets)
 {
    ulong i, j;

    WRITE_UL(fp, cBuckets);
    for ( i = 0; i < SPARSE_GROUPS(cBuckets); i++ )
       for ( j = 0; j < (1<<LOG_BM_WORDS); j++ )
 	 WRITE_UL(fp, binSparse[i].bmOccupied[j]);
 }

 static ulong SparseRead(FILE *fp, SparseBin **pbinSparse)
 {
    ulong i, j, cBuckets;

    READ_UL(fp, cBuckets);                /* actually, cBuckets is stored */
    cBuckets = SparseAllocate(pbinSparse, cBuckets);
    for ( i = 0; i < SPARSE_GROUPS(cBuckets); i++ )
    {
       for ( j = 0; j < (1<<LOG_BM_WORDS); j++ )
 	 READ_UL(fp, (*pbinSparse)[i].bmOccupied[j]);
       (*pbinSparse)[i].cOccupied =
 	 SPARSE_POS_TO_OFFSET((*pbinSparse)[i].bmOccupied,1<<LOG_LOW_BIN_SIZE);
       (*pbinSparse)[i].binSparse =
 	 (SparseBucket *) HTsmalloc(sizeof(*((*pbinSparse)[i].binSparse)) *
 				    (*pbinSparse)[i].cOccupied);
    }
    return cBuckets;
 }

 /*************************************************************************\
 | SparseMemory()                                                          |
 |     SparseMemory() tells us how much memory is being allocated for      |
 |     the dense table.  You need to tell me not only how many buckets     |
 |     there are, but how many are occupied.                               |
 \*************************************************************************/

 static ulong SparseMemory(ulong cBuckets, ulong cOccupied)
 {
    return ( cOccupied * sizeof(SparseBucket) +
 	    SPARSE_GROUPS(cBuckets) * sizeof(SparseBin) );
 }


 /*  Just for fun, I also provide support for dense tables.  These are
  *  just regulr arrays.  Access is fast, but they can get big.
  *  Use Table(x) at the top of chash.h to decide which you want.
  *  A disadvantage is we need to steal more of the data space for
  *  indicating empty buckets.  We choose -3.
  */

 #ifndef DenseBucket             /* by default, each bucket holds an HTItem */
 #define DenseBucket             HTItem
 #endif

 typedef struct DenseBin {       /* needs to be a struct for C typing reasons */
    DenseBucket *rgBuckets;      /* A bin is an array of buckets */
 } DenseBin;

 typedef struct DenseIterator {
    long pos;               /* the actual iterator */
    DenseBin *bin;          /* state info, to avoid args for NextBucket() */
    ulong cBuckets;
 } DenseIterator;

 #define DENSE_IS_EMPTY(bin, i)     ( (bin)[i].data == EMPTY )
 #define DENSE_SET_EMPTY(bin, i)    (bin)[i].data = EMPTY      /* fks-hash.h */
 #define DENSE_SET_OCCUPIED(bin, i) (bin)[i].data = 1          /* not EMPTY */

 static void DenseClear(DenseBin *bin, ulong cBuckets)
 {
    while ( cBuckets-- )
       DENSE_SET_EMPTY(bin->rgBuckets, cBuckets);
 }

 static ulong DenseAllocate(DenseBin **pbin, ulong cBuckets)
 {
    *pbin = (DenseBin *) HTsmalloc(sizeof(*pbin));
    (*pbin)->rgBuckets = (DenseBucket *) HTsmalloc(sizeof(*(*pbin)->rgBuckets)
 						  * cBuckets);
    DenseClear(*pbin, cBuckets);
    return cBuckets;
 }

 static DenseBin *DenseFree(DenseBin *bin, ulong cBuckets)
 {
    HTfree(bin->rgBuckets, sizeof(*bin->rgBuckets) * cBuckets);
    HTfree(bin, sizeof(*bin));
    return NULL;
 }

 static int DenseIsEmpty(DenseBin *bin, ulong location)
 {
    return DENSE_IS_EMPTY(bin->rgBuckets, location);
 }

 static DenseBucket *DenseFind(DenseBin *bin, ulong location)
 {
    if ( DenseIsEmpty(bin, location) )
       return NULL;
    return bin->rgBuckets + location;
 }

 static DenseBucket *DenseInsert(DenseBin *bin, DenseBucket *bckInsert,
 				ulong location, int *pfOverwrite)
 {
    DenseBucket *bckPlace;

    bckPlace = DenseFind(bin, location);
    if ( bckPlace )                /* means something is already there */
    {
       if ( *pfOverwrite )
 	 *bckPlace = *bckInsert;
       *pfOverwrite = 1;           /* set to 1 to indicate someone was there */
       return bckPlace;
    }
    else
    {
       bin->rgBuckets[location] = *bckInsert;
       *pfOverwrite = 0;
       return bin->rgBuckets + location;
    }
 }

 static DenseBucket *DenseNextBucket(DenseIterator *iter)
 {
    for ( iter->pos++; iter->pos < iter->cBuckets; iter->pos++ )
       if ( !DenseIsEmpty(iter->bin, iter->pos) )
 	 return iter->bin->rgBuckets + iter->pos;
    return NULL;                        /* all remaining groups were empty */
 }

 static DenseBucket *DenseFirstBucket(DenseIterator *iter,
 				     DenseBin *bin, ulong cBuckets)
 {
    iter->bin = bin;                    /* set it up for NextBucket() */
    iter->cBuckets = cBuckets;
    iter->pos = -1;                     /* thus the next bucket will be 0 */
    return DenseNextBucket(iter);
 }

 static void DenseWrite(FILE *fp, DenseBin *bin, ulong cBuckets)
 {
    ulong pos = 0, bit, bm;

    WRITE_UL(fp, cBuckets);
    while ( pos < cBuckets )
    {
       bm = 0;
       for ( bit = 0; bit < 8*sizeof(ulong); bit++ )
       {
 	 if ( !DenseIsEmpty(bin, pos) )
 	    SET_BITMAP(&bm, bit);                /* in fks-hash.h */
 	 if ( ++pos == cBuckets )
 	    break;
       }
       WRITE_UL(fp, bm);
    }
 }

 static ulong DenseRead(FILE *fp, DenseBin **pbin)
 {
    ulong pos = 0, bit, bm, cBuckets;

    READ_UL(fp, cBuckets);
    cBuckets = DenseAllocate(pbin, cBuckets);
    while ( pos < cBuckets )
    {
       READ_UL(fp, bm);
       for ( bit = 0; bit < 8*sizeof(ulong); bit++ )
       {
 	 if ( TEST_BITMAP(&bm, bit) )            /* in fks-hash.h */
 	    DENSE_SET_OCCUPIED((*pbin)->rgBuckets, pos);
 	 else
 	    DENSE_SET_EMPTY((*pbin)->rgBuckets, pos);
 	 if ( ++pos == cBuckets )
 	    break;
       }
    }
    return cBuckets;
 }

 static ulong DenseMemory(ulong cBuckets, ulong cOccupied)
 {
    return cBuckets * sizeof(DenseBucket);
 }


 /* ======================================================================== */
 /*                          HASHING ROUTINES                                */
 /*                       ----------------------                             */

 /*  Implements a simple quadratic hashing scheme.  We have a single hash
  *  table of size t and a single hash function h(x).  When inserting an
  *  item, first we try h(x) % t.  If it's occupied, we try h(x) +
  *  i*(i-1)/2 % t for increasing values of i until we hit a not-occupied
  *  space.  To make this dynamic, we double the size of the hash table as
  *  soon as more than half the cells are occupied.  When deleting, we can
  *  choose to shrink the hashtable when less than a quarter of the
  *  cells are occupied, or we can choose never to shrink the hashtable.
  *  For lookup, we check h(x) + i*(i-1)/2 % t (starting with i=0) until
  *  we get a match or we hit an empty space.  Note that as a result,
  *  we can't make a cell empty on deletion, or lookups may end prematurely.
  *  Instead we mark the cell as "deleted."  We thus steal the value
  *  DELETED as a possible "data" value.  As long as data are pointers,
  *  that's ok.
  *     The hash increment we use, i(i-1)/2, is not the standard quadratic
  *  hash increment, which is i^2.  i(i-1)/2 covers the entire bucket space
  *  when the hashtable size is a power of two, as it is for us.  In fact,
  *  the first n probes cover n distinct buckets; then it repeats.  This
  *  guarantees insertion will always succeed.
  *     If you linear hashing, set JUMP in chash.h.  You can also change
  *  various other parameters there.
  */

 /*************************************************************************\
 | Hash()                                                                  |
 |     The hash function I use is due to Bob Jenkins (see                  |
 |     http://burtleburtle.net/bob/hash/evahash.html                       |
 |     According to http://burtleburtle.net/bob/c/lookup2.c,               |
 |     his implementation is public domain.)                               |
 |     It takes 36 instructions, in 18 cycles if you're lucky.             |
 |        hashing depends on the fact the hashtable size is always a       |
 |     power of 2.  cBuckets is probably ht->cBuckets.                     |
 \*************************************************************************/

 #if LOG_WORD_SIZE == 5                      /* 32 bit words */

 #define mix(a,b,c) \
 { \
   a -= b; a -= c; a ^= (c>>13); \
   b -= c; b -= a; b ^= (a<<8); \
   c -= a; c -= b; c ^= (b>>13); \
   a -= b; a -= c; a ^= (c>>12);  \
   b -= c; b -= a; b ^= (a<<16); \
   c -= a; c -= b; c ^= (b>>5); \
   a -= b; a -= c; a ^= (c>>3);  \
   b -= c; b -= a; b ^= (a<<10); \
   c -= a; c -= b; c ^= (b>>15); \
 }
 #ifdef WORD_HASH                 /* play with this on little-endian machines */
 #define WORD_AT(ptr)    ( *(ulong *)(ptr) )
 #else
 #define WORD_AT(ptr)    ( (ptr)[0] + ((ulong)(ptr)[1]<<8) + \
 			  ((ulong)(ptr)[2]<<16) + ((ulong)(ptr)[3]<<24) )
 #endif

 #elif LOG_WORD_SIZE == 6        /* 64 bit words */

 #define mix(a,b,c) \
 { \
   a -= b; a -= c; a ^= (c>>43); \
   b -= c; b -= a; b ^= (a<<9); \
   c -= a; c -= b; c ^= (b>>8); \
   a -= b; a -= c; a ^= (c>>38); \
   b -= c; b -= a; b ^= (a<<23); \
   c -= a; c -= b; c ^= (b>>5); \
   a -= b; a -= c; a ^= (c>>35); \
   b -= c; b -= a; b ^= (a<<49); \
   c -= a; c -= b; c ^= (b>>11); \
   a -= b; a -= c; a ^= (c>>12); \
   b -= c; b -= a; b ^= (a<<18); \
   c -= a; c -= b; c ^= (b>>22); \
 }
 #ifdef WORD_HASH                 /* alpha is little-endian, btw */
 #define WORD_AT(ptr)    ( *(ulong *)(ptr) )
 #else
 #define WORD_AT(ptr)    ( (ptr)[0] + ((ulong)(ptr)[1]<<8) + \
 			  ((ulong)(ptr)[2]<<16) + ((ulong)(ptr)[3]<<24) + \
 			  ((ulong)(ptr)[4]<<32) + ((ulong)(ptr)[5]<<40) + \
 			  ((ulong)(ptr)[6]<<48) + ((ulong)(ptr)[7]<<56) )
 #endif

 #else                            /* neither 32 or 64 bit words */
 #error This hash function can only hash 32 or 64 bit words.  Sorry.
 #endif

 static ulong Hash(HashTable *ht, char *key, ulong cBuckets)
 {
    ulong a, b, c, cchKey, cchKeyOrig;

    cchKeyOrig = ht->cchKey == NULL_TERMINATED ? strlen(key) : ht->cchKey;
    a = b = c = 0x9e3779b9;       /* the golden ratio; an arbitrary value */

    for ( cchKey = cchKeyOrig;  cchKey >= 3 * sizeof(ulong);
 	 cchKey -= 3 * sizeof(ulong),  key += 3 * sizeof(ulong) )
    {
       a += WORD_AT(key);
       b += WORD_AT(key + sizeof(ulong));
       c += WORD_AT(key + sizeof(ulong)*2);
       mix(a,b,c);
    }

    c += cchKeyOrig;
    switch ( cchKey ) {           /* deal with rest.  Cases fall through */
 #if LOG_WORD_SIZE == 5
       case 11: c += (ulong)key[10]<<24;
       case 10: c += (ulong)key[9]<<16;
       case 9 : c += (ulong)key[8]<<8;
                /* the first byte of c is reserved for the length */
       case 8 : b += WORD_AT(key+4);  a+= WORD_AT(key);  break;
       case 7 : b += (ulong)key[6]<<16;
       case 6 : b += (ulong)key[5]<<8;
       case 5 : b += key[4];
       case 4 : a += WORD_AT(key);  break;
       case 3 : a += (ulong)key[2]<<16;
       case 2 : a += (ulong)key[1]<<8;
       case 1 : a += key[0];
    /* case 0 : nothing left to add */
 #elif LOG_WORD_SIZE == 6
       case 23: c += (ulong)key[22]<<56;
       case 22: c += (ulong)key[21]<<48;
       case 21: c += (ulong)key[20]<<40;
       case 20: c += (ulong)key[19]<<32;
       case 19: c += (ulong)key[18]<<24;
       case 18: c += (ulong)key[17]<<16;
       case 17: c += (ulong)key[16]<<8;
                /* the first byte of c is reserved for the length */
       case 16: b += WORD_AT(key+8);  a+= WORD_AT(key);  break;
       case 15: b += (ulong)key[14]<<48;
       case 14: b += (ulong)key[13]<<40;
       case 13: b += (ulong)key[12]<<32;
       case 12: b += (ulong)key[11]<<24;
       case 11: b += (ulong)key[10]<<16;
       case 10: b += (ulong)key[ 9]<<8;
       case  9: b += (ulong)key[ 8];
       case  8: a += WORD_AT(key);  break;
       case  7: a += (ulong)key[ 6]<<48;
       case  6: a += (ulong)key[ 5]<<40;
       case  5: a += (ulong)key[ 4]<<32;
       case  4: a += (ulong)key[ 3]<<24;
       case  3: a += (ulong)key[ 2]<<16;
       case  2: a += (ulong)key[ 1]<<8;
       case  1: a += (ulong)key[ 0];
    /* case 0: nothing left to add */
 #endif
    }
    mix(a,b,c);
    return c & (cBuckets-1);
 }


 /*************************************************************************\
 | Rehash()                                                                |
 |     You give me a hashtable, a new size, and a bucket to follow, and    |
 |     I resize the hashtable's bin to be the new size, rehashing          |
 |     everything in it.  I keep particular track of the bucket you pass   |
 |     in, and RETURN a pointer to where the item in the bucket got to.    |
 |     (If you pass in NULL, I return an arbitrary pointer.)               |
 \*************************************************************************/

 static HTItem *Rehash(HashTable *ht, ulong cNewBuckets, HTItem *bckWatch)
 {
    Table *tableNew;
    ulong iBucketFirst;
    HTItem *bck, *bckNew = NULL;
    ulong offset;                         /* the i in h(x) + i*(i-1)/2 */
    int fOverwrite = 0;    /* not an issue: there can be no collisions */

    assert( ht->table );
    cNewBuckets = Table(Allocate)(&tableNew, cNewBuckets);
       /* Since we RETURN the new position of bckWatch, we want  *
        * to make sure it doesn't get moved due to some table    *
        * rehashing that comes after it's inserted.  Thus, we    *
        * have to put it in last.  This makes the loop weird.    */
    for ( bck = HashFirstBucket(ht); ; bck = HashNextBucket(ht) )
    {
       if ( bck == NULL )      /* we're done iterating, so look at bckWatch */
       {
 	 bck = bckWatch;
 	 if ( bck == NULL )           /* I guess bckWatch wasn't specified */
 	    break;
       }
       else if ( bck == bckWatch )
 	 continue;             /* ignore if we see it during the iteration */

       offset = 0;                              /* a new i for a new bucket */
       for ( iBucketFirst = Hash(ht, KEY_PTR(ht, bck->key), cNewBuckets);
 	    !Table(IsEmpty)(tableNew, iBucketFirst);
 	    iBucketFirst = (iBucketFirst + JUMP(KEY_PTR(ht,bck->key), offset))
 	                   & (cNewBuckets-1) )
 	 ;
       bckNew = Table(Insert)(tableNew, bck, iBucketFirst, &fOverwrite);
       if ( bck == bckWatch )       /* we're done with the last thing to do */
 	 break;
    }
    Table(Free)(ht->table, ht->cBuckets);
    ht->table = tableNew;
    ht->cBuckets = cNewBuckets;
    ht->cDeletedItems = 0;
    return bckNew;     /* new position of bckWatch, which was inserted last */
 }

 /*************************************************************************\
 | Find()                                                                  |
 |     Does the quadratic searching stuff.  RETURNS NULL if we don't       |
 |     find an object with the given key, and a pointer to the Item        |
 |     holding the key, if we do.  Also sets posLastFind.  If piEmpty is   |
 |     non-NULL, we set it to the first open bucket we pass; helpful for   |
 |     doing a later insert if the search fails, for instance.             |
 \*************************************************************************/

 static HTItem *Find(HashTable *ht, ulong key, ulong *piEmpty)
 {
    ulong iBucketFirst;
    HTItem *item;
    ulong offset = 0;              /* the i in h(x) + i*(i-1)/2 */
    int fFoundEmpty = 0;           /* set when we pass over an empty bucket */

    ht->posLastFind = NULL;        /* set up for failure: a new find starts */
    if ( ht->table == NULL )       /* empty hash table: find is bound to fail */
       return NULL;

    iBucketFirst = Hash(ht, KEY_PTR(ht, key), ht->cBuckets);
    while ( 1 )                    /* now try all i > 0 */
    {
       item = Table(Find)(ht->table, iBucketFirst);
       if ( item == NULL )         /* it's not in the table */
       {
 	 if ( piEmpty && !fFoundEmpty ) *piEmpty = iBucketFirst;
 	 return NULL;
       }
       else
       {
 	 if ( IS_BCK_DELETED(item) )      /* always 0 ifdef INSERT_ONLY */
 	 {
 	    if ( piEmpty && !fFoundEmpty )
 	    {
 	       *piEmpty = iBucketFirst;
 	       fFoundEmpty = 1;
 	    }
 	 } else
 	    if ( !KEY_CMP(ht, key, item->key) )     /* must be occupied */
 	    {
 	       ht->posLastFind = item;
 	       return item;               /* we found it! */
 	    }
       }
       iBucketFirst = ((iBucketFirst + JUMP(KEY_PTR(ht, key), offset))
 		      & (ht->cBuckets-1));
    }
 }

 /*************************************************************************\
 | Insert()                                                                |
 |     If an item with the key already exists in the hashtable, RETURNS    |
 |     a pointer to the item (replacing its data if fOverwrite is 1).      |
 |     If not, we find the first place-to-insert (which Find() is nice     |
 |     enough to set for us) and insert the item there, RETURNing a        |
 |     pointer to the item.  We might grow the hashtable if it's getting   |
 |     full.  Note we include buckets holding DELETED when determining     |
 |     fullness, because they slow down searching.                         |
 \*************************************************************************/

 static ulong NextPow2(ulong x)    /* returns next power of 2 > x, or 2^31 */
 {
    if ( ((x << 1) >> 1) != x )    /* next power of 2 overflows */
       x >>= 1;                    /* so we return highest power of 2 we can */
    while ( (x & (x-1)) != 0 )     /* blacks out all but the top bit */
       x &= (x-1);
    return x << 1;                 /* makes it the *next* power of 2 */
 }

 static HTItem *Insert(HashTable *ht, ulong key, ulong data, int fOverwrite)
 {
    HTItem *item, bckInsert;
    ulong iEmpty;                  /* first empty bucket key probes */

    if ( ht->table == NULL )       /* empty hash table: find is bound to fail */
       return NULL;
    item = Find(ht, key, &iEmpty);
    ht->posLastFind = NULL;        /* last operation is insert, not find */
    if ( item )
    {
       if ( fOverwrite )
 	 item->data = data;       /* key already matches */
       return item;
    }

    COPY_KEY(ht, bckInsert.key, key);    /* make our own copy of the key */
    bckInsert.data = data;               /* oh, and the data too */
    item = Table(Insert)(ht->table, &bckInsert, iEmpty, &fOverwrite);
    if ( fOverwrite )                    /* we overwrote a deleted bucket */
       ht->cDeletedItems--;
    ht->cItems++;                        /* insert couldn't have overwritten */
    if ( ht->cDeltaGoalSize > 0 )  /* closer to our goal size */
       ht->cDeltaGoalSize--;
    if ( ht->cItems + ht->cDeletedItems >= ht->cBuckets * OCCUPANCY_PCT
         || ht->cDeltaGoalSize < 0 ) /* we must've overestimated # of deletes */
       item = Rehash(ht,
 		    NextPow2((ulong)(((ht->cDeltaGoalSize > 0 ?
 				       ht->cDeltaGoalSize : 0)
 				      + ht->cItems) / OCCUPANCY_PCT)),
 		    item);
    return item;
 }

 /*************************************************************************\
 | Delete()                                                                |
 |     Removes the item from the hashtable, and if fShrink is 1, will      |
 |     shrink the hashtable if it's too small (ie even after halving,      |
 |     the ht would be less than half full, though in order to avoid       |
 |     oscillating table size, we insist that after halving the ht would   |
 |     be less than 40% full).  RETURNS 1 if the item was found, 0 else.   |
 |        If fLastFindSet is true, then this function is basically         |
 |     DeleteLastFind.                                                     |
 \*************************************************************************/

 static int Delete(HashTable *ht, ulong key, int fShrink, int fLastFindSet)
 {
    if ( !fLastFindSet && !Find(ht, key, NULL) )
       return 0;
    SET_BCK_DELETED(ht, ht->posLastFind);       /* find set this, how nice */
    ht->cItems--;
    ht->cDeletedItems++;
    if ( ht->cDeltaGoalSize < 0 )  /* heading towards our goal of deletion */
       ht->cDeltaGoalSize++;

    if ( fShrink && ht->cItems < ht->cBuckets * OCCUPANCY_PCT*0.4
         && ht->cDeltaGoalSize >= 0       /* wait until we're done deleting */
         && (ht->cBuckets >> 1) >= MIN_HASH_SIZE )                /* shrink */
       Rehash(ht,
 	     NextPow2((ulong)((ht->cItems+ht->cDeltaGoalSize)/OCCUPANCY_PCT)),
 	     NULL);
    ht->posLastFind = NULL;           /* last operation is delete, not find */
    return 1;
 }


 /* ======================================================================== */
 /*                          USER-VISIBLE API                                */
 /*                       ----------------------                             */

 /*************************************************************************\
 | AllocateHashTable()                                                     |
 | ClearHashTable()                                                        |
 | FreeHashTable()                                                         |
 |     Allocate() allocates a hash table and sets up size parameters.      |
 |     Free() frees it.  Clear() deletes all the items from the hash       |
 |     table, but frees not.                                               |
 |        cchKey is < 0 if the keys you send me are meant to be pointers   |
 |     to \0-terminated strings.  Then -cchKey is the maximum key size.    |
 |     If cchKey < one word (ulong), the keys you send me are the keys     |
 |     themselves; else the keys you send me are pointers to the data.     |
 |        If fSaveKeys is 1, we copy any keys given to us to insert.  We   |
 |     also free these keys when freeing the hash table.  If it's 0, the   |
 |     user is responsible for key space management.                       |
 |        AllocateHashTable() RETURNS a hash table; the others TAKE one.   |
 \*************************************************************************/

 HashTable *AllocateHashTable(int cchKey, int fSaveKeys)
 {
    HashTable *ht;

    ht = (HashTable *) HTsmalloc(sizeof(*ht));   /* set everything to 0 */
    ht->cBuckets = Table(Allocate)(&ht->table, MIN_HASH_SIZE);
    ht->cchKey = cchKey <= 0 ? NULL_TERMINATED : cchKey;
    ht->cItems = 0;
    ht->cDeletedItems = 0;
    ht->fSaveKeys = fSaveKeys;
    ht->cDeltaGoalSize = 0;
    ht->iter = HTsmalloc( sizeof(TableIterator) );

    ht->fpData = NULL;                           /* set by HashLoad, maybe */
    ht->bckData.data = (ulong) NULL;             /* this must be done */
    HTSetupKeyTrunc();                           /* in util.c */
    return ht;
 }

 void ClearHashTable(HashTable *ht)
 {
    HTItem *bck;

    if ( STORES_PTR(ht) && ht->fSaveKeys )       /* need to free keys */
       for ( bck = HashFirstBucket(ht); bck; bck = HashNextBucket(ht) )
       {
 	 FREE_KEY(ht, bck->key);
 	 if ( ht->fSaveKeys == 2 )  /* this means key stored in one block */
 	    break;                  /* ...so only free once */
       }
    Table(Free)(ht->table, ht->cBuckets);
    ht->cBuckets = Table(Allocate)(&ht->table, MIN_HASH_SIZE);

    ht->cItems = 0;
    ht->cDeletedItems = 0;
    ht->cDeltaGoalSize = 0;
    ht->posLastFind = NULL;
    ht->fpData = NULL;               /* no longer HashLoading */
    if ( ht->bckData.data )  free( (char *)(ht)->bckData.data);
    ht->bckData.data = (ulong) NULL;
 }

 void FreeHashTable(HashTable *ht)
 {
    ClearHashTable(ht);
    if ( ht->iter )    HTfree(ht->iter, sizeof(TableIterator));
    if ( ht->table )   Table(Free)(ht->table, ht->cBuckets);
    free(ht);
 }

 /*************************************************************************\
 | HashFind()                                                              |
 | HashFindLast()                                                          |
 |     HashFind(): looks in h(x) + i(i-1)/2 % t as i goes up from 0        |
 |     until we either find the key or hit an empty bucket.  RETURNS a     |
 |     pointer to the item in the hit bucket, if we find it, else          |
 |     RETURNS NULL.                                                       |
 |        HashFindLast() returns the item returned by the last             |
 |     HashFind(), which may be NULL if the last HashFind() failed.        |
 |        LOAD_AND_RETURN reads the data from off disk, if necessary.      |
 \*************************************************************************/

 HTItem *HashFind(HashTable *ht, ulong key)
 {
    LOAD_AND_RETURN(ht, Find(ht, KEY_TRUNC(ht, key), NULL));
 }

 HTItem *HashFindLast(HashTable *ht)
 {
    LOAD_AND_RETURN(ht, ht->posLastFind);
 }

 /*************************************************************************\
 | HashFindOrInsert()                                                      |
 | HashFindOrInsertItem()                                                  |
 | HashInsert()                                                            |
 | HashInsertItem()                                                        |
 | HashDelete()                                                            |
 | HashDeleteLast()                                                        |
 |     Pretty obvious what these guys do.  Some take buckets (items),      |
 |     some take keys and data separately.  All things RETURN the bucket   |
 |     (a pointer into the hashtable) if appropriate.                      |
 \*************************************************************************/

 HTItem *HashFindOrInsert(HashTable *ht, ulong key, ulong dataInsert)
 {
       /* This is equivalent to Insert without samekey-overwrite */
    return Insert(ht, KEY_TRUNC(ht, key), dataInsert, 0);
 }

 HTItem *HashFindOrInsertItem(HashTable *ht, HTItem *pItem)
 {
    return HashFindOrInsert(ht, pItem->key, pItem->data);
 }

 HTItem *HashInsert(HashTable *ht, ulong key, ulong data)
 {
    return Insert(ht, KEY_TRUNC(ht, key), data, SAMEKEY_OVERWRITE);
 }

 HTItem *HashInsertItem(HashTable *ht, HTItem *pItem)
 {
    return HashInsert(ht, pItem->key, pItem->data);
 }

 int HashDelete(HashTable *ht, ulong key)
 {
    return Delete(ht, KEY_TRUNC(ht, key), !FAST_DELETE, 0);
 }

 int HashDeleteLast(HashTable *ht)
 {
    if ( !ht->posLastFind  )                /* last find failed */
       return 0;
    return Delete(ht, 0, !FAST_DELETE, 1);  /* no need to specify a key */
 }

 /*************************************************************************\
 | HashFirstBucket()                                                       |
 | HashNextBucket()                                                        |
 |     Iterates through the items in the hashtable by iterating through    |
 |     the table.  Since we know about deleted buckets and loading data    |
 |     off disk, and the table doesn't, our job is to take care of these   |
 |     things.  RETURNS a bucket, or NULL after the last bucket.           |
 \*************************************************************************/

 HTItem *HashFirstBucket(HashTable *ht)
 {
    HTItem *retval;

    for ( retval = Table(FirstBucket)(ht->iter, ht->table, ht->cBuckets);
 	 retval;  retval = Table(NextBucket)(ht->iter) )
       if ( !IS_BCK_DELETED(retval) )
 	 LOAD_AND_RETURN(ht, retval);
    return NULL;
 }

 HTItem *HashNextBucket(HashTable *ht)
 {
    HTItem *retval;

    while ( (retval=Table(NextBucket)(ht->iter)) )
       if ( !IS_BCK_DELETED(retval) )
 	 LOAD_AND_RETURN(ht, retval);
    return NULL;
 }

 /*************************************************************************\
 | HashSetDeltaGoalSize()                                                  |
 |     If we're going to insert 100 items, set the delta goal size to      |
 |     100 and we take that into account when inserting.  Likewise, if     |
 |     we're going to delete 10 items, set it to -100 and we won't         |
 |     rehash until all 100 have been done.  It's ok to be wrong, but      |
 |     it's efficient to be right.  Returns the delta value.               |
 \*************************************************************************/

 int HashSetDeltaGoalSize(HashTable *ht, int delta)
 {
    ht->cDeltaGoalSize = delta;
 #if FAST_DELETE == 1 || defined INSERT_ONLY
    if ( ht->cDeltaGoalSize < 0 )   /* for fast delete, we never */
       ht->cDeltaGoalSize = 0;      /* ...rehash after deletion  */
 #endif
    return ht->cDeltaGoalSize;
 }


 /*************************************************************************\
 | HashSave()                                                              |
 | HashLoad()                                                              |
 | HashLoadKeys()                                                          |
 |     Routines for saving and loading the hashtable from disk.  We can    |
 |     then use the hashtable in two ways: loading it back into memory     |
 |     (HashLoad()) or loading only the keys into memory, in which case    |
 |     the data for a given key is loaded off disk when the key is         |
 |     retrieved.  The data is freed when something new is retrieved in    |
 |     its place, so this is not a "lazy-load" scheme.                     |
 |        The key is saved automatically and restored upon load, but the   |
 |     user needs to specify a routine for reading and writing the data.   |
 |     fSaveKeys is of course set to 1 when you read in a hashtable.       |
 |     HashLoad RETURNS a newly allocated hashtable.                       |
 |        DATA_WRITE() takes an fp and a char * (representing the data     |
 |     field), and must perform two separate tasks.  If fp is NULL,        |
 |     return the number of bytes written.  If not, writes the data to     |
 |     disk at the place the fp points to.                                 |
 |        DATA_READ() takes an fp and the number of bytes in the data      |
 |     field, and returns a char * which points to wherever you've         |
 |     written the data.  Thus, you must allocate memory for the data.     |
 |        Both dataRead and dataWrite may be NULL if you just wish to      |
 |     store the data field directly, as an integer.                       |
 \*************************************************************************/

 void HashSave(FILE *fp, HashTable *ht, int (*dataWrite)(FILE *, char *))
 {
    long cchData, posStart;
    HTItem *bck;

    /* File format: magic number (4 bytes)
                  : cchKey (one word)
                  : cItems (one word)
                  : cDeletedItems (one word)
                  : table info (buckets and a bitmap)
 		 : cchAllKeys (one word)
       Then the keys, in a block.  If cchKey is NULL_TERMINATED, the keys
       are null-terminated too, otherwise this takes up cchKey*cItems bytes.
       Note that keys are not written for DELETED buckets.
       Then the data:
                  : EITHER DELETED (one word) to indicate it's a deleted bucket,
                  : OR number of bytes for this (non-empty) bucket's data
                    (one word).  This is not stored if dataWrite == NULL
                    since the size is known to be sizeof(ul).  Plus:
                  : the data for this bucket (variable length)
       All words are in network byte order. */

    fprintf(fp, "%s", MAGIC_KEY);
    WRITE_UL(fp, ht->cchKey);        /* WRITE_UL, READ_UL, etc in fks-hash.h */
    WRITE_UL(fp, ht->cItems);
    WRITE_UL(fp, ht->cDeletedItems);
    Table(Write)(fp, ht->table, ht->cBuckets);        /* writes cBuckets too */

    WRITE_UL(fp, 0);                 /* to be replaced with sizeof(key block) */
    posStart = ftell(fp);
    for ( bck = HashFirstBucket(ht); bck; bck = HashNextBucket(ht) )
       fwrite(KEY_PTR(ht, bck->key), 1,
 	     (ht->cchKey == NULL_TERMINATED ?
 	      strlen(KEY_PTR(ht, bck->key))+1 : ht->cchKey), fp);
    cchData = ftell(fp) - posStart;
    fseek(fp, posStart - sizeof(unsigned long), SEEK_SET);
    WRITE_UL(fp, cchData);
    fseek(fp, 0, SEEK_END);          /* done with our sojourn at the header */

       /* Unlike HashFirstBucket, TableFirstBucket iters through deleted bcks */
    for ( bck = Table(FirstBucket)(ht->iter, ht->table, ht->cBuckets);
 	 bck;  bck = Table(NextBucket)(ht->iter) )
       if ( dataWrite == NULL || IS_BCK_DELETED(bck) )
 	 WRITE_UL(fp, bck->data);
       else                          /* write cchData followed by the data */
       {
 	 WRITE_UL(fp, (*dataWrite)(NULL, (char *)bck->data));
 	 (*dataWrite)(fp, (char *)bck->data);
       }
 }

 static HashTable *HashDoLoad(FILE *fp, char * (*dataRead)(FILE *, int),
 			     HashTable *ht)
 {
    ulong cchKey;
    char szMagicKey[4], *rgchKeys;
    HTItem *bck;

    fread(szMagicKey, 1, 4, fp);
    if ( strncmp(szMagicKey, MAGIC_KEY, 4) )
    {
       fprintf(stderr, "ERROR: not a hash table (magic key is %4.4s, not %s)\n",
 	      szMagicKey, MAGIC_KEY);
       exit(3);
    }
    Table(Free)(ht->table, ht->cBuckets);  /* allocated in AllocateHashTable */

    READ_UL(fp, ht->cchKey);
    READ_UL(fp, ht->cItems);
    READ_UL(fp, ht->cDeletedItems);
    ht->cBuckets = Table(Read)(fp, &ht->table);    /* next is the table info */

    READ_UL(fp, cchKey);
    rgchKeys = (char *) HTsmalloc( cchKey );  /* stores all the keys */
    fread(rgchKeys, 1, cchKey, fp);
       /* We use the table iterator so we don't try to LOAD_AND_RETURN */
    for ( bck = Table(FirstBucket)(ht->iter, ht->table, ht->cBuckets);
 	 bck;  bck = Table(NextBucket)(ht->iter) )
    {
       READ_UL(fp, bck->data);        /* all we need if dataRead is NULL */
       if ( IS_BCK_DELETED(bck) )     /* always 0 if defined(INSERT_ONLY) */
 	 continue;                   /* this is why we read the data first */
       if ( dataRead != NULL )        /* if it's null, we're done */
 	 if ( !ht->fpData )          /* load data into memory */
 	    bck->data = (ulong)dataRead(fp, bck->data);
 	 else                        /* store location of data on disk */
 	 {
 	    fseek(fp, bck->data, SEEK_CUR);  /* bck->data held size of data */
 	    bck->data = ftell(fp) - bck->data - sizeof(unsigned long);
 	 }

       if ( ht->cchKey == NULL_TERMINATED )   /* now read the key */
       {
 	 bck->key = (ulong) rgchKeys;
 	 rgchKeys = strchr(rgchKeys, '\0') + 1;    /* read past the string */
       }
       else
       {
 	 if ( STORES_PTR(ht) )               /* small keys stored directly */
 	    bck->key = (ulong) rgchKeys;
 	 else
 	    memcpy(&bck->key, rgchKeys, ht->cchKey);
 	 rgchKeys += ht->cchKey;
       }
    }
    if ( !STORES_PTR(ht) )                    /* keys are stored directly */
       HTfree(rgchKeys - cchKey, cchKey);     /* we've advanced rgchK to end */
    return ht;
 }

 HashTable *HashLoad(FILE *fp, char * (*dataRead)(FILE *, int))
 {
    HashTable *ht;
    ht = AllocateHashTable(0, 2);  /* cchKey set later, fSaveKey should be 2! */
    return HashDoLoad(fp, dataRead, ht);
 }

 HashTable *HashLoadKeys(FILE *fp, char * (*dataRead)(FILE *, int))
 {
    HashTable *ht;

    if ( dataRead == NULL )
       return HashLoad(fp, NULL);  /* no reason not to load the data here */
    ht = AllocateHashTable(0, 2);  /* cchKey set later, fSaveKey should be 2! */
    ht->fpData = fp;               /* tells HashDoLoad() to only load keys */
    ht->dataRead = dataRead;
    return HashDoLoad(fp, dataRead, ht);
 }

 /*************************************************************************\
 | PrintHashTable()                                                        |
 |     A debugging tool.  Prints the entire contents of the hash table,    |
 |     like so: <bin #>: key of the contents.  Returns number of bytes     |
 |     allocated.  If time is not -1, we print it as the time required     |
 |     for the hash.  If iForm is 0, we just print the stats.  If it's     |
 |     1, we print the keys and data too, but the keys are printed as      |
 |     ulongs.  If it's 2, we print the keys correctly (as long numbers    |
 |     or as strings).                                                     |
 \*************************************************************************/

 ulong PrintHashTable(HashTable *ht, double time, int iForm)
 {
    ulong cbData = 0, cbBin = 0, cItems = 0, cOccupied = 0;
    HTItem *item;

    printf("HASH TABLE.\n");
    if ( time > -1.0 )
    {
       printf("----------\n");
       printf("Time: %27.2f\n", time);
    }

    for ( item = Table(FirstBucket)(ht->iter, ht->table, ht->cBuckets);
 	 item;  item = Table(NextBucket)(ht->iter) )
    {
       cOccupied++;                    /* this includes deleted buckets */
       if ( IS_BCK_DELETED(item) )     /* we don't need you for anything else */
 	 continue;
       cItems++;                       /* this is for a sanity check */
       if ( STORES_PTR(ht) )
 	 cbData += ht->cchKey == NULL_TERMINATED ?
 	    WORD_ROUND(strlen((char *)item->key)+1) : ht->cchKey;
       else
 	 cbBin -= sizeof(item->key), cbData += sizeof(item->key);
       cbBin -= sizeof(item->data), cbData += sizeof(item->data);
       if ( iForm != 0 )      /* we want the actual contents */
       {
 	 if ( iForm == 2 && ht->cchKey == NULL_TERMINATED )
 	    printf("%s/%lu\n", (char *)item->key, item->data);
 	 else if ( iForm == 2 && STORES_PTR(ht) )
 	    printf("%.*s/%lu\n",
 		   (int)ht->cchKey, (char *)item->key, item->data);
 	 else     /* either key actually is a ulong, or iForm == 1 */
 	    printf("%lu/%lu\n", item->key, item->data);
       }
    }
    assert( cItems == ht->cItems );                   /* sanity check */
    cbBin = Table(Memory)(ht->cBuckets, cOccupied);

    printf("----------\n");
    printf("%lu buckets (%lu bytes).  %lu empty.  %lu hold deleted items.\n"
 	  "%lu items (%lu bytes).\n"
 	  "%lu bytes total.  %lu bytes (%2.1f%%) of this is ht overhead.\n",
 	  ht->cBuckets, cbBin, ht->cBuckets - cOccupied, cOccupied - ht->cItems,
 	  ht->cItems, cbData,
 	  cbData + cbBin, cbBin, cbBin*100.0/(cbBin+cbData));

    return cbData + cbBin;
 }