src/common/hashfn.c - cloudberry - Git at Google

 /*-------------------------------------------------------------------------
  *
  * hashfn.c
  *		Generic hashing functions, and hash functions for use in dynahash.c
  *		hashtables
  *
  *
  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
  *	  src/common/hashfn.c
  *
  * NOTES
  *	  It is expected that every bit of a hash function's 32-bit result is
  *	  as random as every other; failure to ensure this is likely to lead
  *	  to poor performance of hash tables.  In most cases a hash
  *	  function should use hash_bytes() or its variant hash_bytes_uint32(),
  *	  or the wrappers hash_any() and hash_uint32 defined in hashfn.h.
  *
  * NOTES
  *	  It is expected that every bit of a hash function's 32-bit result is
  *	  as random as every other; failure to ensure this is likely to lead
  *	  to poor performance of hash tables.  In most cases a hash
  *	  function should use hash_any() or its variant hash_uint32().
  *
  *-------------------------------------------------------------------------
  */

 /*
  * GPDB: We carry a dependency on pthread_win32.h in elog.h, which causes
  * compilation errors when building Windows clients (as elog.h is included by
  * postgres.h). So use postgres-fe.h instead for this case.
  */
 #if defined (WIN32) && defined (FRONTEND)
 #include "postgres_fe.h"
 #else
 #include "postgres.h"
 #endif

 #include "common/hashfn.h"
 #include "utils/hsearch.h"
 #include "port/pg_bitutils.h"


 /*
  * This hash function was written by Bob Jenkins
  * (bob_jenkins@burtleburtle.net), and superficially adapted
  * for PostgreSQL by Neil Conway. For more information on this
  * hash function, see http://burtleburtle.net/bob/hash/doobs.html,
  * or Bob's article in Dr. Dobb's Journal, Sept. 1997.
  *
  * In the current code, we have adopted Bob's 2006 update of his hash
  * function to fetch the data a word at a time when it is suitably aligned.
  * This makes for a useful speedup, at the cost of having to maintain
  * four code paths (aligned vs unaligned, and little-endian vs big-endian).
  * It also uses two separate mixing functions mix() and final(), instead
  * of a slower multi-purpose function.
  */

 /* Get a bit mask of the bits set in non-uint32 aligned addresses */
 #define UINT32_ALIGN_MASK (sizeof(uint32) - 1)

 #define rot(x,k) pg_rotate_left32(x, k)

 /*----------
  * mix -- mix 3 32-bit values reversibly.
  *
  * This is reversible, so any information in (a,b,c) before mix() is
  * still in (a,b,c) after mix().
  *
  * If four pairs of (a,b,c) inputs are run through mix(), or through
  * mix() in reverse, there are at least 32 bits of the output that
  * are sometimes the same for one pair and different for another pair.
  * This was tested for:
  * * pairs that differed by one bit, by two bits, in any combination
  *	 of top bits of (a,b,c), or in any combination of bottom bits of
  *	 (a,b,c).
  * * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
  *	 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
  *	 is commonly produced by subtraction) look like a single 1-bit
  *	 difference.
  * * the base values were pseudorandom, all zero but one bit set, or
  *	 all zero plus a counter that starts at zero.
  *
  * This does not achieve avalanche.  There are input bits of (a,b,c)
  * that fail to affect some output bits of (a,b,c), especially of a.  The
  * most thoroughly mixed value is c, but it doesn't really even achieve
  * avalanche in c.
  *
  * This allows some parallelism.  Read-after-writes are good at doubling
  * the number of bits affected, so the goal of mixing pulls in the opposite
  * direction from the goal of parallelism.  I did what I could.  Rotates
  * seem to cost as much as shifts on every machine I could lay my hands on,
  * and rotates are much kinder to the top and bottom bits, so I used rotates.
  *----------
  */
 #define mix(a,b,c) \
 { \
   a -= c;  a ^= rot(c, 4);	c += b; \
   b -= a;  b ^= rot(a, 6);	a += c; \
   c -= b;  c ^= rot(b, 8);	b += a; \
   a -= c;  a ^= rot(c,16);	c += b; \
   b -= a;  b ^= rot(a,19);	a += c; \
   c -= b;  c ^= rot(b, 4);	b += a; \
 }

 /*----------
  * final -- final mixing of 3 32-bit values (a,b,c) into c
  *
  * Pairs of (a,b,c) values differing in only a few bits will usually
  * produce values of c that look totally different.  This was tested for
  * * pairs that differed by one bit, by two bits, in any combination
  *	 of top bits of (a,b,c), or in any combination of bottom bits of
  *	 (a,b,c).
  * * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
  *	 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
  *	 is commonly produced by subtraction) look like a single 1-bit
  *	 difference.
  * * the base values were pseudorandom, all zero but one bit set, or
  *	 all zero plus a counter that starts at zero.
  *
  * The use of separate functions for mix() and final() allow for a
  * substantial performance increase since final() does not need to
  * do well in reverse, but is does need to affect all output bits.
  * mix(), on the other hand, does not need to affect all output
  * bits (affecting 32 bits is enough).  The original hash function had
  * a single mixing operation that had to satisfy both sets of requirements
  * and was slower as a result.
  *----------
  */
 #define final(a,b,c) \
 { \
   c ^= b; c -= rot(b,14); \
   a ^= c; a -= rot(c,11); \
   b ^= a; b -= rot(a,25); \
   c ^= b; c -= rot(b,16); \
   a ^= c; a -= rot(c, 4); \
   b ^= a; b -= rot(a,14); \
   c ^= b; c -= rot(b,24); \
 }

 /*
  * hash_bytes() -- hash a variable-length key into a 32-bit value
  *		k		: the key (the unaligned variable-length array of bytes)
  *		len		: the length of the key, counting by bytes
  *
  * Returns a uint32 value.  Every bit of the key affects every bit of
  * the return value.  Every 1-bit and 2-bit delta achieves avalanche.
  * About 6*len+35 instructions. The best hash table sizes are powers
  * of 2.  There is no need to do mod a prime (mod is sooo slow!).
  * If you need less than 32 bits, use a bitmask.
  *
  * This procedure must never throw elog(ERROR); the ResourceOwner code
  * relies on this not to fail.
  *
  * Note: we could easily change this function to return a 64-bit hash value
  * by using the final values of both b and c.  b is perhaps a little less
  * well mixed than c, however.
  */
 uint32
 hash_bytes(const unsigned char *k, int keylen)
 {
 	uint32		a,
 				b,
 				c,
 				len;

 	/* Set up the internal state */
 	len = keylen;
 	a = b = c = 0x9e3779b9 + len + 3923095;

 	/* If the source pointer is word-aligned, we use word-wide fetches */
 	if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)
 	{
 		/* Code path for aligned source data */
 		const uint32 *ka = (const uint32 *) k;

 		/* handle most of the key */
 		while (len >= 12)
 		{
 			a += ka[0];
 			b += ka[1];
 			c += ka[2];
 			mix(a, b, c);
 			ka += 3;
 			len -= 12;
 		}

 		/* handle the last 11 bytes */
 		k = (const unsigned char *) ka;
 #ifdef WORDS_BIGENDIAN
 		switch (len)
 		{
 			case 11:
 				c += ((uint32) k[10] << 8);
 				/* fall through */
 			case 10:
 				c += ((uint32) k[9] << 16);
 				/* fall through */
 			case 9:
 				c += ((uint32) k[8] << 24);
 				/* fall through */
 			case 8:
 				/* the lowest byte of c is reserved for the length */
 				b += ka[1];
 				a += ka[0];
 				break;
 			case 7:
 				b += ((uint32) k[6] << 8);
 				/* fall through */
 			case 6:
 				b += ((uint32) k[5] << 16);
 				/* fall through */
 			case 5:
 				b += ((uint32) k[4] << 24);
 				/* fall through */
 			case 4:
 				a += ka[0];
 				break;
 			case 3:
 				a += ((uint32) k[2] << 8);
 				/* fall through */
 			case 2:
 				a += ((uint32) k[1] << 16);
 				/* fall through */
 			case 1:
 				a += ((uint32) k[0] << 24);
 				/* case 0: nothing left to add */
 		}
 #else							/* !WORDS_BIGENDIAN */
 		switch (len)
 		{
 			case 11:
 				c += ((uint32) k[10] << 24);
 				/* fall through */
 			case 10:
 				c += ((uint32) k[9] << 16);
 				/* fall through */
 			case 9:
 				c += ((uint32) k[8] << 8);
 				/* fall through */
 			case 8:
 				/* the lowest byte of c is reserved for the length */
 				b += ka[1];
 				a += ka[0];
 				break;
 			case 7:
 				b += ((uint32) k[6] << 16);
 				/* fall through */
 			case 6:
 				b += ((uint32) k[5] << 8);
 				/* fall through */
 			case 5:
 				b += k[4];
 				/* fall through */
 			case 4:
 				a += ka[0];
 				break;
 			case 3:
 				a += ((uint32) k[2] << 16);
 				/* fall through */
 			case 2:
 				a += ((uint32) k[1] << 8);
 				/* fall through */
 			case 1:
 				a += k[0];
 				/* case 0: nothing left to add */
 		}
 #endif							/* WORDS_BIGENDIAN */
 	}
 	else
 	{
 		/* Code path for non-aligned source data */

 		/* handle most of the key */
 		while (len >= 12)
 		{
 #ifdef WORDS_BIGENDIAN
 			a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
 			b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
 			c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
 #else							/* !WORDS_BIGENDIAN */
 			a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
 			b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
 			c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
 #endif							/* WORDS_BIGENDIAN */
 			mix(a, b, c);
 			k += 12;
 			len -= 12;
 		}

 		/* handle the last 11 bytes */
 #ifdef WORDS_BIGENDIAN
 		switch (len)
 		{
 			case 11:
 				c += ((uint32) k[10] << 8);
 				/* fall through */
 			case 10:
 				c += ((uint32) k[9] << 16);
 				/* fall through */
 			case 9:
 				c += ((uint32) k[8] << 24);
 				/* fall through */
 			case 8:
 				/* the lowest byte of c is reserved for the length */
 				b += k[7];
 				/* fall through */
 			case 7:
 				b += ((uint32) k[6] << 8);
 				/* fall through */
 			case 6:
 				b += ((uint32) k[5] << 16);
 				/* fall through */
 			case 5:
 				b += ((uint32) k[4] << 24);
 				/* fall through */
 			case 4:
 				a += k[3];
 				/* fall through */
 			case 3:
 				a += ((uint32) k[2] << 8);
 				/* fall through */
 			case 2:
 				a += ((uint32) k[1] << 16);
 				/* fall through */
 			case 1:
 				a += ((uint32) k[0] << 24);
 				/* case 0: nothing left to add */
 		}
 #else							/* !WORDS_BIGENDIAN */
 		switch (len)
 		{
 			case 11:
 				c += ((uint32) k[10] << 24);
 				/* fall through */
 			case 10:
 				c += ((uint32) k[9] << 16);
 				/* fall through */
 			case 9:
 				c += ((uint32) k[8] << 8);
 				/* fall through */
 			case 8:
 				/* the lowest byte of c is reserved for the length */
 				b += ((uint32) k[7] << 24);
 				/* fall through */
 			case 7:
 				b += ((uint32) k[6] << 16);
 				/* fall through */
 			case 6:
 				b += ((uint32) k[5] << 8);
 				/* fall through */
 			case 5:
 				b += k[4];
 				/* fall through */
 			case 4:
 				a += ((uint32) k[3] << 24);
 				/* fall through */
 			case 3:
 				a += ((uint32) k[2] << 16);
 				/* fall through */
 			case 2:
 				a += ((uint32) k[1] << 8);
 				/* fall through */
 			case 1:
 				a += k[0];
 				/* case 0: nothing left to add */
 		}
 #endif							/* WORDS_BIGENDIAN */
 	}

 	final(a, b, c);

 	/* report the result */
 	return c;
 }

 /*
  * hash_bytes_extended() -- hash into a 64-bit value, using an optional seed
  *		k		: the key (the unaligned variable-length array of bytes)
  *		len		: the length of the key, counting by bytes
  *		seed	: a 64-bit seed (0 means no seed)
  *
  * Returns a uint64 value.  Otherwise similar to hash_bytes.
  */
 uint64
 hash_bytes_extended(const unsigned char *k, int keylen, uint64 seed)
 {
 	uint32		a,
 				b,
 				c,
 				len;

 	/* Set up the internal state */
 	len = keylen;
 	a = b = c = 0x9e3779b9 + len + 3923095;

 	/* If the seed is non-zero, use it to perturb the internal state. */
 	if (seed != 0)
 	{
 		/*
 		 * In essence, the seed is treated as part of the data being hashed,
 		 * but for simplicity, we pretend that it's padded with four bytes of
 		 * zeroes so that the seed constitutes a 12-byte chunk.
 		 */
 		a += (uint32) (seed >> 32);
 		b += (uint32) seed;
 		mix(a, b, c);
 	}

 	/* If the source pointer is word-aligned, we use word-wide fetches */
 	if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)
 	{
 		/* Code path for aligned source data */
 		const uint32 *ka = (const uint32 *) k;

 		/* handle most of the key */
 		while (len >= 12)
 		{
 			a += ka[0];
 			b += ka[1];
 			c += ka[2];
 			mix(a, b, c);
 			ka += 3;
 			len -= 12;
 		}

 		/* handle the last 11 bytes */
 		k = (const unsigned char *) ka;
 #ifdef WORDS_BIGENDIAN
 		switch (len)
 		{
 			case 11:
 				c += ((uint32) k[10] << 8);
 				/* fall through */
 			case 10:
 				c += ((uint32) k[9] << 16);
 				/* fall through */
 			case 9:
 				c += ((uint32) k[8] << 24);
 				/* fall through */
 			case 8:
 				/* the lowest byte of c is reserved for the length */
 				b += ka[1];
 				a += ka[0];
 				break;
 			case 7:
 				b += ((uint32) k[6] << 8);
 				/* fall through */
 			case 6:
 				b += ((uint32) k[5] << 16);
 				/* fall through */
 			case 5:
 				b += ((uint32) k[4] << 24);
 				/* fall through */
 			case 4:
 				a += ka[0];
 				break;
 			case 3:
 				a += ((uint32) k[2] << 8);
 				/* fall through */
 			case 2:
 				a += ((uint32) k[1] << 16);
 				/* fall through */
 			case 1:
 				a += ((uint32) k[0] << 24);
 				/* case 0: nothing left to add */
 		}
 #else							/* !WORDS_BIGENDIAN */
 		switch (len)
 		{
 			case 11:
 				c += ((uint32) k[10] << 24);
 				/* fall through */
 			case 10:
 				c += ((uint32) k[9] << 16);
 				/* fall through */
 			case 9:
 				c += ((uint32) k[8] << 8);
 				/* fall through */
 			case 8:
 				/* the lowest byte of c is reserved for the length */
 				b += ka[1];
 				a += ka[0];
 				break;
 			case 7:
 				b += ((uint32) k[6] << 16);
 				/* fall through */
 			case 6:
 				b += ((uint32) k[5] << 8);
 				/* fall through */
 			case 5:
 				b += k[4];
 				/* fall through */
 			case 4:
 				a += ka[0];
 				break;
 			case 3:
 				a += ((uint32) k[2] << 16);
 				/* fall through */
 			case 2:
 				a += ((uint32) k[1] << 8);
 				/* fall through */
 			case 1:
 				a += k[0];
 				/* case 0: nothing left to add */
 		}
 #endif							/* WORDS_BIGENDIAN */
 	}
 	else
 	{
 		/* Code path for non-aligned source data */

 		/* handle most of the key */
 		while (len >= 12)
 		{
 #ifdef WORDS_BIGENDIAN
 			a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
 			b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
 			c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
 #else							/* !WORDS_BIGENDIAN */
 			a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
 			b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
 			c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
 #endif							/* WORDS_BIGENDIAN */
 			mix(a, b, c);
 			k += 12;
 			len -= 12;
 		}

 		/* handle the last 11 bytes */
 #ifdef WORDS_BIGENDIAN
 		switch (len)
 		{
 			case 11:
 				c += ((uint32) k[10] << 8);
 				/* fall through */
 			case 10:
 				c += ((uint32) k[9] << 16);
 				/* fall through */
 			case 9:
 				c += ((uint32) k[8] << 24);
 				/* fall through */
 			case 8:
 				/* the lowest byte of c is reserved for the length */
 				b += k[7];
 				/* fall through */
 			case 7:
 				b += ((uint32) k[6] << 8);
 				/* fall through */
 			case 6:
 				b += ((uint32) k[5] << 16);
 				/* fall through */
 			case 5:
 				b += ((uint32) k[4] << 24);
 				/* fall through */
 			case 4:
 				a += k[3];
 				/* fall through */
 			case 3:
 				a += ((uint32) k[2] << 8);
 				/* fall through */
 			case 2:
 				a += ((uint32) k[1] << 16);
 				/* fall through */
 			case 1:
 				a += ((uint32) k[0] << 24);
 				/* case 0: nothing left to add */
 		}
 #else							/* !WORDS_BIGENDIAN */
 		switch (len)
 		{
 			case 11:
 				c += ((uint32) k[10] << 24);
 				/* fall through */
 			case 10:
 				c += ((uint32) k[9] << 16);
 				/* fall through */
 			case 9:
 				c += ((uint32) k[8] << 8);
 				/* fall through */
 			case 8:
 				/* the lowest byte of c is reserved for the length */
 				b += ((uint32) k[7] << 24);
 				/* fall through */
 			case 7:
 				b += ((uint32) k[6] << 16);
 				/* fall through */
 			case 6:
 				b += ((uint32) k[5] << 8);
 				/* fall through */
 			case 5:
 				b += k[4];
 				/* fall through */
 			case 4:
 				a += ((uint32) k[3] << 24);
 				/* fall through */
 			case 3:
 				a += ((uint32) k[2] << 16);
 				/* fall through */
 			case 2:
 				a += ((uint32) k[1] << 8);
 				/* fall through */
 			case 1:
 				a += k[0];
 				/* case 0: nothing left to add */
 		}
 #endif							/* WORDS_BIGENDIAN */
 	}

 	final(a, b, c);

 	/* report the result */
 	return ((uint64) b << 32) | c;
 }

 /*
  * hash_bytes_uint32() -- hash a 32-bit value to a 32-bit value
  *
  * This has the same result as
  *		hash_bytes(&k, sizeof(uint32))
  * but is faster and doesn't force the caller to store k into memory.
  */
 uint32
 hash_bytes_uint32(uint32 k)
 {
 	uint32		a,
 				b,
 				c;

 	a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;
 	a += k;

 	final(a, b, c);

 	/* report the result */
 	return c;
 }

 /*
  * hash_bytes_uint32_extended() -- hash 32-bit value to 64-bit value, with seed
  *
  * Like hash_bytes_uint32, this is a convenience function.
  */
 uint64
 hash_bytes_uint32_extended(uint32 k, uint64 seed)
 {
 	uint32		a,
 				b,
 				c;

 	a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;

 	if (seed != 0)
 	{
 		a += (uint32) (seed >> 32);
 		b += (uint32) seed;
 		mix(a, b, c);
 	}

 	a += k;

 	final(a, b, c);

 	/* report the result */
 	return ((uint64) b << 32) | c;
 }

 /*
  * string_hash: hash function for keys that are NUL-terminated strings.
  *
  * NOTE: this is the default hash function if none is specified.
  */
 uint32
 string_hash(const void *key, Size keysize)
 {
 	/*
 	 * If the string exceeds keysize-1 bytes, we want to hash only that many,
 	 * because when it is copied into the hash table it will be truncated at
 	 * that length.
 	 */
 	Size		s_len = strlen((const char *) key);

 	s_len = Min(s_len, keysize - 1);
 	return hash_bytes((const unsigned char *) key, (int) s_len);
 }

 /*
  * tag_hash: hash function for fixed-size tag values
  */
 uint32
 tag_hash(const void *key, Size keysize)
 {
 	return hash_bytes((const unsigned char *) key, (int) keysize);
 }

 /*
  * uint32_hash: hash function for keys that are uint32 or int32
  *
  * (tag_hash works for this case too, but is slower)
  */
 uint32
 uint32_hash(const void *key, Size keysize)
 {
 	Assert(keysize == sizeof(uint32));
 	return hash_bytes_uint32(*((const uint32 *) key));
 }

 uint32
 int32_hash(const void *key, Size keysize)
 {
 	return *(uint32 *)key;
 }
	/*-------------------------------------------------------------------------
	*
	* hashfn.c
	* Generic hashing functions, and hash functions for use in dynahash.c
	* hashtables
	*
	*
	* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
	* Portions Copyright (c) 1994, Regents of the University of California
	*
	*
	* IDENTIFICATION
	* src/common/hashfn.c
	*
	* NOTES
	* It is expected that every bit of a hash function's 32-bit result is
	* as random as every other; failure to ensure this is likely to lead
	* to poor performance of hash tables. In most cases a hash
	* function should use hash_bytes() or its variant hash_bytes_uint32(),
	* or the wrappers hash_any() and hash_uint32 defined in hashfn.h.
	*
	* NOTES
	* It is expected that every bit of a hash function's 32-bit result is
	* as random as every other; failure to ensure this is likely to lead
	* to poor performance of hash tables. In most cases a hash
	* function should use hash_any() or its variant hash_uint32().
	*
	*-------------------------------------------------------------------------
	*/

	/*
	* GPDB: We carry a dependency on pthread_win32.h in elog.h, which causes
	* compilation errors when building Windows clients (as elog.h is included by
	* postgres.h). So use postgres-fe.h instead for this case.
	*/
	#if defined (WIN32) && defined (FRONTEND)
	#include "postgres_fe.h"
	#else
	#include "postgres.h"
	#endif

	#include "common/hashfn.h"
	#include "utils/hsearch.h"
	#include "port/pg_bitutils.h"


	/*
	* This hash function was written by Bob Jenkins
	* (bob_jenkins@burtleburtle.net), and superficially adapted
	* for PostgreSQL by Neil Conway. For more information on this
	* hash function, see http://burtleburtle.net/bob/hash/doobs.html,
	* or Bob's article in Dr. Dobb's Journal, Sept. 1997.
	*
	* In the current code, we have adopted Bob's 2006 update of his hash
	* function to fetch the data a word at a time when it is suitably aligned.
	* This makes for a useful speedup, at the cost of having to maintain
	* four code paths (aligned vs unaligned, and little-endian vs big-endian).
	* It also uses two separate mixing functions mix() and final(), instead
	* of a slower multi-purpose function.
	*/

	/* Get a bit mask of the bits set in non-uint32 aligned addresses */
	#define UINT32_ALIGN_MASK (sizeof(uint32) - 1)

	#define rot(x,k) pg_rotate_left32(x, k)

	/*----------
	* mix -- mix 3 32-bit values reversibly.
	*
	* This is reversible, so any information in (a,b,c) before mix() is
	* still in (a,b,c) after mix().
	*
	* If four pairs of (a,b,c) inputs are run through mix(), or through
	* mix() in reverse, there are at least 32 bits of the output that
	* are sometimes the same for one pair and different for another pair.
	* This was tested for:
	* * pairs that differed by one bit, by two bits, in any combination
	* of top bits of (a,b,c), or in any combination of bottom bits of
	* (a,b,c).
	* * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
	* the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
	* is commonly produced by subtraction) look like a single 1-bit
	* difference.
	* * the base values were pseudorandom, all zero but one bit set, or
	* all zero plus a counter that starts at zero.
	*
	* This does not achieve avalanche. There are input bits of (a,b,c)
	* that fail to affect some output bits of (a,b,c), especially of a. The
	* most thoroughly mixed value is c, but it doesn't really even achieve
	* avalanche in c.
	*
	* This allows some parallelism. Read-after-writes are good at doubling
	* the number of bits affected, so the goal of mixing pulls in the opposite
	* direction from the goal of parallelism. I did what I could. Rotates
	* seem to cost as much as shifts on every machine I could lay my hands on,
	* and rotates are much kinder to the top and bottom bits, so I used rotates.
	*----------
	*/
	#define mix(a,b,c) \
	{ \
	a -= c; a ^= rot(c, 4); c += b; \
	b -= a; b ^= rot(a, 6); a += c; \
	c -= b; c ^= rot(b, 8); b += a; \
	a -= c; a ^= rot(c,16); c += b; \
	b -= a; b ^= rot(a,19); a += c; \
	c -= b; c ^= rot(b, 4); b += a; \
	}

	/*----------
	* final -- final mixing of 3 32-bit values (a,b,c) into c
	*
	* Pairs of (a,b,c) values differing in only a few bits will usually
	* produce values of c that look totally different. This was tested for
	* * pairs that differed by one bit, by two bits, in any combination
	* of top bits of (a,b,c), or in any combination of bottom bits of
	* (a,b,c).
	* * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
	* the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
	* is commonly produced by subtraction) look like a single 1-bit
	* difference.
	* * the base values were pseudorandom, all zero but one bit set, or
	* all zero plus a counter that starts at zero.
	*
	* The use of separate functions for mix() and final() allow for a
	* substantial performance increase since final() does not need to
	* do well in reverse, but is does need to affect all output bits.
	* mix(), on the other hand, does not need to affect all output
	* bits (affecting 32 bits is enough). The original hash function had
	* a single mixing operation that had to satisfy both sets of requirements
	* and was slower as a result.
	*----------
	*/
	#define final(a,b,c) \
	{ \
	c ^= b; c -= rot(b,14); \
	a ^= c; a -= rot(c,11); \
	b ^= a; b -= rot(a,25); \
	c ^= b; c -= rot(b,16); \
	a ^= c; a -= rot(c, 4); \
	b ^= a; b -= rot(a,14); \
	c ^= b; c -= rot(b,24); \
	}

	/*
	* hash_bytes() -- hash a variable-length key into a 32-bit value
	* k : the key (the unaligned variable-length array of bytes)
	* len : the length of the key, counting by bytes
	*
	* Returns a uint32 value. Every bit of the key affects every bit of
	* the return value. Every 1-bit and 2-bit delta achieves avalanche.
	* About 6*len+35 instructions. The best hash table sizes are powers
	* of 2. There is no need to do mod a prime (mod is sooo slow!).
	* If you need less than 32 bits, use a bitmask.
	*
	* This procedure must never throw elog(ERROR); the ResourceOwner code
	* relies on this not to fail.
	*
	* Note: we could easily change this function to return a 64-bit hash value
	* by using the final values of both b and c. b is perhaps a little less
	* well mixed than c, however.
	*/
	uint32
	hash_bytes(const unsigned char *k, int keylen)
	{
	uint32 a,
	b,
	c,
	len;

	/* Set up the internal state */
	len = keylen;
	a = b = c = 0x9e3779b9 + len + 3923095;

	/* If the source pointer is word-aligned, we use word-wide fetches */
	if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)
	{
	/* Code path for aligned source data */
	const uint32 ka = (const uint32 ) k;

	/* handle most of the key */
	while (len >= 12)
	{
	a += ka[0];
	b += ka[1];
	c += ka[2];
	mix(a, b, c);
	ka += 3;
	len -= 12;
	}

	/* handle the last 11 bytes */
	k = (const unsigned char *) ka;
	#ifdef WORDS_BIGENDIAN
	switch (len)
	{
	case 11:
	c += ((uint32) k[10] << 8);
	/* fall through */
	case 10:
	c += ((uint32) k[9] << 16);
	/* fall through */
	case 9:
	c += ((uint32) k[8] << 24);
	/* fall through */
	case 8:
	/* the lowest byte of c is reserved for the length */
	b += ka[1];
	a += ka[0];
	break;
	case 7:
	b += ((uint32) k[6] << 8);
	/* fall through */
	case 6:
	b += ((uint32) k[5] << 16);
	/* fall through */
	case 5:
	b += ((uint32) k[4] << 24);
	/* fall through */
	case 4:
	a += ka[0];
	break;
	case 3:
	a += ((uint32) k[2] << 8);
	/* fall through */
	case 2:
	a += ((uint32) k[1] << 16);
	/* fall through */
	case 1:
	a += ((uint32) k[0] << 24);
	/* case 0: nothing left to add */
	}
	#else /* !WORDS_BIGENDIAN */
	switch (len)
	{
	case 11:
	c += ((uint32) k[10] << 24);
	/* fall through */
	case 10:
	c += ((uint32) k[9] << 16);
	/* fall through */
	case 9:
	c += ((uint32) k[8] << 8);
	/* fall through */
	case 8:
	/* the lowest byte of c is reserved for the length */
	b += ka[1];
	a += ka[0];
	break;
	case 7:
	b += ((uint32) k[6] << 16);
	/* fall through */
	case 6:
	b += ((uint32) k[5] << 8);
	/* fall through */
	case 5:
	b += k[4];
	/* fall through */
	case 4:
	a += ka[0];
	break;
	case 3:
	a += ((uint32) k[2] << 16);
	/* fall through */
	case 2:
	a += ((uint32) k[1] << 8);
	/* fall through */
	case 1:
	a += k[0];
	/* case 0: nothing left to add */
	}
	#endif /* WORDS_BIGENDIAN */
	}
	else
	{
	/* Code path for non-aligned source data */

	/* handle most of the key */
	while (len >= 12)
	{
	#ifdef WORDS_BIGENDIAN
	a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
	b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
	c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
	#else /* !WORDS_BIGENDIAN */
	a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
	b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
	c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
	#endif /* WORDS_BIGENDIAN */
	mix(a, b, c);
	k += 12;
	len -= 12;
	}

	/* handle the last 11 bytes */
	#ifdef WORDS_BIGENDIAN
	switch (len)
	{
	case 11:
	c += ((uint32) k[10] << 8);
	/* fall through */
	case 10:
	c += ((uint32) k[9] << 16);
	/* fall through */
	case 9:
	c += ((uint32) k[8] << 24);
	/* fall through */
	case 8:
	/* the lowest byte of c is reserved for the length */
	b += k[7];
	/* fall through */
	case 7:
	b += ((uint32) k[6] << 8);
	/* fall through */
	case 6:
	b += ((uint32) k[5] << 16);
	/* fall through */
	case 5:
	b += ((uint32) k[4] << 24);
	/* fall through */
	case 4:
	a += k[3];
	/* fall through */
	case 3:
	a += ((uint32) k[2] << 8);
	/* fall through */
	case 2:
	a += ((uint32) k[1] << 16);
	/* fall through */
	case 1:
	a += ((uint32) k[0] << 24);
	/* case 0: nothing left to add */
	}
	#else /* !WORDS_BIGENDIAN */
	switch (len)
	{
	case 11:
	c += ((uint32) k[10] << 24);
	/* fall through */
	case 10:
	c += ((uint32) k[9] << 16);
	/* fall through */
	case 9:
	c += ((uint32) k[8] << 8);
	/* fall through */
	case 8:
	/* the lowest byte of c is reserved for the length */
	b += ((uint32) k[7] << 24);
	/* fall through */
	case 7:
	b += ((uint32) k[6] << 16);
	/* fall through */
	case 6:
	b += ((uint32) k[5] << 8);
	/* fall through */
	case 5:
	b += k[4];
	/* fall through */
	case 4:
	a += ((uint32) k[3] << 24);
	/* fall through */
	case 3:
	a += ((uint32) k[2] << 16);
	/* fall through */
	case 2:
	a += ((uint32) k[1] << 8);
	/* fall through */
	case 1:
	a += k[0];
	/* case 0: nothing left to add */
	}
	#endif /* WORDS_BIGENDIAN */
	}

	final(a, b, c);

	/* report the result */
	return c;
	}

	/*
	* hash_bytes_extended() -- hash into a 64-bit value, using an optional seed
	* k : the key (the unaligned variable-length array of bytes)
	* len : the length of the key, counting by bytes
	* seed : a 64-bit seed (0 means no seed)
	*
	* Returns a uint64 value. Otherwise similar to hash_bytes.
	*/
	uint64
	hash_bytes_extended(const unsigned char *k, int keylen, uint64 seed)
	{
	uint32 a,
	b,
	c,
	len;

	/* Set up the internal state */
	len = keylen;
	a = b = c = 0x9e3779b9 + len + 3923095;

	/* If the seed is non-zero, use it to perturb the internal state. */
	if (seed != 0)
	{
	/*
	* In essence, the seed is treated as part of the data being hashed,
	* but for simplicity, we pretend that it's padded with four bytes of
	* zeroes so that the seed constitutes a 12-byte chunk.
	*/
	a += (uint32) (seed >> 32);
	b += (uint32) seed;
	mix(a, b, c);
	}

	/* If the source pointer is word-aligned, we use word-wide fetches */
	if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)
	{
	/* Code path for aligned source data */
	const uint32 ka = (const uint32 ) k;

	/* handle most of the key */
	while (len >= 12)
	{
	a += ka[0];
	b += ka[1];
	c += ka[2];
	mix(a, b, c);
	ka += 3;
	len -= 12;
	}

	/* handle the last 11 bytes */
	k = (const unsigned char *) ka;
	#ifdef WORDS_BIGENDIAN
	switch (len)
	{
	case 11:
	c += ((uint32) k[10] << 8);
	/* fall through */
	case 10:
	c += ((uint32) k[9] << 16);
	/* fall through */
	case 9:
	c += ((uint32) k[8] << 24);
	/* fall through */
	case 8:
	/* the lowest byte of c is reserved for the length */
	b += ka[1];
	a += ka[0];
	break;
	case 7:
	b += ((uint32) k[6] << 8);
	/* fall through */
	case 6:
	b += ((uint32) k[5] << 16);
	/* fall through */
	case 5:
	b += ((uint32) k[4] << 24);
	/* fall through */
	case 4:
	a += ka[0];
	break;
	case 3:
	a += ((uint32) k[2] << 8);
	/* fall through */
	case 2:
	a += ((uint32) k[1] << 16);
	/* fall through */
	case 1:
	a += ((uint32) k[0] << 24);
	/* case 0: nothing left to add */
	}
	#else /* !WORDS_BIGENDIAN */
	switch (len)
	{
	case 11:
	c += ((uint32) k[10] << 24);
	/* fall through */
	case 10:
	c += ((uint32) k[9] << 16);
	/* fall through */
	case 9:
	c += ((uint32) k[8] << 8);
	/* fall through */
	case 8:
	/* the lowest byte of c is reserved for the length */
	b += ka[1];
	a += ka[0];
	break;
	case 7:
	b += ((uint32) k[6] << 16);
	/* fall through */
	case 6:
	b += ((uint32) k[5] << 8);
	/* fall through */
	case 5:
	b += k[4];
	/* fall through */
	case 4:
	a += ka[0];
	break;
	case 3:
	a += ((uint32) k[2] << 16);
	/* fall through */
	case 2:
	a += ((uint32) k[1] << 8);
	/* fall through */
	case 1:
	a += k[0];
	/* case 0: nothing left to add */
	}
	#endif /* WORDS_BIGENDIAN */
	}
	else
	{
	/* Code path for non-aligned source data */

	/* handle most of the key */
	while (len >= 12)
	{
	#ifdef WORDS_BIGENDIAN
	a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
	b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
	c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
	#else /* !WORDS_BIGENDIAN */
	a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
	b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
	c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
	#endif /* WORDS_BIGENDIAN */
	mix(a, b, c);
	k += 12;
	len -= 12;
	}

	/* handle the last 11 bytes */
	#ifdef WORDS_BIGENDIAN
	switch (len)
	{
	case 11:
	c += ((uint32) k[10] << 8);
	/* fall through */
	case 10:
	c += ((uint32) k[9] << 16);
	/* fall through */
	case 9:
	c += ((uint32) k[8] << 24);
	/* fall through */
	case 8:
	/* the lowest byte of c is reserved for the length */
	b += k[7];
	/* fall through */
	case 7:
	b += ((uint32) k[6] << 8);
	/* fall through */
	case 6:
	b += ((uint32) k[5] << 16);
	/* fall through */
	case 5:
	b += ((uint32) k[4] << 24);
	/* fall through */
	case 4:
	a += k[3];
	/* fall through */
	case 3:
	a += ((uint32) k[2] << 8);
	/* fall through */
	case 2:
	a += ((uint32) k[1] << 16);
	/* fall through */
	case 1:
	a += ((uint32) k[0] << 24);
	/* case 0: nothing left to add */
	}
	#else /* !WORDS_BIGENDIAN */
	switch (len)
	{
	case 11:
	c += ((uint32) k[10] << 24);
	/* fall through */
	case 10:
	c += ((uint32) k[9] << 16);
	/* fall through */
	case 9:
	c += ((uint32) k[8] << 8);
	/* fall through */
	case 8:
	/* the lowest byte of c is reserved for the length */
	b += ((uint32) k[7] << 24);
	/* fall through */
	case 7:
	b += ((uint32) k[6] << 16);
	/* fall through */
	case 6:
	b += ((uint32) k[5] << 8);
	/* fall through */
	case 5:
	b += k[4];
	/* fall through */
	case 4:
	a += ((uint32) k[3] << 24);
	/* fall through */
	case 3:
	a += ((uint32) k[2] << 16);
	/* fall through */
	case 2:
	a += ((uint32) k[1] << 8);
	/* fall through */
	case 1:
	a += k[0];
	/* case 0: nothing left to add */
	}
	#endif /* WORDS_BIGENDIAN */
	}

	final(a, b, c);

	/* report the result */
	return ((uint64) b << 32) \| c;
	}

	/*
	* hash_bytes_uint32() -- hash a 32-bit value to a 32-bit value
	*
	* This has the same result as
	* hash_bytes(&k, sizeof(uint32))
	* but is faster and doesn't force the caller to store k into memory.
	*/
	uint32
	hash_bytes_uint32(uint32 k)
	{
	uint32 a,
	b,
	c;

	a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;
	a += k;

	final(a, b, c);

	/* report the result */
	return c;
	}

	/*
	* hash_bytes_uint32_extended() -- hash 32-bit value to 64-bit value, with seed
	*
	* Like hash_bytes_uint32, this is a convenience function.
	*/
	uint64
	hash_bytes_uint32_extended(uint32 k, uint64 seed)
	{
	uint32 a,
	b,
	c;

	a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;

	if (seed != 0)
	{
	a += (uint32) (seed >> 32);
	b += (uint32) seed;
	mix(a, b, c);
	}

	a += k;

	final(a, b, c);

	/* report the result */
	return ((uint64) b << 32) \| c;
	}

	/*
	* string_hash: hash function for keys that are NUL-terminated strings.
	*
	* NOTE: this is the default hash function if none is specified.
	*/
	uint32
	string_hash(const void *key, Size keysize)
	{
	/*
	* If the string exceeds keysize-1 bytes, we want to hash only that many,
	* because when it is copied into the hash table it will be truncated at
	* that length.
	*/
	Size s_len = strlen((const char *) key);

	s_len = Min(s_len, keysize - 1);
	return hash_bytes((const unsigned char *) key, (int) s_len);
	}

	/*
	* tag_hash: hash function for fixed-size tag values
	*/
	uint32
	tag_hash(const void *key, Size keysize)
	{
	return hash_bytes((const unsigned char *) key, (int) keysize);
	}

	/*
	* uint32_hash: hash function for keys that are uint32 or int32
	*
	* (tag_hash works for this case too, but is slower)
	*/
	uint32
	uint32_hash(const void *key, Size keysize)
	{
	Assert(keysize == sizeof(uint32));
	return hash_bytes_uint32(((const uint32 ) key));
	}

	uint32
	int32_hash(const void *key, Size keysize)
	{
	return (uint32 )key;
	}