thirdparty/civetweb-1.9.1/src/third_party/duktape-1.5.2/src-separate/duk_heap_hashstring.c - nifi-minifi-cpp - Git at Google

 /*
  *  String hash computation (interning).
  *
  *  String hashing is performance critical because a string hash is computed
  *  for all new strings which are candidates to be added to the string table.
  *  However, strings actually added to the string table go through a codepoint
  *  length calculation which dominates performance because it goes through
  *  every byte of the input string (but only for strings added).
  *
  *  The string hash algorithm should be fast, but on the other hand provide
  *  good enough hashes to ensure both string table and object property table
  *  hash tables work reasonably well (i.e., there aren't too many collisions
  *  with real world inputs).  Unless the hash is cryptographic, it's always
  *  possible to craft inputs with maximal hash collisions.
  *
  *  NOTE: The hash algorithms must match src/dukutil.py:duk_heap_hashstring()
  *  for ROM string support!
  */

 #include "duk_internal.h"

 #if defined(DUK_USE_STRHASH_DENSE)
 /* Constants for duk_hashstring(). */
 #define DUK__STRHASH_SHORTSTRING   4096L
 #define DUK__STRHASH_MEDIUMSTRING  (256L * 1024L)
 #define DUK__STRHASH_BLOCKSIZE     256L

 DUK_INTERNAL duk_uint32_t duk_heap_hashstring(duk_heap *heap, const duk_uint8_t *str, duk_size_t len) {
 	duk_uint32_t hash;

 	/* Use Murmurhash2 directly for short strings, and use "block skipping"
 	 * for long strings: hash an initial part and then sample the rest of
 	 * the string with reasonably sized chunks.  An initial offset for the
 	 * sampling is computed based on a hash of the initial part of the string;
 	 * this is done to (usually) avoid the case where all long strings have
 	 * certain offset ranges which are never sampled.
 	 *
 	 * Skip should depend on length and bound the total time to roughly
 	 * logarithmic.  With current values:
 	 *
 	 *   1M string => 256 * 241 = 61696 bytes (0.06M) of hashing
 	 *   1G string => 256 * 16321 = 4178176 bytes (3.98M) of hashing
 	 *
 	 * XXX: It would be better to compute the skip offset more "smoothly"
 	 * instead of having a few boundary values.
 	 */

 	/* note: mixing len into seed improves hashing when skipping */
 	duk_uint32_t str_seed = heap->hash_seed ^ ((duk_uint32_t) len);

 	if (len <= DUK__STRHASH_SHORTSTRING) {
 		hash = duk_util_hashbytes(str, len, str_seed);
 	} else {
 		duk_size_t off;
 		duk_size_t skip;

 		if (len <= DUK__STRHASH_MEDIUMSTRING) {
 			skip = (duk_size_t) (16 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE);
 		} else {
 			skip = (duk_size_t) (256 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE);
 		}

 		hash = duk_util_hashbytes(str, (duk_size_t) DUK__STRHASH_SHORTSTRING, str_seed);
 		off = DUK__STRHASH_SHORTSTRING + (skip * (hash % 256)) / 256;

 		/* XXX: inefficient loop */
 		while (off < len) {
 			duk_size_t left = len - off;
 			duk_size_t now = (duk_size_t) (left > DUK__STRHASH_BLOCKSIZE ? DUK__STRHASH_BLOCKSIZE : left);
 			hash ^= duk_util_hashbytes(str + off, now, str_seed);
 			off += skip;
 		}
 	}

 #if defined(DUK_USE_STRHASH16)
 	/* Truncate to 16 bits here, so that a computed hash can be compared
 	 * against a hash stored in a 16-bit field.
 	 */
 	hash &= 0x0000ffffUL;
 #endif
 	return hash;
 }

 #undef DUK__STRHASH_SHORTSTRING
 #undef DUK__STRHASH_MEDIUMSTRING
 #undef DUK__STRHASH_BLOCKSIZE
 #else  /* DUK_USE_STRHASH_DENSE */
 DUK_INTERNAL duk_uint32_t duk_heap_hashstring(duk_heap *heap, const duk_uint8_t *str, duk_size_t len) {
 	duk_uint32_t hash;
 	duk_size_t step;
 	duk_size_t off;

 	/* Slightly modified "Bernstein hash" from:
 	 *
 	 *     http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
 	 *
 	 * Modifications: string skipping and reverse direction similar to
 	 * Lua 5.1.5, and different hash initializer.
 	 *
 	 * The reverse direction ensures last byte it always included in the
 	 * hash which is a good default as changing parts of the string are
 	 * more often in the suffix than in the prefix.
 	 */

 	hash = heap->hash_seed ^ ((duk_uint32_t) len);  /* Bernstein hash init value is normally 5381 */
 	step = (len >> DUK_USE_STRHASH_SKIP_SHIFT) + 1;
 	for (off = len; off >= step; off -= step) {
 		DUK_ASSERT(off >= 1);  /* off >= step, and step >= 1 */
 		hash = (hash * 33) + str[off - 1];
 	}

 #if defined(DUK_USE_STRHASH16)
 	/* Truncate to 16 bits here, so that a computed hash can be compared
 	 * against a hash stored in a 16-bit field.
 	 */
 	hash &= 0x0000ffffUL;
 #endif
 	return hash;
 }
 #endif  /* DUK_USE_STRHASH_DENSE */
	/*
	* String hash computation (interning).
	*
	* String hashing is performance critical because a string hash is computed
	* for all new strings which are candidates to be added to the string table.
	* However, strings actually added to the string table go through a codepoint
	* length calculation which dominates performance because it goes through
	* every byte of the input string (but only for strings added).
	*
	* The string hash algorithm should be fast, but on the other hand provide
	* good enough hashes to ensure both string table and object property table
	* hash tables work reasonably well (i.e., there aren't too many collisions
	* with real world inputs). Unless the hash is cryptographic, it's always
	* possible to craft inputs with maximal hash collisions.
	*
	* NOTE: The hash algorithms must match src/dukutil.py:duk_heap_hashstring()
	* for ROM string support!
	*/

	#include "duk_internal.h"

	#if defined(DUK_USE_STRHASH_DENSE)
	/* Constants for duk_hashstring(). */
	#define DUK__STRHASH_SHORTSTRING 4096L
	#define DUK__STRHASH_MEDIUMSTRING (256L * 1024L)
	#define DUK__STRHASH_BLOCKSIZE 256L

	DUK_INTERNAL duk_uint32_t duk_heap_hashstring(duk_heap heap, const duk_uint8_t str, duk_size_t len) {
	duk_uint32_t hash;

	/* Use Murmurhash2 directly for short strings, and use "block skipping"
	* for long strings: hash an initial part and then sample the rest of
	* the string with reasonably sized chunks. An initial offset for the
	* sampling is computed based on a hash of the initial part of the string;
	* this is done to (usually) avoid the case where all long strings have
	* certain offset ranges which are never sampled.
	*
	* Skip should depend on length and bound the total time to roughly
	* logarithmic. With current values:
	*
	* 1M string => 256 * 241 = 61696 bytes (0.06M) of hashing
	* 1G string => 256 * 16321 = 4178176 bytes (3.98M) of hashing
	*
	* XXX: It would be better to compute the skip offset more "smoothly"
	* instead of having a few boundary values.
	*/

	/* note: mixing len into seed improves hashing when skipping */
	duk_uint32_t str_seed = heap->hash_seed ^ ((duk_uint32_t) len);

	if (len <= DUK__STRHASH_SHORTSTRING) {
	hash = duk_util_hashbytes(str, len, str_seed);
	} else {
	duk_size_t off;
	duk_size_t skip;

	if (len <= DUK__STRHASH_MEDIUMSTRING) {
	skip = (duk_size_t) (16 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE);
	} else {
	skip = (duk_size_t) (256 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE);
	}

	hash = duk_util_hashbytes(str, (duk_size_t) DUK__STRHASH_SHORTSTRING, str_seed);
	off = DUK__STRHASH_SHORTSTRING + (skip * (hash % 256)) / 256;

	/* XXX: inefficient loop */
	while (off < len) {
	duk_size_t left = len - off;
	duk_size_t now = (duk_size_t) (left > DUK__STRHASH_BLOCKSIZE ? DUK__STRHASH_BLOCKSIZE : left);
	hash ^= duk_util_hashbytes(str + off, now, str_seed);
	off += skip;
	}
	}

	#if defined(DUK_USE_STRHASH16)
	/* Truncate to 16 bits here, so that a computed hash can be compared
	* against a hash stored in a 16-bit field.
	*/
	hash &= 0x0000ffffUL;
	#endif
	return hash;
	}

	#undef DUK__STRHASH_SHORTSTRING
	#undef DUK__STRHASH_MEDIUMSTRING
	#undef DUK__STRHASH_BLOCKSIZE
	#else /* DUK_USE_STRHASH_DENSE */
	DUK_INTERNAL duk_uint32_t duk_heap_hashstring(duk_heap heap, const duk_uint8_t str, duk_size_t len) {
	duk_uint32_t hash;
	duk_size_t step;
	duk_size_t off;

	/* Slightly modified "Bernstein hash" from:
	*
	* http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
	*
	* Modifications: string skipping and reverse direction similar to
	* Lua 5.1.5, and different hash initializer.
	*
	* The reverse direction ensures last byte it always included in the
	* hash which is a good default as changing parts of the string are
	* more often in the suffix than in the prefix.
	*/

	hash = heap->hash_seed ^ ((duk_uint32_t) len); /* Bernstein hash init value is normally 5381 */
	step = (len >> DUK_USE_STRHASH_SKIP_SHIFT) + 1;
	for (off = len; off >= step; off -= step) {
	DUK_ASSERT(off >= 1); /* off >= step, and step >= 1 */
	hash = (hash * 33) + str[off - 1];
	}

	#if defined(DUK_USE_STRHASH16)
	/* Truncate to 16 bits here, so that a computed hash can be compared
	* against a hash stored in a 16-bit field.
	*/
	hash &= 0x0000ffffUL;
	#endif
	return hash;
	}
	#endif /* DUK_USE_STRHASH_DENSE */