modules/experimental/cache_hash.c - httpd - Git at Google

 /* Copyright 2002-2005 The Apache Software Foundation or its licensors, as
  * applicable.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include "apr_general.h"

 #include "mod_cache.h"
 #include "cache_hash.h"

 #if APR_HAVE_STDLIB_H
 #include <stdlib.h>
 #endif
 #if APR_HAVE_STRING_H
 #include <string.h>
 #endif


 /*
  * The internal form of a hash table.
  *
  * The table is an array indexed by the hash of the key; collisions
  * are resolved by hanging a linked list of hash entries off each
  * element of the array. Although this is a really simple design it
  * isn't too bad given that pools have a low allocation overhead.
  */

 typedef struct cache_hash_entry_t cache_hash_entry_t;

 struct cache_hash_entry_t {
     cache_hash_entry_t	*next;
     unsigned int	 hash;
     const void		*key;
     apr_ssize_t		 klen;
     const void		*val;
 };

 /*
  * Data structure for iterating through a hash table.
  *
  * We keep a pointer to the next hash entry here to allow the current
  * hash entry to be freed or otherwise mangled between calls to
  * cache_hash_next().
  */
 struct cache_hash_index_t {
     cache_hash_t	 *ht;
     cache_hash_entry_t   *this, *next;
     int                  index;
 };

 /*
  * The size of the array is always a power of two. We use the maximum
  * index rather than the size so that we can use bitwise-AND for
  * modular arithmetic.
  * The count of hash entries may be greater depending on the chosen
  * collision rate.
  */
 struct cache_hash_t {
     cache_hash_entry_t   **array;
     cache_hash_index_t     iterator;  /* For cache_hash_first(NULL, ...) */
     int                  count, max;
 };

 /*
  * Hash creation functions.
  */
 static cache_hash_entry_t **alloc_array(cache_hash_t *ht, int max)
 {
    return calloc(1, sizeof(*ht->array) * (max + 1));
 }

 CACHE_DECLARE(cache_hash_t *) cache_hash_make(apr_size_t size)
 {
     cache_hash_t *ht;
     ht = malloc(sizeof(cache_hash_t));
     if (!ht) {
         return NULL;
     }
     ht->count = 0;
     ht->max = size;
     ht->array = alloc_array(ht, ht->max);
     if (!ht->array) {
         free(ht);
         return NULL;
     }
     return ht;
 }

 CACHE_DECLARE(void) cache_hash_free(cache_hash_t *ht)
 {
     if (ht) {
         if (ht->array) {
             free (ht->array);
         }
         free (ht);
     }
 }
 /*
  * Hash iteration functions.
  */

 CACHE_DECLARE(cache_hash_index_t *) cache_hash_next(cache_hash_index_t *hi)
 {
     hi->this = hi->next;
     while (!hi->this) {
 	if (hi->index > hi->ht->max)
 	    return NULL;
 	hi->this = hi->ht->array[hi->index++];
     }
     hi->next = hi->this->next;
     return hi;
 }

 CACHE_DECLARE(cache_hash_index_t *) cache_hash_first(cache_hash_t *ht)
 {
     cache_hash_index_t *hi;

     hi = &ht->iterator;
     hi->ht = ht;
     hi->index = 0;
     hi->this = NULL;
     hi->next = NULL;
     return cache_hash_next(hi);
 }

 CACHE_DECLARE(void) cache_hash_this(cache_hash_index_t *hi,
                                   const void **key,
                                   apr_ssize_t *klen,
                                   void **val)
 {
     if (key)  *key  = hi->this->key;
     if (klen) *klen = hi->this->klen;
     if (val)  *val  = (void *)hi->this->val;
 }


 /*
  * This is where we keep the details of the hash function and control
  * the maximum collision rate.
  *
  * If val is non-NULL it creates and initializes a new hash entry if
  * there isn't already one there; it returns an updatable pointer so
  * that hash entries can be removed.
  */

 static cache_hash_entry_t **find_entry(cache_hash_t *ht,
                                        const void *key,
                                        apr_ssize_t klen,
                                        const void *val)
 {
     cache_hash_entry_t **hep, *he;
     const unsigned char *p;
     unsigned int hash;
     apr_ssize_t i;

     /*
      * This is the popular `times 33' hash algorithm which is used by
      * perl and also appears in Berkeley DB. This is one of the best
      * known hash functions for strings because it is both computed
      * very fast and distributes very well.
      *
      * The originator may be Dan Bernstein but the code in Berkeley DB
      * cites Chris Torek as the source. The best citation I have found
      * is "Chris Torek, Hash function for text in C, Usenet message
      * <27038@mimsy.umd.edu> in comp.lang.c , October, 1990." in Rich
      * Salz's USENIX 1992 paper about INN which can be found at
      * <http://citeseer.nj.nec.com/salz92internetnews.html>.
      *
      * The magic of number 33, i.e. why it works better than many other
      * constants, prime or not, has never been adequately explained by
      * anyone. So I try an explanation: if one experimentally tests all
      * multipliers between 1 and 256 (as I did while writing a low-level
      * data structure library some time ago) one detects that even
      * numbers are not useable at all. The remaining 128 odd numbers
      * (except for the number 1) work more or less all equally well.
      * They all distribute in an acceptable way and this way fill a hash
      * table with an average percent of approx. 86%.
      *
      * If one compares the chi^2 values of the variants (see
      * Bob Jenkins ``Hashing Frequently Asked Questions'' at
      * http://burtleburtle.net/bob/hash/hashfaq.html for a description
      * of chi^2), the number 33 not even has the best value. But the
      * number 33 and a few other equally good numbers like 17, 31, 63,
      * 127 and 129 have nevertheless a great advantage to the remaining
      * numbers in the large set of possible multipliers: their multiply
      * operation can be replaced by a faster operation based on just one
      * shift plus either a single addition or subtraction operation. And
      * because a hash function has to both distribute good _and_ has to
      * be very fast to compute, those few numbers should be preferred.
      *
      *                  -- Ralf S. Engelschall <rse@engelschall.com>
      */
     hash = 0;
     if (klen == CACHE_HASH_KEY_STRING) {
         for (p = key; *p; p++) {
             hash = hash * 33 + *p;
         }
         klen = p - (const unsigned char *)key;
     }
     else {
         for (p = key, i = klen; i; i--, p++) {
             hash = hash * 33 + *p;
         }
     }

     /* scan linked list */
     for (hep = &ht->array[hash % ht->max], he = *hep;
 	 he;
 	 hep = &he->next, he = *hep) {
 	if (he->hash == hash &&
 	    he->klen == klen &&
 	    memcmp(he->key, key, klen) == 0)
 	    break;
     }
     if (he || !val)
 	return hep;
     /* add a new entry for non-NULL values */
     he = malloc(sizeof(*he));
     if (!he) {
         return NULL;
     }
     he->next = NULL;
     he->hash = hash;
     he->key  = key;
     he->klen = klen;
     he->val  = val;
     *hep = he;
     ht->count++;
     return hep;
 }

 CACHE_DECLARE(void *) cache_hash_get(cache_hash_t *ht,
                                    const void *key,
                                    apr_ssize_t klen)
 {
     cache_hash_entry_t *he;
     he = *find_entry(ht, key, klen, NULL);
     if (he)
 	return (void *)he->val;
     else
 	return NULL;
 }

 CACHE_DECLARE(void *) cache_hash_set(cache_hash_t *ht,
                                      const void *key,
                                      apr_ssize_t klen,
                                      const void *val)
 {
     cache_hash_entry_t **hep, *tmp;
     const void *tval;
     hep = find_entry(ht, key, klen, val);
     /* If hep == NULL, then the malloc() in find_entry failed */
     if (hep && *hep) {
         if (!val) {
             /* delete entry */
             tval = (*hep)->val;
             tmp = *hep;
             *hep = (*hep)->next;
             free(tmp);
             --ht->count;
         }
         else {
             /* replace entry */
             tval = (*hep)->val;
             (*hep)->val = val;
         }
         /* Return the object just removed from the cache to let the
          * caller clean it up. Cast the constness away upon return.
          */
         return (void *) tval;
     }
     /* else key not present and val==NULL */
     return NULL;
 }

 CACHE_DECLARE(int) cache_hash_count(cache_hash_t *ht)
 {
     return ht->count;
 }
	/* Copyright 2002-2005 The Apache Software Foundation or its licensors, as
	* applicable.
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#include "apr_general.h"

	#include "mod_cache.h"
	#include "cache_hash.h"

	#if APR_HAVE_STDLIB_H
	#include <stdlib.h>
	#endif
	#if APR_HAVE_STRING_H
	#include <string.h>
	#endif


	/*
	* The internal form of a hash table.
	*
	* The table is an array indexed by the hash of the key; collisions
	* are resolved by hanging a linked list of hash entries off each
	* element of the array. Although this is a really simple design it
	* isn't too bad given that pools have a low allocation overhead.
	*/

	typedef struct cache_hash_entry_t cache_hash_entry_t;

	struct cache_hash_entry_t {
	cache_hash_entry_t *next;
	unsigned int hash;
	const void *key;
	apr_ssize_t klen;
	const void *val;
	};

	/*
	* Data structure for iterating through a hash table.
	*
	* We keep a pointer to the next hash entry here to allow the current
	* hash entry to be freed or otherwise mangled between calls to
	* cache_hash_next().
	*/
	struct cache_hash_index_t {
	cache_hash_t *ht;
	cache_hash_entry_t this, next;
	int index;
	};

	/*
	* The size of the array is always a power of two. We use the maximum
	* index rather than the size so that we can use bitwise-AND for
	* modular arithmetic.
	* The count of hash entries may be greater depending on the chosen
	* collision rate.
	*/
	struct cache_hash_t {
	cache_hash_entry_t **array;
	cache_hash_index_t iterator; /* For cache_hash_first(NULL, ...) */
	int count, max;
	};

	/*
	* Hash creation functions.
	*/
	static cache_hash_entry_t *alloc_array(cache_hash_t ht, int max)
	{
	return calloc(1, sizeof(ht->array) (max + 1));
	}

	CACHE_DECLARE(cache_hash_t *) cache_hash_make(apr_size_t size)
	{
	cache_hash_t *ht;
	ht = malloc(sizeof(cache_hash_t));
	if (!ht) {
	return NULL;
	}
	ht->count = 0;
	ht->max = size;
	ht->array = alloc_array(ht, ht->max);
	if (!ht->array) {
	free(ht);
	return NULL;
	}
	return ht;
	}

	CACHE_DECLARE(void) cache_hash_free(cache_hash_t *ht)
	{
	if (ht) {
	if (ht->array) {
	free (ht->array);
	}
	free (ht);
	}
	}
	/*
	* Hash iteration functions.
	*/

	CACHE_DECLARE(cache_hash_index_t ) cache_hash_next(cache_hash_index_t hi)
	{
	hi->this = hi->next;
	while (!hi->this) {
	if (hi->index > hi->ht->max)
	return NULL;
	hi->this = hi->ht->array[hi->index++];
	}
	hi->next = hi->this->next;
	return hi;
	}

	CACHE_DECLARE(cache_hash_index_t ) cache_hash_first(cache_hash_t ht)
	{
	cache_hash_index_t *hi;

	hi = &ht->iterator;
	hi->ht = ht;
	hi->index = 0;
	hi->this = NULL;
	hi->next = NULL;
	return cache_hash_next(hi);
	}

	CACHE_DECLARE(void) cache_hash_this(cache_hash_index_t *hi,
	const void **key,
	apr_ssize_t *klen,
	void **val)
	{
	if (key) *key = hi->this->key;
	if (klen) *klen = hi->this->klen;
	if (val) val = (void )hi->this->val;
	}


	/*
	* This is where we keep the details of the hash function and control
	* the maximum collision rate.
	*
	* If val is non-NULL it creates and initializes a new hash entry if
	* there isn't already one there; it returns an updatable pointer so
	* that hash entries can be removed.
	*/

	static cache_hash_entry_t *find_entry(cache_hash_t ht,
	const void *key,
	apr_ssize_t klen,
	const void *val)
	{
	cache_hash_entry_t *hep, he;
	const unsigned char *p;
	unsigned int hash;
	apr_ssize_t i;

	/*
	* This is the popular `times 33' hash algorithm which is used by
	* perl and also appears in Berkeley DB. This is one of the best
	* known hash functions for strings because it is both computed
	* very fast and distributes very well.
	*
	* The originator may be Dan Bernstein but the code in Berkeley DB
	* cites Chris Torek as the source. The best citation I have found
	* is "Chris Torek, Hash function for text in C, Usenet message
	* <27038@mimsy.umd.edu> in comp.lang.c , October, 1990." in Rich
	* Salz's USENIX 1992 paper about INN which can be found at
	* <http://citeseer.nj.nec.com/salz92internetnews.html>.
	*
	* The magic of number 33, i.e. why it works better than many other
	* constants, prime or not, has never been adequately explained by
	* anyone. So I try an explanation: if one experimentally tests all
	* multipliers between 1 and 256 (as I did while writing a low-level
	* data structure library some time ago) one detects that even
	* numbers are not useable at all. The remaining 128 odd numbers
	* (except for the number 1) work more or less all equally well.
	* They all distribute in an acceptable way and this way fill a hash
	* table with an average percent of approx. 86%.
	*
	* If one compares the chi^2 values of the variants (see
	* Bob Jenkins ``Hashing Frequently Asked Questions'' at
	* http://burtleburtle.net/bob/hash/hashfaq.html for a description
	* of chi^2), the number 33 not even has the best value. But the
	* number 33 and a few other equally good numbers like 17, 31, 63,
	* 127 and 129 have nevertheless a great advantage to the remaining
	* numbers in the large set of possible multipliers: their multiply
	* operation can be replaced by a faster operation based on just one
	* shift plus either a single addition or subtraction operation. And
	* because a hash function has to both distribute good _and_ has to
	* be very fast to compute, those few numbers should be preferred.
	*
	* -- Ralf S. Engelschall <rse@engelschall.com>
	*/
	hash = 0;
	if (klen == CACHE_HASH_KEY_STRING) {
	for (p = key; *p; p++) {
	hash = hash * 33 + *p;
	}
	klen = p - (const unsigned char *)key;
	}
	else {
	for (p = key, i = klen; i; i--, p++) {
	hash = hash * 33 + *p;
	}
	}

	/* scan linked list */
	for (hep = &ht->array[hash % ht->max], he = *hep;
	he;
	hep = &he->next, he = *hep) {
	if (he->hash == hash &&
	he->klen == klen &&
	memcmp(he->key, key, klen) == 0)
	break;
	}
	if (he \|\| !val)
	return hep;
	/* add a new entry for non-NULL values */
	he = malloc(sizeof(*he));
	if (!he) {
	return NULL;
	}
	he->next = NULL;
	he->hash = hash;
	he->key = key;
	he->klen = klen;
	he->val = val;
	*hep = he;
	ht->count++;
	return hep;
	}

	CACHE_DECLARE(void ) cache_hash_get(cache_hash_t ht,
	const void *key,
	apr_ssize_t klen)
	{
	cache_hash_entry_t *he;
	he = *find_entry(ht, key, klen, NULL);
	if (he)
	return (void *)he->val;
	else
	return NULL;
	}

	CACHE_DECLARE(void ) cache_hash_set(cache_hash_t ht,
	const void *key,
	apr_ssize_t klen,
	const void *val)
	{
	cache_hash_entry_t *hep, tmp;
	const void *tval;
	hep = find_entry(ht, key, klen, val);
	/* If hep == NULL, then the malloc() in find_entry failed */
	if (hep && *hep) {
	if (!val) {
	/* delete entry */
	tval = (*hep)->val;
	tmp = *hep;
	hep = (hep)->next;
	free(tmp);
	--ht->count;
	}
	else {
	/* replace entry */
	tval = (*hep)->val;
	(*hep)->val = val;
	}
	/* Return the object just removed from the cache to let the
	* caller clean it up. Cast the constness away upon return.
	*/
	return (void *) tval;
	}
	/* else key not present and val==NULL */
	return NULL;
	}

	CACHE_DECLARE(int) cache_hash_count(cache_hash_t *ht)
	{
	return ht->count;
	}