Initial implementation
This is a quick wrapper around the kazlib hash structure. Its fairly
straight forward.
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e57b0dc
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+c_src/*.o
+ebin/
+test/*.beam
+priv/*.so
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..873b78b
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,76 @@
+khash
+=====
+
+Files: c_src/khash.c src/* test/*.t test/util.erl test/gen_term.erl
+
+Copyright 2013 Cloudant, Inc <support@cloudant.com>
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+
+Kazlib - Hash Table Data Type
+=============================
+
+Files: c_src/hash.*
+
+Copyright (C) 1997 Kaz Kylheku <kaz@ashi.footprints.net>
+
+Free Software License:
+
+All rights are reserved by the author, with the following exceptions:
+Permission is granted to freely reproduce and distribute this software,
+possibly in exchange for a fee, provided that this copyright notice appears
+intact. Permission is also granted to adapt this software to produce
+derivative works, as long as the modified versions carry this copyright
+notice and additional notices stating that the work has been modified.
+This source code may be translated into executable form and incorporated
+into proprietary software; there is no requirement for such software to
+contain a copyright notice related to this source.
+
+
+Etap
+====
+
+Files: test/etap.erl
+
+Copyright (c) 2008-2009 Nick Gerakines <nick@gerakines.net>
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..5ecb125
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,32 @@
+REBAR?=./rebar
+
+
+all: build
+
+
+clean:
+ $(REBAR) clean
+ rm -rf logs
+ rm -f test/*.beam
+
+
+distclean: clean
+ git clean -fxd
+
+
+build:
+ $(REBAR) compile
+
+
+etap: test/etap.beam test/util.beam test/gen_term.beam
+ prove test/*.t
+
+
+check: build etap
+
+
+%.beam: %.erl
+ erlc -o test/ $<
+
+
+.PHONY: all clean distclean build etap check
diff --git a/c_src/hash.c b/c_src/hash.c
new file mode 100644
index 0000000..e3da0da
--- /dev/null
+++ b/c_src/hash.c
@@ -0,0 +1,843 @@
+/*
+ * Hash Table Data Type
+ * Copyright (C) 1997 Kaz Kylheku <kaz@ashi.footprints.net>
+ *
+ * Free Software License:
+ *
+ * All rights are reserved by the author, with the following exceptions:
+ * Permission is granted to freely reproduce and distribute this software,
+ * possibly in exchange for a fee, provided that this copyright notice appears
+ * intact. Permission is also granted to adapt this software to produce
+ * derivative works, as long as the modified versions carry this copyright
+ * notice and additional notices stating that the work has been modified.
+ * This source code may be translated into executable form and incorporated
+ * into proprietary software; there is no requirement for such software to
+ * contain a copyright notice related to this source.
+ *
+ * $Id: hash.c,v 1.36.2.11 2000/11/13 01:36:45 kaz Exp $
+ * $Name: kazlib_1_20 $
+ */
+
+#include <stdlib.h>
+#include <stddef.h>
+#include <assert.h>
+#include <string.h>
+#define HASH_IMPLEMENTATION
+#include "hash.h"
+
+#ifdef KAZLIB_RCSID
+static const char rcsid[] = "$Id: hash.c,v 1.36.2.11 2000/11/13 01:36:45 kaz Exp $";
+#endif
+
+#define INIT_BITS 6
+#define INIT_SIZE (1UL << (INIT_BITS)) /* must be power of two */
+#define INIT_MASK ((INIT_SIZE) - 1)
+
+#define next hash_next
+#define key hash_key
+#define data hash_data
+#define hkey hash_hkey
+
+#define table hash_table
+#define nchains hash_nchains
+#define nodecount hash_nodecount
+#define maxcount hash_maxcount
+#define highmark hash_highmark
+#define lowmark hash_lowmark
+#define compare hash_compare
+#define function hash_function
+#define allocnode hash_allocnode
+#define freenode hash_freenode
+#define context hash_context
+#define mask hash_mask
+#define dynamic hash_dynamic
+
+#define table hash_table
+#define chain hash_chain
+
+static hnode_t *kl_hnode_alloc(void *context);
+static void kl_hnode_free(hnode_t *node, void *context);
+static hash_val_t hash_fun_default(const void *key);
+static int hash_comp_default(const void *key1, const void *key2);
+
+int hash_val_t_bit;
+
+/*
+ * Compute the number of bits in the hash_val_t type. We know that hash_val_t
+ * is an unsigned integral type. Thus the highest value it can hold is a
+ * Mersenne number (power of two, less one). We initialize a hash_val_t
+ * object with this value and then shift bits out one by one while counting.
+ * Notes:
+ * 1. HASH_VAL_T_MAX is a Mersenne number---one that is one less than a power
+ * of two. This means that its binary representation consists of all one
+ * bits, and hence ``val'' is initialized to all one bits.
+ * 2. While bits remain in val, we increment the bit count and shift it to the
+ * right, replacing the topmost bit by zero.
+ */
+
+static void compute_bits(void)
+{
+ hash_val_t val = HASH_VAL_T_MAX; /* 1 */
+ int bits = 0;
+
+ while (val) { /* 2 */
+ bits++;
+ val >>= 1;
+ }
+
+ hash_val_t_bit = bits;
+}
+
+/*
+ * Verify whether the given argument is a power of two.
+ */
+
+static int is_power_of_two(hash_val_t arg)
+{
+ if (arg == 0)
+ return 0;
+ while ((arg & 1) == 0)
+ arg >>= 1;
+ return (arg == 1);
+}
+
+/*
+ * Compute a shift amount from a given table size
+ */
+
+static hash_val_t compute_mask(hashcount_t size)
+{
+ assert (is_power_of_two(size));
+ assert (size >= 2);
+
+ return size - 1;
+}
+
+/*
+ * Initialize the table of pointers to null.
+ */
+
+static void clear_table(hash_t *hash)
+{
+ hash_val_t i;
+
+ for (i = 0; i < hash->nchains; i++)
+ hash->table[i] = NULL;
+}
+
+/*
+ * Double the size of a dynamic table. This works as follows. Each chain splits
+ * into two adjacent chains. The shift amount increases by one, exposing an
+ * additional bit of each hashed key. For each node in the original chain, the
+ * value of this newly exposed bit will decide which of the two new chains will
+ * receive the node: if the bit is 1, the chain with the higher index will have
+ * the node, otherwise the lower chain will receive the node. In this manner,
+ * the hash table will continue to function exactly as before without having to
+ * rehash any of the keys.
+ * Notes:
+ * 1. Overflow check.
+ * 2. The new number of chains is twice the old number of chains.
+ * 3. The new mask is one bit wider than the previous, revealing a
+ * new bit in all hashed keys.
+ * 4. Allocate a new table of chain pointers that is twice as large as the
+ * previous one.
+ * 5. If the reallocation was successful, we perform the rest of the growth
+ * algorithm, otherwise we do nothing.
+ * 6. The exposed_bit variable holds a mask with which each hashed key can be
+ * AND-ed to test the value of its newly exposed bit.
+ * 7. Now loop over each chain in the table and sort its nodes into two
+ * chains based on the value of each node's newly exposed hash bit.
+ * 8. The low chain replaces the current chain. The high chain goes
+ * into the corresponding sister chain in the upper half of the table.
+ * 9. We have finished dealing with the chains and nodes. We now update
+ * the various bookeeping fields of the hash structure.
+ */
+
+static void grow_table(hash_t *hash)
+{
+ hnode_t **newtable;
+
+ assert (2 * hash->nchains > hash->nchains); /* 1 */
+
+ newtable = realloc(hash->table,
+ sizeof *newtable * hash->nchains * 2); /* 4 */
+
+ if (newtable) { /* 5 */
+ hash_val_t mask = (hash->mask << 1) | 1; /* 3 */
+ hash_val_t exposed_bit = mask ^ hash->mask; /* 6 */
+ hash_val_t chain;
+
+ assert (mask != hash->mask);
+
+ for (chain = 0; chain < hash->nchains; chain++) { /* 7 */
+ hnode_t *low_chain = 0, *high_chain = 0, *hptr, *next;
+
+ for (hptr = newtable[chain]; hptr != 0; hptr = next) {
+ next = hptr->next;
+
+ if (hptr->hkey & exposed_bit) {
+ hptr->next = high_chain;
+ high_chain = hptr;
+ } else {
+ hptr->next = low_chain;
+ low_chain = hptr;
+ }
+ }
+
+ newtable[chain] = low_chain; /* 8 */
+ newtable[chain + hash->nchains] = high_chain;
+ }
+
+ hash->table = newtable; /* 9 */
+ hash->mask = mask;
+ hash->nchains *= 2;
+ hash->lowmark *= 2;
+ hash->highmark *= 2;
+ }
+ assert (kl_hash_verify(hash));
+}
+
+/*
+ * Cut a table size in half. This is done by folding together adjacent chains
+ * and populating the lower half of the table with these chains. The chains are
+ * simply spliced together. Once this is done, the whole table is reallocated
+ * to a smaller object.
+ * Notes:
+ * 1. It is illegal to have a hash table with one slot. This would mean that
+ * hash->shift is equal to hash_val_t_bit, an illegal shift value.
+ * Also, other things could go wrong, such as hash->lowmark becoming zero.
+ * 2. Looping over each pair of sister chains, the low_chain is set to
+ * point to the head node of the chain in the lower half of the table,
+ * and high_chain points to the head node of the sister in the upper half.
+ * 3. The intent here is to compute a pointer to the last node of the
+ * lower chain into the low_tail variable. If this chain is empty,
+ * low_tail ends up with a null value.
+ * 4. If the lower chain is not empty, we simply tack the upper chain onto it.
+ * If the upper chain is a null pointer, nothing happens.
+ * 5. Otherwise if the lower chain is empty but the upper one is not,
+ * If the low chain is empty, but the high chain is not, then the
+ * high chain is simply transferred to the lower half of the table.
+ * 6. Otherwise if both chains are empty, there is nothing to do.
+ * 7. All the chain pointers are in the lower half of the table now, so
+ * we reallocate it to a smaller object. This, of course, invalidates
+ * all pointer-to-pointers which reference into the table from the
+ * first node of each chain.
+ * 8. Though it's unlikely, the reallocation may fail. In this case we
+ * pretend that the table _was_ reallocated to a smaller object.
+ * 9. Finally, update the various table parameters to reflect the new size.
+ */
+
+static void shrink_table(hash_t *hash)
+{
+ hash_val_t chain, nchains;
+ hnode_t **newtable, *low_tail, *low_chain, *high_chain;
+
+ assert (hash->nchains >= 2); /* 1 */
+ nchains = hash->nchains / 2;
+
+ for (chain = 0; chain < nchains; chain++) {
+ low_chain = hash->table[chain]; /* 2 */
+ high_chain = hash->table[chain + nchains];
+ for (low_tail = low_chain; low_tail && low_tail->next; low_tail = low_tail->next)
+ ; /* 3 */
+ if (low_chain != 0) /* 4 */
+ low_tail->next = high_chain;
+ else if (high_chain != 0) /* 5 */
+ hash->table[chain] = high_chain;
+ else
+ assert (hash->table[chain] == NULL); /* 6 */
+ }
+ newtable = realloc(hash->table,
+ sizeof *newtable * nchains); /* 7 */
+ if (newtable) /* 8 */
+ hash->table = newtable;
+ hash->mask >>= 1; /* 9 */
+ hash->nchains = nchains;
+ hash->lowmark /= 2;
+ hash->highmark /= 2;
+ assert (kl_hash_verify(hash));
+}
+
+
+/*
+ * Create a dynamic hash table. Both the hash table structure and the table
+ * itself are dynamically allocated. Furthermore, the table is extendible in
+ * that it will automatically grow as its load factor increases beyond a
+ * certain threshold.
+ * Notes:
+ * 1. If the number of bits in the hash_val_t type has not been computed yet,
+ * we do so here, because this is likely to be the first function that the
+ * user calls.
+ * 2. Allocate a hash table control structure.
+ * 3. If a hash table control structure is successfully allocated, we
+ * proceed to initialize it. Otherwise we return a null pointer.
+ * 4. We try to allocate the table of hash chains.
+ * 5. If we were able to allocate the hash chain table, we can finish
+ * initializing the hash structure and the table. Otherwise, we must
+ * backtrack by freeing the hash structure.
+ * 6. INIT_SIZE should be a power of two. The high and low marks are always set
+ * to be twice the table size and half the table size respectively. When the
+ * number of nodes in the table grows beyond the high size (beyond load
+ * factor 2), it will double in size to cut the load factor down to about
+ * about 1. If the table shrinks down to or beneath load factor 0.5,
+ * it will shrink, bringing the load up to about 1. However, the table
+ * will never shrink beneath INIT_SIZE even if it's emptied.
+ * 7. This indicates that the table is dynamically allocated and dynamically
+ * resized on the fly. A table that has this value set to zero is
+ * assumed to be statically allocated and will not be resized.
+ * 8. The table of chains must be properly reset to all null pointers.
+ */
+
+hash_t *kl_hash_create(hashcount_t maxcount, hash_comp_t compfun,
+ hash_fun_t hashfun)
+{
+ hash_t *hash;
+
+ if (hash_val_t_bit == 0) /* 1 */
+ compute_bits();
+
+ hash = malloc(sizeof *hash); /* 2 */
+
+ if (hash) { /* 3 */
+ hash->table = malloc(sizeof *hash->table * INIT_SIZE); /* 4 */
+ if (hash->table) { /* 5 */
+ hash->nchains = INIT_SIZE; /* 6 */
+ hash->highmark = INIT_SIZE * 2;
+ hash->lowmark = INIT_SIZE / 2;
+ hash->nodecount = 0;
+ hash->maxcount = maxcount;
+ hash->compare = compfun ? compfun : hash_comp_default;
+ hash->function = hashfun ? hashfun : hash_fun_default;
+ hash->allocnode = kl_hnode_alloc;
+ hash->freenode = kl_hnode_free;
+ hash->context = NULL;
+ hash->mask = INIT_MASK;
+ hash->dynamic = 1; /* 7 */
+ clear_table(hash); /* 8 */
+ assert (kl_hash_verify(hash));
+ return hash;
+ }
+ free(hash);
+ }
+
+ return NULL;
+}
+
+/*
+ * Select a different set of node allocator routines.
+ */
+
+void kl_hash_set_allocator(hash_t *hash, hnode_alloc_t al,
+ hnode_free_t fr, void *context)
+{
+ assert (kl_hash_count(hash) == 0);
+ assert ((al == 0 && fr == 0) || (al != 0 && fr != 0));
+
+ hash->allocnode = al ? al : kl_hnode_alloc;
+ hash->freenode = fr ? fr : kl_hnode_free;
+ hash->context = context;
+}
+
+/*
+ * Free every node in the hash using the hash->freenode() function pointer, and
+ * cause the hash to become empty.
+ */
+
+void kl_hash_free_nodes(hash_t *hash)
+{
+ hscan_t hs;
+ hnode_t *node;
+ kl_hash_scan_begin(&hs, hash);
+ while ((node = kl_hash_scan_next(&hs))) {
+ kl_hash_scan_delete(hash, node);
+ hash->freenode(node, hash->context);
+ }
+ hash->nodecount = 0;
+ clear_table(hash);
+}
+
+/*
+ * Obsolescent function for removing all nodes from a table,
+ * freeing them and then freeing the table all in one step.
+ */
+
+void kl_hash_free(hash_t *hash)
+{
+#ifdef KAZLIB_OBSOLESCENT_DEBUG
+ assert ("call to obsolescent function hash_free()" && 0);
+#endif
+ kl_hash_free_nodes(hash);
+ kl_hash_destroy(hash);
+}
+
+/*
+ * Free a dynamic hash table structure.
+ */
+
+void kl_hash_destroy(hash_t *hash)
+{
+ assert (hash_val_t_bit != 0);
+ assert (kl_hash_isempty(hash));
+ free(hash->table);
+ free(hash);
+}
+
+/*
+ * Initialize a user supplied hash structure. The user also supplies a table of
+ * chains which is assigned to the hash structure. The table is static---it
+ * will not grow or shrink.
+ * 1. See note 1. in hash_create().
+ * 2. The user supplied array of pointers hopefully contains nchains nodes.
+ * 3. See note 7. in hash_create().
+ * 4. We must dynamically compute the mask from the given power of two table
+ * size.
+ * 5. The user supplied table can't be assumed to contain null pointers,
+ * so we reset it here.
+ */
+
+hash_t *kl_hash_init(hash_t *hash, hashcount_t maxcount,
+ hash_comp_t compfun, hash_fun_t hashfun, hnode_t **table,
+ hashcount_t nchains)
+{
+ if (hash_val_t_bit == 0) /* 1 */
+ compute_bits();
+
+ assert (is_power_of_two(nchains));
+
+ hash->table = table; /* 2 */
+ hash->nchains = nchains;
+ hash->nodecount = 0;
+ hash->maxcount = maxcount;
+ hash->compare = compfun ? compfun : hash_comp_default;
+ hash->function = hashfun ? hashfun : hash_fun_default;
+ hash->dynamic = 0; /* 3 */
+ hash->mask = compute_mask(nchains); /* 4 */
+ clear_table(hash); /* 5 */
+
+ assert (kl_hash_verify(hash));
+
+ return hash;
+}
+
+/*
+ * Reset the hash scanner so that the next element retrieved by
+ * hash_scan_next() shall be the first element on the first non-empty chain.
+ * Notes:
+ * 1. Locate the first non empty chain.
+ * 2. If an empty chain is found, remember which one it is and set the next
+ * pointer to refer to its first element.
+ * 3. Otherwise if a chain is not found, set the next pointer to NULL
+ * so that hash_scan_next() shall indicate failure.
+ */
+
+void kl_hash_scan_begin(hscan_t *scan, hash_t *hash)
+{
+ hash_val_t nchains = hash->nchains;
+ hash_val_t chain;
+
+ scan->table = hash;
+
+ /* 1 */
+
+ for (chain = 0; chain < nchains && hash->table[chain] == 0; chain++)
+ ;
+
+ if (chain < nchains) { /* 2 */
+ scan->chain = chain;
+ scan->next = hash->table[chain];
+ } else { /* 3 */
+ scan->next = NULL;
+ }
+}
+
+/*
+ * Retrieve the next node from the hash table, and update the pointer
+ * for the next invocation of hash_scan_next().
+ * Notes:
+ * 1. Remember the next pointer in a temporary value so that it can be
+ * returned.
+ * 2. This assertion essentially checks whether the module has been properly
+ * initialized. The first point of interaction with the module should be
+ * either hash_create() or hash_init(), both of which set hash_val_t_bit to
+ * a non zero value.
+ * 3. If the next pointer we are returning is not NULL, then the user is
+ * allowed to call hash_scan_next() again. We prepare the new next pointer
+ * for that call right now. That way the user is allowed to delete the node
+ * we are about to return, since we will no longer be needing it to locate
+ * the next node.
+ * 4. If there is a next node in the chain (next->next), then that becomes the
+ * new next node, otherwise ...
+ * 5. We have exhausted the current chain, and must locate the next subsequent
+ * non-empty chain in the table.
+ * 6. If a non-empty chain is found, the first element of that chain becomes
+ * the new next node. Otherwise there is no new next node and we set the
+ * pointer to NULL so that the next time hash_scan_next() is called, a null
+ * pointer shall be immediately returned.
+ */
+
+
+hnode_t *kl_hash_scan_next(hscan_t *scan)
+{
+ hnode_t *next = scan->next; /* 1 */
+ hash_t *hash = scan->table;
+ hash_val_t chain = scan->chain + 1;
+ hash_val_t nchains = hash->nchains;
+
+ assert (hash_val_t_bit != 0); /* 2 */
+
+ if (next) { /* 3 */
+ if (next->next) { /* 4 */
+ scan->next = next->next;
+ } else {
+ while (chain < nchains && hash->table[chain] == 0) /* 5 */
+ chain++;
+ if (chain < nchains) { /* 6 */
+ scan->chain = chain;
+ scan->next = hash->table[chain];
+ } else {
+ scan->next = NULL;
+ }
+ }
+ }
+ return next;
+}
+
+/*
+ * Insert a node into the hash table.
+ * Notes:
+ * 1. It's illegal to insert more than the maximum number of nodes. The client
+ * should verify that the hash table is not full before attempting an
+ * insertion.
+ * 2. The same key may not be inserted into a table twice.
+ * 3. If the table is dynamic and the load factor is already at >= 2,
+ * grow the table.
+ * 4. We take the bottom N bits of the hash value to derive the chain index,
+ * where N is the base 2 logarithm of the size of the hash table.
+ */
+
+void kl_hash_insert(hash_t *hash, hnode_t *node, const void *key)
+{
+ hash_val_t hkey, chain;
+
+ assert (hash_val_t_bit != 0);
+ assert (node->next == NULL);
+ assert (hash->nodecount < hash->maxcount); /* 1 */
+ assert (kl_hash_lookup(hash, key) == NULL); /* 2 */
+
+ if (hash->dynamic && hash->nodecount >= hash->highmark) /* 3 */
+ grow_table(hash);
+
+ hkey = hash->function(key);
+ chain = hkey & hash->mask; /* 4 */
+
+ node->key = key;
+ node->hkey = hkey;
+ node->next = hash->table[chain];
+ hash->table[chain] = node;
+ hash->nodecount++;
+
+ assert (kl_hash_verify(hash));
+}
+
+/*
+ * Find a node in the hash table and return a pointer to it.
+ * Notes:
+ * 1. We hash the key and keep the entire hash value. As an optimization, when
+ * we descend down the chain, we can compare hash values first and only if
+ * hash values match do we perform a full key comparison.
+ * 2. To locate the chain from among 2^N chains, we look at the lower N bits of
+ * the hash value by anding them with the current mask.
+ * 3. Looping through the chain, we compare the stored hash value inside each
+ * node against our computed hash. If they match, then we do a full
+ * comparison between the unhashed keys. If these match, we have located the
+ * entry.
+ */
+
+hnode_t *kl_hash_lookup(hash_t *hash, const void *key)
+{
+ hash_val_t hkey, chain;
+ hnode_t *nptr;
+
+ hkey = hash->function(key); /* 1 */
+ chain = hkey & hash->mask; /* 2 */
+
+ for (nptr = hash->table[chain]; nptr; nptr = nptr->next) { /* 3 */
+ if (nptr->hkey == hkey && hash->compare(nptr->key, key) == 0)
+ return nptr;
+ }
+
+ return NULL;
+}
+
+/*
+ * Delete the given node from the hash table. Since the chains
+ * are singly linked, we must locate the start of the node's chain
+ * and traverse.
+ * Notes:
+ * 1. The node must belong to this hash table, and its key must not have
+ * been tampered with.
+ * 2. If this deletion will take the node count below the low mark, we
+ * shrink the table now.
+ * 3. Determine which chain the node belongs to, and fetch the pointer
+ * to the first node in this chain.
+ * 4. If the node being deleted is the first node in the chain, then
+ * simply update the chain head pointer.
+ * 5. Otherwise advance to the node's predecessor, and splice out
+ * by updating the predecessor's next pointer.
+ * 6. Indicate that the node is no longer in a hash table.
+ */
+
+hnode_t *kl_hash_delete(hash_t *hash, hnode_t *node)
+{
+ hash_val_t chain;
+ hnode_t *hptr;
+
+ assert (kl_hash_lookup(hash, node->key) == node); /* 1 */
+ assert (hash_val_t_bit != 0);
+
+ if (hash->dynamic && hash->nodecount <= hash->lowmark
+ && hash->nodecount > INIT_SIZE)
+ shrink_table(hash); /* 2 */
+
+ chain = node->hkey & hash->mask; /* 3 */
+ hptr = hash->table[chain];
+
+ if (hptr == node) { /* 4 */
+ hash->table[chain] = node->next;
+ } else {
+ while (hptr->next != node) { /* 5 */
+ assert (hptr != 0);
+ hptr = hptr->next;
+ }
+ assert (hptr->next == node);
+ hptr->next = node->next;
+ }
+
+ hash->nodecount--;
+ assert (kl_hash_verify(hash));
+
+ node->next = NULL; /* 6 */
+ return node;
+}
+
+int kl_hash_alloc_insert(hash_t *hash, const void *key, void *data)
+{
+ hnode_t *node = hash->allocnode(hash->context);
+
+ if (node) {
+ kl_hnode_init(node, data);
+ kl_hash_insert(hash, node, key);
+ return 1;
+ }
+ return 0;
+}
+
+void kl_hash_delete_free(hash_t *hash, hnode_t *node)
+{
+ kl_hash_delete(hash, node);
+ hash->freenode(node, hash->context);
+}
+
+/*
+ * Exactly like hash_delete, except does not trigger table shrinkage. This is to be
+ * used from within a hash table scan operation. See notes for hash_delete.
+ */
+
+hnode_t *kl_hash_scan_delete(hash_t *hash, hnode_t *node)
+{
+ hash_val_t chain;
+ hnode_t *hptr;
+
+ assert (kl_hash_lookup(hash, node->key) == node);
+ assert (hash_val_t_bit != 0);
+
+ chain = node->hkey & hash->mask;
+ hptr = hash->table[chain];
+
+ if (hptr == node) {
+ hash->table[chain] = node->next;
+ } else {
+ while (hptr->next != node)
+ hptr = hptr->next;
+ hptr->next = node->next;
+ }
+
+ hash->nodecount--;
+ assert (kl_hash_verify(hash));
+ node->next = NULL;
+
+ return node;
+}
+
+/*
+ * Like hash_delete_free but based on hash_scan_delete.
+ */
+
+void kl_hash_scan_delfree(hash_t *hash, hnode_t *node)
+{
+ kl_hash_scan_delete(hash, node);
+ hash->freenode(node, hash->context);
+}
+
+/*
+ * Verify whether the given object is a valid hash table. This means
+ * Notes:
+ * 1. If the hash table is dynamic, verify whether the high and
+ * low expansion/shrinkage thresholds are powers of two.
+ * 2. Count all nodes in the table, and test each hash value
+ * to see whether it is correct for the node's chain.
+ */
+
+int kl_hash_verify(hash_t *hash)
+{
+ hashcount_t count = 0;
+ hash_val_t chain;
+ hnode_t *hptr;
+
+ if (hash->dynamic) { /* 1 */
+ if (hash->lowmark >= hash->highmark)
+ return 0;
+ if (!is_power_of_two(hash->highmark))
+ return 0;
+ if (!is_power_of_two(hash->lowmark))
+ return 0;
+ }
+
+ for (chain = 0; chain < hash->nchains; chain++) { /* 2 */
+ for (hptr = hash->table[chain]; hptr != 0; hptr = hptr->next) {
+ if ((hptr->hkey & hash->mask) != chain)
+ return 0;
+ count++;
+ }
+ }
+
+ if (count != hash->nodecount)
+ return 0;
+
+ return 1;
+}
+
+/*
+ * Test whether the hash table is full and return 1 if this is true,
+ * 0 if it is false.
+ */
+
+#undef kl_hash_isfull
+int kl_hash_isfull(hash_t *hash)
+{
+ return hash->nodecount == hash->maxcount;
+}
+
+/*
+ * Test whether the hash table is empty and return 1 if this is true,
+ * 0 if it is false.
+ */
+
+#undef kl_hash_isempty
+int kl_hash_isempty(hash_t *hash)
+{
+ return hash->nodecount == 0;
+}
+
+static hnode_t *kl_hnode_alloc(void *context)
+{
+ return malloc(sizeof *kl_hnode_alloc(NULL));
+}
+
+static void kl_hnode_free(hnode_t *node, void *context)
+{
+ free(node);
+}
+
+
+/*
+ * Create a hash table node dynamically and assign it the given data.
+ */
+
+hnode_t *kl_hnode_create(void *data)
+{
+ hnode_t *node = malloc(sizeof *node);
+ if (node) {
+ node->data = data;
+ node->next = NULL;
+ }
+ return node;
+}
+
+/*
+ * Initialize a client-supplied node
+ */
+
+hnode_t *kl_hnode_init(hnode_t *hnode, void *data)
+{
+ hnode->data = data;
+ hnode->next = NULL;
+ return hnode;
+}
+
+/*
+ * Destroy a dynamically allocated node.
+ */
+
+void kl_hnode_destroy(hnode_t *hnode)
+{
+ free(hnode);
+}
+
+#undef kl_hnode_put
+void kl_hnode_put(hnode_t *node, void *data)
+{
+ node->data = data;
+}
+
+#undef kl_hnode_get
+void *kl_hnode_get(hnode_t *node)
+{
+ return node->data;
+}
+
+#undef kl_hnode_getkey
+const void *kl_hnode_getkey(hnode_t *node)
+{
+ return node->key;
+}
+
+#undef kl_hash_count
+hashcount_t kl_hash_count(hash_t *hash)
+{
+ return hash->nodecount;
+}
+
+#undef kl_hash_size
+hashcount_t kl_hash_size(hash_t *hash)
+{
+ return hash->nchains;
+}
+
+static hash_val_t hash_fun_default(const void *key)
+{
+ static unsigned long randbox[] = {
+ 0x49848f1bU, 0xe6255dbaU, 0x36da5bdcU, 0x47bf94e9U,
+ 0x8cbcce22U, 0x559fc06aU, 0xd268f536U, 0xe10af79aU,
+ 0xc1af4d69U, 0x1d2917b5U, 0xec4c304dU, 0x9ee5016cU,
+ 0x69232f74U, 0xfead7bb3U, 0xe9089ab6U, 0xf012f6aeU,
+ };
+
+ const unsigned char *str = key;
+ hash_val_t acc = 0;
+
+ while (*str) {
+ acc ^= randbox[(*str + acc) & 0xf];
+ acc = (acc << 1) | (acc >> 31);
+ acc &= 0xffffffffU;
+ acc ^= randbox[((*str++ >> 4) + acc) & 0xf];
+ acc = (acc << 2) | (acc >> 30);
+ acc &= 0xffffffffU;
+ }
+ return acc;
+}
+
+static int hash_comp_default(const void *key1, const void *key2)
+{
+ return strcmp(key1, key2);
+}
diff --git a/c_src/hash.h b/c_src/hash.h
new file mode 100644
index 0000000..0c75ed0
--- /dev/null
+++ b/c_src/hash.h
@@ -0,0 +1,240 @@
+/*
+ * Hash Table Data Type
+ * Copyright (C) 1997 Kaz Kylheku <kaz@ashi.footprints.net>
+ *
+ * Free Software License:
+ *
+ * All rights are reserved by the author, with the following exceptions:
+ * Permission is granted to freely reproduce and distribute this software,
+ * possibly in exchange for a fee, provided that this copyright notice appears
+ * intact. Permission is also granted to adapt this software to produce
+ * derivative works, as long as the modified versions carry this copyright
+ * notice and additional notices stating that the work has been modified.
+ * This source code may be translated into executable form and incorporated
+ * into proprietary software; there is no requirement for such software to
+ * contain a copyright notice related to this source.
+ *
+ * $Id: hash.h,v 1.22.2.7 2000/11/13 01:36:45 kaz Exp $
+ * $Name: kazlib_1_20 $
+ */
+
+#ifndef HASH_H
+#define HASH_H
+
+#include <limits.h>
+#ifdef KAZLIB_SIDEEFFECT_DEBUG
+#include "sfx.h"
+#endif
+
+/*
+ * Blurb for inclusion into C++ translation units
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef unsigned long hashcount_t;
+#define HASHCOUNT_T_MAX ULONG_MAX
+
+typedef unsigned long hash_val_t;
+#define HASH_VAL_T_MAX ULONG_MAX
+
+extern int hash_val_t_bit;
+
+#ifndef HASH_VAL_T_BIT
+#define HASH_VAL_T_BIT ((int) hash_val_t_bit)
+#endif
+
+/*
+ * Hash chain node structure.
+ * Notes:
+ * 1. This preprocessing directive is for debugging purposes. The effect is
+ * that if the preprocessor symbol KAZLIB_OPAQUE_DEBUG is defined prior to the
+ * inclusion of this header, then the structure shall be declared as having
+ * the single member int __OPAQUE__. This way, any attempts by the
+ * client code to violate the principles of information hiding (by accessing
+ * the structure directly) can be diagnosed at translation time. However,
+ * note the resulting compiled unit is not suitable for linking.
+ * 2. This is a pointer to the next node in the chain. In the last node of a
+ * chain, this pointer is null.
+ * 3. The key is a pointer to some user supplied data that contains a unique
+ * identifier for each hash node in a given table. The interpretation of
+ * the data is up to the user. When creating or initializing a hash table,
+ * the user must supply a pointer to a function for comparing two keys,
+ * and a pointer to a function for hashing a key into a numeric value.
+ * 4. The value is a user-supplied pointer to void which may refer to
+ * any data object. It is not interpreted in any way by the hashing
+ * module.
+ * 5. The hashed key is stored in each node so that we don't have to rehash
+ * each key when the table must grow or shrink.
+ */
+
+typedef struct hnode_t {
+ #if defined(HASH_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG) /* 1 */
+ struct hnode_t *hash_next; /* 2 */
+ const void *hash_key; /* 3 */
+ void *hash_data; /* 4 */
+ hash_val_t hash_hkey; /* 5 */
+ #else
+ int hash_dummy;
+ #endif
+} hnode_t;
+
+/*
+ * The comparison function pointer type. A comparison function takes two keys
+ * and produces a value of -1 if the left key is less than the right key, a
+ * value of 0 if the keys are equal, and a value of 1 if the left key is
+ * greater than the right key.
+ */
+
+typedef int (*hash_comp_t)(const void *, const void *);
+
+/*
+ * The hashing function performs some computation on a key and produces an
+ * integral value of type hash_val_t based on that key. For best results, the
+ * function should have a good randomness properties in *all* significant bits
+ * over the set of keys that are being inserted into a given hash table. In
+ * particular, the most significant bits of hash_val_t are most significant to
+ * the hash module. Only as the hash table expands are less significant bits
+ * examined. Thus a function that has good distribution in its upper bits but
+ * not lower is preferrable to one that has poor distribution in the upper bits
+ * but not the lower ones.
+ */
+
+typedef hash_val_t (*hash_fun_t)(const void *);
+
+/*
+ * allocator functions
+ */
+
+typedef hnode_t *(*hnode_alloc_t)(void *);
+typedef void (*hnode_free_t)(hnode_t *, void *);
+
+/*
+ * This is the hash table control structure. It keeps track of information
+ * about a hash table, as well as the hash table itself.
+ * Notes:
+ * 1. Pointer to the hash table proper. The table is an array of pointers to
+ * hash nodes (of type hnode_t). If the table is empty, every element of
+ * this table is a null pointer. A non-null entry points to the first
+ * element of a chain of nodes.
+ * 2. This member keeps track of the size of the hash table---that is, the
+ * number of chain pointers.
+ * 3. The count member maintains the number of elements that are presently
+ * in the hash table.
+ * 4. The maximum count is the greatest number of nodes that can populate this
+ * table. If the table contains this many nodes, no more can be inserted,
+ * and the hash_isfull() function returns true.
+ * 5. The high mark is a population threshold, measured as a number of nodes,
+ * which, if exceeded, will trigger a table expansion. Only dynamic hash
+ * tables are subject to this expansion.
+ * 6. The low mark is a minimum population threshold, measured as a number of
+ * nodes. If the table population drops below this value, a table shrinkage
+ * will occur. Only dynamic tables are subject to this reduction. No table
+ * will shrink beneath a certain absolute minimum number of nodes.
+ * 7. This is the a pointer to the hash table's comparison function. The
+ * function is set once at initialization or creation time.
+ * 8. Pointer to the table's hashing function, set once at creation or
+ * initialization time.
+ * 9. The current hash table mask. If the size of the hash table is 2^N,
+ * this value has its low N bits set to 1, and the others clear. It is used
+ * to select bits from the result of the hashing function to compute an
+ * index into the table.
+ * 10. A flag which indicates whether the table is to be dynamically resized. It
+ * is set to 1 in dynamically allocated tables, 0 in tables that are
+ * statically allocated.
+ */
+
+typedef struct hash_t {
+ #if defined(HASH_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG)
+ struct hnode_t **hash_table; /* 1 */
+ hashcount_t hash_nchains; /* 2 */
+ hashcount_t hash_nodecount; /* 3 */
+ hashcount_t hash_maxcount; /* 4 */
+ hashcount_t hash_highmark; /* 5 */
+ hashcount_t hash_lowmark; /* 6 */
+ hash_comp_t hash_compare; /* 7 */
+ hash_fun_t hash_function; /* 8 */
+ hnode_alloc_t hash_allocnode;
+ hnode_free_t hash_freenode;
+ void *hash_context;
+ hash_val_t hash_mask; /* 9 */
+ int hash_dynamic; /* 10 */
+ #else
+ int hash_dummy;
+ #endif
+} hash_t;
+
+/*
+ * Hash scanner structure, used for traversals of the data structure.
+ * Notes:
+ * 1. Pointer to the hash table that is being traversed.
+ * 2. Reference to the current chain in the table being traversed (the chain
+ * that contains the next node that shall be retrieved).
+ * 3. Pointer to the node that will be retrieved by the subsequent call to
+ * hash_scan_next().
+ */
+
+typedef struct hscan_t {
+ #if defined(HASH_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG)
+ hash_t *hash_table; /* 1 */
+ hash_val_t hash_chain; /* 2 */
+ hnode_t *hash_next; /* 3 */
+ #else
+ int hash_dummy;
+ #endif
+} hscan_t;
+
+extern hash_t *kl_hash_create(hashcount_t, hash_comp_t, hash_fun_t);
+extern void kl_hash_set_allocator(hash_t *, hnode_alloc_t, hnode_free_t, void *);
+extern void kl_hash_destroy(hash_t *);
+extern void kl_hash_free_nodes(hash_t *);
+extern void kl_hash_free(hash_t *);
+extern hash_t *kl_hash_init(hash_t *, hashcount_t, hash_comp_t,
+ hash_fun_t, hnode_t **, hashcount_t);
+extern void kl_hash_insert(hash_t *, hnode_t *, const void *);
+extern hnode_t *kl_hash_lookup(hash_t *, const void *);
+extern hnode_t *kl_hash_delete(hash_t *, hnode_t *);
+extern int kl_hash_alloc_insert(hash_t *, const void *, void *);
+extern void kl_hash_delete_free(hash_t *, hnode_t *);
+
+extern void kl_hnode_put(hnode_t *, void *);
+extern void *kl_hnode_get(hnode_t *);
+extern const void *kl_hnode_getkey(hnode_t *);
+extern hashcount_t kl_hash_count(hash_t *);
+extern hashcount_t kl_hash_size(hash_t *);
+
+extern int kl_hash_isfull(hash_t *);
+extern int kl_hash_isempty(hash_t *);
+
+extern void kl_hash_scan_begin(hscan_t *, hash_t *);
+extern hnode_t *kl_hash_scan_next(hscan_t *);
+extern hnode_t *kl_hash_scan_delete(hash_t *, hnode_t *);
+extern void kl_hash_scan_delfree(hash_t *, hnode_t *);
+
+extern int kl_hash_verify(hash_t *);
+
+extern hnode_t *kl_hnode_create(void *);
+extern hnode_t *kl_hnode_init(hnode_t *, void *);
+extern void kl_hnode_destroy(hnode_t *);
+
+#if defined(HASH_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG)
+#ifdef KAZLIB_SIDEEFFECT_DEBUG
+#define kl_hash_isfull(H) (SFX_CHECK(H)->hash_nodecount == (H)->hash_maxcount)
+#else
+#define kl_hash_isfull(H) ((H)->hash_nodecount == (H)->hash_maxcount)
+#endif
+#define kl_hash_isempty(H) ((H)->hash_nodecount == 0)
+#define kl_hash_count(H) ((H)->hash_nodecount)
+#define kl_hash_size(H) ((H)->hash_nchains)
+#define kl_hnode_get(N) ((N)->hash_data)
+#define kl_hnode_getkey(N) ((N)->hash_key)
+#define kl_hnode_put(N, V) ((N)->hash_data = (V))
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/c_src/khash.c b/c_src/khash.c
new file mode 100644
index 0000000..8107029
--- /dev/null
+++ b/c_src/khash.c
@@ -0,0 +1,582 @@
+// This file is part of khash released under the MIT license.
+// See the LICENSE file for more information.
+// Copyright 2013 Cloudant, Inc <support@cloudant.com>
+
+#include <assert.h>
+#include <string.h>
+
+#include "erl_nif.h"
+#include "hash.h"
+
+
+#define KHASH_VERSION 0
+
+
+typedef struct
+{
+ ERL_NIF_TERM atom_ok;
+ ERL_NIF_TERM atom_error;
+ ERL_NIF_TERM atom_shared;
+ ERL_NIF_TERM atom_value;
+ ERL_NIF_TERM atom_not_found;
+ ErlNifResourceType* res_hash;
+} khash_priv;
+
+
+typedef struct
+{
+ ErlNifEnv* env;
+ ERL_NIF_TERM key;
+ ERL_NIF_TERM val;
+} khnode_t;
+
+
+typedef struct
+{
+ int version;
+ hash_t* h;
+ ErlNifPid p;
+ ErlNifMutex* l;
+} khash_t;
+
+
+// This is actually an internal Erlang VM function that
+// we're being a bit hacky to get access to. There's a
+// pending patch to expose this in the NIF API in newer
+// Erlangs.
+unsigned int make_hash2(ERL_NIF_TERM term);
+
+
+static inline ERL_NIF_TERM
+make_atom(ErlNifEnv* env, const char* name)
+{
+ ERL_NIF_TERM ret;
+ if(enif_make_existing_atom(env, name, &ret, ERL_NIF_LATIN1)) {
+ return ret;
+ }
+ return enif_make_atom(env, name);
+}
+
+
+static inline ERL_NIF_TERM
+make_ok(ErlNifEnv* env, khash_priv* priv, ERL_NIF_TERM value)
+{
+ return enif_make_tuple2(env, priv->atom_ok, value);
+}
+
+
+static inline ERL_NIF_TERM
+make_error(ErlNifEnv* env, khash_priv* priv, ERL_NIF_TERM reason)
+{
+ return enif_make_tuple2(env, priv->atom_error, reason);
+}
+
+
+static inline int
+has_option(ErlNifEnv* env, ERL_NIF_TERM opts, ERL_NIF_TERM opt)
+{
+ ERL_NIF_TERM head;
+ ERL_NIF_TERM tail = opts;
+ const ERL_NIF_TERM* elems;
+ int arity;
+
+ if(!enif_is_list(env, opts)) {
+ return 0;
+ }
+
+ while(enif_get_list_cell(env, tail, &head, &tail)) {
+ if(enif_compare(head, opt) == 0) {
+ return 1;
+ }
+
+ if(!enif_is_tuple(env, head)) {
+ continue;
+ }
+
+ if(!enif_get_tuple(env, head, &arity, &elems)) {
+ continue;
+ }
+
+ if(arity == 0) {
+ continue;
+ }
+
+ if(enif_compare(elems[0], opt)) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+
+static inline int
+lock_hash(ErlNifEnv* env, khash_t* khash)
+{
+ ErlNifPid pid;
+
+ // shared hash
+ if(khash->l != NULL) {
+ enif_mutex_lock(khash->l);
+ return 1;
+ }
+
+ // Else check that we're being called from the pid
+ // that created the hash.
+ enif_self(env, &pid);
+ if(enif_compare(pid.pid, khash->p.pid) == 0) {
+ return 1;
+ }
+
+ return 0;
+}
+
+
+static inline void
+unlock_hash(ErlNifEnv* env, khash_t* khash)
+{
+ if(khash->l != NULL) {
+ enif_mutex_unlock(khash->l);
+ }
+}
+
+
+hnode_t*
+khnode_alloc(void* ctx)
+{
+ hnode_t* ret = (hnode_t*) enif_alloc(sizeof(hnode_t));
+ khnode_t* node = (khnode_t*) enif_alloc(sizeof(khnode_t));
+
+ memset(ret, '\0', sizeof(hnode_t));
+ memset(node, '\0', sizeof(khnode_t));
+
+ node->env = enif_alloc_env();
+ ret->hash_key = node;
+
+ return ret;
+}
+
+
+void
+khnode_free(hnode_t* obj, void* ctx)
+{
+ khnode_t* node = (khnode_t*) kl_hnode_getkey(obj);
+ enif_free_env(node->env);
+ enif_free(node);
+ enif_free(obj);
+ return;
+}
+
+
+int
+khash_cmp_fun(const void* l, const void* r)
+{
+ khnode_t* left = (khnode_t*) l;
+ khnode_t* right = (khnode_t*) r;
+ int cmp = enif_compare(left->key, right->key);
+
+ if(cmp < 0) {
+ return -1;
+ } else if(cmp == 0) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
+
+hash_val_t
+khash_hash_fun(const void* obj)
+{
+ khnode_t* node = (khnode_t*) obj;
+ return (hash_val_t) make_hash2(node->key);
+}
+
+
+static inline khash_t*
+khash_create_int(ErlNifEnv* env, khash_priv* priv, ERL_NIF_TERM opts)
+{
+ khash_t* khash = NULL;
+
+ assert(priv != NULL && "missing private data member");
+
+ khash = (khash_t*) enif_alloc_resource(priv->res_hash, sizeof(khash_t));
+ memset(khash, '\0', sizeof(khash_t));
+ khash->version = KHASH_VERSION;
+
+ khash->h = kl_hash_create(HASHCOUNT_T_MAX, khash_cmp_fun, khash_hash_fun);
+
+ if(khash->h == NULL ) {
+ enif_release_resource(khash);
+ return NULL;
+ }
+
+ kl_hash_set_allocator(khash->h, khnode_alloc, khnode_free, NULL);
+ enif_self(env, &(khash->p));
+
+ if(has_option(env, opts, priv->atom_shared)) {
+ khash->l = enif_mutex_create(NULL);
+ } else {
+ khash->l = NULL;
+ }
+
+ return khash;
+}
+
+
+static ERL_NIF_TERM
+khash_new(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+ khash_priv* priv = enif_priv_data(env);
+ khash_t* khash;
+ ERL_NIF_TERM ret;
+
+ if(argc != 1) {
+ return enif_make_badarg(env);
+ }
+
+ khash = khash_create_int(env, priv, argv[0]);
+ if(khash == NULL) {
+ return enif_make_badarg(env);
+ }
+
+ ret = enif_make_resource(env, khash);
+ enif_release_resource(khash);
+
+ return make_ok(env, priv, ret);
+}
+
+
+static void
+khash_free(ErlNifEnv* env, void* obj)
+{
+ khash_t* khash = (khash_t*) obj;
+
+ if(khash->h != NULL) {
+ kl_hash_free_nodes(khash->h);
+ kl_hash_destroy(khash->h);
+ }
+
+ if(khash->l != NULL) {
+ enif_mutex_destroy(khash->l);
+ }
+
+ return;
+}
+
+
+static ERL_NIF_TERM
+khash_to_list(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+ khash_priv* priv = (khash_priv*) enif_priv_data(env);
+ ERL_NIF_TERM ret = enif_make_list(env, 0);
+ khash_t* khash;
+ hscan_t scan;
+ hnode_t* entry;
+ khnode_t* node;
+ ERL_NIF_TERM key;
+ ERL_NIF_TERM val;
+ ERL_NIF_TERM tuple;
+
+ if(argc != 1) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_get_resource(env, argv[0], priv->res_hash, (void**) &khash)) {
+ return enif_make_badarg(env);
+ }
+
+ if(!lock_hash(env, khash)) {
+ return enif_make_badarg(env);
+ }
+
+ kl_hash_scan_begin(&scan, khash->h);
+
+ while((entry = kl_hash_scan_next(&scan)) != NULL) {
+ node = (khnode_t*) kl_hnode_getkey(entry);
+ key = enif_make_copy(env, node->key);
+ val = enif_make_copy(env, node->val);
+ tuple = enif_make_tuple2(env, key, val);
+ ret = enif_make_list_cell(env, tuple, ret);
+ }
+
+ unlock_hash(env, khash);
+
+ return ret;
+}
+
+
+static ERL_NIF_TERM
+khash_clear(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+ khash_priv* priv = enif_priv_data(env);
+ khash_t* khash;
+
+ if(argc != 1) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_get_resource(env, argv[0], priv->res_hash, (void**) &khash)) {
+ return enif_make_badarg(env);
+ }
+
+ if(!lock_hash(env, khash)) {
+ return enif_make_badarg(env);
+ }
+
+ kl_hash_free_nodes(khash->h);
+
+ unlock_hash(env, khash);
+
+ return priv->atom_ok;
+}
+
+
+static inline hnode_t*
+khash_lookup_int(ErlNifEnv* env, ERL_NIF_TERM key, khash_t* khash)
+{
+ khnode_t node;
+ node.env = env;
+ node.key = key;
+ return kl_hash_lookup(khash->h, &node);
+}
+
+
+static ERL_NIF_TERM
+khash_lookup(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+ khash_priv* priv = enif_priv_data(env);
+ khash_t* khash;
+ hnode_t* entry;
+ khnode_t* node;
+ ERL_NIF_TERM ret;
+
+ if(argc != 2) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_get_resource(env, argv[0], priv->res_hash, (void**) &khash)) {
+ return enif_make_badarg(env);
+ }
+
+ if(!lock_hash(env, khash)) {
+ return enif_make_badarg(env);
+ }
+
+ entry = khash_lookup_int(env, argv[1], khash);
+ if(entry == NULL) {
+ ret = priv->atom_not_found;
+ } else {
+ node = (khnode_t*) kl_hnode_getkey(entry);
+ ret = enif_make_copy(env, node->val);
+ ret = enif_make_tuple2(env, priv->atom_value, ret);
+ }
+
+ unlock_hash(env, khash);
+
+ return ret;
+}
+
+
+static ERL_NIF_TERM
+khash_get(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+ khash_priv* priv = enif_priv_data(env);
+ khash_t* khash;
+ hnode_t* entry;
+ khnode_t* node;
+ ERL_NIF_TERM ret;
+
+ if(argc != 3) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_get_resource(env, argv[0], priv->res_hash, (void**) &khash)) {
+ return enif_make_badarg(env);
+ }
+
+ if(!lock_hash(env, khash)) {
+ return enif_make_badarg(env);
+ }
+
+ entry = khash_lookup_int(env, argv[1], khash);
+ if(entry == NULL) {
+ ret = argv[2];
+ } else {
+ node = (khnode_t*) kl_hnode_getkey(entry);
+ ret = enif_make_copy(env, node->val);
+ }
+
+ unlock_hash(env, khash);
+
+ return ret;
+}
+
+
+static ERL_NIF_TERM
+khash_put(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+ khash_priv* priv = enif_priv_data(env);
+ khash_t* khash;
+ hnode_t* entry;
+ khnode_t* node;
+
+ if(argc != 3) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_get_resource(env, argv[0], priv->res_hash, (void**) &khash)) {
+ return enif_make_badarg(env);
+ }
+
+ if(!lock_hash(env, khash)) {
+ return enif_make_badarg(env);
+ }
+
+ entry = khash_lookup_int(env, argv[1], khash);
+ if(entry == NULL) {
+ entry = khnode_alloc(NULL);
+ node = (khnode_t*) kl_hnode_getkey(entry);
+ node->key = enif_make_copy(node->env, argv[1]);
+ node->val = enif_make_copy(node->env, argv[2]);
+ kl_hash_insert(khash->h, entry, node);
+ } else {
+ node = (khnode_t*) kl_hnode_getkey(entry);
+ enif_clear_env(node->env);
+ node->key = enif_make_copy(node->env, argv[1]);
+ node->val = enif_make_copy(node->env, argv[2]);
+ }
+
+ unlock_hash(env, khash);
+
+ return priv->atom_ok;
+}
+
+
+static ERL_NIF_TERM
+khash_del(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+ khash_priv* priv = enif_priv_data(env);
+ khash_t* khash;
+ hnode_t* entry;
+ ERL_NIF_TERM ret;
+
+ if(argc != 2) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_get_resource(env, argv[0], priv->res_hash, (void**) &khash)) {
+ return enif_make_badarg(env);
+ }
+
+ if(!lock_hash(env, khash)) {
+ return enif_make_badarg(env);
+ }
+
+ entry = khash_lookup_int(env, argv[1], khash);
+ if(entry == NULL) {
+ ret = priv->atom_not_found;
+ } else {
+ kl_hash_delete_free(khash->h, entry);
+ ret = priv->atom_ok;
+ }
+
+ unlock_hash(env, khash);
+
+ return ret;
+}
+
+
+static ERL_NIF_TERM
+khash_size(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+ khash_priv* priv = enif_priv_data(env);
+ khash_t* khash;
+ ERL_NIF_TERM ret;
+
+ if(argc != 1) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_get_resource(env, argv[0], priv->res_hash, (void**) &khash)) {
+ return enif_make_badarg(env);
+ }
+
+ if(!lock_hash(env, khash)) {
+ return enif_make_badarg(env);
+ }
+
+ ret = enif_make_uint64(env, kl_hash_count(khash->h));
+
+ unlock_hash(env, khash);
+
+ return ret;
+}
+
+
+static int
+load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info)
+{
+ const char* mod = "khash";
+ int flags = ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER;
+ ErlNifResourceType* res;
+
+ khash_priv* new_priv = (khash_priv*) enif_alloc(sizeof(khash_priv));
+ if(new_priv == NULL) {
+ return 1;
+ }
+
+ res = enif_open_resource_type(env, mod, mod, khash_free, flags, NULL);
+ if(res == NULL) {
+ return 1;
+ }
+
+ new_priv->res_hash = res;
+
+ new_priv->atom_ok = make_atom(env, "ok");
+ new_priv->atom_error = make_atom(env, "error");
+ new_priv->atom_shared = make_atom(env, "shared");
+ new_priv->atom_value = make_atom(env, "value");
+ new_priv->atom_not_found = make_atom(env, "not_found");
+
+ *priv = (void*) new_priv;
+
+ return 0;
+}
+
+
+static int
+reload(ErlNifEnv* env, void** priv, ERL_NIF_TERM info)
+{
+ return 0;
+}
+
+
+static int
+upgrade(ErlNifEnv* env, void** priv, void** old_priv, ERL_NIF_TERM info)
+{
+ return load(env, priv, info);
+}
+
+
+static void
+unload(ErlNifEnv* env, void* priv)
+{
+ enif_free(priv);
+ return;
+}
+
+
+static ErlNifFunc funcs[] = {
+ {"new", 1, khash_new},
+ {"to_list", 1, khash_to_list},
+ {"clear", 1, khash_clear},
+ {"lookup", 2, khash_lookup},
+ {"get", 3, khash_get},
+ {"put", 3, khash_put},
+ {"del", 2, khash_del},
+ {"size", 1, khash_size},
+};
+
+
+ERL_NIF_INIT(khash, funcs, &load, &reload, &upgrade, &unload);
diff --git a/rebar.config b/rebar.config
new file mode 100644
index 0000000..59a612b
--- /dev/null
+++ b/rebar.config
@@ -0,0 +1,12 @@
+{port_specs, [
+ {"priv/khash.so", ["c_src/*.c"]}
+]}.
+
+{port_env, [
+ % Development compilation
+ % {".*", "CFLAGS", "$CFLAGS -g -Wall -Werror -fPIC"}
+
+ % Production compilation
+ {".*", "CFLAGS", "$CFLAGS -Wall -Werror -DNDEBUG -O3"}
+]}.
+
diff --git a/src/khash.app.src b/src/khash.app.src
new file mode 100644
index 0000000..14a45c7
--- /dev/null
+++ b/src/khash.app.src
@@ -0,0 +1,10 @@
+%% This file is part of khash released under the MIT license.
+%% See the LICENSE file for more information.
+%% Copyright 2013 Cloudant, Inc <support@cloudant.com>
+
+{application, khash, [
+ {description, "A NIF wrapper for Kazlib's hash structure."},
+ {vsn, git},
+ {registered, []},
+ {applications, [kernel]}
+]}.
diff --git a/src/khash.erl b/src/khash.erl
new file mode 100644
index 0000000..d3542bf
--- /dev/null
+++ b/src/khash.erl
@@ -0,0 +1,110 @@
+%% This file is part of khash released under the MIT license.
+%% See the LICENSE file for more information.
+%% Copyright 2013 Cloudant, Inc <support@cloudant.com>
+
+-module(khash).
+-on_load(init/0).
+
+
+-export([
+ new/0,
+ new/1,
+ from_list/1,
+ from_list/2,
+ to_list/1,
+ clear/1,
+ lookup/2,
+ get/2,
+ get/3,
+ put/3,
+ del/2,
+ size/1
+]).
+
+
+-define(NOT_LOADED, not_loaded(?LINE)).
+
+
+-type kv() :: {any(), any()}.
+-type hash() :: term().
+-type option() :: [shared].
+
+
+-spec new() -> {ok, hash()}.
+new() ->
+ new([]).
+
+
+-spec new([option()]) -> {ok, hash()}.
+new(_Options) ->
+ ?NOT_LOADED.
+
+
+-spec from_list([kv()]) -> {ok, hash()}.
+from_list(KVList) ->
+ from_list(KVList, []).
+
+
+-spec from_list([kv()], [option()]) -> {ok, hash()}.
+from_list(KVList, Options) ->
+ {ok, Hash} = ?MODULE:new(Options),
+ lists:foreach(fun({Key, Val}) ->
+ ?MODULE:put(Hash, Key, Val)
+ end, KVList),
+ {ok, Hash}.
+
+
+-spec to_list(hash()) -> [kv()].
+to_list(_Hash) ->
+ ?NOT_LOADED.
+
+
+-spec clear(hash()) -> ok.
+clear(_Hash) ->
+ ?NOT_LOADED.
+
+
+-spec lookup(hash(), any()) -> {value, any()} | not_found.
+lookup(_Hash, _Key) ->
+ ?NOT_LOADED.
+
+
+-spec get(hash(), any()) -> any().
+get(Hash, Key) ->
+ get(Hash, Key, undefined).
+
+
+-spec get(hash(), any(), any()) -> any().
+get(_Hash, _Key, _Default) ->
+ ?NOT_LOADED.
+
+
+-spec put(hash(), any(), any()) -> ok.
+put(_Hash, _Key, _Value) ->
+ ?NOT_LOADED.
+
+
+-spec del(hash(), any()) -> ok.
+del(_Hash, _Key) ->
+ ?NOT_LOADED.
+
+
+-spec size(hash()) -> non_neg_integer().
+size(_Hash) ->
+ ?NOT_LOADED.
+
+
+init() ->
+ PrivDir = case code:priv_dir(?MODULE) of
+ {error, _} ->
+ EbinDir = filename:dirname(code:which(?MODULE)),
+ AppPath = filename:dirname(EbinDir),
+ filename:join(AppPath, "priv");
+ Path ->
+ Path
+ end,
+ erlang:load_nif(filename:join(PrivDir, "khash"), 0).
+
+
+not_loaded(Line) ->
+ erlang:nif_error({not_loaded, [{module, ?MODULE}, {line, Line}]}).
diff --git a/test/001-load.t b/test/001-load.t
new file mode 100755
index 0000000..27b3c07
--- /dev/null
+++ b/test/001-load.t
@@ -0,0 +1,19 @@
+#! /usr/bin/env escript
+%% This file is part of khash released under the MIT license.
+%% See the LICENSE file for more information.
+%% Copyright 2013 Cloudant, Inc <support@cloudant.com>
+
+main([]) ->
+ code:add_pathz("test"),
+ code:add_pathz("ebin"),
+
+ Modules = [
+ khash
+ ],
+
+ etap:plan(length(Modules)),
+ lists:foreach(fun(M) ->
+ etap:loaded_ok(M, "Loaded " ++ atom_to_list(M))
+ end, Modules),
+ etap:end_tests().
+
diff --git a/test/002-basics.t b/test/002-basics.t
new file mode 100755
index 0000000..49b7054
--- /dev/null
+++ b/test/002-basics.t
@@ -0,0 +1,31 @@
+#! /usr/bin/env escript
+%% This file is part of khash released under the MIT license.
+%% See the LICENSE file for more information.
+%% Copyright 2013 Cloudant, Inc <support@cloudant.com>
+
+-mode(compile).
+
+num_cycles() -> 10000.
+
+main([]) ->
+ code:add_pathz("test"),
+ util:run(12, fun() ->
+ test_basic(),
+ ok
+ end).
+
+
+test_basic() ->
+ {ok, C} = khash:new(),
+ etap:is(khash:lookup(C, <<"foo">>), not_found, "Lookup missing is ok"),
+ etap:is(khash:get(C, <<"foo">>), undefined, "Get missing is ok"),
+ etap:is(khash:del(C, <<"foo">>), not_found, "Del missing is ok"),
+ etap:is(khash:put(C, <<"foo">>, bar), ok, "Stored a key"),
+ etap:is(khash:lookup(C, <<"foo">>), {value, bar}, "Lookuped a key"),
+ etap:is(khash:get(C, <<"foo">>), bar, "Retrieved a key"),
+ etap:is(khash:put(C, <<"bar">>, foo), ok, "Stored a key"),
+ etap:is(khash:size(C), 2, "Correct size for hash"),
+ etap:is(khash:del(C, <<"foo">>), ok, "Deleted a key"),
+ etap:is(khash:size(C), 1, "Correct size after delete"),
+ etap:is(khash:clear(C), ok, "Cleared the hash"),
+ etap:is(khash:size(C), 0, "Correct size after clear").
diff --git a/test/003-randomized.t b/test/003-randomized.t
new file mode 100755
index 0000000..5695ce8
--- /dev/null
+++ b/test/003-randomized.t
@@ -0,0 +1,147 @@
+#! /usr/bin/env escript
+%% This file is part of khash released under the MIT license.
+%% See the LICENSE file for more information.
+%% Copyright 2013 Cloudant, Inc <support@cloudant.com>
+
+-mode(compile).
+
+num_cycles() -> 10000.
+
+main([]) ->
+ code:add_pathz("test"),
+ random:seed(erlang:now()),
+
+ util:run(num_cycles(), fun() ->
+ test_random(),
+ ok
+ end).
+
+
+test_random() ->
+ D = dict:new(),
+ {ok, H} = khash:new(),
+
+ Actions = [
+ {0.1, fun(S) -> run_clear(S) end},
+ {1.0, fun(S) -> run_get2(S) end},
+ {1.0, fun(S) -> run_get3(S) end},
+ {1.0, fun(S) -> run_put(S) end},
+ {1.0, fun(S) -> run_del(S) end},
+ {0.5, fun(S) -> run_size(S) end},
+ %{0.3, fun(S) -> run_keys(S) end},
+ {0.3, fun(S) -> run_to_list(S) end}
+ ],
+
+ run(Actions, num_cycles(), {D, H}).
+
+
+run(_, N, _S) when N =< 0 ->
+ ok;
+run(Actions, N, S0) ->
+ Action = weighted_choice(Actions),
+ S1 = Action(S0),
+ true = check_state(S1),
+ run(Actions, N-1, S1).
+
+
+run_clear({_D0, H}) ->
+ ok = khash:clear(H),
+ {dict:new(), H}.
+
+
+run_get2({D, H}) ->
+ K = random_key(D),
+ case dict:find(K, D) of
+ {ok, Value} ->
+ {value, Value} = khash:lookup(H, K),
+ Value = khash:get(H, K);
+ error ->
+ not_found = khash:lookup(H, K),
+ undefined = khash:get(H, K)
+ end,
+ {D, H}.
+
+
+run_get3({D, H}) ->
+ K = random_key(D),
+ case dict:find(K, D) of
+ {ok, Value} ->
+ {value, Value} = khash:lookup(H, K),
+ Value = khash:get(H, K);
+ error ->
+ Val = random_val(),
+ Val = khash:get(H, K, Val)
+ end,
+ {D, H}.
+
+
+run_put({D0, H}) ->
+ K = random_key(D0),
+ V = random_val(),
+ D1 = dict:store(K, V, D0),
+ ok = khash:put(H, K, V),
+ {D1, H}.
+
+
+run_del({D0, H}) ->
+ K = random_key(D0),
+ D1 = case dict:is_key(K, D0) of
+ true ->
+ ok = khash:del(H, K),
+ dict:erase(K, D0);
+ false ->
+ not_found = khash:del(H, K),
+ D0
+ end,
+ {D1, H}.
+
+
+run_size({D, H}) ->
+ S = dict:size(D),
+ S = khash:size(H),
+ {D, H}.
+
+
+%run_keys({D, H}) ->
+% DKeys = lists:sort(dict:fetch_keys(D)),
+% {ok, HKeys0} = khash:keys(H),
+% HKeys = lists:sort(HKeys0),
+% DKeys = HKeys,
+% {D, H}.
+
+
+run_to_list({D, H}) ->
+ DKVs = lists:sort(dict:to_list(D)),
+ HKVs = lists:sort(khash:to_list(H)),
+ DKVs = HKVs,
+ {D, H}.
+
+
+check_state({D, H}) ->
+ DKVs = lists:sort(dict:to_list(D)),
+ HKVs = lists:sort(khash:to_list(H)),
+ etap:is(DKVs, HKVs, "State matches dict implementation").
+
+
+weighted_choice(Items0) ->
+ Items = lists:sort(Items0),
+ Sum = lists:sum([W || {W, _} <- Items]),
+ Choice = random:uniform() * Sum,
+ weighted_choice(Items, 0.0, Choice).
+
+
+weighted_choice([], _, _) ->
+ throw(bad_choice);
+weighted_choice([{W, _} | Rest], S, C) when W + S < C ->
+ weighted_choice(Rest, W+S, C);
+weighted_choice([{_, I} | _], _, _) ->
+ I.
+
+
+random_key(D) ->
+ Keys = lists:usort(dict:fetch_keys(D) ++ [foo]),
+ lists:nth(random:uniform(length(Keys)), Keys).
+
+
+random_val() ->
+ gen_term:any().
diff --git a/test/004-race-dict-fetch.t b/test/004-race-dict-fetch.t
new file mode 100755
index 0000000..4c6509c
--- /dev/null
+++ b/test/004-race-dict-fetch.t
@@ -0,0 +1,59 @@
+#! /usr/bin/env escript
+%% This file is part of khash released under the MIT license.
+%% See the LICENSE file for more information.
+%% Copyright 2013 Cloudant, Inc <support@cloudant.com>
+
+-mode(compile).
+
+num_cycles() -> 10000000.
+num_kvs() -> 5000.
+
+
+main([]) ->
+ % Let the VM settle for a bit
+ receive after 1000 -> ok end,
+
+ code:add_pathz("test"),
+ util:run(1, fun() ->
+ test(),
+ ok
+ end).
+
+
+test() ->
+ {DTime, _} = timer:tc(fun() -> test_dict() end),
+ {KTime, _} = timer:tc(fun() -> test_khash() end),
+ etap:diag("Dict: ~10b", [DTime]),
+ etap:diag("KHash: ~10b", [KTime]),
+ etap:is_greater(DTime, KTime, "Dict is slower than khash").
+
+
+test_dict() ->
+ erlang:garbage_collect(),
+ D = dict:from_list(kv_data()),
+ test_dict(D, num_cycles()).
+
+
+test_dict(_D, 0) ->
+ ok;
+test_dict(D, NumCycles) ->
+ bing = dict:fetch(1, D),
+ test_dict(D, NumCycles - 1).
+
+
+test_khash() ->
+ erlang:garbage_collect(),
+ {ok, H} = khash:from_list(kv_data()),
+ test_khash(H, num_cycles()).
+
+
+test_khash(_H, 0) ->
+ ok;
+test_khash(H, NumCycles) ->
+ bing = khash:get(H, 1),
+ test_khash(H, NumCycles - 1).
+
+
+kv_data() ->
+ [{I, bing} || I <- lists:seq(1, num_kvs())].
+
diff --git a/test/005-race-dict-store.t b/test/005-race-dict-store.t
new file mode 100755
index 0000000..a390d7c
--- /dev/null
+++ b/test/005-race-dict-store.t
@@ -0,0 +1,56 @@
+#! /usr/bin/env escript
+%% This file is part of khash released under the MIT license.
+%% See the LICENSE file for more information.
+%% Copyright 2013 Cloudant, Inc <support@cloudant.com>
+
+-mode(compile).
+
+num_cycles() -> 1000000.
+num_kvs() -> 10000.
+
+main([]) ->
+ % Let the VM settle for a bit
+ receive after 1000 -> ok end,
+
+ code:add_pathz("test"),
+ util:run(1, fun() ->
+ test(),
+ ok
+ end).
+
+
+test() ->
+ {DTime, _} = timer:tc(fun() -> test_dict() end),
+ {KTime, _} = timer:tc(fun() -> test_khash() end),
+ etap:diag("Dict: ~10b", [DTime]),
+ etap:diag("KHash: ~10b", [KTime]),
+ etap:is_greater(DTime, KTime, "Dict is slower than khash").
+
+
+test_dict() ->
+ D = dict:from_list(kv_data()),
+ test_dict(D, num_cycles()).
+
+
+test_dict(_D, 0) ->
+ ok;
+test_dict(D, NumCycles) ->
+ D2 = dict:store(1, bing, D),
+ test_dict(D2, NumCycles - 1).
+
+
+test_khash() ->
+ {ok, H} = khash:from_list(kv_data()),
+ test_khash(H, num_cycles()).
+
+
+test_khash(_H, 0) ->
+ ok;
+test_khash(H, NumCycles) ->
+ khash:put(H, 1, bing),
+ test_khash(H, NumCycles - 1).
+
+
+kv_data() ->
+ [{I, bing} || I <- lists:seq(1, num_kvs())].
+
diff --git a/test/etap.erl b/test/etap.erl
new file mode 100644
index 0000000..6924d09
--- /dev/null
+++ b/test/etap.erl
@@ -0,0 +1,612 @@
+%% Copyright (c) 2008-2009 Nick Gerakines <nick@gerakines.net>
+%%
+%% Permission is hereby granted, free of charge, to any person
+%% obtaining a copy of this software and associated documentation
+%% files (the "Software"), to deal in the Software without
+%% restriction, including without limitation the rights to use,
+%% copy, modify, merge, publish, distribute, sublicense, and/or sell
+%% copies of the Software, and to permit persons to whom the
+%% Software is furnished to do so, subject to the following
+%% conditions:
+%%
+%% The above copyright notice and this permission notice shall be
+%% included in all copies or substantial portions of the Software.
+%%
+%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+%% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+%% OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+%% NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+%% HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+%% WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+%% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+%% OTHER DEALINGS IN THE SOFTWARE.
+%%
+%% @author Nick Gerakines <nick@gerakines.net> [http://socklabs.com/]
+%% @author Jeremy Wall <jeremy@marzhillstudios.com>
+%% @version 0.3.4
+%% @copyright 2007-2008 Jeremy Wall, 2008-2009 Nick Gerakines
+%% @reference http://testanything.org/wiki/index.php/Main_Page
+%% @reference http://en.wikipedia.org/wiki/Test_Anything_Protocol
+%% @todo Finish implementing the skip directive.
+%% @todo Document the messages handled by this receive loop.
+%% @todo Explain in documentation why we use a process to handle test input.
+%% @doc etap is a TAP testing module for Erlang components and applications.
+%% This module allows developers to test their software using the TAP method.
+%%
+%% <blockquote cite="http://en.wikipedia.org/wiki/Test_Anything_Protocol"><p>
+%% TAP, the Test Anything Protocol, is a simple text-based interface between
+%% testing modules in a test harness. TAP started life as part of the test
+%% harness for Perl but now has implementations in C/C++, Python, PHP, Perl
+%% and probably others by the time you read this.
+%% </p></blockquote>
+%%
+%% The testing process begins by defining a plan using etap:plan/1, running
+%% a number of etap tests and then calling eta:end_tests/0. Please refer to
+%% the Erlang modules in the t directory of this project for example tests.
+-module(etap).
+-vsn("0.3.4").
+
+-export([
+ ensure_test_server/0,
+ start_etap_server/0,
+ test_server/1,
+ msg/1, msg/2,
+ diag/1, diag/2,
+ expectation_mismatch_message/3,
+ plan/1,
+ end_tests/0,
+ not_ok/2, ok/2, is_ok/2, is/3, isnt/3, any/3, none/3,
+ fun_is/3, expect_fun/3, expect_fun/4,
+ is_greater/3,
+ skip/1, skip/2,
+ datetime/1,
+ skip/3,
+ bail/0, bail/1,
+ test_state/0, failure_count/0
+]).
+
+-export([
+ contains_ok/3,
+ is_before/4
+]).
+
+-export([
+ is_pid/2,
+ is_alive/2,
+ is_mfa/3
+]).
+
+-export([
+ loaded_ok/2,
+ can_ok/2, can_ok/3,
+ has_attrib/2, is_attrib/3,
+ is_behaviour/2
+]).
+
+-export([
+ dies_ok/2,
+ lives_ok/2,
+ throws_ok/3
+]).
+
+
+-record(test_state, {
+ planned = 0,
+ count = 0,
+ pass = 0,
+ fail = 0,
+ skip = 0,
+ skip_reason = ""
+}).
+
+%% @spec plan(N) -> Result
+%% N = unknown | skip | {skip, string()} | integer()
+%% Result = ok
+%% @doc Create a test plan and boot strap the test server.
+plan(unknown) ->
+ ensure_test_server(),
+ etap_server ! {self(), plan, unknown},
+ ok;
+plan(skip) ->
+ io:format("1..0 # skip~n");
+plan({skip, Reason}) ->
+ io:format("1..0 # skip ~s~n", [Reason]);
+plan(N) when is_integer(N), N > 0 ->
+ ensure_test_server(),
+ etap_server ! {self(), plan, N},
+ ok.
+
+%% @spec end_tests() -> ok
+%% @doc End the current test plan and output test results.
+%% @todo This should probably be done in the test_server process.
+end_tests() ->
+ case whereis(etap_server) of
+ undefined -> self() ! true;
+ _ -> etap_server ! {self(), state}
+ end,
+ State = receive X -> X end,
+ if
+ State#test_state.planned == -1 ->
+ io:format("1..~p~n", [State#test_state.count]);
+ true ->
+ ok
+ end,
+ case whereis(etap_server) of
+ undefined -> ok;
+ _ -> etap_server ! done, ok
+ end.
+
+bail() ->
+ bail("").
+
+bail(Reason) ->
+ etap_server ! {self(), diag, "Bail out! " ++ Reason},
+ etap_server ! done, ok,
+ ok.
+
+%% @spec test_state() -> Return
+%% Return = test_state_record() | {error, string()}
+%% @doc Return the current test state
+test_state() ->
+ etap_server ! {self(), state},
+ receive
+ X when is_record(X, test_state) -> X
+ after
+ 1000 -> {error, "Timed out waiting for etap server reply.~n"}
+ end.
+
+%% @spec failure_count() -> Return
+%% Return = integer() | {error, string()}
+%% @doc Return the current failure count
+failure_count() ->
+ case test_state() of
+ #test_state{fail=FailureCount} -> FailureCount;
+ X -> X
+ end.
+
+%% @spec msg(S) -> ok
+%% S = string()
+%% @doc Print a message in the test output.
+msg(S) -> etap_server ! {self(), diag, S}, ok.
+
+%% @spec msg(Format, Data) -> ok
+%% Format = atom() | string() | binary()
+%% Data = [term()]
+%% UnicodeList = [Unicode]
+%% Unicode = int()
+%% @doc Print a message in the test output.
+%% Function arguments are passed through io_lib:format/2.
+msg(Format, Data) -> msg(io_lib:format(Format, Data)).
+
+%% @spec diag(S) -> ok
+%% S = string()
+%% @doc Print a debug/status message related to the test suite.
+diag(S) -> msg("# " ++ S).
+
+%% @spec diag(Format, Data) -> ok
+%% Format = atom() | string() | binary()
+%% Data = [term()]
+%% UnicodeList = [Unicode]
+%% Unicode = int()
+%% @doc Print a debug/status message related to the test suite.
+%% Function arguments are passed through io_lib:format/2.
+diag(Format, Data) -> diag(io_lib:format(Format, Data)).
+
+%% @spec expectation_mismatch_message(Got, Expected, Desc) -> ok
+%% Got = any()
+%% Expected = any()
+%% Desc = string()
+%% @doc Print an expectation mismatch message in the test output.
+expectation_mismatch_message(Got, Expected, Desc) ->
+ msg(" ---"),
+ msg(" description: ~p", [Desc]),
+ msg(" found: ~p", [Got]),
+ msg(" wanted: ~p", [Expected]),
+ msg(" ..."),
+ ok.
+
+% @spec evaluate(Pass, Got, Expected, Desc) -> Result
+%% Pass = true | false
+%% Got = any()
+%% Expected = any()
+%% Desc = string()
+%% Result = true | false
+%% @doc Evaluate a test statement, printing an expectation mismatch message
+%% if the test failed.
+evaluate(Pass, Got, Expected, Desc) ->
+ case mk_tap(Pass, Desc) of
+ false ->
+ expectation_mismatch_message(Got, Expected, Desc),
+ false;
+ true ->
+ true
+ end.
+
+%% @spec ok(Expr, Desc) -> Result
+%% Expr = true | false
+%% Desc = string()
+%% Result = true | false
+%% @doc Assert that a statement is true.
+ok(Expr, Desc) -> evaluate(Expr == true, Expr, true, Desc).
+
+%% @spec not_ok(Expr, Desc) -> Result
+%% Expr = true | false
+%% Desc = string()
+%% Result = true | false
+%% @doc Assert that a statement is false.
+not_ok(Expr, Desc) -> evaluate(Expr == false, Expr, false, Desc).
+
+%% @spec is_ok(Expr, Desc) -> Result
+%% Expr = any()
+%% Desc = string()
+%% Result = true | false
+%% @doc Assert that two values are the same.
+is_ok(Expr, Desc) -> evaluate(Expr == ok, Expr, ok, Desc).
+
+%% @spec is(Got, Expected, Desc) -> Result
+%% Got = any()
+%% Expected = any()
+%% Desc = string()
+%% Result = true | false
+%% @doc Assert that two values are the same.
+is(Got, Expected, Desc) -> evaluate(Got == Expected, Got, Expected, Desc).
+
+%% @spec isnt(Got, Expected, Desc) -> Result
+%% Got = any()
+%% Expected = any()
+%% Desc = string()
+%% Result = true | false
+%% @doc Assert that two values are not the same.
+isnt(Got, Expected, Desc) -> evaluate(Got /= Expected, Got, Expected, Desc).
+
+%% @spec is_greater(ValueA, ValueB, Desc) -> Result
+%% ValueA = number()
+%% ValueB = number()
+%% Desc = string()
+%% Result = true | false
+%% @doc Assert that an integer is greater than another.
+is_greater(ValueA, ValueB, Desc) when is_integer(ValueA), is_integer(ValueB) ->
+ mk_tap(ValueA > ValueB, Desc).
+
+%% @spec any(Got, Items, Desc) -> Result
+%% Got = any()
+%% Items = [any()]
+%% Desc = string()
+%% Result = true | false
+%% @doc Assert that an item is in a list.
+any(Got, Items, Desc) when is_function(Got) ->
+ is(lists:any(Got, Items), true, Desc);
+any(Got, Items, Desc) ->
+ is(lists:member(Got, Items), true, Desc).
+
+%% @spec none(Got, Items, Desc) -> Result
+%% Got = any()
+%% Items = [any()]
+%% Desc = string()
+%% Result = true | false
+%% @doc Assert that an item is not in a list.
+none(Got, Items, Desc) when is_function(Got) ->
+ is(lists:any(Got, Items), false, Desc);
+none(Got, Items, Desc) ->
+ is(lists:member(Got, Items), false, Desc).
+
+%% @spec fun_is(Fun, Expected, Desc) -> Result
+%% Fun = function()
+%% Expected = any()
+%% Desc = string()
+%% Result = true | false
+%% @doc Use an anonymous function to assert a pattern match.
+fun_is(Fun, Expected, Desc) when is_function(Fun) ->
+ is(Fun(Expected), true, Desc).
+
+%% @spec expect_fun(ExpectFun, Got, Desc) -> Result
+%% ExpectFun = function()
+%% Got = any()
+%% Desc = string()
+%% Result = true | false
+%% @doc Use an anonymous function to assert a pattern match, using actual
+%% value as the argument to the function.
+expect_fun(ExpectFun, Got, Desc) ->
+ evaluate(ExpectFun(Got), Got, ExpectFun, Desc).
+
+%% @spec expect_fun(ExpectFun, Got, Desc, ExpectStr) -> Result
+%% ExpectFun = function()
+%% Got = any()
+%% Desc = string()
+%% ExpectStr = string()
+%% Result = true | false
+%% @doc Use an anonymous function to assert a pattern match, using actual
+%% value as the argument to the function.
+expect_fun(ExpectFun, Got, Desc, ExpectStr) ->
+ evaluate(ExpectFun(Got), Got, ExpectStr, Desc).
+
+%% @equiv skip(TestFun, "")
+skip(TestFun) when is_function(TestFun) ->
+ skip(TestFun, "").
+
+%% @spec skip(TestFun, Reason) -> ok
+%% TestFun = function()
+%% Reason = string()
+%% @doc Skip a test.
+skip(TestFun, Reason) when is_function(TestFun), is_list(Reason) ->
+ begin_skip(Reason),
+ catch TestFun(),
+ end_skip(),
+ ok.
+
+%% @spec skip(Q, TestFun, Reason) -> ok
+%% Q = true | false | function()
+%% TestFun = function()
+%% Reason = string()
+%% @doc Skips a test conditionally. The first argument to this function can
+%% either be the 'true' or 'false' atoms or a function that returns 'true' or
+%% 'false'.
+skip(QFun, TestFun, Reason) when is_function(QFun), is_function(TestFun), is_list(Reason) ->
+ case QFun() of
+ true -> begin_skip(Reason), TestFun(), end_skip();
+ _ -> TestFun()
+ end,
+ ok;
+
+skip(Q, TestFun, Reason) when is_function(TestFun), is_list(Reason), Q == true ->
+ begin_skip(Reason),
+ TestFun(),
+ end_skip(),
+ ok;
+
+skip(_, TestFun, Reason) when is_function(TestFun), is_list(Reason) ->
+ TestFun(),
+ ok.
+
+%% @private
+begin_skip(Reason) ->
+ etap_server ! {self(), begin_skip, Reason}.
+
+%% @private
+end_skip() ->
+ etap_server ! {self(), end_skip}.
+
+%% @spec contains_ok(string(), string(), string()) -> true | false
+%% @doc Assert that a string is contained in another string.
+contains_ok(Source, String, Desc) ->
+ etap:isnt(
+ string:str(Source, String),
+ 0,
+ Desc
+ ).
+
+%% @spec is_before(string(), string(), string(), string()) -> true | false
+%% @doc Assert that a string comes before another string within a larger body.
+is_before(Source, StringA, StringB, Desc) ->
+ etap:is_greater(
+ string:str(Source, StringB),
+ string:str(Source, StringA),
+ Desc
+ ).
+
+%% @doc Assert that a given variable is a pid.
+is_pid(Pid, Desc) when is_pid(Pid) -> etap:ok(true, Desc);
+is_pid(_, Desc) -> etap:ok(false, Desc).
+
+%% @doc Assert that a given process/pid is alive.
+is_alive(Pid, Desc) ->
+ etap:ok(erlang:is_process_alive(Pid), Desc).
+
+%% @doc Assert that the current function of a pid is a given {M, F, A} tuple.
+is_mfa(Pid, MFA, Desc) ->
+ etap:is({current_function, MFA}, erlang:process_info(Pid, current_function), Desc).
+
+%% @spec loaded_ok(atom(), string()) -> true | false
+%% @doc Assert that a module has been loaded successfully.
+loaded_ok(M, Desc) when is_atom(M) ->
+ etap:fun_is(fun({module, _}) -> true; (_) -> false end, code:load_file(M), Desc).
+
+%% @spec can_ok(atom(), atom()) -> true | false
+%% @doc Assert that a module exports a given function.
+can_ok(M, F) when is_atom(M), is_atom(F) ->
+ Matches = [X || {X, _} <- M:module_info(exports), X == F],
+ etap:ok(Matches > 0, lists:concat([M, " can ", F])).
+
+%% @spec can_ok(atom(), atom(), integer()) -> true | false
+%% @doc Assert that a module exports a given function with a given arity.
+can_ok(M, F, A) when is_atom(M); is_atom(F), is_number(A) ->
+ Matches = [X || X <- M:module_info(exports), X == {F, A}],
+ etap:ok(Matches > 0, lists:concat([M, " can ", F, "/", A])).
+
+%% @spec has_attrib(M, A) -> true | false
+%% M = atom()
+%% A = atom()
+%% @doc Asserts that a module has a given attribute.
+has_attrib(M, A) when is_atom(M), is_atom(A) ->
+ etap:isnt(
+ proplists:get_value(A, M:module_info(attributes), 'asdlkjasdlkads'),
+ 'asdlkjasdlkads',
+ lists:concat([M, " has attribute ", A])
+ ).
+
+%% @spec has_attrib(M, A. V) -> true | false
+%% M = atom()
+%% A = atom()
+%% V = any()
+%% @doc Asserts that a module has a given attribute with a given value.
+is_attrib(M, A, V) when is_atom(M) andalso is_atom(A) ->
+ etap:is(
+ proplists:get_value(A, M:module_info(attributes)),
+ [V],
+ lists:concat([M, "'s ", A, " is ", V])
+ ).
+
+%% @spec is_behavior(M, B) -> true | false
+%% M = atom()
+%% B = atom()
+%% @doc Asserts that a given module has a specific behavior.
+is_behaviour(M, B) when is_atom(M) andalso is_atom(B) ->
+ is_attrib(M, behaviour, B).
+
+%% @doc Assert that an exception is raised when running a given function.
+dies_ok(F, Desc) ->
+ case (catch F()) of
+ {'EXIT', _} -> etap:ok(true, Desc);
+ _ -> etap:ok(false, Desc)
+ end.
+
+%% @doc Assert that an exception is not raised when running a given function.
+lives_ok(F, Desc) ->
+ etap:is(try_this(F), success, Desc).
+
+%% @doc Assert that the exception thrown by a function matches the given exception.
+throws_ok(F, Exception, Desc) ->
+ try F() of
+ _ -> etap:ok(nok, Desc)
+ catch
+ _:E ->
+ etap:is(E, Exception, Desc)
+ end.
+
+%% @private
+%% @doc Run a function and catch any exceptions.
+try_this(F) when is_function(F, 0) ->
+ try F() of
+ _ -> success
+ catch
+ throw:E -> {throw, E};
+ error:E -> {error, E};
+ exit:E -> {exit, E}
+ end.
+
+%% @private
+%% @doc Start the etap_server process if it is not running already.
+ensure_test_server() ->
+ case whereis(etap_server) of
+ undefined ->
+ proc_lib:start(?MODULE, start_etap_server,[]);
+ _ ->
+ diag("The test server is already running.")
+ end.
+
+%% @private
+%% @doc Start the etap_server loop and register itself as the etap_server
+%% process.
+start_etap_server() ->
+ catch register(etap_server, self()),
+ proc_lib:init_ack(ok),
+ etap:test_server(#test_state{
+ planned = 0,
+ count = 0,
+ pass = 0,
+ fail = 0,
+ skip = 0,
+ skip_reason = ""
+ }).
+
+
+%% @private
+%% @doc The main etap_server receive/run loop. The etap_server receive loop
+%% responds to seven messages apperatining to failure or passing of tests.
+%% It is also used to initiate the testing process with the {_, plan, _}
+%% message that clears the current test state.
+test_server(State) ->
+ NewState = receive
+ {_From, plan, unknown} ->
+ io:format("# Current time local ~s~n", [datetime(erlang:localtime())]),
+ io:format("# Using etap version ~p~n", [ proplists:get_value(vsn, proplists:get_value(attributes, etap:module_info())) ]),
+ State#test_state{
+ planned = -1,
+ count = 0,
+ pass = 0,
+ fail = 0,
+ skip = 0,
+ skip_reason = ""
+ };
+ {_From, plan, N} ->
+ io:format("# Current time local ~s~n", [datetime(erlang:localtime())]),
+ io:format("# Using etap version ~p~n", [ proplists:get_value(vsn, proplists:get_value(attributes, etap:module_info())) ]),
+ io:format("1..~p~n", [N]),
+ State#test_state{
+ planned = N,
+ count = 0,
+ pass = 0,
+ fail = 0,
+ skip = 0,
+ skip_reason = ""
+ };
+ {_From, begin_skip, Reason} ->
+ State#test_state{
+ skip = 1,
+ skip_reason = Reason
+ };
+ {_From, end_skip} ->
+ State#test_state{
+ skip = 0,
+ skip_reason = ""
+ };
+ {_From, pass, Desc} ->
+ FullMessage = skip_diag(
+ " - " ++ Desc,
+ State#test_state.skip,
+ State#test_state.skip_reason
+ ),
+ io:format("ok ~p ~s~n", [State#test_state.count + 1, FullMessage]),
+ State#test_state{
+ count = State#test_state.count + 1,
+ pass = State#test_state.pass + 1
+ };
+
+ {_From, fail, Desc} ->
+ FullMessage = skip_diag(
+ " - " ++ Desc,
+ State#test_state.skip,
+ State#test_state.skip_reason
+ ),
+ io:format("not ok ~p ~s~n", [State#test_state.count + 1, FullMessage]),
+ State#test_state{
+ count = State#test_state.count + 1,
+ fail = State#test_state.fail + 1
+ };
+ {From, state} ->
+ From ! State,
+ State;
+ {_From, diag, Message} ->
+ io:format("~s~n", [Message]),
+ State;
+ {From, count} ->
+ From ! State#test_state.count,
+ State;
+ {From, is_skip} ->
+ From ! State#test_state.skip,
+ State;
+ done ->
+ exit(normal)
+ end,
+ test_server(NewState).
+
+%% @private
+%% @doc Process the result of a test and send it to the etap_server process.
+mk_tap(Result, Desc) ->
+ IsSkip = lib:sendw(etap_server, is_skip),
+ case [IsSkip, Result] of
+ [_, true] ->
+ etap_server ! {self(), pass, Desc},
+ true;
+ [1, _] ->
+ etap_server ! {self(), pass, Desc},
+ true;
+ _ ->
+ etap_server ! {self(), fail, Desc},
+ false
+ end.
+
+%% @private
+%% @doc Format a date/time string.
+datetime(DateTime) ->
+ {{Year, Month, Day}, {Hour, Min, Sec}} = DateTime,
+ io_lib:format("~4.10.0B-~2.10.0B-~2.10.0B ~2.10.0B:~2.10.0B:~2.10.0B", [Year, Month, Day, Hour, Min, Sec]).
+
+%% @private
+%% @doc Craft an output message taking skip/todo into consideration.
+skip_diag(Message, 0, _) ->
+ Message;
+skip_diag(_Message, 1, "") ->
+ " # SKIP";
+skip_diag(_Message, 1, Reason) ->
+ " # SKIP : " ++ Reason.
diff --git a/test/gen_term.erl b/test/gen_term.erl
new file mode 100644
index 0000000..11a4f96
--- /dev/null
+++ b/test/gen_term.erl
@@ -0,0 +1,131 @@
+%% This file is part of khash released under the MIT license.
+%% See the LICENSE file for more information.
+%% Copyright 2013 Cloudant, Inc <support@cloudant.com>
+
+-module(gen_term).
+
+-export([
+ any/0,
+ any/1,
+
+ gen_atom/1,
+ gen_integer/1,
+ gen_float/1,
+ gen_reference/1,
+ gen_port/1,
+ gen_pid/1,
+ gen_tuple/1,
+ gen_list/1,
+ gen_short_string/1,
+ gen_string/1,
+ gen_binary/1,
+ gen_bitstring/1,
+ gen_bignum/1,
+ gen_function/1
+]).
+
+
+any() ->
+ any(16).
+
+
+any(MaxSize) when MaxSize =< 0 ->
+ Fun = choice(value_types()),
+ ?MODULE:Fun(MaxSize);
+any(MaxSize) ->
+ Fun = choice(all_types()),
+ ?MODULE:Fun(MaxSize).
+
+
+gen_atom(MaxSize) ->
+ list_to_atom(gen_short_string(MaxSize)).
+
+
+gen_integer(_) ->
+ Value = case random:uniform() < 0.5 of
+ true -> random:uniform(127);
+ false -> random:uniform(16#FFFFFFFF)
+ end,
+ case random:uniform() < 0.5 of
+ true -> -1 * Value;
+ false -> Value
+ end.
+
+
+gen_float(_) ->
+ random:uniform() * float(16#FFFFFFFF).
+
+
+gen_reference(_) ->
+ erlang:make_ref().
+
+
+gen_port(_) ->
+ Ports = erlang:ports(),
+ lists:nth(random:uniform(length(Ports)), Ports).
+
+
+gen_pid(_) ->
+ Pids = erlang:processes(),
+ lists:nth(random:uniform(length(Pids)), Pids).
+
+
+gen_tuple(MaxSize) ->
+ list_to_tuple(gen_list(MaxSize)).
+
+
+gen_list(MaxSize) ->
+ Width = random:uniform(MaxSize),
+ [any(MaxSize-Width) || _ <- lists:seq(1, Width)].
+
+
+gen_short_string(_) ->
+ Size = random:uniform(255),
+ [random:uniform(127) || _ <- lists:seq(1, Size)].
+
+
+gen_string(_) ->
+ Size = random:uniform(4096),
+ [random:uniform(127) || _ <- lists:seq(1, Size)].
+
+
+gen_binary(MaxSize) ->
+ list_to_binary(gen_string(MaxSize)).
+
+
+gen_bitstring(MaxSize) ->
+ B = gen_binary(MaxSize),
+ <<2:4/integer, B/binary>>.
+
+
+gen_bignum(_) ->
+ 16#FFFFFFFFFFFFFFFF + random:uniform(16#FFFFFFFF).
+
+
+gen_function(_) ->
+ choice(all_types()).
+
+
+choice(Options) ->
+ lists:nth(random:uniform(length(Options)), Options).
+
+
+value_types() ->
+ [
+ gen_atom,
+ gen_integer,
+ gen_float,
+ gen_reference,
+ gen_port,
+ gen_pid,
+ gen_short_string,
+ gen_string,
+ gen_binary,
+ gen_bitstring,
+ gen_bignum,
+ gen_function
+ ].
+
+
+all_types() ->
+ value_types() ++ [gen_tuple, gen_list].
diff --git a/test/util.erl b/test/util.erl
new file mode 100644
index 0000000..4f0bbf6
--- /dev/null
+++ b/test/util.erl
@@ -0,0 +1,25 @@
+%% This file is part of khash released under the MIT license.
+%% See the LICENSE file for more information.
+%% Copyright 2013 Cloudant, Inc <support@cloudant.com>
+
+-module(util).
+-compile(export_all).
+
+
+init_code_path() ->
+ code:add_pathz("ebin").
+
+
+run(Plan, Fun) ->
+ init_code_path(),
+ etap:plan(Plan),
+ case (catch Fun()) of
+ ok ->
+ etap:end_tests();
+ Other ->
+ etap:diag(io_lib:format("Test died abnormally:~n~p", [Other])),
+ timer:sleep(500),
+ etap:bail(Other)
+ end,
+ ok.
+