src/backend/lib/integerset.c - cloudberry - Git at Google

 /*-------------------------------------------------------------------------
  *
  * integerset.c
  *	  Data structure to hold a large set of 64-bit integers efficiently
  *
  * IntegerSet provides an in-memory data structure to hold a set of
  * arbitrary 64-bit integers.  Internally, the values are stored in a
  * B-tree, with a special packed representation at the leaf level using
  * the Simple-8b algorithm, which can pack clusters of nearby values
  * very tightly.
  *
  * Memory consumption depends on the number of values stored, but also
  * on how far the values are from each other.  In the best case, with
  * long runs of consecutive integers, memory consumption can be as low as
  * 0.1 bytes per integer.  In the worst case, if integers are more than
  * 2^32 apart, it uses about 8 bytes per integer.  In typical use, the
  * consumption per integer is somewhere between those extremes, depending
  * on the range of integers stored, and how "clustered" they are.
  *
  *
  * Interface
  * ---------
  *
  *	intset_create			- Create a new, empty set
  *	intset_add_member		- Add an integer to the set
  *	intset_is_member		- Test if an integer is in the set
  *	intset_begin_iterate	- Begin iterating through all integers in set
  *	intset_iterate_next		- Return next set member, if any
  *
  * intset_create() creates the set in the current memory context.  Subsequent
  * operations that add to the data structure will continue to allocate from
  * that same context, even if it's not current anymore.
  *
  * Note that there is no function to free an integer set.  If you need to do
  * that, create a dedicated memory context to hold it, and destroy the memory
  * context instead.
  *
  *
  * Limitations
  * -----------
  *
  * - Values must be added in order.  (Random insertions would require
  *   splitting nodes, which hasn't been implemented.)
  *
  * - Values cannot be added while iteration is in progress.
  *
  * - No support for removing values.
  *
  * None of these limitations are fundamental to the data structure, so they
  * could be lifted if needed, by writing some new code.  But the current
  * users of this facility don't need them.
  *
  *
  * References
  * ----------
  *
  * Simple-8b encoding is based on:
  *
  * Vo Ngoc Anh, Alistair Moffat, Index compression using 64-bit words,
  *   Software - Practice & Experience, v.40 n.2, p.131-147, February 2010
  *   (https://doi.org/10.1002/spe.948)
  *
  *
  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
  *	  src/backend/lib/integerset.c
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"

 #include "access/htup_details.h"
 #include "lib/integerset.h"
 #include "port/pg_bitutils.h"
 #include "utils/memutils.h"


 /*
  * Maximum number of integers that can be encoded in a single Simple-8b
  * codeword. (Defined here before anything else, so that we can size arrays
  * using this.)
  */
 #define SIMPLE8B_MAX_VALUES_PER_CODEWORD 240

 /*
  * Parameters for shape of the in-memory B-tree.
  *
  * These set the size of each internal and leaf node.  They don't necessarily
  * need to be the same, because the tree is just an in-memory structure.
  * With the default 64, each node is about 1 kb.
  *
  * If you change these, you must recalculate MAX_TREE_LEVELS, too!
  */
 #define MAX_INTERNAL_ITEMS	64
 #define MAX_LEAF_ITEMS	64

 /*
  * Maximum height of the tree.
  *
  * MAX_TREE_ITEMS is calculated from the "fan-out" of the B-tree.  The
  * theoretical maximum number of items that we can store in a set is 2^64,
  * so MAX_TREE_LEVELS should be set so that:
  *
  *   MAX_LEAF_ITEMS * MAX_INTERNAL_ITEMS ^ (MAX_TREE_LEVELS - 1) >= 2^64.
  *
  * In practice, we'll need far fewer levels, because you will run out of
  * memory long before reaching that number, but let's be conservative.
  */
 #define MAX_TREE_LEVELS		11

 /*
  * Node structures, for the in-memory B-tree.
  *
  * An internal node holds a number of downlink pointers to leaf nodes, or
  * to internal nodes on a lower level.  For each downlink, the key value
  * corresponding to the lower level node is stored in a sorted array.  The
  * stored key values are low keys.  In other words, if the downlink has value
  * X, then all items stored on that child are >= X.
  *
  * Each leaf node holds a number of "items", with a varying number of
  * integers packed into each item.  Each item consists of two 64-bit words:
  * The first word holds the first integer stored in the item, in plain format.
  * The second word contains between 0 and 240 more integers, packed using
  * Simple-8b encoding.  By storing the first integer in plain, unpacked,
  * format, we can use binary search to quickly find an item that holds (or
  * would hold) a particular integer.  And by storing the rest in packed form,
  * we still get pretty good memory density, if there are clusters of integers
  * with similar values.
  *
  * Each leaf node also has a pointer to the next leaf node, so that the leaf
  * nodes can be easily walked from beginning to end when iterating.
  */
 typedef struct intset_node intset_node;
 typedef struct intset_leaf_node intset_leaf_node;
 typedef struct intset_internal_node intset_internal_node;

 /* Common structure of both leaf and internal nodes. */
 struct intset_node
 {
 	uint16		level;			/* tree level of this node */
 	uint16		num_items;		/* number of items in this node */
 };

 /* Internal node */
 struct intset_internal_node
 {
 	/* common header, must match intset_node */
 	uint16		level;			/* >= 1 on internal nodes */
 	uint16		num_items;

 	/*
 	 * 'values' is an array of key values, and 'downlinks' are pointers to
 	 * lower-level nodes, corresponding to the key values.
 	 */
 	uint64		values[MAX_INTERNAL_ITEMS];
 	intset_node *downlinks[MAX_INTERNAL_ITEMS];
 };

 /* Leaf node */
 typedef struct
 {
 	uint64		first;			/* first integer in this item */
 	uint64		codeword;		/* simple8b encoded differences from 'first' */
 } leaf_item;

 #define MAX_VALUES_PER_LEAF_ITEM	(1 + SIMPLE8B_MAX_VALUES_PER_CODEWORD)

 struct intset_leaf_node
 {
 	/* common header, must match intset_node */
 	uint16		level;			/* 0 on leafs */
 	uint16		num_items;

 	intset_leaf_node *next;		/* right sibling, if any */

 	leaf_item	items[MAX_LEAF_ITEMS];
 };

 /*
  * We buffer insertions in a simple array, before packing and inserting them
  * into the B-tree.  MAX_BUFFERED_VALUES sets the size of the buffer.  The
  * encoder assumes that it is large enough that we can always fill a leaf
  * item with buffered new items.  In other words, MAX_BUFFERED_VALUES must be
  * larger than MAX_VALUES_PER_LEAF_ITEM.  For efficiency, make it much larger.
  */
 #define MAX_BUFFERED_VALUES			(MAX_VALUES_PER_LEAF_ITEM * 2)

 /*
  * IntegerSet is the top-level object representing the set.
  *
  * The integers are stored in an in-memory B-tree structure, plus an array
  * for newly-added integers.  IntegerSet also tracks information about memory
  * usage, as well as the current position when iterating the set with
  * intset_begin_iterate / intset_iterate_next.
  */
 struct IntegerSet
 {
 	/*
 	 * 'context' is the memory context holding this integer set and all its
 	 * tree nodes.
 	 *
 	 * 'mem_used' tracks the amount of memory used.  We don't do anything with
 	 * it in integerset.c itself, but the callers can ask for it with
 	 * intset_memory_usage().
 	 */
 	MemoryContext context;
 	uint64		mem_used;

 	uint64		num_entries;	/* total # of values in the set */
 	uint64		highest_value;	/* highest value stored in this set */

 	/*
 	 * B-tree to hold the packed values.
 	 *
 	 * 'rightmost_nodes' hold pointers to the rightmost node on each level.
 	 * rightmost_parent[0] is rightmost leaf, rightmost_parent[1] is its
 	 * parent, and so forth, all the way up to the root. These are needed when
 	 * adding new values. (Currently, we require that new values are added at
 	 * the end.)
 	 */
 	int			num_levels;		/* height of the tree */
 	intset_node *root;			/* root node */
 	intset_node *rightmost_nodes[MAX_TREE_LEVELS];
 	intset_leaf_node *leftmost_leaf;	/* leftmost leaf node */

 	/*
 	 * Holding area for new items that haven't been inserted to the tree yet.
 	 */
 	uint64		buffered_values[MAX_BUFFERED_VALUES];
 	int			num_buffered_values;

 	/*
 	 * Iterator support.
 	 *
 	 * 'iter_values' is an array of integers ready to be returned to the
 	 * caller; 'iter_num_values' is the length of that array, and
 	 * 'iter_valueno' is the next index.  'iter_node' and 'iter_itemno' point
 	 * to the leaf node, and item within the leaf node, to get the next batch
 	 * of values from.
 	 *
 	 * Normally, 'iter_values' points to 'iter_values_buf', which holds items
 	 * decoded from a leaf item.  But after we have scanned the whole B-tree,
 	 * we iterate through all the unbuffered values, too, by pointing
 	 * iter_values to 'buffered_values'.
 	 */
 	bool		iter_active;	/* is iteration in progress? */

 	const uint64 *iter_values;
 	int			iter_num_values;	/* number of elements in 'iter_values' */
 	int			iter_valueno;	/* next index into 'iter_values' */

 	intset_leaf_node *iter_node;	/* current leaf node */
 	int			iter_itemno;	/* next item in 'iter_node' to decode */

 	uint64		iter_values_buf[MAX_VALUES_PER_LEAF_ITEM];
 };

 /*
  * Prototypes for internal functions.
  */
 static void intset_update_upper(IntegerSet *intset, int level,
 								intset_node *child, uint64 child_key);
 static void intset_flush_buffered_values(IntegerSet *intset);

 static int	intset_binsrch_uint64(uint64 item, uint64 *arr, int arr_elems,
 								  bool nextkey);
 static int	intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems,
 								bool nextkey);

 static uint64 simple8b_encode(const uint64 *ints, int *num_encoded, uint64 base);
 static int	simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base);
 static bool simple8b_contains(uint64 codeword, uint64 key, uint64 base);


 /*
  * Create a new, initially empty, integer set.
  *
  * The integer set is created in the current memory context.
  * We will do all subsequent allocations in the same context, too, regardless
  * of which memory context is current when new integers are added to the set.
  */
 IntegerSet *
 intset_create(void)
 {
 	IntegerSet *intset;

 	intset = (IntegerSet *) palloc(sizeof(IntegerSet));
 	intset->context = CurrentMemoryContext;
 	intset->mem_used = GetMemoryChunkSpace(intset);

 	intset->num_entries = 0;
 	intset->highest_value = 0;

 	intset->num_levels = 0;
 	intset->root = NULL;
 	memset(intset->rightmost_nodes, 0, sizeof(intset->rightmost_nodes));
 	intset->leftmost_leaf = NULL;

 	intset->num_buffered_values = 0;

 	intset->iter_active = false;
 	intset->iter_node = NULL;
 	intset->iter_itemno = 0;
 	intset->iter_valueno = 0;
 	intset->iter_num_values = 0;
 	intset->iter_values = NULL;

 	return intset;
 }

 /*
  * Allocate a new node.
  */
 static intset_internal_node *
 intset_new_internal_node(IntegerSet *intset)
 {
 	intset_internal_node *n;

 	n = (intset_internal_node *) MemoryContextAlloc(intset->context,
 													sizeof(intset_internal_node));
 	intset->mem_used += GetMemoryChunkSpace(n);

 	n->level = 0;				/* caller must set */
 	n->num_items = 0;

 	return n;
 }

 static intset_leaf_node *
 intset_new_leaf_node(IntegerSet *intset)
 {
 	intset_leaf_node *n;

 	n = (intset_leaf_node *) MemoryContextAlloc(intset->context,
 												sizeof(intset_leaf_node));
 	intset->mem_used += GetMemoryChunkSpace(n);

 	n->level = 0;
 	n->num_items = 0;
 	n->next = NULL;

 	return n;
 }

 /*
  * Return the number of entries in the integer set.
  */
 uint64
 intset_num_entries(IntegerSet *intset)
 {
 	return intset->num_entries;
 }

 /*
  * Return the amount of memory used by the integer set.
  */
 uint64
 intset_memory_usage(IntegerSet *intset)
 {
 	return intset->mem_used;
 }

 /*
  * Add a value to the set.
  *
  * Values must be added in order.
  */
 void
 intset_add_member(IntegerSet *intset, uint64 x)
 {
 	if (intset->iter_active)
 		elog(ERROR, "cannot add new values to integer set while iteration is in progress");

 	if (x <= intset->highest_value && intset->num_entries > 0)
 		elog(ERROR, "cannot add value to integer set out of order");

 	if (intset->num_buffered_values >= MAX_BUFFERED_VALUES)
 	{
 		/* Time to flush our buffer */
 		intset_flush_buffered_values(intset);
 		Assert(intset->num_buffered_values < MAX_BUFFERED_VALUES);
 	}

 	/* Add it to the buffer of newly-added values */
 	intset->buffered_values[intset->num_buffered_values] = x;
 	intset->num_buffered_values++;
 	intset->num_entries++;
 	intset->highest_value = x;
 }

 /*
  * Take a batch of buffered values, and pack them into the B-tree.
  */
 static void
 intset_flush_buffered_values(IntegerSet *intset)
 {
 	uint64	   *values = intset->buffered_values;
 	uint64		num_values = intset->num_buffered_values;
 	int			num_packed = 0;
 	intset_leaf_node *leaf;

 	leaf = (intset_leaf_node *) intset->rightmost_nodes[0];

 	/*
 	 * If the tree is completely empty, create the first leaf page, which is
 	 * also the root.
 	 */
 	if (leaf == NULL)
 	{
 		/*
 		 * This is the very first item in the set.
 		 *
 		 * Allocate root node. It's also a leaf.
 		 */
 		leaf = intset_new_leaf_node(intset);

 		intset->root = (intset_node *) leaf;
 		intset->leftmost_leaf = leaf;
 		intset->rightmost_nodes[0] = (intset_node *) leaf;
 		intset->num_levels = 1;
 	}

 	/*
 	 * If there are less than MAX_VALUES_PER_LEAF_ITEM values in the buffer,
 	 * stop.  In most cases, we cannot encode that many values in a single
 	 * value, but this way, the encoder doesn't have to worry about running
 	 * out of input.
 	 */
 	while (num_values - num_packed >= MAX_VALUES_PER_LEAF_ITEM)
 	{
 		leaf_item	item;
 		int			num_encoded;

 		/*
 		 * Construct the next leaf item, packing as many buffered values as
 		 * possible.
 		 */
 		item.first = values[num_packed];
 		item.codeword = simple8b_encode(&values[num_packed + 1],
 										&num_encoded,
 										item.first);

 		/*
 		 * Add the item to the node, allocating a new node if the old one is
 		 * full.
 		 */
 		if (leaf->num_items >= MAX_LEAF_ITEMS)
 		{
 			/* Allocate new leaf and link it to the tree */
 			intset_leaf_node *old_leaf = leaf;

 			leaf = intset_new_leaf_node(intset);
 			old_leaf->next = leaf;
 			intset->rightmost_nodes[0] = (intset_node *) leaf;
 			intset_update_upper(intset, 1, (intset_node *) leaf, item.first);
 		}
 		leaf->items[leaf->num_items++] = item;

 		num_packed += 1 + num_encoded;
 	}

 	/*
 	 * Move any remaining buffered values to the beginning of the array.
 	 */
 	if (num_packed < intset->num_buffered_values)
 	{
 		memmove(&intset->buffered_values[0],
 				&intset->buffered_values[num_packed],
 				(intset->num_buffered_values - num_packed) * sizeof(uint64));
 	}
 	intset->num_buffered_values -= num_packed;
 }

 /*
  * Insert a downlink into parent node, after creating a new node.
  *
  * Recurses if the parent node is full, too.
  */
 static void
 intset_update_upper(IntegerSet *intset, int level, intset_node *child,
 					uint64 child_key)
 {
 	intset_internal_node *parent;

 	Assert(level > 0);

 	/*
 	 * Create a new root node, if necessary.
 	 */
 	if (level >= intset->num_levels)
 	{
 		intset_node *oldroot = intset->root;
 		uint64		downlink_key;

 		/* MAX_TREE_LEVELS should be more than enough, this shouldn't happen */
 		if (intset->num_levels == MAX_TREE_LEVELS)
 			elog(ERROR, "could not expand integer set, maximum number of levels reached");
 		intset->num_levels++;

 		/*
 		 * Get the first value on the old root page, to be used as the
 		 * downlink.
 		 */
 		if (intset->root->level == 0)
 			downlink_key = ((intset_leaf_node *) oldroot)->items[0].first;
 		else
 			downlink_key = ((intset_internal_node *) oldroot)->values[0];

 		parent = intset_new_internal_node(intset);
 		parent->level = level;
 		parent->values[0] = downlink_key;
 		parent->downlinks[0] = oldroot;
 		parent->num_items = 1;

 		intset->root = (intset_node *) parent;
 		intset->rightmost_nodes[level] = (intset_node *) parent;
 	}

 	/*
 	 * Place the downlink on the parent page.
 	 */
 	parent = (intset_internal_node *) intset->rightmost_nodes[level];

 	if (parent->num_items < MAX_INTERNAL_ITEMS)
 	{
 		parent->values[parent->num_items] = child_key;
 		parent->downlinks[parent->num_items] = child;
 		parent->num_items++;
 	}
 	else
 	{
 		/*
 		 * Doesn't fit.  Allocate new parent, with the downlink as the first
 		 * item on it, and recursively insert the downlink to the new parent
 		 * to the grandparent.
 		 */
 		parent = intset_new_internal_node(intset);
 		parent->level = level;
 		parent->values[0] = child_key;
 		parent->downlinks[0] = child;
 		parent->num_items = 1;

 		intset->rightmost_nodes[level] = (intset_node *) parent;

 		intset_update_upper(intset, level + 1, (intset_node *) parent, child_key);
 	}
 }

 /*
  * Does the set contain the given value?
  */
 bool
 intset_is_member(IntegerSet *intset, uint64 x)
 {
 	intset_node *node;
 	intset_leaf_node *leaf;
 	int			level;
 	int			itemno;
 	leaf_item  *item;

 	/*
 	 * The value might be in the buffer of newly-added values.
 	 */
 	if (intset->num_buffered_values > 0 && x >= intset->buffered_values[0])
 	{
 		itemno = intset_binsrch_uint64(x,
 									   intset->buffered_values,
 									   intset->num_buffered_values,
 									   false);
 		if (itemno >= intset->num_buffered_values)
 			return false;
 		else
 			return (intset->buffered_values[itemno] == x);
 	}

 	/*
 	 * Start from the root, and walk down the B-tree to find the right leaf
 	 * node.
 	 */
 	if (!intset->root)
 		return false;
 	node = intset->root;
 	for (level = intset->num_levels - 1; level > 0; level--)
 	{
 		intset_internal_node *n = (intset_internal_node *) node;

 		Assert(node->level == level);

 		itemno = intset_binsrch_uint64(x, n->values, n->num_items, true);
 		if (itemno == 0)
 			return false;
 		node = n->downlinks[itemno - 1];
 	}
 	Assert(node->level == 0);
 	leaf = (intset_leaf_node *) node;

 	/*
 	 * Binary search to find the right item on the leaf page
 	 */
 	itemno = intset_binsrch_leaf(x, leaf->items, leaf->num_items, true);
 	if (itemno == 0)
 		return false;
 	item = &leaf->items[itemno - 1];

 	/* Is this a match to the first value on the item? */
 	if (item->first == x)
 		return true;
 	Assert(x > item->first);

 	/* Is it in the packed codeword? */
 	if (simple8b_contains(item->codeword, x, item->first))
 		return true;

 	return false;
 }

 /*
  * Begin in-order scan through all the values.
  *
  * While the iteration is in-progress, you cannot add new values to the set.
  */
 void
 intset_begin_iterate(IntegerSet *intset)
 {
 	/* Note that we allow an iteration to be abandoned midway */
 	intset->iter_active = true;
 	intset->iter_node = intset->leftmost_leaf;
 	intset->iter_itemno = 0;
 	intset->iter_valueno = 0;
 	intset->iter_num_values = 0;
 	intset->iter_values = intset->iter_values_buf;
 }

 /*
  * Returns the next integer, when iterating.
  *
  * intset_begin_iterate() must be called first.  intset_iterate_next() returns
  * the next value in the set.  Returns true, if there was another value, and
  * stores the value in *next.  Otherwise, returns false.
  */
 bool
 intset_iterate_next(IntegerSet *intset, uint64 *next)
 {
 	Assert(intset->iter_active);
 	for (;;)
 	{
 		/* Return next iter_values[] entry if any */
 		if (intset->iter_valueno < intset->iter_num_values)
 		{
 			*next = intset->iter_values[intset->iter_valueno++];
 			return true;
 		}

 		/* Decode next item in current leaf node, if any */
 		if (intset->iter_node &&
 			intset->iter_itemno < intset->iter_node->num_items)
 		{
 			leaf_item  *item;
 			int			num_decoded;

 			item = &intset->iter_node->items[intset->iter_itemno++];

 			intset->iter_values_buf[0] = item->first;
 			num_decoded = simple8b_decode(item->codeword,
 										  &intset->iter_values_buf[1],
 										  item->first);
 			intset->iter_num_values = num_decoded + 1;
 			intset->iter_valueno = 0;
 			continue;
 		}

 		/* No more items on this leaf, step to next node */
 		if (intset->iter_node)
 		{
 			intset->iter_node = intset->iter_node->next;
 			intset->iter_itemno = 0;
 			continue;
 		}

 		/*
 		 * We have reached the end of the B-tree.  But we might still have
 		 * some integers in the buffer of newly-added values.
 		 */
 		if (intset->iter_values == (const uint64 *) intset->iter_values_buf)
 		{
 			intset->iter_values = intset->buffered_values;
 			intset->iter_num_values = intset->num_buffered_values;
 			intset->iter_valueno = 0;
 			continue;
 		}

 		break;
 	}

 	/* No more results. */
 	intset->iter_active = false;
 	*next = 0;					/* prevent uninitialized-variable warnings */
 	return false;
 }

 /*
  * intset_binsrch_uint64() -- search a sorted array of uint64s
  *
  * Returns the first position with key equal or less than the given key.
  * The returned position would be the "insert" location for the given key,
  * that is, the position where the new key should be inserted to.
  *
  * 'nextkey' affects the behavior on equal keys.  If true, and there is an
  * equal key in the array, this returns the position immediately after the
  * equal key.  If false, this returns the position of the equal key itself.
  */
 static int
 intset_binsrch_uint64(uint64 item, uint64 *arr, int arr_elems, bool nextkey)
 {
 	int			low,
 				high,
 				mid;

 	low = 0;
 	high = arr_elems;
 	while (high > low)
 	{
 		mid = low + (high - low) / 2;

 		if (nextkey)
 		{
 			if (item >= arr[mid])
 				low = mid + 1;
 			else
 				high = mid;
 		}
 		else
 		{
 			if (item > arr[mid])
 				low = mid + 1;
 			else
 				high = mid;
 		}
 	}

 	return low;
 }

 /* same, but for an array of leaf items */
 static int
 intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems, bool nextkey)
 {
 	int			low,
 				high,
 				mid;

 	low = 0;
 	high = arr_elems;
 	while (high > low)
 	{
 		mid = low + (high - low) / 2;

 		if (nextkey)
 		{
 			if (item >= arr[mid].first)
 				low = mid + 1;
 			else
 				high = mid;
 		}
 		else
 		{
 			if (item > arr[mid].first)
 				low = mid + 1;
 			else
 				high = mid;
 		}
 	}

 	return low;
 }

 /*
  * Simple-8b encoding.
  *
  * The simple-8b algorithm packs between 1 and 240 integers into 64-bit words,
  * called "codewords".  The number of integers packed into a single codeword
  * depends on the integers being packed; small integers are encoded using
  * fewer bits than large integers.  A single codeword can store a single
  * 60-bit integer, or two 30-bit integers, for example.
  *
  * Since we're storing a unique, sorted, set of integers, we actually encode
  * the *differences* between consecutive integers.  That way, clusters of
  * integers that are close to each other are packed efficiently, regardless
  * of their absolute values.
  *
  * In Simple-8b, each codeword consists of a 4-bit selector, which indicates
  * how many integers are encoded in the codeword, and the encoded integers are
  * packed into the remaining 60 bits.  The selector allows for 16 different
  * ways of using the remaining 60 bits, called "modes".  The number of integers
  * packed into a single codeword in each mode is listed in the simple8b_modes
  * table below.  For example, consider the following codeword:
  *
  *      20-bit integer       20-bit integer       20-bit integer
  * 1101 00000000000000010010 01111010000100100000 00000000000000010100
  * ^
  * selector
  *
  * The selector 1101 is 13 in decimal.  From the modes table below, we see
  * that it means that the codeword encodes three 20-bit integers.  In decimal,
  * those integers are 18, 500000 and 20.  Because we encode deltas rather than
  * absolute values, the actual values that they represent are 18, 500018 and
  * 500038.
  *
  * Modes 0 and 1 are a bit special; they encode a run of 240 or 120 zeroes
  * (which means 240 or 120 consecutive integers, since we're encoding the
  * deltas between integers), without using the rest of the codeword bits
  * for anything.
  *
  * Simple-8b cannot encode integers larger than 60 bits.  Values larger than
  * that are always stored in the 'first' field of a leaf item, never in the
  * packed codeword.  If there is a sequence of integers that are more than
  * 2^60 apart, the codeword will go unused on those items.  To represent that,
  * we use a magic EMPTY_CODEWORD codeword value.
  */
 static const struct simple8b_mode
 {
 	uint8		bits_per_int;
 	uint8		num_ints;
 }			simple8b_modes[17] =

 {
 	{0, 240},					/* mode  0: 240 zeroes */
 	{0, 120},					/* mode  1: 120 zeroes */
 	{1, 60},					/* mode  2: sixty 1-bit integers */
 	{2, 30},					/* mode  3: thirty 2-bit integers */
 	{3, 20},					/* mode  4: twenty 3-bit integers */
 	{4, 15},					/* mode  5: fifteen 4-bit integers */
 	{5, 12},					/* mode  6: twelve 5-bit integers */
 	{6, 10},					/* mode  7: ten 6-bit integers */
 	{7, 8},						/* mode  8: eight 7-bit integers (four bits
 								 * are wasted) */
 	{8, 7},						/* mode  9: seven 8-bit integers (four bits
 								 * are wasted) */
 	{10, 6},					/* mode 10: six 10-bit integers */
 	{12, 5},					/* mode 11: five 12-bit integers */
 	{15, 4},					/* mode 12: four 15-bit integers */
 	{20, 3},					/* mode 13: three 20-bit integers */
 	{30, 2},					/* mode 14: two 30-bit integers */
 	{60, 1},					/* mode 15: one 60-bit integer */

 	{0, 0}						/* sentinel value */
 };

 /*
  * EMPTY_CODEWORD is a special value, used to indicate "no values".
  * It is used if the next value is too large to be encoded with Simple-8b.
  *
  * This value looks like a mode-0 codeword, but we can distinguish it
  * because a regular mode-0 codeword would have zeroes in the unused bits.
  */
 #define EMPTY_CODEWORD		UINT64CONST(0x0FFFFFFFFFFFFFFF)

 /*
  * Encode a number of integers into a Simple-8b codeword.
  *
  * (What we actually encode are deltas between successive integers.
  * "base" is the value before ints[0].)
  *
  * The input array must contain at least SIMPLE8B_MAX_VALUES_PER_CODEWORD
  * elements, ensuring that we can produce a full codeword.
  *
  * Returns the encoded codeword, and sets *num_encoded to the number of
  * input integers that were encoded.  That can be zero, if the first delta
  * is too large to be encoded.
  */
 static uint64
 simple8b_encode(const uint64 *ints, int *num_encoded, uint64 base)
 {
 	int			selector;
 	int			nints;
 	int			bits;
 	uint64		diff;
 	uint64		last_val;
 	uint64		codeword;
 	int			i;

 	Assert(ints[0] > base);

 	/*
 	 * Select the "mode" to use for this codeword.
 	 *
 	 * In each iteration, check if the next value can be represented in the
 	 * current mode we're considering.  If it's too large, then step up the
 	 * mode to a wider one, and repeat.  If it fits, move on to the next
 	 * integer.  Repeat until the codeword is full, given the current mode.
 	 *
 	 * Note that we don't have any way to represent unused slots in the
 	 * codeword, so we require each codeword to be "full".  It is always
 	 * possible to produce a full codeword unless the very first delta is too
 	 * large to be encoded.  For example, if the first delta is small but the
 	 * second is too large to be encoded, we'll end up using the last "mode",
 	 * which has nints == 1.
 	 */
 	selector = 0;
 	nints = simple8b_modes[0].num_ints;
 	bits = simple8b_modes[0].bits_per_int;
 	diff = ints[0] - base - 1;
 	last_val = ints[0];
 	i = 0;						/* number of deltas we have accepted */
 	for (;;)
 	{
 		if (diff >= (UINT64CONST(1) << bits))
 		{
 			/* too large, step up to next mode */
 			selector++;
 			nints = simple8b_modes[selector].num_ints;
 			bits = simple8b_modes[selector].bits_per_int;
 			/* we might already have accepted enough deltas for this mode */
 			if (i >= nints)
 				break;
 		}
 		else
 		{
 			/* accept this delta; then done if codeword is full */
 			i++;
 			if (i >= nints)
 				break;
 			/* examine next delta */
 			Assert(ints[i] > last_val);
 			diff = ints[i] - last_val - 1;
 			last_val = ints[i];
 		}
 	}

 	if (nints == 0)
 	{
 		/*
 		 * The first delta is too large to be encoded with Simple-8b.
 		 *
 		 * If there is at least one not-too-large integer in the input, we
 		 * will encode it using mode 15 (or a more compact mode).  Hence, we
 		 * can only get here if the *first* delta is >= 2^60.
 		 */
 		Assert(i == 0);
 		*num_encoded = 0;
 		return EMPTY_CODEWORD;
 	}

 	/*
 	 * Encode the integers using the selected mode.  Note that we shift them
 	 * into the codeword in reverse order, so that they will come out in the
 	 * correct order in the decoder.
 	 */
 	codeword = 0;
 	if (bits > 0)
 	{
 		for (i = nints - 1; i > 0; i--)
 		{
 			diff = ints[i] - ints[i - 1] - 1;
 			codeword |= diff;
 			codeword <<= bits;
 		}
 		diff = ints[0] - base - 1;
 		codeword |= diff;
 	}

 	/* add selector to the codeword, and return */
 	codeword |= (uint64) selector << 60;

 	*num_encoded = nints;
 	return codeword;
 }

 /*
  * Decode a codeword into an array of integers.
  * Returns the number of integers decoded.
  */
 static int
 simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base)
 {
 	int			selector = (codeword >> 60);
 	int			nints = simple8b_modes[selector].num_ints;
 	int			bits = simple8b_modes[selector].bits_per_int;
 	uint64		mask = (UINT64CONST(1) << bits) - 1;
 	uint64		curr_value;

 	if (codeword == EMPTY_CODEWORD)
 		return 0;

 	curr_value = base;
 	for (int i = 0; i < nints; i++)
 	{
 		uint64		diff = codeword & mask;

 		curr_value += 1 + diff;
 		decoded[i] = curr_value;
 		codeword >>= bits;
 	}
 	return nints;
 }

 /*
  * This is very similar to simple8b_decode(), but instead of decoding all
  * the values to an array, it just checks if the given "key" is part of
  * the codeword.
  */
 static bool
 simple8b_contains(uint64 codeword, uint64 key, uint64 base)
 {
 	int			selector = (codeword >> 60);
 	int			nints = simple8b_modes[selector].num_ints;
 	int			bits = simple8b_modes[selector].bits_per_int;

 	if (codeword == EMPTY_CODEWORD)
 		return false;

 	if (bits == 0)
 	{
 		/* Special handling for 0-bit cases. */
 		return (key - base) <= nints;
 	}
 	else
 	{
 		uint64		mask = (UINT64CONST(1) << bits) - 1;
 		uint64		curr_value;

 		curr_value = base;
 		for (int i = 0; i < nints; i++)
 		{
 			uint64		diff = codeword & mask;

 			curr_value += 1 + diff;

 			if (curr_value >= key)
 			{
 				if (curr_value == key)
 					return true;
 				else
 					return false;
 			}

 			codeword >>= bits;
 		}
 	}
 	return false;
 }