src/java/org/apache/cassandra/utils/MerkleTree.java - cassandra - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.cassandra.utils;

 import java.io.DataInput;
 import java.io.IOException;
 import java.io.Serializable;
 import java.util.*;

 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.PeekingIterator;

 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 import org.apache.cassandra.db.TypeSizes;
 import org.apache.cassandra.dht.IPartitioner;
 import org.apache.cassandra.dht.IPartitionerDependentSerializer;
 import org.apache.cassandra.dht.Range;
 import org.apache.cassandra.dht.Token;
 import org.apache.cassandra.exceptions.ConfigurationException;
 import org.apache.cassandra.io.IVersionedSerializer;
 import org.apache.cassandra.io.util.DataInputPlus;
 import org.apache.cassandra.io.util.DataOutputPlus;
 import org.apache.cassandra.net.MessagingService;

 /**
  * A MerkleTree implemented as a binary tree.
  *
  * A MerkleTree is a full binary tree that represents a perfect binary tree of
  * depth 'hashdepth'. In a perfect binary tree, each leaf contains a
  * sequentially hashed range, and each inner node contains the binary hash of
  * its two children. In the MerkleTree, many ranges will not be split to the
  * full depth of the perfect binary tree: the leaves of this tree are Leaf objects,
  * which contain the computed values of the nodes that would be below them if
  * the tree were perfect.
  *
  * The hash values of the inner nodes of the MerkleTree are calculated lazily based
  * on their children when the hash of a range is requested with hash(range).
  *
  * Inputs passed to TreeRange.validate should be calculated using a very secure hash,
  * because all hashing internal to the tree is accomplished using XOR.
  *
  * If two MerkleTrees have the same hashdepth, they represent a perfect tree
  * of the same depth, and can always be compared, regardless of size or splits.
  */
 public class MerkleTree implements Serializable
 {
     private static Logger logger = LoggerFactory.getLogger(MerkleTree.class);

     public static final MerkleTreeSerializer serializer = new MerkleTreeSerializer();
     private static final long serialVersionUID = 2L;

     public static final byte RECOMMENDED_DEPTH = Byte.MAX_VALUE - 1;

     public static final int CONSISTENT = 0;
     public static final int FULLY_INCONSISTENT = 1;
     public static final int PARTIALLY_INCONSISTENT = 2;
     private static final byte[] EMPTY_HASH = new byte[0];

     public final byte hashdepth;

     /** The top level range that this MerkleTree covers. */
     public final Range<Token> fullRange;
     private final IPartitioner partitioner;

     private long maxsize;
     private long size;
     private Hashable root;

     public static class MerkleTreeSerializer implements IVersionedSerializer<MerkleTree>
     {
         public void serialize(MerkleTree mt, DataOutputPlus out, int version) throws IOException
         {
             out.writeByte(mt.hashdepth);
             out.writeLong(mt.maxsize);
             out.writeLong(mt.size);
             out.writeUTF(mt.partitioner.getClass().getCanonicalName());
             // full range
             Token.serializer.serialize(mt.fullRange.left, out, version);
             Token.serializer.serialize(mt.fullRange.right, out, version);
             Hashable.serializer.serialize(mt.root, out, version);
         }

         public MerkleTree deserialize(DataInputPlus in, int version) throws IOException
         {
             byte hashdepth = in.readByte();
             long maxsize = in.readLong();
             long size = in.readLong();
             IPartitioner partitioner;
             try
             {
                 partitioner = FBUtilities.newPartitioner(in.readUTF());
             }
             catch (ConfigurationException e)
             {
                 throw new IOException(e);
             }

             // full range
             Token left = Token.serializer.deserialize(in, partitioner, version);
             Token right = Token.serializer.deserialize(in, partitioner, version);
             Range<Token> fullRange = new Range<>(left, right);

             MerkleTree mt = new MerkleTree(partitioner, fullRange, hashdepth, maxsize);
             mt.size = size;
             mt.root = Hashable.serializer.deserialize(in, partitioner, version);
             return mt;
         }

         public long serializedSize(MerkleTree mt, int version)
         {
             long size = 1 // mt.hashdepth
                  + TypeSizes.sizeof(mt.maxsize)
                  + TypeSizes.sizeof(mt.size)
                  + TypeSizes.sizeof(mt.partitioner.getClass().getCanonicalName());

             // full range
             size += Token.serializer.serializedSize(mt.fullRange.left, version);
             size += Token.serializer.serializedSize(mt.fullRange.right, version);

             size += Hashable.serializer.serializedSize(mt.root, version);
             return size;
         }
     }

     /**
      * @param partitioner The partitioner in use.
      * @param range the range this tree covers
      * @param hashdepth The maximum depth of the tree. 100/(2^depth) is the %
      *        of the key space covered by each subrange of a fully populated tree.
      * @param maxsize The maximum number of subranges in the tree.
      */
     public MerkleTree(IPartitioner partitioner, Range<Token> range, byte hashdepth, long maxsize)
     {
         assert hashdepth < Byte.MAX_VALUE;
         this.fullRange = Preconditions.checkNotNull(range);
         this.partitioner = Preconditions.checkNotNull(partitioner);
         this.hashdepth = hashdepth;
         this.maxsize = maxsize;

         size = 1;
         root = new Leaf(null);
     }


     static byte inc(byte in)
     {
         assert in < Byte.MAX_VALUE;
         return (byte)(in + 1);
     }

     /**
      * Initializes this tree by splitting it until hashdepth is reached,
      * or until an additional level of splits would violate maxsize.
      *
      * NB: Replaces all nodes in the tree.
      */
     public void init()
     {
         // determine the depth to which we can safely split the tree
         byte sizedepth = (byte)(Math.log10(maxsize) / Math.log10(2));
         byte depth = (byte)Math.min(sizedepth, hashdepth);

         root = initHelper(fullRange.left, fullRange.right, (byte)0, depth);
         size = (long)Math.pow(2, depth);
     }

     private Hashable initHelper(Token left, Token right, byte depth, byte max)
     {
         if (depth == max)
             // we've reached the leaves
             return new Leaf();
         Token midpoint = partitioner.midpoint(left, right);

         if (midpoint.equals(left) || midpoint.equals(right))
             return new Leaf();

         Hashable lchild =  initHelper(left, midpoint, inc(depth), max);
         Hashable rchild =  initHelper(midpoint, right, inc(depth), max);
         return new Inner(midpoint, lchild, rchild);
     }

     Hashable root()
     {
         return root;
     }

     public IPartitioner partitioner()
     {
         return partitioner;
     }

     /**
      * The number of distinct ranges contained in this tree. This is a reasonable
      * measure of the memory usage of the tree (assuming 'this.order' is significant).
      */
     public long size()
     {
         return size;
     }

     public long maxsize()
     {
         return maxsize;
     }

     public void maxsize(long maxsize)
     {
         this.maxsize = maxsize;
     }

     /**
      * @param ltree First tree.
      * @param rtree Second tree.
      * @return A list of the largest contiguous ranges where the given trees disagree.
      */
     public static List<TreeRange> difference(MerkleTree ltree, MerkleTree rtree)
     {
         if (!ltree.fullRange.equals(rtree.fullRange))
             throw new IllegalArgumentException("Difference only make sense on tree covering the same range (but " + ltree.fullRange + " != " + rtree.fullRange + ")");

         List<TreeRange> diff = new ArrayList<>();
         TreeDifference active = new TreeDifference(ltree.fullRange.left, ltree.fullRange.right, (byte)0);

         Hashable lnode = ltree.find(active);
         Hashable rnode = rtree.find(active);
         byte[] lhash = lnode.hash();
         byte[] rhash = rnode.hash();
         active.setSize(lnode.sizeOfRange(), rnode.sizeOfRange());

         if (lhash != null && rhash != null && !Arrays.equals(lhash, rhash))
         {
             if(lnode instanceof  Leaf || rnode instanceof Leaf)
             {
                 logger.debug("Digest mismatch detected among leaf nodes {}, {}", lnode, rnode);
                 diff.add(active);
             }
             else
             {
                 logger.debug("Digest mismatch detected, traversing trees [{}, {}]", ltree, rtree);
                 if (FULLY_INCONSISTENT == differenceHelper(ltree, rtree, diff, active))
                 {
                     logger.debug("Range {} fully inconsistent", active);
                     diff.add(active);
                 }
             }
         }
         else if (lhash == null || rhash == null)
             diff.add(active);
         return diff;
     }

     /**
      * TODO: This function could be optimized into a depth first traversal of
      * the two trees in parallel.
      *
      * Takes two trees and a range for which they have hashes, but are inconsistent.
      * @return FULLY_INCONSISTENT if active is inconsistent, PARTIALLY_INCONSISTENT if only a subrange is inconsistent.
      */
     @VisibleForTesting
     static int differenceHelper(MerkleTree ltree, MerkleTree rtree, List<TreeRange> diff, TreeRange active)
     {
         if (active.depth == Byte.MAX_VALUE)
             return CONSISTENT;

         Token midpoint = ltree.partitioner().midpoint(active.left, active.right);
         // sanity check for midpoint calculation, see CASSANDRA-13052
         if (midpoint.equals(active.left) || midpoint.equals(active.right))
         {
             // If the midpoint equals either the left or the right, we have a range that's too small to split - we'll simply report the
             // whole range as inconsistent
             logger.debug("({}) No sane midpoint ({}) for range {} , marking whole range as inconsistent", active.depth, midpoint, active);
             return FULLY_INCONSISTENT;
         }

         TreeDifference left = new TreeDifference(active.left, midpoint, inc(active.depth));
         TreeDifference right = new TreeDifference(midpoint, active.right, inc(active.depth));
         logger.debug("({}) Hashing sub-ranges [{}, {}] for {} divided by midpoint {}", active.depth, left, right, active, midpoint);
         byte[] lhash, rhash;
         Hashable lnode, rnode;

         // see if we should recurse left
         lnode = ltree.find(left);
         rnode = rtree.find(left);
         lhash = lnode.hash();
         rhash = rnode.hash();
         left.setSize(lnode.sizeOfRange(), rnode.sizeOfRange());
         left.setRows(lnode.rowsInRange(), rnode.rowsInRange());

         int ldiff = CONSISTENT;
         boolean lreso = lhash != null && rhash != null;
         if (lreso && !Arrays.equals(lhash, rhash))
         {
             logger.debug("({}) Inconsistent digest on left sub-range {}: [{}, {}]", active.depth, left, lnode, rnode);
             if (lnode instanceof Leaf) ldiff = FULLY_INCONSISTENT;
             else ldiff = differenceHelper(ltree, rtree, diff, left);
         }
         else if (!lreso)
         {
             logger.debug("({}) Left sub-range fully inconsistent {}", active.depth, right);
             ldiff = FULLY_INCONSISTENT;
         }

         // see if we should recurse right
         lnode = ltree.find(right);
         rnode = rtree.find(right);
         lhash = lnode.hash();
         rhash = rnode.hash();
         right.setSize(lnode.sizeOfRange(), rnode.sizeOfRange());
         right.setRows(lnode.rowsInRange(), rnode.rowsInRange());

         int rdiff = CONSISTENT;
         boolean rreso = lhash != null && rhash != null;
         if (rreso && !Arrays.equals(lhash, rhash))
         {
             logger.debug("({}) Inconsistent digest on right sub-range {}: [{}, {}]", active.depth, right, lnode, rnode);
             if (rnode instanceof Leaf) rdiff = FULLY_INCONSISTENT;
             else rdiff = differenceHelper(ltree, rtree, diff, right);
         }
         else if (!rreso)
         {
             logger.debug("({}) Right sub-range fully inconsistent {}", active.depth, right);
             rdiff = FULLY_INCONSISTENT;
         }

         if (ldiff == FULLY_INCONSISTENT && rdiff == FULLY_INCONSISTENT)
         {
             // both children are fully inconsistent
             logger.debug("({}) Fully inconsistent range [{}, {}]", active.depth, left, right);
             return FULLY_INCONSISTENT;
         }
         else if (ldiff == FULLY_INCONSISTENT)
         {
             logger.debug("({}) Adding left sub-range to diff as fully inconsistent {}", active.depth, left);
             diff.add(left);
             return PARTIALLY_INCONSISTENT;
         }
         else if (rdiff == FULLY_INCONSISTENT)
         {
             logger.debug("({}) Adding right sub-range to diff as fully inconsistent {}", active.depth, right);
             diff.add(right);
             return PARTIALLY_INCONSISTENT;
         }
         logger.debug("({}) Range {} partially inconstent", active.depth, active);
         return PARTIALLY_INCONSISTENT;
     }

     /**
      * For testing purposes.
      * Gets the smallest range containing the token.
      */
     public TreeRange get(Token t)
     {
         return getHelper(root, fullRange.left, fullRange.right, (byte)0, t);
     }

     TreeRange getHelper(Hashable hashable, Token pleft, Token pright, byte depth, Token t)
     {
         while (true)
         {
             if (hashable instanceof Leaf)
             {
                 // we've reached a hash: wrap it up and deliver it
                 return new TreeRange(this, pleft, pright, depth, hashable);
             }
             // else: node.

             Inner node = (Inner) hashable;
             depth = inc(depth);
             if (Range.contains(pleft, node.token, t))
             { // left child contains token
                 hashable = node.lchild;
                 pright = node.token;
             }
             else
             { // else: right child contains token
                 hashable = node.rchild;
                 pleft = node.token;
             }
         }
     }

     /**
      * Invalidates the ranges containing the given token.
      * Useful for testing.
      */
     public void invalidate(Token t)
     {
         invalidateHelper(root, fullRange.left, t);
     }

     private void invalidateHelper(Hashable hashable, Token pleft, Token t)
     {
         hashable.hash(null);
         if (hashable instanceof Leaf)
             return;
         // else: node.

         Inner node = (Inner)hashable;
         if (Range.contains(pleft, node.token, t))
             // left child contains token
             invalidateHelper(node.lchild, pleft, t);
         else
             // right child contains token
             invalidateHelper(node.rchild, node.token, t);
     }

     /**
      * Hash the given range in the tree. The range must have been generated
      * with recursive applications of partitioner.midpoint().
      *
      * NB: Currently does not support wrapping ranges that do not end with
      * partitioner.getMinimumToken().
      *
      * @return Null if any subrange of the range is invalid, or if the exact
      *         range cannot be calculated using this tree.
      */
     public byte[] hash(Range<Token> range)
     {
         return find(range).hash();
     }

     /**
      * Find the {@link Hashable} node that matches the given {@code range}.
      *
      * @param range Range to find
      * @return {@link Hashable} found. If nothing found, return {@link Leaf} with null hash.
      */
     private Hashable find(Range<Token> range)
     {
         try
         {
             return findHelper(root, new Range<Token>(fullRange.left, fullRange.right), range);
         }
         catch (StopRecursion e)
         {
             return new Leaf();
         }
     }

     /**
      * @throws StopRecursion If no match could be found for the range.
      */
     private Hashable findHelper(Hashable current, Range<Token> activeRange, Range<Token> find) throws StopRecursion
     {
         while (true)
         {
             if (current instanceof Leaf)
             {
                 if (!find.contains(activeRange))
                     // we are not fully contained in this range!
                     throw new StopRecursion.BadRange();
                 return current;
             }
             // else: node.

             Inner node = (Inner) current;
             Range<Token> leftRange = new Range<>(activeRange.left, node.token);
             Range<Token> rightRange = new Range<>(node.token, activeRange.right);

             if (find.contains(activeRange))
                 // this node is fully contained in the range
                 return node.calc();

             // else: one of our children contains the range

             if (leftRange.contains(find))
             { // left child contains/matches the range
                 current = node.lchild;
                 activeRange = leftRange;
             }
             else if (rightRange.contains(find))
             { // right child contains/matches the range
                 current = node.rchild;
                 activeRange = rightRange;
             }
             else
             {
                 throw new StopRecursion.BadRange();
             }
         }
     }

     /**
      * Splits the range containing the given token, if no tree limits would be
      * violated. If the range would be split to a depth below hashdepth, or if
      * the tree already contains maxsize subranges, this operation will fail.
      *
      * @return True if the range was successfully split.
      */
     public boolean split(Token t)
     {
         if (!(size < maxsize))
             return false;

         try
         {
             root = splitHelper(root, fullRange.left, fullRange.right, (byte)0, t);
         }
         catch (StopRecursion.TooDeep e)
         {
             return false;
         }
         return true;
     }

     private Hashable splitHelper(Hashable hashable, Token pleft, Token pright, byte depth, Token t) throws StopRecursion.TooDeep
     {
         if (depth >= hashdepth)
             throw new StopRecursion.TooDeep();

         if (hashable instanceof Leaf)
         {
             Token midpoint = partitioner.midpoint(pleft, pright);

             // We should not create a non-sensical range where start and end are the same token (this is non-sensical because range are
             // start exclusive). Note that we shouldn't hit that unless the full range is very small or we are fairly deep
             if (midpoint.equals(pleft) || midpoint.equals(pright))
                 throw new StopRecursion.TooDeep();

             // split
             size++;
             return new Inner(midpoint, new Leaf(), new Leaf());
         }
         // else: node.

         // recurse on the matching child
         Inner node = (Inner)hashable;

         if (Range.contains(pleft, node.token, t))
             // left child contains token
             node.lchild(splitHelper(node.lchild, pleft, node.token, inc(depth), t));
         else
             // else: right child contains token
             node.rchild(splitHelper(node.rchild, node.token, pright, inc(depth), t));
         return node;
     }

     /**
      * Returns a lazy iterator of invalid TreeRanges that need to be filled
      * in order to make the given Range valid.
      */
     public TreeRangeIterator invalids()
     {
         return new TreeRangeIterator(this);
     }

     public EstimatedHistogram histogramOfRowSizePerLeaf()
     {
         HistogramBuilder histbuild = new HistogramBuilder();
         for (TreeRange range : new TreeRangeIterator(this))
         {
             histbuild.add(range.hashable.sizeOfRange);
         }
         return histbuild.buildWithStdevRangesAroundMean();
     }

     public EstimatedHistogram histogramOfRowCountPerLeaf()
     {
         HistogramBuilder histbuild = new HistogramBuilder();
         for (TreeRange range : new TreeRangeIterator(this))
         {
             histbuild.add(range.hashable.rowsInRange);
         }
         return histbuild.buildWithStdevRangesAroundMean();
     }

     public long rowCount()
     {
         long count = 0;
         for (TreeRange range : new TreeRangeIterator(this))
         {
             count += range.hashable.rowsInRange;
         }
         return count;
     }

     @Override
     public String toString()
     {
         StringBuilder buff = new StringBuilder();
         buff.append("#<MerkleTree root=");
         root.toString(buff, 8);
         buff.append(">");
         return buff.toString();
     }

     public static class TreeDifference extends TreeRange
     {
         private static final long serialVersionUID = 6363654174549968183L;

         private long sizeOnLeft;
         private long sizeOnRight;
         private long rowsOnLeft;
         private long rowsOnRight;

         void setSize(long sizeOnLeft, long sizeOnRight)
         {
             this.sizeOnLeft = sizeOnLeft;
             this.sizeOnRight = sizeOnRight;
         }

         void setRows(long rowsOnLeft, long rowsOnRight)
         {
             this.rowsOnLeft = rowsOnLeft;
             this.rowsOnRight = rowsOnRight;
         }

         public long sizeOnLeft()
         {
             return sizeOnLeft;
         }

         public long sizeOnRight()
         {
             return sizeOnRight;
         }

         public long rowsOnLeft()
         {
             return rowsOnLeft;
         }

         public long rowsOnRight()
         {
             return rowsOnRight;
         }

         public TreeDifference(Token left, Token right, byte depth)
         {
             super(null, left, right, depth, null);
         }

         public long totalRows()
         {
             return rowsOnLeft + rowsOnRight;
         }

     }

     /**
      * The public interface to a range in the tree.
      *
      * NB: A TreeRange should not be returned by a public method unless the
      * parents of the range it represents are already invalidated, since it
      * will allow someone to modify the hash. Alternatively, a TreeRange
      * may be created with a null tree, indicating that it is read only.
      */
     public static class TreeRange extends Range<Token>
     {
         public static final long serialVersionUID = 1L;
         private final MerkleTree tree;
         public final byte depth;
         private final Hashable hashable;

         TreeRange(MerkleTree tree, Token left, Token right, byte depth, Hashable hashable)
         {
             super(left, right);
             this.tree = tree;
             this.depth = depth;
             this.hashable = hashable;
         }

         public void hash(byte[] hash)
         {
             assert tree != null : "Not intended for modification!";
             hashable.hash(hash);
         }

         public byte[] hash()
         {
             return hashable.hash();
         }

         /**
          * @param entry Row to mix into the hash for this range.
          */
         public void addHash(RowHash entry)
         {
             assert tree != null : "Not intended for modification!";
             assert hashable instanceof Leaf;

             hashable.addHash(entry.hash, entry.size);
         }

         public void ensureHashInitialised()
         {
             assert tree != null : "Not intended for modification!";
             assert hashable instanceof Leaf;

             if (hashable.hash == null)
                 hashable.hash = EMPTY_HASH;
         }

         public void addAll(Iterator<RowHash> entries)
         {
             while (entries.hasNext())
                 addHash(entries.next());
         }

         @Override
         public String toString()
         {
             StringBuilder buff = new StringBuilder("#<TreeRange ");
             buff.append(super.toString()).append(" depth=").append(depth);
             return buff.append(">").toString();
         }
     }

     /**
      * Returns the leaf (range) of a given tree in increasing order.
      * If the full range covered by the tree don't wrap, then it will return the
      * ranges in increasing order.
      * If the full range wrap, the first *and* last range returned by the
      * iterator will be the wrapping range. It is the only case where the same
      * leaf will be returned twice.
      */
     public static class TreeRangeIterator extends AbstractIterator<TreeRange> implements Iterable<TreeRange>, PeekingIterator<TreeRange>
     {
         // stack of ranges to visit
         private final ArrayDeque<TreeRange> tovisit;
         // interesting range
         private final MerkleTree tree;

         TreeRangeIterator(MerkleTree tree)
         {
             tovisit = new ArrayDeque<TreeRange>();
             tovisit.add(new TreeRange(tree, tree.fullRange.left, tree.fullRange.right, (byte)0, tree.root));
             this.tree = tree;
         }

         /**
          * Find the next TreeRange.
          *
          * @return The next TreeRange.
          */
         public TreeRange computeNext()
         {
             while (!tovisit.isEmpty())
             {
                 TreeRange active = tovisit.pop();

                 if (active.hashable instanceof Leaf)
                 {
                     // found a leaf invalid range
                     if (active.isWrapAround() && !tovisit.isEmpty())
                         // put to be taken again last
                         tovisit.addLast(active);
                     return active;
                 }

                 Inner node = (Inner)active.hashable;
                 TreeRange left = new TreeRange(tree, active.left, node.token, inc(active.depth), node.lchild);
                 TreeRange right = new TreeRange(tree, node.token, active.right, inc(active.depth), node.rchild);

                 if (right.isWrapAround())
                 {
                     // whatever is on the left is 'after' everything we have seen so far (it has greater tokens)
                     tovisit.addLast(left);
                     tovisit.addFirst(right);
                 }
                 else
                 {
                     // do left first then right
                     tovisit.addFirst(right);
                     tovisit.addFirst(left);
                 }
             }
             return endOfData();
         }

         public Iterator<TreeRange> iterator()
         {
             return this;
         }
     }

     /**
      * An inner node in the MerkleTree. Inners can contain cached hash values, which
      * are the binary hash of their two children.
      */
     static class Inner extends Hashable
     {
         public static final long serialVersionUID = 1L;
         static final byte IDENT = 2;
         public final Token token;
         private Hashable lchild;
         private Hashable rchild;

         private static final InnerSerializer serializer = new InnerSerializer();

         /**
          * Constructs an Inner with the given token and children, and a null hash.
          */
         public Inner(Token token, Hashable lchild, Hashable rchild)
         {
             super(null);
             this.token = token;
             this.lchild = lchild;
             this.rchild = rchild;
         }

         public Hashable lchild()
         {
             return lchild;
         }

         public Hashable rchild()
         {
             return rchild;
         }

         public void lchild(Hashable child)
         {
             lchild = child;
         }

         public void rchild(Hashable child)
         {
             rchild = child;
         }

         Hashable calc()
         {
             if (hash == null)
             {
                 // hash and size haven't been calculated; calc children then compute
                 Hashable lnode = lchild.calc();
                 Hashable rnode = rchild.calc();
                 // cache the computed value
                 hash(lnode.hash, rnode.hash);
                 sizeOfRange = lnode.sizeOfRange + rnode.sizeOfRange;
                 rowsInRange = lnode.rowsInRange + rnode.rowsInRange;
             }
             return this;
         }

         /**
          * Recursive toString.
          */
         public void toString(StringBuilder buff, int maxdepth)
         {
             buff.append("#<").append(getClass().getSimpleName());
             buff.append(" ").append(token);
             buff.append(" hash=").append(Hashable.toString(hash()));
             buff.append(" children=[");
             if (maxdepth < 1)
             {
                 buff.append("#");
             }
             else
             {
                 if (lchild == null)
                     buff.append("null");
                 else
                     lchild.toString(buff, maxdepth-1);
                 buff.append(" ");
                 if (rchild == null)
                     buff.append("null");
                 else
                     rchild.toString(buff, maxdepth-1);
             }
             buff.append("]>");
         }

         @Override
         public String toString()
         {
             StringBuilder buff = new StringBuilder();
             toString(buff, 1);
             return buff.toString();
         }

         private static class InnerSerializer implements IPartitionerDependentSerializer<Inner>
         {
             public void serialize(Inner inner, DataOutputPlus out, int version) throws IOException
             {
                 if (version < MessagingService.VERSION_30)
                 {
                     if (inner.hash == null)
                         out.writeInt(-1);
                     else
                     {
                         out.writeInt(inner.hash.length);
                         out.write(inner.hash);
                     }
                 }
                 Token.serializer.serialize(inner.token, out, version);
                 Hashable.serializer.serialize(inner.lchild, out, version);
                 Hashable.serializer.serialize(inner.rchild, out, version);
             }

             public Inner deserialize(DataInput in, IPartitioner p, int version) throws IOException
             {
                 if (version < MessagingService.VERSION_30)
                 {
                     int hashLen = in.readInt();
                     byte[] hash = hashLen >= 0 ? new byte[hashLen] : null;
                     if (hash != null)
                         in.readFully(hash);
                 }
                 Token token = Token.serializer.deserialize(in, p, version);
                 Hashable lchild = Hashable.serializer.deserialize(in, p, version);
                 Hashable rchild = Hashable.serializer.deserialize(in, p, version);
                 return new Inner(token, lchild, rchild);
             }

             public long serializedSize(Inner inner, int version)
             {
                 long size = 0;
                 if (version < MessagingService.VERSION_30)
                 {
                     size += inner.hash == null
                                        ? TypeSizes.sizeof(-1)
                                        : TypeSizes.sizeof(inner.hash().length) + inner.hash().length;
                 }

                 size += Token.serializer.serializedSize(inner.token, version)
                 + Hashable.serializer.serializedSize(inner.lchild, version)
                 + Hashable.serializer.serializedSize(inner.rchild, version);
                 return size;
             }
         }
     }

     /**
      * A leaf node in the MerkleTree. Because the MerkleTree represents a much
      * larger perfect binary tree of depth hashdepth, a Leaf object contains
      * the value that would be contained in the perfect tree at its position.
      *
      * When rows are added to the MerkleTree using TreeRange.validate(), the
      * tree extending below the Leaf is generated in memory, but only the root
      * is stored in the Leaf.
      */
     static class Leaf extends Hashable
     {
         public static final long serialVersionUID = 1L;
         static final byte IDENT = 1;
         private static final LeafSerializer serializer = new LeafSerializer();

         /**
          * Constructs a null hash.
          */
         public Leaf()
         {
             super(null);
         }

         public Leaf(byte[] hash)
         {
             super(hash);
         }

         public void toString(StringBuilder buff, int maxdepth)
         {
             buff.append(toString());
         }

         @Override
         public String toString()
         {
             return "#<Leaf " + Hashable.toString(hash()) + ">";
         }

         private static class LeafSerializer implements IPartitionerDependentSerializer<Leaf>
         {
             public void serialize(Leaf leaf, DataOutputPlus out, int version) throws IOException
             {
                 if (leaf.hash == null)
                 {
                     if (version < MessagingService.VERSION_30)
                         out.writeInt(-1);
                     else
                         out.writeByte(-1);
                 }
                 else
                 {
                     if (version < MessagingService.VERSION_30)
                         out.writeInt(leaf.hash.length);
                     else
                         out.writeByte(leaf.hash.length);
                     out.write(leaf.hash);
                 }
             }

             public Leaf deserialize(DataInput in, IPartitioner p, int version) throws IOException
             {
                 int hashLen = version < MessagingService.VERSION_30 ? in.readInt() : in.readByte();
                 byte[] hash = hashLen < 0 ? null : new byte[hashLen];
                 if (hash != null)
                     in.readFully(hash);
                 return new Leaf(hash);
             }

             public long serializedSize(Leaf leaf, int version)
             {
                 long size = version < MessagingService.VERSION_30 ? TypeSizes.sizeof(1) : 1;
                 if (leaf.hash != null)
                 {
                     size += leaf.hash().length;
                 }
                 return size;
             }
         }
     }

     /**
      * Hash value representing a row, to be used to pass hashes to the MerkleTree.
      * The byte[] hash value should contain a digest of the key and value of the row
      * created using a very strong hash function.
      */
     public static class RowHash
     {
         public final Token token;
         public final byte[] hash;
         public final long size;
         public RowHash(Token token, byte[] hash, long size)
         {
             this.token = token;
             this.hash  = hash;
             this.size = size;
         }

         @Override
         public String toString()
         {
             return "#<RowHash " + token + " " + Hashable.toString(hash) + " @ " + size + " bytes>";
         }
     }

     /**
      * Abstract class containing hashing logic, and containing a single hash field.
      */
     static abstract class Hashable implements Serializable
     {
         private static final long serialVersionUID = 1L;
         private static final IPartitionerDependentSerializer<Hashable> serializer = new HashableSerializer();

         protected byte[] hash;
         protected long sizeOfRange;
         protected long rowsInRange;

         protected Hashable(byte[] hash)
         {
             this.hash = hash;
         }

         public byte[] hash()
         {
             return hash;
         }

         public long sizeOfRange()
         {
             return sizeOfRange;
         }

         public long rowsInRange()
         {
             return rowsInRange;
         }

         void hash(byte[] hash)
         {
             this.hash = hash;
         }

         Hashable calc()
         {
             return this;
         }

         /**
          * Sets the value of this hash to binaryHash of its children.
          * @param lefthash Hash of left child.
          * @param righthash Hash of right child.
          */
         void hash(byte[] lefthash, byte[] righthash)
         {
             hash = binaryHash(lefthash, righthash);
         }

         /**
          * Mixes the given value into our hash. If our hash is null,
          * our hash will become the given value.
          */
         void addHash(byte[] righthash, long sizeOfRow)
         {
             if (hash == null)
                 hash = righthash;
             else
                 hash = binaryHash(hash, righthash);
             this.sizeOfRange += sizeOfRow;
             this.rowsInRange += 1;
         }

         /**
          * The primitive with which all hashing should be accomplished: hashes
          * a left and right value together.
          */
         static byte[] binaryHash(final byte[] left, final byte[] right)
         {
             return FBUtilities.xor(left, right);
         }

         public abstract void toString(StringBuilder buff, int maxdepth);

         public static String toString(byte[] hash)
         {
             if (hash == null)
                 return "null";
             return "[" + Hex.bytesToHex(hash) + "]";
         }

         private static class HashableSerializer implements IPartitionerDependentSerializer<Hashable>
         {
             public void serialize(Hashable h, DataOutputPlus out, int version) throws IOException
             {
                 if (h instanceof Inner)
                 {
                     out.writeByte(Inner.IDENT);
                     Inner.serializer.serialize((Inner)h, out, version);
                 }
                 else if (h instanceof Leaf)
                 {
                     out.writeByte(Leaf.IDENT);
                     Leaf.serializer.serialize((Leaf) h, out, version);
                 }
                 else
                     throw new IOException("Unexpected Hashable: " + h.getClass().getCanonicalName());
             }

             public Hashable deserialize(DataInput in, IPartitioner p, int version) throws IOException
             {
                 byte ident = in.readByte();
                 if (Inner.IDENT == ident)
                     return Inner.serializer.deserialize(in, p, version);
                 else if (Leaf.IDENT == ident)
                     return Leaf.serializer.deserialize(in, p, version);
                 else
                     throw new IOException("Unexpected Hashable: " + ident);
             }

             public long serializedSize(Hashable h, int version)
             {
                 if (h instanceof Inner)
                     return 1 + Inner.serializer.serializedSize((Inner) h, version);
                 else if (h instanceof Leaf)
                     return 1 + Leaf.serializer.serializedSize((Leaf) h, version);
                 throw new AssertionError(h.getClass());
             }
         }
     }

     /**
      * Exceptions that stop recursion early when we are sure that no answer
      * can be found.
      */
     static abstract class StopRecursion extends Exception
     {
         static class BadRange extends StopRecursion
         {
             public BadRange(){ super(); }
         }

         static class InvalidHash extends StopRecursion
         {
             public InvalidHash(){ super(); }
         }

         static class TooDeep extends StopRecursion
         {
             public TooDeep(){ super(); }
         }
     }
 }