hbase-common/src/main/java/org/apache/omid/committable/hbase/RegionSplitter.java - phoenix-omid - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.omid.committable.hbase;

 import org.apache.phoenix.thirdparty.com.google.common.base.Preconditions;

 import org.apache.commons.lang3.ArrayUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.util.Bytes;

 import java.io.IOException;
 import java.util.Arrays;

 /**
  * This class contains only the required behavior of the original
  * org.apache.hadoop.hbase.util.RegionSplitter class to avoid
  * having a reference to hbase-testing-util, which transitively
  * imports hbase-server causing dependency conflicts for this module.
  */
 public class RegionSplitter {

     /**
      * A generic interface for the RegionSplitter code to use for all it's functionality. Note that the original authors
      * of this code use see org.apache.hadoop.hbase.util.HexStringSplit to partition their table and set it as default, but provided this for
      * your custom algorithm. To use, create a new derived class from this interface and call
      * see RegionSplitter#createPresplitTable or
      * see RegionSplitter#rollingSplit(String, SplitAlgorithm, Configuration)} with the argument splitClassName
      * giving the name of your class.
      */
     public interface SplitAlgorithm {

         /**
          * Split a pre-existing region into 2 regions.
          *
          * @param start
          *            first row (inclusive)
          * @param end
          *            last row (exclusive)
          * @return the split row to use
          */
         byte[] split(byte[] start, byte[] end);

         /**
          * Split an entire table.
          *
          * @param numRegions
          *            number of regions to split the table into
          *
          * @throws RuntimeException
          *             user input is validated at this time. may throw a runtime exception in response to a parse
          *             failure
          * @return array of split keys for the initial regions of the table. The length of the returned array should be
          *         numRegions-1.
          */
         byte[][] split(int numRegions);

         /**
          * In HBase, the first row is represented by an empty byte array. This might cause problems with your split
          * algorithm or row printing. All your APIs will be passed firstRow() instead of empty array.
          *
          * @return your representation of your first row
          */
         byte[] firstRow();

         /**
          * In HBase, the last row is represented by an empty byte array. This might cause problems with your split
          * algorithm or row printing. All your APIs will be passed firstRow() instead of empty array.
          *
          * @return your representation of your last row
          */
         byte[] lastRow();

         /**
          * In HBase, the last row is represented by an empty byte array. Set this value to help the split code
          * understand how to evenly divide the first region.
          *
          * @param userInput
          *            raw user input (may throw RuntimeException on parse failure)
          */
         void setFirstRow(String userInput);

         /**
          * In HBase, the last row is represented by an empty byte array. Set this value to help the split code
          * understand how to evenly divide the last region. Note that this last row is inclusive for all rows sharing
          * the same prefix.
          *
          * @param userInput raw user input (may throw RuntimeException on parse failure)
          */
         void setLastRow(String userInput);

         /**
          * @param input
          *            user or file input for row
          * @return byte array representation of this row for HBase
          */
         byte[] strToRow(String input);

         /**
          * @param row byte array representing a row in HBase
          * @return String to use for debug and file printing
          */
         String rowToStr(byte[] row);

         /**
          * @return the separator character to use when storing / printing the row
          */
         String separator();

         /**
          * Set the first row
          *
          * @param userInput
          *            byte array of the row key.
          */
         void setFirstRow(byte[] userInput);

         /**
          * Set the last row
          *
          * @param userInput
          *            byte array of the row key.
          */
         void setLastRow(byte[] userInput);
     }

     /**
      * @param conf Hbase conf
      * @param splitClassName split class name to be used
      * @return an instance of SplitAlgorithm
      * @throws IOException if the specified SplitAlgorithm class couldn't be instantiated
      */
     public static SplitAlgorithm newSplitAlgoInstance(Configuration conf,
                                                       String splitClassName) throws IOException {
         Class<?> splitClass;

         // For split algorithms builtin to RegionSplitter, the user can specify
         // their simple class name instead of a fully qualified class name.
         if (splitClassName.equals(UniformSplit.class.getSimpleName())) {
             splitClass = UniformSplit.class;
         } else {
             try {
                 splitClass = conf.getClassByName(splitClassName);
             } catch (ClassNotFoundException e) {
                 throw new IOException("Couldn't load split class " + splitClassName, e);
             }
             if (splitClass == null) {
                 throw new IOException("Failed loading split class " + splitClassName);
             }
             if (!SplitAlgorithm.class.isAssignableFrom(splitClass)) {
                 throw new IOException(
                     "Specified split class doesn't implement SplitAlgorithm");
             }
         }
         try {
             return splitClass.asSubclass(SplitAlgorithm.class).newInstance();
         } catch (Exception e) {
             throw new IOException("Problem loading split algorithm: ", e);
         }
     }

     /**
      * A SplitAlgorithm that divides the space of possible keys evenly. Useful when the keys are approximately uniform
      * random bytes (e.g. hashes). Rows are raw byte values in the range [00..FF] and are right-padded with zeros
      * to keep the same memcmp() order. This is the natural algorithm to use for a byte[] environment and saves space,
      * but is not necessarily the easiest for readability.
      */
     public static class UniformSplit implements SplitAlgorithm {

         static final byte xFF = (byte) 0xFF;
         byte[] firstRowBytes = ArrayUtils.EMPTY_BYTE_ARRAY;
         byte[] lastRowBytes =
             new byte[]{xFF, xFF, xFF, xFF, xFF, xFF, xFF, xFF};

         public byte[] split(byte[] start, byte[] end) {
             return Bytes.split(start, end, 1)[1];
         }

         @Override
         public byte[][] split(int numRegions) {
             Preconditions.checkArgument(
                 Bytes.compareTo(lastRowBytes, firstRowBytes) > 0,
                 "last row (%s) is configured less than first row (%s)",
                 Bytes.toStringBinary(lastRowBytes),
                 Bytes.toStringBinary(firstRowBytes));

             byte[][] splits = Bytes.split(firstRowBytes, lastRowBytes, true,
                                           numRegions - 1);
             Preconditions.checkState(splits != null,
                                      "Could not split region with given user input: " + this);

             // remove endpoints, which are included in the splits list
             return Arrays.copyOfRange(splits, 1, splits.length - 1);
         }

         @Override
         public byte[] firstRow() {
             return firstRowBytes;
         }

         @Override
         public byte[] lastRow() {
             return lastRowBytes;
         }

         @Override
         public void setFirstRow(String userInput) {
             firstRowBytes = Bytes.toBytesBinary(userInput);
         }

         @Override
         public void setLastRow(String userInput) {
             lastRowBytes = Bytes.toBytesBinary(userInput);
         }

         @Override
         public void setFirstRow(byte[] userInput) {
             firstRowBytes = userInput;
         }

         @Override
         public void setLastRow(byte[] userInput) {
             lastRowBytes = userInput;
         }

         @Override
         public byte[] strToRow(String input) {
             return Bytes.toBytesBinary(input);
         }

         @Override
         public String rowToStr(byte[] row) {
             return Bytes.toStringBinary(row);
         }

         @Override
         public String separator() {
             return ",";
         }

         @Override
         public String toString() {
             return this.getClass().getSimpleName() + " [" + rowToStr(firstRow())
                    + "," + rowToStr(lastRow()) + "]";
         }
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.omid.committable.hbase;

	import org.apache.phoenix.thirdparty.com.google.common.base.Preconditions;

	import org.apache.commons.lang3.ArrayUtils;
	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.hbase.util.Bytes;

	import java.io.IOException;
	import java.util.Arrays;

	/**
	* This class contains only the required behavior of the original
	* org.apache.hadoop.hbase.util.RegionSplitter class to avoid
	* having a reference to hbase-testing-util, which transitively
	* imports hbase-server causing dependency conflicts for this module.
	*/
	public class RegionSplitter {

	/**
	* A generic interface for the RegionSplitter code to use for all it's functionality. Note that the original authors
	* of this code use see org.apache.hadoop.hbase.util.HexStringSplit to partition their table and set it as default, but provided this for
	* your custom algorithm. To use, create a new derived class from this interface and call
	* see RegionSplitter#createPresplitTable or
	* see RegionSplitter#rollingSplit(String, SplitAlgorithm, Configuration)} with the argument splitClassName
	* giving the name of your class.
	*/
	public interface SplitAlgorithm {

	/**
	* Split a pre-existing region into 2 regions.
	*
	* @param start
	* first row (inclusive)
	* @param end
	* last row (exclusive)
	* @return the split row to use
	*/
	byte[] split(byte[] start, byte[] end);

	/**
	* Split an entire table.
	*
	* @param numRegions
	* number of regions to split the table into
	*
	* @throws RuntimeException
	* user input is validated at this time. may throw a runtime exception in response to a parse
	* failure
	* @return array of split keys for the initial regions of the table. The length of the returned array should be
	* numRegions-1.
	*/
	byte[][] split(int numRegions);

	/**
	* In HBase, the first row is represented by an empty byte array. This might cause problems with your split
	* algorithm or row printing. All your APIs will be passed firstRow() instead of empty array.
	*
	* @return your representation of your first row
	*/
	byte[] firstRow();

	/**
	* In HBase, the last row is represented by an empty byte array. This might cause problems with your split
	* algorithm or row printing. All your APIs will be passed firstRow() instead of empty array.
	*
	* @return your representation of your last row
	*/
	byte[] lastRow();

	/**
	* In HBase, the last row is represented by an empty byte array. Set this value to help the split code
	* understand how to evenly divide the first region.
	*
	* @param userInput
	* raw user input (may throw RuntimeException on parse failure)
	*/
	void setFirstRow(String userInput);

	/**
	* In HBase, the last row is represented by an empty byte array. Set this value to help the split code
	* understand how to evenly divide the last region. Note that this last row is inclusive for all rows sharing
	* the same prefix.
	*
	* @param userInput raw user input (may throw RuntimeException on parse failure)
	*/
	void setLastRow(String userInput);

	/**
	* @param input
	* user or file input for row
	* @return byte array representation of this row for HBase
	*/
	byte[] strToRow(String input);

	/**
	* @param row byte array representing a row in HBase
	* @return String to use for debug and file printing
	*/
	String rowToStr(byte[] row);

	/**
	* @return the separator character to use when storing / printing the row
	*/
	String separator();

	/**
	* Set the first row
	*
	* @param userInput
	* byte array of the row key.
	*/
	void setFirstRow(byte[] userInput);

	/**
	* Set the last row
	*
	* @param userInput
	* byte array of the row key.
	*/
	void setLastRow(byte[] userInput);
	}

	/**
	* @param conf Hbase conf
	* @param splitClassName split class name to be used
	* @return an instance of SplitAlgorithm
	* @throws IOException if the specified SplitAlgorithm class couldn't be instantiated
	*/
	public static SplitAlgorithm newSplitAlgoInstance(Configuration conf,
	String splitClassName) throws IOException {
	Class<?> splitClass;

	// For split algorithms builtin to RegionSplitter, the user can specify
	// their simple class name instead of a fully qualified class name.
	if (splitClassName.equals(UniformSplit.class.getSimpleName())) {
	splitClass = UniformSplit.class;
	} else {
	try {
	splitClass = conf.getClassByName(splitClassName);
	} catch (ClassNotFoundException e) {
	throw new IOException("Couldn't load split class " + splitClassName, e);
	}
	if (splitClass == null) {
	throw new IOException("Failed loading split class " + splitClassName);
	}
	if (!SplitAlgorithm.class.isAssignableFrom(splitClass)) {
	throw new IOException(
	"Specified split class doesn't implement SplitAlgorithm");
	}
	}
	try {
	return splitClass.asSubclass(SplitAlgorithm.class).newInstance();
	} catch (Exception e) {
	throw new IOException("Problem loading split algorithm: ", e);
	}
	}

	/**
	* A SplitAlgorithm that divides the space of possible keys evenly. Useful when the keys are approximately uniform
	* random bytes (e.g. hashes). Rows are raw byte values in the range [00..FF] and are right-padded with zeros
	* to keep the same memcmp() order. This is the natural algorithm to use for a byte[] environment and saves space,
	* but is not necessarily the easiest for readability.
	*/
	public static class UniformSplit implements SplitAlgorithm {

	static final byte xFF = (byte) 0xFF;
	byte[] firstRowBytes = ArrayUtils.EMPTY_BYTE_ARRAY;
	byte[] lastRowBytes =
	new byte[]{xFF, xFF, xFF, xFF, xFF, xFF, xFF, xFF};

	public byte[] split(byte[] start, byte[] end) {
	return Bytes.split(start, end, 1)[1];
	}

	@Override
	public byte[][] split(int numRegions) {
	Preconditions.checkArgument(
	Bytes.compareTo(lastRowBytes, firstRowBytes) > 0,
	"last row (%s) is configured less than first row (%s)",
	Bytes.toStringBinary(lastRowBytes),
	Bytes.toStringBinary(firstRowBytes));

	byte[][] splits = Bytes.split(firstRowBytes, lastRowBytes, true,
	numRegions - 1);
	Preconditions.checkState(splits != null,
	"Could not split region with given user input: " + this);

	// remove endpoints, which are included in the splits list
	return Arrays.copyOfRange(splits, 1, splits.length - 1);
	}

	@Override
	public byte[] firstRow() {
	return firstRowBytes;
	}

	@Override
	public byte[] lastRow() {
	return lastRowBytes;
	}

	@Override
	public void setFirstRow(String userInput) {
	firstRowBytes = Bytes.toBytesBinary(userInput);
	}

	@Override
	public void setLastRow(String userInput) {
	lastRowBytes = Bytes.toBytesBinary(userInput);
	}

	@Override
	public void setFirstRow(byte[] userInput) {
	firstRowBytes = userInput;
	}

	@Override
	public void setLastRow(byte[] userInput) {
	lastRowBytes = userInput;
	}

	@Override
	public byte[] strToRow(String input) {
	return Bytes.toBytesBinary(input);
	}

	@Override
	public String rowToStr(byte[] row) {
	return Bytes.toStringBinary(row);
	}

	@Override
	public String separator() {
	return ",";
	}

	@Override
	public String toString() {
	return this.getClass().getSimpleName() + " [" + rowToStr(firstRow())
	+ "," + rowToStr(lastRow()) + "]";
	}
	}
	}