| <!DOCTYPE HTML> |
| <html lang="en"> |
| <head> |
| <!-- Generated by javadoc (17) --> |
| <title>Source code</title> |
| <meta name="viewport" content="width=device-width, initial-scale=1"> |
| <meta name="description" content="source: package: org.apache.hadoop.hbase.util, class: RegionSplitter, interface: SplitAlgorithm"> |
| <meta name="generator" content="javadoc/SourceToHTMLConverter"> |
| <link rel="stylesheet" type="text/css" href="../../../../../../stylesheet.css" title="Style"> |
| </head> |
| <body class="source-page"> |
| <main role="main"> |
| <div class="source-container"> |
| <pre><span class="source-line-no">001</span><span id="line-1">/*</span> |
| <span class="source-line-no">002</span><span id="line-2"> * Licensed to the Apache Software Foundation (ASF) under one</span> |
| <span class="source-line-no">003</span><span id="line-3"> * or more contributor license agreements. See the NOTICE file</span> |
| <span class="source-line-no">004</span><span id="line-4"> * distributed with this work for additional information</span> |
| <span class="source-line-no">005</span><span id="line-5"> * regarding copyright ownership. The ASF licenses this file</span> |
| <span class="source-line-no">006</span><span id="line-6"> * to you under the Apache License, Version 2.0 (the</span> |
| <span class="source-line-no">007</span><span id="line-7"> * "License"); you may not use this file except in compliance</span> |
| <span class="source-line-no">008</span><span id="line-8"> * with the License. You may obtain a copy of the License at</span> |
| <span class="source-line-no">009</span><span id="line-9"> *</span> |
| <span class="source-line-no">010</span><span id="line-10"> * http://www.apache.org/licenses/LICENSE-2.0</span> |
| <span class="source-line-no">011</span><span id="line-11"> *</span> |
| <span class="source-line-no">012</span><span id="line-12"> * Unless required by applicable law or agreed to in writing, software</span> |
| <span class="source-line-no">013</span><span id="line-13"> * distributed under the License is distributed on an "AS IS" BASIS,</span> |
| <span class="source-line-no">014</span><span id="line-14"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span> |
| <span class="source-line-no">015</span><span id="line-15"> * See the License for the specific language governing permissions and</span> |
| <span class="source-line-no">016</span><span id="line-16"> * limitations under the License.</span> |
| <span class="source-line-no">017</span><span id="line-17"> */</span> |
| <span class="source-line-no">018</span><span id="line-18">package org.apache.hadoop.hbase.util;</span> |
| <span class="source-line-no">019</span><span id="line-19"></span> |
| <span class="source-line-no">020</span><span id="line-20">import java.io.IOException;</span> |
| <span class="source-line-no">021</span><span id="line-21">import java.math.BigInteger;</span> |
| <span class="source-line-no">022</span><span id="line-22">import java.util.Arrays;</span> |
| <span class="source-line-no">023</span><span id="line-23">import java.util.Collection;</span> |
| <span class="source-line-no">024</span><span id="line-24">import java.util.Comparator;</span> |
| <span class="source-line-no">025</span><span id="line-25">import java.util.LinkedList;</span> |
| <span class="source-line-no">026</span><span id="line-26">import java.util.List;</span> |
| <span class="source-line-no">027</span><span id="line-27">import java.util.Set;</span> |
| <span class="source-line-no">028</span><span id="line-28">import java.util.TreeMap;</span> |
| <span class="source-line-no">029</span><span id="line-29">import org.apache.commons.lang3.ArrayUtils;</span> |
| <span class="source-line-no">030</span><span id="line-30">import org.apache.commons.lang3.StringUtils;</span> |
| <span class="source-line-no">031</span><span id="line-31">import org.apache.hadoop.conf.Configuration;</span> |
| <span class="source-line-no">032</span><span id="line-32">import org.apache.hadoop.fs.FSDataInputStream;</span> |
| <span class="source-line-no">033</span><span id="line-33">import org.apache.hadoop.fs.FSDataOutputStream;</span> |
| <span class="source-line-no">034</span><span id="line-34">import org.apache.hadoop.fs.FileSystem;</span> |
| <span class="source-line-no">035</span><span id="line-35">import org.apache.hadoop.fs.Path;</span> |
| <span class="source-line-no">036</span><span id="line-36">import org.apache.hadoop.hbase.HBaseConfiguration;</span> |
| <span class="source-line-no">037</span><span id="line-37">import org.apache.hadoop.hbase.HConstants;</span> |
| <span class="source-line-no">038</span><span id="line-38">import org.apache.hadoop.hbase.HRegionLocation;</span> |
| <span class="source-line-no">039</span><span id="line-39">import org.apache.hadoop.hbase.ServerName;</span> |
| <span class="source-line-no">040</span><span id="line-40">import org.apache.hadoop.hbase.TableName;</span> |
| <span class="source-line-no">041</span><span id="line-41">import org.apache.hadoop.hbase.client.Admin;</span> |
| <span class="source-line-no">042</span><span id="line-42">import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;</span> |
| <span class="source-line-no">043</span><span id="line-43">import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;</span> |
| <span class="source-line-no">044</span><span id="line-44">import org.apache.hadoop.hbase.client.Connection;</span> |
| <span class="source-line-no">045</span><span id="line-45">import org.apache.hadoop.hbase.client.ConnectionFactory;</span> |
| <span class="source-line-no">046</span><span id="line-46">import org.apache.hadoop.hbase.client.NoServerForRegionException;</span> |
| <span class="source-line-no">047</span><span id="line-47">import org.apache.hadoop.hbase.client.RegionInfo;</span> |
| <span class="source-line-no">048</span><span id="line-48">import org.apache.hadoop.hbase.client.RegionLocator;</span> |
| <span class="source-line-no">049</span><span id="line-49">import org.apache.hadoop.hbase.client.Table;</span> |
| <span class="source-line-no">050</span><span id="line-50">import org.apache.hadoop.hbase.client.TableDescriptor;</span> |
| <span class="source-line-no">051</span><span id="line-51">import org.apache.hadoop.hbase.client.TableDescriptorBuilder;</span> |
| <span class="source-line-no">052</span><span id="line-52">import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;</span> |
| <span class="source-line-no">053</span><span id="line-53">import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker;</span> |
| <span class="source-line-no">054</span><span id="line-54">import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;</span> |
| <span class="source-line-no">055</span><span id="line-55">import org.apache.yetus.audience.InterfaceAudience;</span> |
| <span class="source-line-no">056</span><span id="line-56">import org.slf4j.Logger;</span> |
| <span class="source-line-no">057</span><span id="line-57">import org.slf4j.LoggerFactory;</span> |
| <span class="source-line-no">058</span><span id="line-58"></span> |
| <span class="source-line-no">059</span><span id="line-59">import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;</span> |
| <span class="source-line-no">060</span><span id="line-60">import org.apache.hbase.thirdparty.com.google.common.collect.Lists;</span> |
| <span class="source-line-no">061</span><span id="line-61">import org.apache.hbase.thirdparty.com.google.common.collect.Maps;</span> |
| <span class="source-line-no">062</span><span id="line-62">import org.apache.hbase.thirdparty.com.google.common.collect.Sets;</span> |
| <span class="source-line-no">063</span><span id="line-63">import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;</span> |
| <span class="source-line-no">064</span><span id="line-64">import org.apache.hbase.thirdparty.org.apache.commons.cli.GnuParser;</span> |
| <span class="source-line-no">065</span><span id="line-65">import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter;</span> |
| <span class="source-line-no">066</span><span id="line-66">import org.apache.hbase.thirdparty.org.apache.commons.cli.OptionBuilder;</span> |
| <span class="source-line-no">067</span><span id="line-67">import org.apache.hbase.thirdparty.org.apache.commons.cli.Options;</span> |
| <span class="source-line-no">068</span><span id="line-68">import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException;</span> |
| <span class="source-line-no">069</span><span id="line-69"></span> |
| <span class="source-line-no">070</span><span id="line-70">/**</span> |
| <span class="source-line-no">071</span><span id="line-71"> * The {@link RegionSplitter} class provides several utilities to help in the administration</span> |
| <span class="source-line-no">072</span><span id="line-72"> * lifecycle for developers who choose to manually split regions instead of having HBase handle that</span> |
| <span class="source-line-no">073</span><span id="line-73"> * automatically. The most useful utilities are:</span> |
| <span class="source-line-no">074</span><span id="line-74"> * <p></span> |
| <span class="source-line-no">075</span><span id="line-75"> * <ul></span> |
| <span class="source-line-no">076</span><span id="line-76"> * <li>Create a table with a specified number of pre-split regions</span> |
| <span class="source-line-no">077</span><span id="line-77"> * <li>Execute a rolling split of all regions on an existing table</span> |
| <span class="source-line-no">078</span><span id="line-78"> * </ul></span> |
| <span class="source-line-no">079</span><span id="line-79"> * <p></span> |
| <span class="source-line-no">080</span><span id="line-80"> * Both operations can be safely done on a live server.</span> |
| <span class="source-line-no">081</span><span id="line-81"> * <p></span> |
| <span class="source-line-no">082</span><span id="line-82"> * <b>Question:</b> How do I turn off automatic splitting? <br></span> |
| <span class="source-line-no">083</span><span id="line-83"> * <b>Answer:</b> Automatic splitting is determined by the configuration value</span> |
| <span class="source-line-no">084</span><span id="line-84"> * <i>HConstants.HREGION_MAX_FILESIZE</i>. It is not recommended that you set this to Long.MAX_VALUE</span> |
| <span class="source-line-no">085</span><span id="line-85"> * in case you forget about manual splits. A suggested setting is 100GB, which would result in &gt;</span> |
| <span class="source-line-no">086</span><span id="line-86"> * 1hr major compactions if reached.</span> |
| <span class="source-line-no">087</span><span id="line-87"> * <p></span> |
| <span class="source-line-no">088</span><span id="line-88"> * <b>Question:</b> Why did the original authors decide to manually split? <br></span> |
| <span class="source-line-no">089</span><span id="line-89"> * <b>Answer:</b> Specific workload characteristics of our use case allowed us to benefit from a</span> |
| <span class="source-line-no">090</span><span id="line-90"> * manual split system.</span> |
| <span class="source-line-no">091</span><span id="line-91"> * <p></span> |
| <span class="source-line-no">092</span><span id="line-92"> * <ul></span> |
| <span class="source-line-no">093</span><span id="line-93"> * <li>Data (~1k) that would grow instead of being replaced</span> |
| <span class="source-line-no">094</span><span id="line-94"> * <li>Data growth was roughly uniform across all regions</span> |
| <span class="source-line-no">095</span><span id="line-95"> * <li>OLTP workload. Data loss is a big deal.</span> |
| <span class="source-line-no">096</span><span id="line-96"> * </ul></span> |
| <span class="source-line-no">097</span><span id="line-97"> * <p></span> |
| <span class="source-line-no">098</span><span id="line-98"> * <b>Question:</b> Why is manual splitting good for this workload? <br></span> |
| <span class="source-line-no">099</span><span id="line-99"> * <b>Answer:</b> Although automated splitting is not a bad option, there are benefits to manual</span> |
| <span class="source-line-no">100</span><span id="line-100"> * splitting.</span> |
| <span class="source-line-no">101</span><span id="line-101"> * <p></span> |
| <span class="source-line-no">102</span><span id="line-102"> * <ul></span> |
| <span class="source-line-no">103</span><span id="line-103"> * <li>With growing amounts of data, splits will continually be needed. Since you always know</span> |
| <span class="source-line-no">104</span><span id="line-104"> * exactly what regions you have, long-term debugging and profiling is much easier with manual</span> |
| <span class="source-line-no">105</span><span id="line-105"> * splits. It is hard to trace the logs to understand region level problems if it keeps splitting</span> |
| <span class="source-line-no">106</span><span id="line-106"> * and getting renamed.</span> |
| <span class="source-line-no">107</span><span id="line-107"> * <li>Data offlining bugs + unknown number of split regions == oh crap! If an WAL or StoreFile was</span> |
| <span class="source-line-no">108</span><span id="line-108"> * mistakenly unprocessed by HBase due to a weird bug and you notice it a day or so later, you can</span> |
| <span class="source-line-no">109</span><span id="line-109"> * be assured that the regions specified in these files are the same as the current regions and you</span> |
| <span class="source-line-no">110</span><span id="line-110"> * have less headaches trying to restore/replay your data.</span> |
| <span class="source-line-no">111</span><span id="line-111"> * <li>You can finely tune your compaction algorithm. With roughly uniform data growth, it's easy to</span> |
| <span class="source-line-no">112</span><span id="line-112"> * cause split / compaction storms as the regions all roughly hit the same data size at the same</span> |
| <span class="source-line-no">113</span><span id="line-113"> * time. With manual splits, you can let staggered, time-based major compactions spread out your</span> |
| <span class="source-line-no">114</span><span id="line-114"> * network IO load.</span> |
| <span class="source-line-no">115</span><span id="line-115"> * </ul></span> |
| <span class="source-line-no">116</span><span id="line-116"> * <p></span> |
| <span class="source-line-no">117</span><span id="line-117"> * <b>Question:</b> What's the optimal number of pre-split regions to create? <br></span> |
| <span class="source-line-no">118</span><span id="line-118"> * <b>Answer:</b> Mileage will vary depending upon your application.</span> |
| <span class="source-line-no">119</span><span id="line-119"> * <p></span> |
| <span class="source-line-no">120</span><span id="line-120"> * The short answer for our application is that we started with 10 pre-split regions / server and</span> |
| <span class="source-line-no">121</span><span id="line-121"> * watched our data growth over time. It's better to err on the side of too little regions and</span> |
| <span class="source-line-no">122</span><span id="line-122"> * rolling split later.</span> |
| <span class="source-line-no">123</span><span id="line-123"> * <p></span> |
| <span class="source-line-no">124</span><span id="line-124"> * The more complicated answer is that this depends upon the largest storefile in your region. With</span> |
| <span class="source-line-no">125</span><span id="line-125"> * a growing data size, this will get larger over time. You want the largest region to be just big</span> |
| <span class="source-line-no">126</span><span id="line-126"> * enough that the {@link org.apache.hadoop.hbase.regionserver.HStore} compact selection algorithm</span> |
| <span class="source-line-no">127</span><span id="line-127"> * only compacts it due to a timed major. If you don't, your cluster can be prone to compaction</span> |
| <span class="source-line-no">128</span><span id="line-128"> * storms as the algorithm decides to run major compactions on a large series of regions all at</span> |
| <span class="source-line-no">129</span><span id="line-129"> * once. Note that compaction storms are due to the uniform data growth, not the manual split</span> |
| <span class="source-line-no">130</span><span id="line-130"> * decision.</span> |
| <span class="source-line-no">131</span><span id="line-131"> * <p></span> |
| <span class="source-line-no">132</span><span id="line-132"> * If you pre-split your regions too thin, you can increase the major compaction interval by</span> |
| <span class="source-line-no">133</span><span id="line-133"> * configuring HConstants.MAJOR_COMPACTION_PERIOD. If your data size grows too large, use this</span> |
| <span class="source-line-no">134</span><span id="line-134"> * script to perform a network IO safe rolling split of all regions.</span> |
| <span class="source-line-no">135</span><span id="line-135"> */</span> |
| <span class="source-line-no">136</span><span id="line-136">@InterfaceAudience.Private</span> |
| <span class="source-line-no">137</span><span id="line-137">public class RegionSplitter {</span> |
| <span class="source-line-no">138</span><span id="line-138"> private static final Logger LOG = LoggerFactory.getLogger(RegionSplitter.class);</span> |
| <span class="source-line-no">139</span><span id="line-139"></span> |
| <span class="source-line-no">140</span><span id="line-140"> /**</span> |
| <span class="source-line-no">141</span><span id="line-141"> * A generic interface for the RegionSplitter code to use for all it's functionality. Note that</span> |
| <span class="source-line-no">142</span><span id="line-142"> * the original authors of this code use {@link HexStringSplit} to partition their table and set</span> |
| <span class="source-line-no">143</span><span id="line-143"> * it as default, but provided this for your custom algorithm. To use, create a new derived class</span> |
| <span class="source-line-no">144</span><span id="line-144"> * from this interface and call {@link RegionSplitter#createPresplitTable} or</span> |
| <span class="source-line-no">145</span><span id="line-145"> * RegionSplitter#rollingSplit(TableName, SplitAlgorithm, Configuration) with the argument</span> |
| <span class="source-line-no">146</span><span id="line-146"> * splitClassName giving the name of your class.</span> |
| <span class="source-line-no">147</span><span id="line-147"> */</span> |
| <span class="source-line-no">148</span><span id="line-148"> public interface SplitAlgorithm {</span> |
| <span class="source-line-no">149</span><span id="line-149"> /**</span> |
| <span class="source-line-no">150</span><span id="line-150"> * Split a pre-existing region into 2 regions. first row (inclusive) last row (exclusive)</span> |
| <span class="source-line-no">151</span><span id="line-151"> * @return the split row to use</span> |
| <span class="source-line-no">152</span><span id="line-152"> */</span> |
| <span class="source-line-no">153</span><span id="line-153"> byte[] split(byte[] start, byte[] end);</span> |
| <span class="source-line-no">154</span><span id="line-154"></span> |
| <span class="source-line-no">155</span><span id="line-155"> /**</span> |
| <span class="source-line-no">156</span><span id="line-156"> * Split an entire table. number of regions to split the table into user input is validated at</span> |
| <span class="source-line-no">157</span><span id="line-157"> * this time. may throw a runtime exception in response to a parse failure</span> |
| <span class="source-line-no">158</span><span id="line-158"> * @return array of split keys for the initial regions of the table. The length of the returned</span> |
| <span class="source-line-no">159</span><span id="line-159"> * array should be numRegions-1.</span> |
| <span class="source-line-no">160</span><span id="line-160"> */</span> |
| <span class="source-line-no">161</span><span id="line-161"> byte[][] split(int numRegions);</span> |
| <span class="source-line-no">162</span><span id="line-162"></span> |
| <span class="source-line-no">163</span><span id="line-163"> /**</span> |
| <span class="source-line-no">164</span><span id="line-164"> * Some MapReduce jobs may want to run multiple mappers per region, this is intended for such</span> |
| <span class="source-line-no">165</span><span id="line-165"> * usecase.</span> |
| <span class="source-line-no">166</span><span id="line-166"> * @param start first row (inclusive)</span> |
| <span class="source-line-no">167</span><span id="line-167"> * @param end last row (exclusive)</span> |
| <span class="source-line-no">168</span><span id="line-168"> * @param numSplits number of splits to generate</span> |
| <span class="source-line-no">169</span><span id="line-169"> * @param inclusive whether start and end are returned as split points</span> |
| <span class="source-line-no">170</span><span id="line-170"> */</span> |
| <span class="source-line-no">171</span><span id="line-171"> byte[][] split(byte[] start, byte[] end, int numSplits, boolean inclusive);</span> |
| <span class="source-line-no">172</span><span id="line-172"></span> |
| <span class="source-line-no">173</span><span id="line-173"> /**</span> |
| <span class="source-line-no">174</span><span id="line-174"> * In HBase, the first row is represented by an empty byte array. This might cause problems with</span> |
| <span class="source-line-no">175</span><span id="line-175"> * your split algorithm or row printing. All your APIs will be passed firstRow() instead of</span> |
| <span class="source-line-no">176</span><span id="line-176"> * empty array.</span> |
| <span class="source-line-no">177</span><span id="line-177"> * @return your representation of your first row</span> |
| <span class="source-line-no">178</span><span id="line-178"> */</span> |
| <span class="source-line-no">179</span><span id="line-179"> byte[] firstRow();</span> |
| <span class="source-line-no">180</span><span id="line-180"></span> |
| <span class="source-line-no">181</span><span id="line-181"> /**</span> |
| <span class="source-line-no">182</span><span id="line-182"> * In HBase, the last row is represented by an empty byte array. This might cause problems with</span> |
| <span class="source-line-no">183</span><span id="line-183"> * your split algorithm or row printing. All your APIs will be passed firstRow() instead of</span> |
| <span class="source-line-no">184</span><span id="line-184"> * empty array.</span> |
| <span class="source-line-no">185</span><span id="line-185"> * @return your representation of your last row</span> |
| <span class="source-line-no">186</span><span id="line-186"> */</span> |
| <span class="source-line-no">187</span><span id="line-187"> byte[] lastRow();</span> |
| <span class="source-line-no">188</span><span id="line-188"></span> |
| <span class="source-line-no">189</span><span id="line-189"> /**</span> |
| <span class="source-line-no">190</span><span id="line-190"> * In HBase, the last row is represented by an empty byte array. Set this value to help the</span> |
| <span class="source-line-no">191</span><span id="line-191"> * split code understand how to evenly divide the first region. raw user input (may throw</span> |
| <span class="source-line-no">192</span><span id="line-192"> * RuntimeException on parse failure)</span> |
| <span class="source-line-no">193</span><span id="line-193"> */</span> |
| <span class="source-line-no">194</span><span id="line-194"> void setFirstRow(String userInput);</span> |
| <span class="source-line-no">195</span><span id="line-195"></span> |
| <span class="source-line-no">196</span><span id="line-196"> /**</span> |
| <span class="source-line-no">197</span><span id="line-197"> * In HBase, the last row is represented by an empty byte array. Set this value to help the</span> |
| <span class="source-line-no">198</span><span id="line-198"> * split code understand how to evenly divide the last region. Note that this last row is</span> |
| <span class="source-line-no">199</span><span id="line-199"> * inclusive for all rows sharing the same prefix. raw user input (may throw RuntimeException on</span> |
| <span class="source-line-no">200</span><span id="line-200"> * parse failure)</span> |
| <span class="source-line-no">201</span><span id="line-201"> */</span> |
| <span class="source-line-no">202</span><span id="line-202"> void setLastRow(String userInput);</span> |
| <span class="source-line-no">203</span><span id="line-203"></span> |
| <span class="source-line-no">204</span><span id="line-204"> /**</span> |
| <span class="source-line-no">205</span><span id="line-205"> * user or file input for row</span> |
| <span class="source-line-no">206</span><span id="line-206"> * @return byte array representation of this row for HBase</span> |
| <span class="source-line-no">207</span><span id="line-207"> */</span> |
| <span class="source-line-no">208</span><span id="line-208"> byte[] strToRow(String input);</span> |
| <span class="source-line-no">209</span><span id="line-209"></span> |
| <span class="source-line-no">210</span><span id="line-210"> /**</span> |
| <span class="source-line-no">211</span><span id="line-211"> * byte array representing a row in HBase</span> |
| <span class="source-line-no">212</span><span id="line-212"> * @return String to use for debug &amp; file printing</span> |
| <span class="source-line-no">213</span><span id="line-213"> */</span> |
| <span class="source-line-no">214</span><span id="line-214"> String rowToStr(byte[] row);</span> |
| <span class="source-line-no">215</span><span id="line-215"></span> |
| <span class="source-line-no">216</span><span id="line-216"> /** Returns the separator character to use when storing / printing the row */</span> |
| <span class="source-line-no">217</span><span id="line-217"> String separator();</span> |
| <span class="source-line-no">218</span><span id="line-218"></span> |
| <span class="source-line-no">219</span><span id="line-219"> /**</span> |
| <span class="source-line-no">220</span><span id="line-220"> * Set the first row</span> |
| <span class="source-line-no">221</span><span id="line-221"> * @param userInput byte array of the row key.</span> |
| <span class="source-line-no">222</span><span id="line-222"> */</span> |
| <span class="source-line-no">223</span><span id="line-223"> void setFirstRow(byte[] userInput);</span> |
| <span class="source-line-no">224</span><span id="line-224"></span> |
| <span class="source-line-no">225</span><span id="line-225"> /**</span> |
| <span class="source-line-no">226</span><span id="line-226"> * Set the last row</span> |
| <span class="source-line-no">227</span><span id="line-227"> * @param userInput byte array of the row key.</span> |
| <span class="source-line-no">228</span><span id="line-228"> */</span> |
| <span class="source-line-no">229</span><span id="line-229"> void setLastRow(byte[] userInput);</span> |
| <span class="source-line-no">230</span><span id="line-230"> }</span> |
| <span class="source-line-no">231</span><span id="line-231"></span> |
| <span class="source-line-no">232</span><span id="line-232"> /**</span> |
| <span class="source-line-no">233</span><span id="line-233"> * The main function for the RegionSplitter application. Common uses:</span> |
| <span class="source-line-no">234</span><span id="line-234"> * <p></span> |
| <span class="source-line-no">235</span><span id="line-235"> * <ul></span> |
| <span class="source-line-no">236</span><span id="line-236"> * <li>create a table named 'myTable' with 60 pre-split regions containing 2 column families</span> |
| <span class="source-line-no">237</span><span id="line-237"> * 'test' &amp; 'rs', assuming the keys are hex-encoded ASCII:</span> |
| <span class="source-line-no">238</span><span id="line-238"> * <ul></span> |
| <span class="source-line-no">239</span><span id="line-239"> * <li>bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -c 60 -f test:rs myTable</span> |
| <span class="source-line-no">240</span><span id="line-240"> * HexStringSplit</span> |
| <span class="source-line-no">241</span><span id="line-241"> * </ul></span> |
| <span class="source-line-no">242</span><span id="line-242"> * <li>create a table named 'myTable' with 50 pre-split regions, assuming the keys are</span> |
| <span class="source-line-no">243</span><span id="line-243"> * decimal-encoded ASCII:</span> |
| <span class="source-line-no">244</span><span id="line-244"> * <ul></span> |
| <span class="source-line-no">245</span><span id="line-245"> * <li>bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -c 50 myTable DecimalStringSplit</span> |
| <span class="source-line-no">246</span><span id="line-246"> * </ul></span> |
| <span class="source-line-no">247</span><span id="line-247"> * <li>perform a rolling split of 'myTable' (i.e. 60 =&gt; 120 regions), # 2 outstanding splits at</span> |
| <span class="source-line-no">248</span><span id="line-248"> * a time, assuming keys are uniformly distributed bytes:</span> |
| <span class="source-line-no">249</span><span id="line-249"> * <ul></span> |
| <span class="source-line-no">250</span><span id="line-250"> * <li>bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -r -o 2 myTable UniformSplit</span> |
| <span class="source-line-no">251</span><span id="line-251"> * </ul></span> |
| <span class="source-line-no">252</span><span id="line-252"> * </ul></span> |
| <span class="source-line-no">253</span><span id="line-253"> * There are three SplitAlgorithms built into RegionSplitter, HexStringSplit, DecimalStringSplit,</span> |
| <span class="source-line-no">254</span><span id="line-254"> * and UniformSplit. These are different strategies for choosing region boundaries. See their</span> |
| <span class="source-line-no">255</span><span id="line-255"> * source code for details. Usage: RegionSplitter &lt;TABLE&gt; &lt;SPLITALGORITHM&gt; &lt;-c</span> |
| <span class="source-line-no">256</span><span id="line-256"> * &lt;# regions&gt; -f &lt;family:family:...&gt; | -r [-o &lt;# outstanding splits&gt;]&gt; [-D</span> |
| <span class="source-line-no">257</span><span id="line-257"> * &lt;conf.param=value&gt;] HBase IO problem user requested exit problem parsing user input</span> |
| <span class="source-line-no">258</span><span id="line-258"> */</span> |
| <span class="source-line-no">259</span><span id="line-259"> @SuppressWarnings("static-access")</span> |
| <span class="source-line-no">260</span><span id="line-260"> public static void main(String[] args) throws IOException, InterruptedException, ParseException {</span> |
| <span class="source-line-no">261</span><span id="line-261"> Configuration conf = HBaseConfiguration.create();</span> |
| <span class="source-line-no">262</span><span id="line-262"></span> |
| <span class="source-line-no">263</span><span id="line-263"> // parse user input</span> |
| <span class="source-line-no">264</span><span id="line-264"> Options opt = new Options();</span> |
| <span class="source-line-no">265</span><span id="line-265"> opt.addOption(OptionBuilder.withArgName("property=value").hasArg()</span> |
| <span class="source-line-no">266</span><span id="line-266"> .withDescription("Override HBase Configuration Settings").create("D"));</span> |
| <span class="source-line-no">267</span><span id="line-267"> opt.addOption(OptionBuilder.withArgName("region count").hasArg()</span> |
| <span class="source-line-no">268</span><span id="line-268"> .withDescription("Create a new table with a pre-split number of regions").create("c"));</span> |
| <span class="source-line-no">269</span><span id="line-269"> opt.addOption(OptionBuilder.withArgName("family:family:...").hasArg()</span> |
| <span class="source-line-no">270</span><span id="line-270"> .withDescription("Column Families to create with new table. Required with -c").create("f"));</span> |
| <span class="source-line-no">271</span><span id="line-271"> opt.addOption("h", false, "Print this usage help");</span> |
| <span class="source-line-no">272</span><span id="line-272"> opt.addOption("r", false, "Perform a rolling split of an existing region");</span> |
| <span class="source-line-no">273</span><span id="line-273"> opt.addOption(OptionBuilder.withArgName("count").hasArg()</span> |
| <span class="source-line-no">274</span><span id="line-274"> .withDescription("Max outstanding splits that have unfinished major compactions")</span> |
| <span class="source-line-no">275</span><span id="line-275"> .create("o"));</span> |
| <span class="source-line-no">276</span><span id="line-276"> opt.addOption(null, "firstrow", true, "First Row in Table for Split Algorithm");</span> |
| <span class="source-line-no">277</span><span id="line-277"> opt.addOption(null, "lastrow", true, "Last Row in Table for Split Algorithm");</span> |
| <span class="source-line-no">278</span><span id="line-278"> opt.addOption(null, "risky", false, "Skip verification steps to complete quickly. "</span> |
| <span class="source-line-no">279</span><span id="line-279"> + "STRONGLY DISCOURAGED for production systems. ");</span> |
| <span class="source-line-no">280</span><span id="line-280"> CommandLine cmd = new GnuParser().parse(opt, args);</span> |
| <span class="source-line-no">281</span><span id="line-281"></span> |
| <span class="source-line-no">282</span><span id="line-282"> if (cmd.hasOption("D")) {</span> |
| <span class="source-line-no">283</span><span id="line-283"> for (String confOpt : cmd.getOptionValues("D")) {</span> |
| <span class="source-line-no">284</span><span id="line-284"> String[] kv = confOpt.split("=", 2);</span> |
| <span class="source-line-no">285</span><span id="line-285"> if (kv.length == 2) {</span> |
| <span class="source-line-no">286</span><span id="line-286"> conf.set(kv[0], kv[1]);</span> |
| <span class="source-line-no">287</span><span id="line-287"> LOG.debug("-D configuration override: " + kv[0] + "=" + kv[1]);</span> |
| <span class="source-line-no">288</span><span id="line-288"> } else {</span> |
| <span class="source-line-no">289</span><span id="line-289"> throw new ParseException("-D option format invalid: " + confOpt);</span> |
| <span class="source-line-no">290</span><span id="line-290"> }</span> |
| <span class="source-line-no">291</span><span id="line-291"> }</span> |
| <span class="source-line-no">292</span><span id="line-292"> }</span> |
| <span class="source-line-no">293</span><span id="line-293"></span> |
| <span class="source-line-no">294</span><span id="line-294"> if (cmd.hasOption("risky")) {</span> |
| <span class="source-line-no">295</span><span id="line-295"> conf.setBoolean("split.verify", false);</span> |
| <span class="source-line-no">296</span><span id="line-296"> }</span> |
| <span class="source-line-no">297</span><span id="line-297"></span> |
| <span class="source-line-no">298</span><span id="line-298"> boolean createTable = cmd.hasOption("c") && cmd.hasOption("f");</span> |
| <span class="source-line-no">299</span><span id="line-299"> boolean rollingSplit = cmd.hasOption("r");</span> |
| <span class="source-line-no">300</span><span id="line-300"> boolean oneOperOnly = createTable ^ rollingSplit;</span> |
| <span class="source-line-no">301</span><span id="line-301"></span> |
| <span class="source-line-no">302</span><span id="line-302"> if (2 != cmd.getArgList().size() || !oneOperOnly || cmd.hasOption("h")) {</span> |
| <span class="source-line-no">303</span><span id="line-303"> new HelpFormatter().printHelp("bin/hbase regionsplitter <TABLE> <SPLITALGORITHM>\n"</span> |
| <span class="source-line-no">304</span><span id="line-304"> + "SPLITALGORITHM is the java class name of a class implementing "</span> |
| <span class="source-line-no">305</span><span id="line-305"> + "SplitAlgorithm, or one of the special strings HexStringSplit or "</span> |
| <span class="source-line-no">306</span><span id="line-306"> + "DecimalStringSplit or UniformSplit, which are built-in split algorithms. "</span> |
| <span class="source-line-no">307</span><span id="line-307"> + "HexStringSplit treats keys as hexadecimal ASCII, and "</span> |
| <span class="source-line-no">308</span><span id="line-308"> + "DecimalStringSplit treats keys as decimal ASCII, and "</span> |
| <span class="source-line-no">309</span><span id="line-309"> + "UniformSplit treats keys as arbitrary bytes.", opt);</span> |
| <span class="source-line-no">310</span><span id="line-310"> return;</span> |
| <span class="source-line-no">311</span><span id="line-311"> }</span> |
| <span class="source-line-no">312</span><span id="line-312"> TableName tableName = TableName.valueOf(cmd.getArgs()[0]);</span> |
| <span class="source-line-no">313</span><span id="line-313"> String splitClass = cmd.getArgs()[1];</span> |
| <span class="source-line-no">314</span><span id="line-314"> SplitAlgorithm splitAlgo = newSplitAlgoInstance(conf, splitClass);</span> |
| <span class="source-line-no">315</span><span id="line-315"></span> |
| <span class="source-line-no">316</span><span id="line-316"> if (cmd.hasOption("firstrow")) {</span> |
| <span class="source-line-no">317</span><span id="line-317"> splitAlgo.setFirstRow(cmd.getOptionValue("firstrow"));</span> |
| <span class="source-line-no">318</span><span id="line-318"> }</span> |
| <span class="source-line-no">319</span><span id="line-319"> if (cmd.hasOption("lastrow")) {</span> |
| <span class="source-line-no">320</span><span id="line-320"> splitAlgo.setLastRow(cmd.getOptionValue("lastrow"));</span> |
| <span class="source-line-no">321</span><span id="line-321"> }</span> |
| <span class="source-line-no">322</span><span id="line-322"></span> |
| <span class="source-line-no">323</span><span id="line-323"> if (createTable) {</span> |
| <span class="source-line-no">324</span><span id="line-324"> conf.set("split.count", cmd.getOptionValue("c"));</span> |
| <span class="source-line-no">325</span><span id="line-325"> createPresplitTable(tableName, splitAlgo, cmd.getOptionValue("f").split(":"), conf);</span> |
| <span class="source-line-no">326</span><span id="line-326"> }</span> |
| <span class="source-line-no">327</span><span id="line-327"></span> |
| <span class="source-line-no">328</span><span id="line-328"> if (rollingSplit) {</span> |
| <span class="source-line-no">329</span><span id="line-329"> if (cmd.hasOption("o")) {</span> |
| <span class="source-line-no">330</span><span id="line-330"> conf.set("split.outstanding", cmd.getOptionValue("o"));</span> |
| <span class="source-line-no">331</span><span id="line-331"> }</span> |
| <span class="source-line-no">332</span><span id="line-332"> rollingSplit(tableName, splitAlgo, conf);</span> |
| <span class="source-line-no">333</span><span id="line-333"> }</span> |
| <span class="source-line-no">334</span><span id="line-334"> }</span> |
| <span class="source-line-no">335</span><span id="line-335"></span> |
| <span class="source-line-no">336</span><span id="line-336"> static void createPresplitTable(TableName tableName, SplitAlgorithm splitAlgo,</span> |
| <span class="source-line-no">337</span><span id="line-337"> String[] columnFamilies, Configuration conf) throws IOException, InterruptedException {</span> |
| <span class="source-line-no">338</span><span id="line-338"> final int splitCount = conf.getInt("split.count", 0);</span> |
| <span class="source-line-no">339</span><span id="line-339"> Preconditions.checkArgument(splitCount > 1, "Split count must be > 1");</span> |
| <span class="source-line-no">340</span><span id="line-340"></span> |
| <span class="source-line-no">341</span><span id="line-341"> Preconditions.checkArgument(columnFamilies.length > 0,</span> |
| <span class="source-line-no">342</span><span id="line-342"> "Must specify at least one column family. ");</span> |
| <span class="source-line-no">343</span><span id="line-343"> LOG.debug("Creating table " + tableName + " with " + columnFamilies.length</span> |
| <span class="source-line-no">344</span><span id="line-344"> + " column families. Presplitting to " + splitCount + " regions");</span> |
| <span class="source-line-no">345</span><span id="line-345"></span> |
| <span class="source-line-no">346</span><span id="line-346"> TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName);</span> |
| <span class="source-line-no">347</span><span id="line-347"> for (String cf : columnFamilies) {</span> |
| <span class="source-line-no">348</span><span id="line-348"> builder.setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf));</span> |
| <span class="source-line-no">349</span><span id="line-349"> }</span> |
| <span class="source-line-no">350</span><span id="line-350"> try (Connection connection = ConnectionFactory.createConnection(conf)) {</span> |
| <span class="source-line-no">351</span><span id="line-351"> Admin admin = connection.getAdmin();</span> |
| <span class="source-line-no">352</span><span id="line-352"> try {</span> |
| <span class="source-line-no">353</span><span id="line-353"> Preconditions.checkArgument(!admin.tableExists(tableName),</span> |
| <span class="source-line-no">354</span><span id="line-354"> "Table already exists: " + tableName);</span> |
| <span class="source-line-no">355</span><span id="line-355"> admin.createTable(builder.build(), splitAlgo.split(splitCount));</span> |
| <span class="source-line-no">356</span><span id="line-356"> } finally {</span> |
| <span class="source-line-no">357</span><span id="line-357"> admin.close();</span> |
| <span class="source-line-no">358</span><span id="line-358"> }</span> |
| <span class="source-line-no">359</span><span id="line-359"> LOG.debug("Table created! Waiting for regions to show online in META...");</span> |
| <span class="source-line-no">360</span><span id="line-360"> if (!conf.getBoolean("split.verify", true)) {</span> |
| <span class="source-line-no">361</span><span id="line-361"> // NOTE: createTable is synchronous on the table, but not on the regions</span> |
| <span class="source-line-no">362</span><span id="line-362"> int onlineRegions = 0;</span> |
| <span class="source-line-no">363</span><span id="line-363"> try (RegionLocator locator = connection.getRegionLocator(tableName)) {</span> |
| <span class="source-line-no">364</span><span id="line-364"> while (onlineRegions < splitCount) {</span> |
| <span class="source-line-no">365</span><span id="line-365"> onlineRegions = locator.getAllRegionLocations().size();</span> |
| <span class="source-line-no">366</span><span id="line-366"> LOG.debug(onlineRegions + " of " + splitCount + " regions online...");</span> |
| <span class="source-line-no">367</span><span id="line-367"> if (onlineRegions < splitCount) {</span> |
| <span class="source-line-no">368</span><span id="line-368"> Thread.sleep(10 * 1000); // sleep</span> |
| <span class="source-line-no">369</span><span id="line-369"> }</span> |
| <span class="source-line-no">370</span><span id="line-370"> }</span> |
| <span class="source-line-no">371</span><span id="line-371"> }</span> |
| <span class="source-line-no">372</span><span id="line-372"> }</span> |
| <span class="source-line-no">373</span><span id="line-373"> LOG.debug("Finished creating table with " + splitCount + " regions");</span> |
| <span class="source-line-no">374</span><span id="line-374"> }</span> |
| <span class="source-line-no">375</span><span id="line-375"> }</span> |
| <span class="source-line-no">376</span><span id="line-376"></span> |
| <span class="source-line-no">377</span><span id="line-377"> /**</span> |
| <span class="source-line-no">378</span><span id="line-378"> * Alternative getCurrentNrHRS which is no longer available.</span> |
| <span class="source-line-no">379</span><span id="line-379"> * @return Rough count of regionservers out on cluster.</span> |
| <span class="source-line-no">380</span><span id="line-380"> * @throws IOException if a remote or network exception occurs</span> |
| <span class="source-line-no">381</span><span id="line-381"> */</span> |
| <span class="source-line-no">382</span><span id="line-382"> private static int getRegionServerCount(final Connection connection) throws IOException {</span> |
| <span class="source-line-no">383</span><span id="line-383"> try (Admin admin = connection.getAdmin()) {</span> |
| <span class="source-line-no">384</span><span id="line-384"> Collection<ServerName> servers = admin.getRegionServers();</span> |
| <span class="source-line-no">385</span><span id="line-385"> return servers == null || servers.isEmpty() ? 0 : servers.size();</span> |
| <span class="source-line-no">386</span><span id="line-386"> }</span> |
| <span class="source-line-no">387</span><span id="line-387"> }</span> |
| <span class="source-line-no">388</span><span id="line-388"></span> |
| <span class="source-line-no">389</span><span id="line-389"> private static byte[] readFile(final FileSystem fs, final Path path) throws IOException {</span> |
| <span class="source-line-no">390</span><span id="line-390"> FSDataInputStream tmpIn = fs.open(path);</span> |
| <span class="source-line-no">391</span><span id="line-391"> try {</span> |
| <span class="source-line-no">392</span><span id="line-392"> byte[] rawData = new byte[tmpIn.available()];</span> |
| <span class="source-line-no">393</span><span id="line-393"> tmpIn.readFully(rawData);</span> |
| <span class="source-line-no">394</span><span id="line-394"> return rawData;</span> |
| <span class="source-line-no">395</span><span id="line-395"> } finally {</span> |
| <span class="source-line-no">396</span><span id="line-396"> tmpIn.close();</span> |
| <span class="source-line-no">397</span><span id="line-397"> }</span> |
| <span class="source-line-no">398</span><span id="line-398"> }</span> |
| <span class="source-line-no">399</span><span id="line-399"></span> |
| <span class="source-line-no">400</span><span id="line-400"> static void rollingSplit(TableName tableName, SplitAlgorithm splitAlgo, Configuration conf)</span> |
| <span class="source-line-no">401</span><span id="line-401"> throws IOException, InterruptedException {</span> |
| <span class="source-line-no">402</span><span id="line-402"> final int minOS = conf.getInt("split.outstanding", 2);</span> |
| <span class="source-line-no">403</span><span id="line-403"> try (Connection connection = ConnectionFactory.createConnection(conf)) {</span> |
| <span class="source-line-no">404</span><span id="line-404"> // Max outstanding splits. default == 50% of servers</span> |
| <span class="source-line-no">405</span><span id="line-405"> final int MAX_OUTSTANDING = Math.max(getRegionServerCount(connection) / 2, minOS);</span> |
| <span class="source-line-no">406</span><span id="line-406"></span> |
| <span class="source-line-no">407</span><span id="line-407"> Path hbDir = CommonFSUtils.getRootDir(conf);</span> |
| <span class="source-line-no">408</span><span id="line-408"> Path tableDir = CommonFSUtils.getTableDir(hbDir, tableName);</span> |
| <span class="source-line-no">409</span><span id="line-409"> Path splitFile = new Path(tableDir, "_balancedSplit");</span> |
| <span class="source-line-no">410</span><span id="line-410"> FileSystem fs = FileSystem.get(conf);</span> |
| <span class="source-line-no">411</span><span id="line-411"></span> |
| <span class="source-line-no">412</span><span id="line-412"> // Get a list of daughter regions to create</span> |
| <span class="source-line-no">413</span><span id="line-413"> LinkedList<Pair<byte[], byte[]>> tmpRegionSet = null;</span> |
| <span class="source-line-no">414</span><span id="line-414"> try (Table table = connection.getTable(tableName)) {</span> |
| <span class="source-line-no">415</span><span id="line-415"> tmpRegionSet = getSplits(connection, tableName, splitAlgo);</span> |
| <span class="source-line-no">416</span><span id="line-416"> }</span> |
| <span class="source-line-no">417</span><span id="line-417"> LinkedList<Pair<byte[], byte[]>> outstanding = Lists.newLinkedList();</span> |
| <span class="source-line-no">418</span><span id="line-418"> int splitCount = 0;</span> |
| <span class="source-line-no">419</span><span id="line-419"> final int origCount = tmpRegionSet.size();</span> |
| <span class="source-line-no">420</span><span id="line-420"></span> |
| <span class="source-line-no">421</span><span id="line-421"> // all splits must compact & we have 1 compact thread, so 2 split</span> |
| <span class="source-line-no">422</span><span id="line-422"> // requests to the same RS can stall the outstanding split queue.</span> |
| <span class="source-line-no">423</span><span id="line-423"> // To fix, group the regions into an RS pool and round-robin through it</span> |
| <span class="source-line-no">424</span><span id="line-424"> LOG.debug("Bucketing regions by regionserver...");</span> |
| <span class="source-line-no">425</span><span id="line-425"> TreeMap<ServerName, LinkedList<Pair<byte[], byte[]>>> daughterRegions = Maps.newTreeMap();</span> |
| <span class="source-line-no">426</span><span id="line-426"> // Get a regionLocator. Need it in below.</span> |
| <span class="source-line-no">427</span><span id="line-427"> try (RegionLocator regionLocator = connection.getRegionLocator(tableName)) {</span> |
| <span class="source-line-no">428</span><span id="line-428"> for (Pair<byte[], byte[]> dr : tmpRegionSet) {</span> |
| <span class="source-line-no">429</span><span id="line-429"> ServerName rsLocation = regionLocator.getRegionLocation(dr.getSecond()).getServerName();</span> |
| <span class="source-line-no">430</span><span id="line-430"> if (!daughterRegions.containsKey(rsLocation)) {</span> |
| <span class="source-line-no">431</span><span id="line-431"> LinkedList<Pair<byte[], byte[]>> entry = Lists.newLinkedList();</span> |
| <span class="source-line-no">432</span><span id="line-432"> daughterRegions.put(rsLocation, entry);</span> |
| <span class="source-line-no">433</span><span id="line-433"> }</span> |
| <span class="source-line-no">434</span><span id="line-434"> daughterRegions.get(rsLocation).add(dr);</span> |
| <span class="source-line-no">435</span><span id="line-435"> }</span> |
| <span class="source-line-no">436</span><span id="line-436"> LOG.debug("Done with bucketing. Split time!");</span> |
| <span class="source-line-no">437</span><span id="line-437"> long startTime = EnvironmentEdgeManager.currentTime();</span> |
| <span class="source-line-no">438</span><span id="line-438"></span> |
| <span class="source-line-no">439</span><span id="line-439"> // Open the split file and modify it as splits finish</span> |
| <span class="source-line-no">440</span><span id="line-440"> byte[] rawData = readFile(fs, splitFile);</span> |
| <span class="source-line-no">441</span><span id="line-441"></span> |
| <span class="source-line-no">442</span><span id="line-442"> FSDataOutputStream splitOut = fs.create(splitFile);</span> |
| <span class="source-line-no">443</span><span id="line-443"> try {</span> |
| <span class="source-line-no">444</span><span id="line-444"> splitOut.write(rawData);</span> |
| <span class="source-line-no">445</span><span id="line-445"></span> |
| <span class="source-line-no">446</span><span id="line-446"> try {</span> |
| <span class="source-line-no">447</span><span id="line-447"> // *** split code ***</span> |
| <span class="source-line-no">448</span><span id="line-448"> while (!daughterRegions.isEmpty()) {</span> |
| <span class="source-line-no">449</span><span id="line-449"> LOG.debug(daughterRegions.size() + " RS have regions to splt.");</span> |
| <span class="source-line-no">450</span><span id="line-450"></span> |
| <span class="source-line-no">451</span><span id="line-451"> // Get ServerName to region count mapping</span> |
| <span class="source-line-no">452</span><span id="line-452"> final TreeMap<ServerName, Integer> rsSizes = Maps.newTreeMap();</span> |
| <span class="source-line-no">453</span><span id="line-453"> List<HRegionLocation> hrls = regionLocator.getAllRegionLocations();</span> |
| <span class="source-line-no">454</span><span id="line-454"> for (HRegionLocation hrl : hrls) {</span> |
| <span class="source-line-no">455</span><span id="line-455"> ServerName sn = hrl.getServerName();</span> |
| <span class="source-line-no">456</span><span id="line-456"> if (rsSizes.containsKey(sn)) {</span> |
| <span class="source-line-no">457</span><span id="line-457"> rsSizes.put(sn, rsSizes.get(sn) + 1);</span> |
| <span class="source-line-no">458</span><span id="line-458"> } else {</span> |
| <span class="source-line-no">459</span><span id="line-459"> rsSizes.put(sn, 1);</span> |
| <span class="source-line-no">460</span><span id="line-460"> }</span> |
| <span class="source-line-no">461</span><span id="line-461"> }</span> |
| <span class="source-line-no">462</span><span id="line-462"></span> |
| <span class="source-line-no">463</span><span id="line-463"> // Sort the ServerNames by the number of regions they have</span> |
| <span class="source-line-no">464</span><span id="line-464"> final List<ServerName> serversLeft = Lists.newArrayList(daughterRegions.keySet());</span> |
| <span class="source-line-no">465</span><span id="line-465"> serversLeft.sort(Comparator.comparing(rsSizes::get));</span> |
| <span class="source-line-no">466</span><span id="line-466"></span> |
| <span class="source-line-no">467</span><span id="line-467"> // Round-robin through the ServerName list. Choose the lightest-loaded servers</span> |
| <span class="source-line-no">468</span><span id="line-468"> // first to keep the master from load-balancing regions as we split.</span> |
| <span class="source-line-no">469</span><span id="line-469"> for (final ServerName rsLoc : serversLeft) {</span> |
| <span class="source-line-no">470</span><span id="line-470"> Pair<byte[], byte[]> dr = null;</span> |
| <span class="source-line-no">471</span><span id="line-471"> final LinkedList<Pair<byte[], byte[]>> regionList = daughterRegions.get(rsLoc);</span> |
| <span class="source-line-no">472</span><span id="line-472"></span> |
| <span class="source-line-no">473</span><span id="line-473"> // Find a region in the ServerName list that hasn't been moved</span> |
| <span class="source-line-no">474</span><span id="line-474"> LOG.debug("Finding a region on " + rsLoc);</span> |
| <span class="source-line-no">475</span><span id="line-475"> while (!regionList.isEmpty()) {</span> |
| <span class="source-line-no">476</span><span id="line-476"> dr = regionList.pop();</span> |
| <span class="source-line-no">477</span><span id="line-477"></span> |
| <span class="source-line-no">478</span><span id="line-478"> // get current region info</span> |
| <span class="source-line-no">479</span><span id="line-479"> byte[] split = dr.getSecond();</span> |
| <span class="source-line-no">480</span><span id="line-480"> HRegionLocation regionLoc = regionLocator.getRegionLocation(split);</span> |
| <span class="source-line-no">481</span><span id="line-481"></span> |
| <span class="source-line-no">482</span><span id="line-482"> // if this region moved locations</span> |
| <span class="source-line-no">483</span><span id="line-483"> ServerName newRs = regionLoc.getServerName();</span> |
| <span class="source-line-no">484</span><span id="line-484"> if (newRs.compareTo(rsLoc) != 0) {</span> |
| <span class="source-line-no">485</span><span id="line-485"> LOG.debug("Region with " + splitAlgo.rowToStr(split) + " moved to " + newRs</span> |
| <span class="source-line-no">486</span><span id="line-486"> + ". Relocating...");</span> |
| <span class="source-line-no">487</span><span id="line-487"> // relocate it, don't use it right now</span> |
| <span class="source-line-no">488</span><span id="line-488"> if (!daughterRegions.containsKey(newRs)) {</span> |
| <span class="source-line-no">489</span><span id="line-489"> LinkedList<Pair<byte[], byte[]>> entry = Lists.newLinkedList();</span> |
| <span class="source-line-no">490</span><span id="line-490"> daughterRegions.put(newRs, entry);</span> |
| <span class="source-line-no">491</span><span id="line-491"> }</span> |
| <span class="source-line-no">492</span><span id="line-492"> daughterRegions.get(newRs).add(dr);</span> |
| <span class="source-line-no">493</span><span id="line-493"> dr = null;</span> |
| <span class="source-line-no">494</span><span id="line-494"> continue;</span> |
| <span class="source-line-no">495</span><span id="line-495"> }</span> |
| <span class="source-line-no">496</span><span id="line-496"></span> |
| <span class="source-line-no">497</span><span id="line-497"> // make sure this region wasn't already split</span> |
| <span class="source-line-no">498</span><span id="line-498"> byte[] sk = regionLoc.getRegion().getStartKey();</span> |
| <span class="source-line-no">499</span><span id="line-499"> if (sk.length != 0) {</span> |
| <span class="source-line-no">500</span><span id="line-500"> if (Bytes.equals(split, sk)) {</span> |
| <span class="source-line-no">501</span><span id="line-501"> LOG.debug("Region already split on " + splitAlgo.rowToStr(split)</span> |
| <span class="source-line-no">502</span><span id="line-502"> + ". Skipping this region...");</span> |
| <span class="source-line-no">503</span><span id="line-503"> ++splitCount;</span> |
| <span class="source-line-no">504</span><span id="line-504"> dr = null;</span> |
| <span class="source-line-no">505</span><span id="line-505"> continue;</span> |
| <span class="source-line-no">506</span><span id="line-506"> }</span> |
| <span class="source-line-no">507</span><span id="line-507"> byte[] start = dr.getFirst();</span> |
| <span class="source-line-no">508</span><span id="line-508"> Preconditions.checkArgument(Bytes.equals(start, sk),</span> |
| <span class="source-line-no">509</span><span id="line-509"> splitAlgo.rowToStr(start) + " != " + splitAlgo.rowToStr(sk));</span> |
| <span class="source-line-no">510</span><span id="line-510"> }</span> |
| <span class="source-line-no">511</span><span id="line-511"></span> |
| <span class="source-line-no">512</span><span id="line-512"> // passed all checks! found a good region</span> |
| <span class="source-line-no">513</span><span id="line-513"> break;</span> |
| <span class="source-line-no">514</span><span id="line-514"> }</span> |
| <span class="source-line-no">515</span><span id="line-515"> if (regionList.isEmpty()) {</span> |
| <span class="source-line-no">516</span><span id="line-516"> daughterRegions.remove(rsLoc);</span> |
| <span class="source-line-no">517</span><span id="line-517"> }</span> |
| <span class="source-line-no">518</span><span id="line-518"> if (dr == null) continue;</span> |
| <span class="source-line-no">519</span><span id="line-519"></span> |
| <span class="source-line-no">520</span><span id="line-520"> // we have a good region, time to split!</span> |
| <span class="source-line-no">521</span><span id="line-521"> byte[] split = dr.getSecond();</span> |
| <span class="source-line-no">522</span><span id="line-522"> LOG.debug("Splitting at " + splitAlgo.rowToStr(split));</span> |
| <span class="source-line-no">523</span><span id="line-523"> try (Admin admin = connection.getAdmin()) {</span> |
| <span class="source-line-no">524</span><span id="line-524"> admin.split(tableName, split);</span> |
| <span class="source-line-no">525</span><span id="line-525"> }</span> |
| <span class="source-line-no">526</span><span id="line-526"></span> |
| <span class="source-line-no">527</span><span id="line-527"> LinkedList<Pair<byte[], byte[]>> finished = Lists.newLinkedList();</span> |
| <span class="source-line-no">528</span><span id="line-528"> LinkedList<Pair<byte[], byte[]>> local_finished = Lists.newLinkedList();</span> |
| <span class="source-line-no">529</span><span id="line-529"> if (conf.getBoolean("split.verify", true)) {</span> |
| <span class="source-line-no">530</span><span id="line-530"> // we need to verify and rate-limit our splits</span> |
| <span class="source-line-no">531</span><span id="line-531"> outstanding.addLast(dr);</span> |
| <span class="source-line-no">532</span><span id="line-532"> // with too many outstanding splits, wait for some to finish</span> |
| <span class="source-line-no">533</span><span id="line-533"> while (outstanding.size() >= MAX_OUTSTANDING) {</span> |
| <span class="source-line-no">534</span><span id="line-534"> LOG.debug("Wait for outstanding splits " + outstanding.size());</span> |
| <span class="source-line-no">535</span><span id="line-535"> local_finished = splitScan(outstanding, connection, tableName, splitAlgo);</span> |
| <span class="source-line-no">536</span><span id="line-536"> if (local_finished.isEmpty()) {</span> |
| <span class="source-line-no">537</span><span id="line-537"> Thread.sleep(30 * 1000);</span> |
| <span class="source-line-no">538</span><span id="line-538"> } else {</span> |
| <span class="source-line-no">539</span><span id="line-539"> finished.addAll(local_finished);</span> |
| <span class="source-line-no">540</span><span id="line-540"> outstanding.removeAll(local_finished);</span> |
| <span class="source-line-no">541</span><span id="line-541"> LOG.debug(local_finished.size() + " outstanding splits finished");</span> |
| <span class="source-line-no">542</span><span id="line-542"> }</span> |
| <span class="source-line-no">543</span><span id="line-543"> }</span> |
| <span class="source-line-no">544</span><span id="line-544"> } else {</span> |
| <span class="source-line-no">545</span><span id="line-545"> finished.add(dr);</span> |
| <span class="source-line-no">546</span><span id="line-546"> }</span> |
| <span class="source-line-no">547</span><span id="line-547"></span> |
| <span class="source-line-no">548</span><span id="line-548"> // mark each finished region as successfully split.</span> |
| <span class="source-line-no">549</span><span id="line-549"> for (Pair<byte[], byte[]> region : finished) {</span> |
| <span class="source-line-no">550</span><span id="line-550"> splitOut.writeChars("- " + splitAlgo.rowToStr(region.getFirst()) + " "</span> |
| <span class="source-line-no">551</span><span id="line-551"> + splitAlgo.rowToStr(region.getSecond()) + "\n");</span> |
| <span class="source-line-no">552</span><span id="line-552"> splitCount++;</span> |
| <span class="source-line-no">553</span><span id="line-553"> if (splitCount % 10 == 0) {</span> |
| <span class="source-line-no">554</span><span id="line-554"> long tDiff = (EnvironmentEdgeManager.currentTime() - startTime) / splitCount;</span> |
| <span class="source-line-no">555</span><span id="line-555"> LOG.debug(</span> |
| <span class="source-line-no">556</span><span id="line-556"> "STATUS UPDATE: " + splitCount + " / " + origCount + ". Avg Time / Split = "</span> |
| <span class="source-line-no">557</span><span id="line-557"> + org.apache.hadoop.util.StringUtils.formatTime(tDiff));</span> |
| <span class="source-line-no">558</span><span id="line-558"> }</span> |
| <span class="source-line-no">559</span><span id="line-559"> }</span> |
| <span class="source-line-no">560</span><span id="line-560"> }</span> |
| <span class="source-line-no">561</span><span id="line-561"> }</span> |
| <span class="source-line-no">562</span><span id="line-562"> if (conf.getBoolean("split.verify", true)) {</span> |
| <span class="source-line-no">563</span><span id="line-563"> while (!outstanding.isEmpty()) {</span> |
| <span class="source-line-no">564</span><span id="line-564"> LOG.debug("Finally Wait for outstanding splits " + outstanding.size());</span> |
| <span class="source-line-no">565</span><span id="line-565"> LinkedList<Pair<byte[], byte[]>> finished =</span> |
| <span class="source-line-no">566</span><span id="line-566"> splitScan(outstanding, connection, tableName, splitAlgo);</span> |
| <span class="source-line-no">567</span><span id="line-567"> if (finished.isEmpty()) {</span> |
| <span class="source-line-no">568</span><span id="line-568"> Thread.sleep(30 * 1000);</span> |
| <span class="source-line-no">569</span><span id="line-569"> } else {</span> |
| <span class="source-line-no">570</span><span id="line-570"> outstanding.removeAll(finished);</span> |
| <span class="source-line-no">571</span><span id="line-571"> for (Pair<byte[], byte[]> region : finished) {</span> |
| <span class="source-line-no">572</span><span id="line-572"> splitOut.writeChars("- " + splitAlgo.rowToStr(region.getFirst()) + " "</span> |
| <span class="source-line-no">573</span><span id="line-573"> + splitAlgo.rowToStr(region.getSecond()) + "\n");</span> |
| <span class="source-line-no">574</span><span id="line-574"> splitCount++;</span> |
| <span class="source-line-no">575</span><span id="line-575"> }</span> |
| <span class="source-line-no">576</span><span id="line-576"> LOG.debug("Finally " + finished.size() + " outstanding splits finished");</span> |
| <span class="source-line-no">577</span><span id="line-577"> }</span> |
| <span class="source-line-no">578</span><span id="line-578"> }</span> |
| <span class="source-line-no">579</span><span id="line-579"> }</span> |
| <span class="source-line-no">580</span><span id="line-580"> LOG.debug("All regions have been successfully split!");</span> |
| <span class="source-line-no">581</span><span id="line-581"> } finally {</span> |
| <span class="source-line-no">582</span><span id="line-582"> long tDiff = EnvironmentEdgeManager.currentTime() - startTime;</span> |
| <span class="source-line-no">583</span><span id="line-583"> LOG.debug("TOTAL TIME = " + org.apache.hadoop.util.StringUtils.formatTime(tDiff));</span> |
| <span class="source-line-no">584</span><span id="line-584"> LOG.debug("Splits = " + splitCount);</span> |
| <span class="source-line-no">585</span><span id="line-585"> if (0 < splitCount) {</span> |
| <span class="source-line-no">586</span><span id="line-586"> LOG.debug("Avg Time / Split = "</span> |
| <span class="source-line-no">587</span><span id="line-587"> + org.apache.hadoop.util.StringUtils.formatTime(tDiff / splitCount));</span> |
| <span class="source-line-no">588</span><span id="line-588"> }</span> |
| <span class="source-line-no">589</span><span id="line-589"> }</span> |
| <span class="source-line-no">590</span><span id="line-590"> } finally {</span> |
| <span class="source-line-no">591</span><span id="line-591"> splitOut.close();</span> |
| <span class="source-line-no">592</span><span id="line-592"> fs.delete(splitFile, false);</span> |
| <span class="source-line-no">593</span><span id="line-593"> }</span> |
| <span class="source-line-no">594</span><span id="line-594"> }</span> |
| <span class="source-line-no">595</span><span id="line-595"> }</span> |
| <span class="source-line-no">596</span><span id="line-596"> }</span> |
| <span class="source-line-no">597</span><span id="line-597"></span> |
| <span class="source-line-no">598</span><span id="line-598"> /**</span> |
| <span class="source-line-no">599</span><span id="line-599"> * @throws IOException if the specified SplitAlgorithm class couldn't be instantiated</span> |
| <span class="source-line-no">600</span><span id="line-600"> */</span> |
| <span class="source-line-no">601</span><span id="line-601"> public static SplitAlgorithm newSplitAlgoInstance(Configuration conf, String splitClassName)</span> |
| <span class="source-line-no">602</span><span id="line-602"> throws IOException {</span> |
| <span class="source-line-no">603</span><span id="line-603"> Class<?> splitClass;</span> |
| <span class="source-line-no">604</span><span id="line-604"></span> |
| <span class="source-line-no">605</span><span id="line-605"> // For split algorithms builtin to RegionSplitter, the user can specify</span> |
| <span class="source-line-no">606</span><span id="line-606"> // their simple class name instead of a fully qualified class name.</span> |
| <span class="source-line-no">607</span><span id="line-607"> if (splitClassName.equals(HexStringSplit.class.getSimpleName())) {</span> |
| <span class="source-line-no">608</span><span id="line-608"> splitClass = HexStringSplit.class;</span> |
| <span class="source-line-no">609</span><span id="line-609"> } else if (splitClassName.equals(DecimalStringSplit.class.getSimpleName())) {</span> |
| <span class="source-line-no">610</span><span id="line-610"> splitClass = DecimalStringSplit.class;</span> |
| <span class="source-line-no">611</span><span id="line-611"> } else if (splitClassName.equals(UniformSplit.class.getSimpleName())) {</span> |
| <span class="source-line-no">612</span><span id="line-612"> splitClass = UniformSplit.class;</span> |
| <span class="source-line-no">613</span><span id="line-613"> } else {</span> |
| <span class="source-line-no">614</span><span id="line-614"> try {</span> |
| <span class="source-line-no">615</span><span id="line-615"> splitClass = conf.getClassByName(splitClassName);</span> |
| <span class="source-line-no">616</span><span id="line-616"> } catch (ClassNotFoundException e) {</span> |
| <span class="source-line-no">617</span><span id="line-617"> throw new IOException("Couldn't load split class " + splitClassName, e);</span> |
| <span class="source-line-no">618</span><span id="line-618"> }</span> |
| <span class="source-line-no">619</span><span id="line-619"> if (splitClass == null) {</span> |
| <span class="source-line-no">620</span><span id="line-620"> throw new IOException("Failed loading split class " + splitClassName);</span> |
| <span class="source-line-no">621</span><span id="line-621"> }</span> |
| <span class="source-line-no">622</span><span id="line-622"> if (!SplitAlgorithm.class.isAssignableFrom(splitClass)) {</span> |
| <span class="source-line-no">623</span><span id="line-623"> throw new IOException("Specified split class doesn't implement SplitAlgorithm");</span> |
| <span class="source-line-no">624</span><span id="line-624"> }</span> |
| <span class="source-line-no">625</span><span id="line-625"> }</span> |
| <span class="source-line-no">626</span><span id="line-626"> try {</span> |
| <span class="source-line-no">627</span><span id="line-627"> return splitClass.asSubclass(SplitAlgorithm.class).getDeclaredConstructor().newInstance();</span> |
| <span class="source-line-no">628</span><span id="line-628"> } catch (Exception e) {</span> |
| <span class="source-line-no">629</span><span id="line-629"> throw new IOException("Problem loading split algorithm: ", e);</span> |
| <span class="source-line-no">630</span><span id="line-630"> }</span> |
| <span class="source-line-no">631</span><span id="line-631"> }</span> |
| <span class="source-line-no">632</span><span id="line-632"></span> |
| <span class="source-line-no">633</span><span id="line-633"> static LinkedList<Pair<byte[], byte[]>> splitScan(LinkedList<Pair<byte[], byte[]>> regionList,</span> |
| <span class="source-line-no">634</span><span id="line-634"> final Connection connection, final TableName tableName, SplitAlgorithm splitAlgo)</span> |
| <span class="source-line-no">635</span><span id="line-635"> throws IOException, InterruptedException {</span> |
| <span class="source-line-no">636</span><span id="line-636"> LinkedList<Pair<byte[], byte[]>> finished = Lists.newLinkedList();</span> |
| <span class="source-line-no">637</span><span id="line-637"> LinkedList<Pair<byte[], byte[]>> logicalSplitting = Lists.newLinkedList();</span> |
| <span class="source-line-no">638</span><span id="line-638"> LinkedList<Pair<byte[], byte[]>> physicalSplitting = Lists.newLinkedList();</span> |
| <span class="source-line-no">639</span><span id="line-639"></span> |
| <span class="source-line-no">640</span><span id="line-640"> // Get table info</span> |
| <span class="source-line-no">641</span><span id="line-641"> Pair<Path, Path> tableDirAndSplitFile =</span> |
| <span class="source-line-no">642</span><span id="line-642"> getTableDirAndSplitFile(connection.getConfiguration(), tableName);</span> |
| <span class="source-line-no">643</span><span id="line-643"> Path tableDir = tableDirAndSplitFile.getFirst();</span> |
| <span class="source-line-no">644</span><span id="line-644"> FileSystem fs = tableDir.getFileSystem(connection.getConfiguration());</span> |
| <span class="source-line-no">645</span><span id="line-645"> // Clear the cache to forcibly refresh region information</span> |
| <span class="source-line-no">646</span><span id="line-646"> connection.clearRegionLocationCache();</span> |
| <span class="source-line-no">647</span><span id="line-647"> TableDescriptor htd = null;</span> |
| <span class="source-line-no">648</span><span id="line-648"> try (Table table = connection.getTable(tableName)) {</span> |
| <span class="source-line-no">649</span><span id="line-649"> htd = table.getDescriptor();</span> |
| <span class="source-line-no">650</span><span id="line-650"> }</span> |
| <span class="source-line-no">651</span><span id="line-651"> try (RegionLocator regionLocator = connection.getRegionLocator(tableName)) {</span> |
| <span class="source-line-no">652</span><span id="line-652"> // for every region that hasn't been verified as a finished split</span> |
| <span class="source-line-no">653</span><span id="line-653"> for (Pair<byte[], byte[]> region : regionList) {</span> |
| <span class="source-line-no">654</span><span id="line-654"> byte[] start = region.getFirst();</span> |
| <span class="source-line-no">655</span><span id="line-655"> byte[] split = region.getSecond();</span> |
| <span class="source-line-no">656</span><span id="line-656"></span> |
| <span class="source-line-no">657</span><span id="line-657"> // see if the new split daughter region has come online</span> |
| <span class="source-line-no">658</span><span id="line-658"> try {</span> |
| <span class="source-line-no">659</span><span id="line-659"> RegionInfo dri = regionLocator.getRegionLocation(split, true).getRegion();</span> |
| <span class="source-line-no">660</span><span id="line-660"> if (dri.isOffline() || !Bytes.equals(dri.getStartKey(), split)) {</span> |
| <span class="source-line-no">661</span><span id="line-661"> logicalSplitting.add(region);</span> |
| <span class="source-line-no">662</span><span id="line-662"> continue;</span> |
| <span class="source-line-no">663</span><span id="line-663"> }</span> |
| <span class="source-line-no">664</span><span id="line-664"> } catch (NoServerForRegionException nsfre) {</span> |
| <span class="source-line-no">665</span><span id="line-665"> // NSFRE will occur if the old hbase:meta entry has no server assigned</span> |
| <span class="source-line-no">666</span><span id="line-666"> LOG.info(nsfre.toString(), nsfre);</span> |
| <span class="source-line-no">667</span><span id="line-667"> logicalSplitting.add(region);</span> |
| <span class="source-line-no">668</span><span id="line-668"> continue;</span> |
| <span class="source-line-no">669</span><span id="line-669"> }</span> |
| <span class="source-line-no">670</span><span id="line-670"></span> |
| <span class="source-line-no">671</span><span id="line-671"> try {</span> |
| <span class="source-line-no">672</span><span id="line-672"> // when a daughter region is opened, a compaction is triggered</span> |
| <span class="source-line-no">673</span><span id="line-673"> // wait until compaction completes for both daughter regions</span> |
| <span class="source-line-no">674</span><span id="line-674"> LinkedList<RegionInfo> check = Lists.newLinkedList();</span> |
| <span class="source-line-no">675</span><span id="line-675"> check.add(regionLocator.getRegionLocation(start).getRegion());</span> |
| <span class="source-line-no">676</span><span id="line-676"> check.add(regionLocator.getRegionLocation(split).getRegion());</span> |
| <span class="source-line-no">677</span><span id="line-677"> for (RegionInfo hri : check.toArray(new RegionInfo[check.size()])) {</span> |
| <span class="source-line-no">678</span><span id="line-678"> byte[] sk = hri.getStartKey();</span> |
| <span class="source-line-no">679</span><span id="line-679"> if (sk.length == 0) sk = splitAlgo.firstRow();</span> |
| <span class="source-line-no">680</span><span id="line-680"></span> |
| <span class="source-line-no">681</span><span id="line-681"> HRegionFileSystem regionFs = HRegionFileSystem</span> |
| <span class="source-line-no">682</span><span id="line-682"> .openRegionFromFileSystem(connection.getConfiguration(), fs, tableDir, hri, true);</span> |
| <span class="source-line-no">683</span><span id="line-683"></span> |
| <span class="source-line-no">684</span><span id="line-684"> // Check every Column Family for that region -- check does not have references.</span> |
| <span class="source-line-no">685</span><span id="line-685"> boolean refFound = false;</span> |
| <span class="source-line-no">686</span><span id="line-686"> for (ColumnFamilyDescriptor c : htd.getColumnFamilies()) {</span> |
| <span class="source-line-no">687</span><span id="line-687"> StoreFileTracker sft = StoreFileTrackerFactory</span> |
| <span class="source-line-no">688</span><span id="line-688"> .create(regionFs.getFileSystem().getConf(), htd, c, regionFs);</span> |
| <span class="source-line-no">689</span><span id="line-689"> if ((refFound = sft.hasReferences())) {</span> |
| <span class="source-line-no">690</span><span id="line-690"> break;</span> |
| <span class="source-line-no">691</span><span id="line-691"> }</span> |
| <span class="source-line-no">692</span><span id="line-692"> }</span> |
| <span class="source-line-no">693</span><span id="line-693"></span> |
| <span class="source-line-no">694</span><span id="line-694"> // compaction is completed when all reference files are gone</span> |
| <span class="source-line-no">695</span><span id="line-695"> if (!refFound) {</span> |
| <span class="source-line-no">696</span><span id="line-696"> check.remove(hri);</span> |
| <span class="source-line-no">697</span><span id="line-697"> }</span> |
| <span class="source-line-no">698</span><span id="line-698"> }</span> |
| <span class="source-line-no">699</span><span id="line-699"> if (check.isEmpty()) {</span> |
| <span class="source-line-no">700</span><span id="line-700"> finished.add(region);</span> |
| <span class="source-line-no">701</span><span id="line-701"> } else {</span> |
| <span class="source-line-no">702</span><span id="line-702"> physicalSplitting.add(region);</span> |
| <span class="source-line-no">703</span><span id="line-703"> }</span> |
| <span class="source-line-no">704</span><span id="line-704"> } catch (NoServerForRegionException nsfre) {</span> |
| <span class="source-line-no">705</span><span id="line-705"> LOG.debug("No Server Exception thrown for: " + splitAlgo.rowToStr(start));</span> |
| <span class="source-line-no">706</span><span id="line-706"> physicalSplitting.add(region);</span> |
| <span class="source-line-no">707</span><span id="line-707"> connection.clearRegionLocationCache();</span> |
| <span class="source-line-no">708</span><span id="line-708"> }</span> |
| <span class="source-line-no">709</span><span id="line-709"> }</span> |
| <span class="source-line-no">710</span><span id="line-710"></span> |
| <span class="source-line-no">711</span><span id="line-711"> LOG.debug("Split Scan: " + finished.size() + " finished / " + logicalSplitting.size()</span> |
| <span class="source-line-no">712</span><span id="line-712"> + " split wait / " + physicalSplitting.size() + " reference wait");</span> |
| <span class="source-line-no">713</span><span id="line-713"></span> |
| <span class="source-line-no">714</span><span id="line-714"> return finished;</span> |
| <span class="source-line-no">715</span><span id="line-715"> }</span> |
| <span class="source-line-no">716</span><span id="line-716"> }</span> |
| <span class="source-line-no">717</span><span id="line-717"></span> |
| <span class="source-line-no">718</span><span id="line-718"> /**</span> |
| <span class="source-line-no">719</span><span id="line-719"> * @return A Pair where first item is table dir and second is the split file.</span> |
| <span class="source-line-no">720</span><span id="line-720"> * @throws IOException if a remote or network exception occurs</span> |
| <span class="source-line-no">721</span><span id="line-721"> */</span> |
| <span class="source-line-no">722</span><span id="line-722"> private static Pair<Path, Path> getTableDirAndSplitFile(final Configuration conf,</span> |
| <span class="source-line-no">723</span><span id="line-723"> final TableName tableName) throws IOException {</span> |
| <span class="source-line-no">724</span><span id="line-724"> Path hbDir = CommonFSUtils.getRootDir(conf);</span> |
| <span class="source-line-no">725</span><span id="line-725"> Path tableDir = CommonFSUtils.getTableDir(hbDir, tableName);</span> |
| <span class="source-line-no">726</span><span id="line-726"> Path splitFile = new Path(tableDir, "_balancedSplit");</span> |
| <span class="source-line-no">727</span><span id="line-727"> return new Pair<>(tableDir, splitFile);</span> |
| <span class="source-line-no">728</span><span id="line-728"> }</span> |
| <span class="source-line-no">729</span><span id="line-729"></span> |
| <span class="source-line-no">730</span><span id="line-730"> static LinkedList<Pair<byte[], byte[]>> getSplits(final Connection connection,</span> |
| <span class="source-line-no">731</span><span id="line-731"> TableName tableName, SplitAlgorithm splitAlgo) throws IOException {</span> |
| <span class="source-line-no">732</span><span id="line-732"> Pair<Path, Path> tableDirAndSplitFile =</span> |
| <span class="source-line-no">733</span><span id="line-733"> getTableDirAndSplitFile(connection.getConfiguration(), tableName);</span> |
| <span class="source-line-no">734</span><span id="line-734"> Path tableDir = tableDirAndSplitFile.getFirst();</span> |
| <span class="source-line-no">735</span><span id="line-735"> Path splitFile = tableDirAndSplitFile.getSecond();</span> |
| <span class="source-line-no">736</span><span id="line-736"></span> |
| <span class="source-line-no">737</span><span id="line-737"> FileSystem fs = tableDir.getFileSystem(connection.getConfiguration());</span> |
| <span class="source-line-no">738</span><span id="line-738"></span> |
| <span class="source-line-no">739</span><span id="line-739"> // Using strings because (new byte[]{0}).equals(new byte[]{0}) == false</span> |
| <span class="source-line-no">740</span><span id="line-740"> Set<Pair<String, String>> daughterRegions = Sets.newHashSet();</span> |
| <span class="source-line-no">741</span><span id="line-741"></span> |
| <span class="source-line-no">742</span><span id="line-742"> // Does a split file exist?</span> |
| <span class="source-line-no">743</span><span id="line-743"> if (!fs.exists(splitFile)) {</span> |
| <span class="source-line-no">744</span><span id="line-744"> // NO = fresh start. calculate splits to make</span> |
| <span class="source-line-no">745</span><span id="line-745"> LOG.debug("No " + splitFile.getName() + " file. Calculating splits ");</span> |
| <span class="source-line-no">746</span><span id="line-746"></span> |
| <span class="source-line-no">747</span><span id="line-747"> // Query meta for all regions in the table</span> |
| <span class="source-line-no">748</span><span id="line-748"> Set<Pair<byte[], byte[]>> rows = Sets.newHashSet();</span> |
| <span class="source-line-no">749</span><span id="line-749"> Pair<byte[][], byte[][]> tmp = null;</span> |
| <span class="source-line-no">750</span><span id="line-750"> try (RegionLocator regionLocator = connection.getRegionLocator(tableName)) {</span> |
| <span class="source-line-no">751</span><span id="line-751"> tmp = regionLocator.getStartEndKeys();</span> |
| <span class="source-line-no">752</span><span id="line-752"> }</span> |
| <span class="source-line-no">753</span><span id="line-753"> Preconditions.checkArgument(tmp.getFirst().length == tmp.getSecond().length,</span> |
| <span class="source-line-no">754</span><span id="line-754"> "Start and End rows should be equivalent");</span> |
| <span class="source-line-no">755</span><span id="line-755"> for (int i = 0; i < tmp.getFirst().length; ++i) {</span> |
| <span class="source-line-no">756</span><span id="line-756"> byte[] start = tmp.getFirst()[i], end = tmp.getSecond()[i];</span> |
| <span class="source-line-no">757</span><span id="line-757"> if (start.length == 0) start = splitAlgo.firstRow();</span> |
| <span class="source-line-no">758</span><span id="line-758"> if (end.length == 0) end = splitAlgo.lastRow();</span> |
| <span class="source-line-no">759</span><span id="line-759"> rows.add(Pair.newPair(start, end));</span> |
| <span class="source-line-no">760</span><span id="line-760"> }</span> |
| <span class="source-line-no">761</span><span id="line-761"> LOG.debug("Table " + tableName + " has " + rows.size() + " regions that will be split.");</span> |
| <span class="source-line-no">762</span><span id="line-762"></span> |
| <span class="source-line-no">763</span><span id="line-763"> // prepare the split file</span> |
| <span class="source-line-no">764</span><span id="line-764"> Path tmpFile = new Path(tableDir, "_balancedSplit_prepare");</span> |
| <span class="source-line-no">765</span><span id="line-765"> FSDataOutputStream tmpOut = fs.create(tmpFile);</span> |
| <span class="source-line-no">766</span><span id="line-766"></span> |
| <span class="source-line-no">767</span><span id="line-767"> // calculate all the splits == [daughterRegions] = [(start, splitPoint)]</span> |
| <span class="source-line-no">768</span><span id="line-768"> for (Pair<byte[], byte[]> r : rows) {</span> |
| <span class="source-line-no">769</span><span id="line-769"> byte[] splitPoint = splitAlgo.split(r.getFirst(), r.getSecond());</span> |
| <span class="source-line-no">770</span><span id="line-770"> String startStr = splitAlgo.rowToStr(r.getFirst());</span> |
| <span class="source-line-no">771</span><span id="line-771"> String splitStr = splitAlgo.rowToStr(splitPoint);</span> |
| <span class="source-line-no">772</span><span id="line-772"> daughterRegions.add(Pair.newPair(startStr, splitStr));</span> |
| <span class="source-line-no">773</span><span id="line-773"> LOG.debug("Will Split [" + startStr + " , " + splitAlgo.rowToStr(r.getSecond()) + ") at "</span> |
| <span class="source-line-no">774</span><span id="line-774"> + splitStr);</span> |
| <span class="source-line-no">775</span><span id="line-775"> tmpOut.writeChars("+ " + startStr + splitAlgo.separator() + splitStr + "\n");</span> |
| <span class="source-line-no">776</span><span id="line-776"> }</span> |
| <span class="source-line-no">777</span><span id="line-777"> tmpOut.close();</span> |
| <span class="source-line-no">778</span><span id="line-778"> fs.rename(tmpFile, splitFile);</span> |
| <span class="source-line-no">779</span><span id="line-779"> } else {</span> |
| <span class="source-line-no">780</span><span id="line-780"> LOG.debug("_balancedSplit file found. Replay log to restore state...");</span> |
| <span class="source-line-no">781</span><span id="line-781"> RecoverLeaseFSUtils.recoverFileLease(fs, splitFile, connection.getConfiguration(), null);</span> |
| <span class="source-line-no">782</span><span id="line-782"></span> |
| <span class="source-line-no">783</span><span id="line-783"> // parse split file and process remaining splits</span> |
| <span class="source-line-no">784</span><span id="line-784"> FSDataInputStream tmpIn = fs.open(splitFile);</span> |
| <span class="source-line-no">785</span><span id="line-785"> StringBuilder sb = new StringBuilder(tmpIn.available());</span> |
| <span class="source-line-no">786</span><span id="line-786"> while (tmpIn.available() > 0) {</span> |
| <span class="source-line-no">787</span><span id="line-787"> sb.append(tmpIn.readChar());</span> |
| <span class="source-line-no">788</span><span id="line-788"> }</span> |
| <span class="source-line-no">789</span><span id="line-789"> tmpIn.close();</span> |
| <span class="source-line-no">790</span><span id="line-790"> for (String line : sb.toString().split("\n")) {</span> |
| <span class="source-line-no">791</span><span id="line-791"> String[] cmd = line.split(splitAlgo.separator());</span> |
| <span class="source-line-no">792</span><span id="line-792"> Preconditions.checkArgument(3 == cmd.length);</span> |
| <span class="source-line-no">793</span><span id="line-793"> byte[] start = splitAlgo.strToRow(cmd[1]);</span> |
| <span class="source-line-no">794</span><span id="line-794"> String startStr = splitAlgo.rowToStr(start);</span> |
| <span class="source-line-no">795</span><span id="line-795"> byte[] splitPoint = splitAlgo.strToRow(cmd[2]);</span> |
| <span class="source-line-no">796</span><span id="line-796"> String splitStr = splitAlgo.rowToStr(splitPoint);</span> |
| <span class="source-line-no">797</span><span id="line-797"> Pair<String, String> r = Pair.newPair(startStr, splitStr);</span> |
| <span class="source-line-no">798</span><span id="line-798"> if (cmd[0].equals("+")) {</span> |
| <span class="source-line-no">799</span><span id="line-799"> LOG.debug("Adding: " + r);</span> |
| <span class="source-line-no">800</span><span id="line-800"> daughterRegions.add(r);</span> |
| <span class="source-line-no">801</span><span id="line-801"> } else {</span> |
| <span class="source-line-no">802</span><span id="line-802"> LOG.debug("Removing: " + r);</span> |
| <span class="source-line-no">803</span><span id="line-803"> Preconditions.checkArgument(cmd[0].equals("-"), "Unknown option: " + cmd[0]);</span> |
| <span class="source-line-no">804</span><span id="line-804"> Preconditions.checkState(daughterRegions.contains(r), "Missing row: " + r);</span> |
| <span class="source-line-no">805</span><span id="line-805"> daughterRegions.remove(r);</span> |
| <span class="source-line-no">806</span><span id="line-806"> }</span> |
| <span class="source-line-no">807</span><span id="line-807"> }</span> |
| <span class="source-line-no">808</span><span id="line-808"> LOG.debug("Done reading. " + daughterRegions.size() + " regions left.");</span> |
| <span class="source-line-no">809</span><span id="line-809"> }</span> |
| <span class="source-line-no">810</span><span id="line-810"> LinkedList<Pair<byte[], byte[]>> ret = Lists.newLinkedList();</span> |
| <span class="source-line-no">811</span><span id="line-811"> for (Pair<String, String> r : daughterRegions) {</span> |
| <span class="source-line-no">812</span><span id="line-812"> ret.add(Pair.newPair(splitAlgo.strToRow(r.getFirst()), splitAlgo.strToRow(r.getSecond())));</span> |
| <span class="source-line-no">813</span><span id="line-813"> }</span> |
| <span class="source-line-no">814</span><span id="line-814"> return ret;</span> |
| <span class="source-line-no">815</span><span id="line-815"> }</span> |
| <span class="source-line-no">816</span><span id="line-816"></span> |
| <span class="source-line-no">817</span><span id="line-817"> /**</span> |
| <span class="source-line-no">818</span><span id="line-818"> * HexStringSplit is a well-known {@link SplitAlgorithm} for choosing region boundaries. The</span> |
| <span class="source-line-no">819</span><span id="line-819"> * format of a HexStringSplit region boundary is the ASCII representation of an MD5 checksum, or</span> |
| <span class="source-line-no">820</span><span id="line-820"> * any other uniformly distributed hexadecimal value. Row are hex-encoded long values in the range</span> |
| <span class="source-line-no">821</span><span id="line-821"> * <b>"00000000" =&gt; "FFFFFFFF"</b> and are left-padded with zeros to keep the same order</span> |
| <span class="source-line-no">822</span><span id="line-822"> * lexicographically as if they were binary. Since this split algorithm uses hex strings as keys,</span> |
| <span class="source-line-no">823</span><span id="line-823"> * it is easy to read &amp; write in the shell but takes up more space and may be non-intuitive.</span> |
| <span class="source-line-no">824</span><span id="line-824"> */</span> |
| <span class="source-line-no">825</span><span id="line-825"> public static class HexStringSplit extends NumberStringSplit {</span> |
| <span class="source-line-no">826</span><span id="line-826"> final static String DEFAULT_MIN_HEX = "00000000";</span> |
| <span class="source-line-no">827</span><span id="line-827"> final static String DEFAULT_MAX_HEX = "FFFFFFFF";</span> |
| <span class="source-line-no">828</span><span id="line-828"> final static int RADIX_HEX = 16;</span> |
| <span class="source-line-no">829</span><span id="line-829"></span> |
| <span class="source-line-no">830</span><span id="line-830"> public HexStringSplit() {</span> |
| <span class="source-line-no">831</span><span id="line-831"> super(DEFAULT_MIN_HEX, DEFAULT_MAX_HEX, RADIX_HEX);</span> |
| <span class="source-line-no">832</span><span id="line-832"> }</span> |
| <span class="source-line-no">833</span><span id="line-833"></span> |
| <span class="source-line-no">834</span><span id="line-834"> }</span> |
| <span class="source-line-no">835</span><span id="line-835"></span> |
| <span class="source-line-no">836</span><span id="line-836"> /**</span> |
| <span class="source-line-no">837</span><span id="line-837"> * The format of a DecimalStringSplit region boundary is the ASCII representation of reversed</span> |
| <span class="source-line-no">838</span><span id="line-838"> * sequential number, or any other uniformly distributed decimal value. Row are decimal-encoded</span> |
| <span class="source-line-no">839</span><span id="line-839"> * long values in the range <b>"00000000" =&gt; "99999999"</b> and are left-padded with zeros to</span> |
| <span class="source-line-no">840</span><span id="line-840"> * keep the same order lexicographically as if they were binary.</span> |
| <span class="source-line-no">841</span><span id="line-841"> */</span> |
| <span class="source-line-no">842</span><span id="line-842"> public static class DecimalStringSplit extends NumberStringSplit {</span> |
| <span class="source-line-no">843</span><span id="line-843"> final static String DEFAULT_MIN_DEC = "00000000";</span> |
| <span class="source-line-no">844</span><span id="line-844"> final static String DEFAULT_MAX_DEC = "99999999";</span> |
| <span class="source-line-no">845</span><span id="line-845"> final static int RADIX_DEC = 10;</span> |
| <span class="source-line-no">846</span><span id="line-846"></span> |
| <span class="source-line-no">847</span><span id="line-847"> public DecimalStringSplit() {</span> |
| <span class="source-line-no">848</span><span id="line-848"> super(DEFAULT_MIN_DEC, DEFAULT_MAX_DEC, RADIX_DEC);</span> |
| <span class="source-line-no">849</span><span id="line-849"> }</span> |
| <span class="source-line-no">850</span><span id="line-850"></span> |
| <span class="source-line-no">851</span><span id="line-851"> }</span> |
| <span class="source-line-no">852</span><span id="line-852"></span> |
| <span class="source-line-no">853</span><span id="line-853"> public abstract static class NumberStringSplit implements SplitAlgorithm {</span> |
| <span class="source-line-no">854</span><span id="line-854"></span> |
| <span class="source-line-no">855</span><span id="line-855"> String firstRow;</span> |
| <span class="source-line-no">856</span><span id="line-856"> BigInteger firstRowInt;</span> |
| <span class="source-line-no">857</span><span id="line-857"> String lastRow;</span> |
| <span class="source-line-no">858</span><span id="line-858"> BigInteger lastRowInt;</span> |
| <span class="source-line-no">859</span><span id="line-859"> int rowComparisonLength;</span> |
| <span class="source-line-no">860</span><span id="line-860"> int radix;</span> |
| <span class="source-line-no">861</span><span id="line-861"></span> |
| <span class="source-line-no">862</span><span id="line-862"> NumberStringSplit(String minRow, String maxRow, int radix) {</span> |
| <span class="source-line-no">863</span><span id="line-863"> this.firstRow = minRow;</span> |
| <span class="source-line-no">864</span><span id="line-864"> this.lastRow = maxRow;</span> |
| <span class="source-line-no">865</span><span id="line-865"> this.radix = radix;</span> |
| <span class="source-line-no">866</span><span id="line-866"> this.firstRowInt = BigInteger.ZERO;</span> |
| <span class="source-line-no">867</span><span id="line-867"> this.lastRowInt = new BigInteger(lastRow, this.radix);</span> |
| <span class="source-line-no">868</span><span id="line-868"> this.rowComparisonLength = lastRow.length();</span> |
| <span class="source-line-no">869</span><span id="line-869"> }</span> |
| <span class="source-line-no">870</span><span id="line-870"></span> |
| <span class="source-line-no">871</span><span id="line-871"> @Override</span> |
| <span class="source-line-no">872</span><span id="line-872"> public byte[] split(byte[] start, byte[] end) {</span> |
| <span class="source-line-no">873</span><span id="line-873"> BigInteger s = convertToBigInteger(start);</span> |
| <span class="source-line-no">874</span><span id="line-874"> BigInteger e = convertToBigInteger(end);</span> |
| <span class="source-line-no">875</span><span id="line-875"> Preconditions.checkArgument(!e.equals(BigInteger.ZERO));</span> |
| <span class="source-line-no">876</span><span id="line-876"> return convertToByte(split2(s, e));</span> |
| <span class="source-line-no">877</span><span id="line-877"> }</span> |
| <span class="source-line-no">878</span><span id="line-878"></span> |
| <span class="source-line-no">879</span><span id="line-879"> @Override</span> |
| <span class="source-line-no">880</span><span id="line-880"> public byte[][] split(int n) {</span> |
| <span class="source-line-no">881</span><span id="line-881"> Preconditions.checkArgument(lastRowInt.compareTo(firstRowInt) > 0,</span> |
| <span class="source-line-no">882</span><span id="line-882"> "last row (%s) is configured less than first row (%s)", lastRow, firstRow);</span> |
| <span class="source-line-no">883</span><span id="line-883"> // +1 to range because the last row is inclusive</span> |
| <span class="source-line-no">884</span><span id="line-884"> BigInteger range = lastRowInt.subtract(firstRowInt).add(BigInteger.ONE);</span> |
| <span class="source-line-no">885</span><span id="line-885"> Preconditions.checkState(range.compareTo(BigInteger.valueOf(n)) >= 0,</span> |
| <span class="source-line-no">886</span><span id="line-886"> "split granularity (%s) is greater than the range (%s)", n, range);</span> |
| <span class="source-line-no">887</span><span id="line-887"></span> |
| <span class="source-line-no">888</span><span id="line-888"> BigInteger[] splits = new BigInteger[n - 1];</span> |
| <span class="source-line-no">889</span><span id="line-889"> BigInteger sizeOfEachSplit = range.divide(BigInteger.valueOf(n));</span> |
| <span class="source-line-no">890</span><span id="line-890"> for (int i = 1; i < n; i++) {</span> |
| <span class="source-line-no">891</span><span id="line-891"> // NOTE: this means the last region gets all the slop.</span> |
| <span class="source-line-no">892</span><span id="line-892"> // This is not a big deal if we're assuming n << MAXHEX</span> |
| <span class="source-line-no">893</span><span id="line-893"> splits[i - 1] = firstRowInt.add(sizeOfEachSplit.multiply(BigInteger.valueOf(i)));</span> |
| <span class="source-line-no">894</span><span id="line-894"> }</span> |
| <span class="source-line-no">895</span><span id="line-895"> return convertToBytes(splits);</span> |
| <span class="source-line-no">896</span><span id="line-896"> }</span> |
| <span class="source-line-no">897</span><span id="line-897"></span> |
| <span class="source-line-no">898</span><span id="line-898"> @Override</span> |
| <span class="source-line-no">899</span><span id="line-899"> public byte[][] split(byte[] start, byte[] end, int numSplits, boolean inclusive) {</span> |
| <span class="source-line-no">900</span><span id="line-900"> BigInteger s = convertToBigInteger(start);</span> |
| <span class="source-line-no">901</span><span id="line-901"> BigInteger e = convertToBigInteger(end);</span> |
| <span class="source-line-no">902</span><span id="line-902"></span> |
| <span class="source-line-no">903</span><span id="line-903"> Preconditions.checkArgument(e.compareTo(s) > 0,</span> |
| <span class="source-line-no">904</span><span id="line-904"> "last row (%s) is configured less than first row (%s)", rowToStr(end), end);</span> |
| <span class="source-line-no">905</span><span id="line-905"> // +1 to range because the last row is inclusive</span> |
| <span class="source-line-no">906</span><span id="line-906"> BigInteger range = e.subtract(s).add(BigInteger.ONE);</span> |
| <span class="source-line-no">907</span><span id="line-907"> Preconditions.checkState(range.compareTo(BigInteger.valueOf(numSplits)) >= 0,</span> |
| <span class="source-line-no">908</span><span id="line-908"> "split granularity (%s) is greater than the range (%s)", numSplits, range);</span> |
| <span class="source-line-no">909</span><span id="line-909"></span> |
| <span class="source-line-no">910</span><span id="line-910"> BigInteger[] splits = new BigInteger[numSplits - 1];</span> |
| <span class="source-line-no">911</span><span id="line-911"> BigInteger sizeOfEachSplit = range.divide(BigInteger.valueOf(numSplits));</span> |
| <span class="source-line-no">912</span><span id="line-912"> for (int i = 1; i < numSplits; i++) {</span> |
| <span class="source-line-no">913</span><span id="line-913"> // NOTE: this means the last region gets all the slop.</span> |
| <span class="source-line-no">914</span><span id="line-914"> // This is not a big deal if we're assuming n << MAXHEX</span> |
| <span class="source-line-no">915</span><span id="line-915"> splits[i - 1] = s.add(sizeOfEachSplit.multiply(BigInteger.valueOf(i)));</span> |
| <span class="source-line-no">916</span><span id="line-916"> }</span> |
| <span class="source-line-no">917</span><span id="line-917"></span> |
| <span class="source-line-no">918</span><span id="line-918"> if (inclusive) {</span> |
| <span class="source-line-no">919</span><span id="line-919"> BigInteger[] inclusiveSplitPoints = new BigInteger[numSplits + 1];</span> |
| <span class="source-line-no">920</span><span id="line-920"> inclusiveSplitPoints[0] = convertToBigInteger(start);</span> |
| <span class="source-line-no">921</span><span id="line-921"> inclusiveSplitPoints[numSplits] = convertToBigInteger(end);</span> |
| <span class="source-line-no">922</span><span id="line-922"> System.arraycopy(splits, 0, inclusiveSplitPoints, 1, splits.length);</span> |
| <span class="source-line-no">923</span><span id="line-923"> return convertToBytes(inclusiveSplitPoints);</span> |
| <span class="source-line-no">924</span><span id="line-924"> } else {</span> |
| <span class="source-line-no">925</span><span id="line-925"> return convertToBytes(splits);</span> |
| <span class="source-line-no">926</span><span id="line-926"> }</span> |
| <span class="source-line-no">927</span><span id="line-927"> }</span> |
| <span class="source-line-no">928</span><span id="line-928"></span> |
| <span class="source-line-no">929</span><span id="line-929"> @Override</span> |
| <span class="source-line-no">930</span><span id="line-930"> public byte[] firstRow() {</span> |
| <span class="source-line-no">931</span><span id="line-931"> return convertToByte(firstRowInt);</span> |
| <span class="source-line-no">932</span><span id="line-932"> }</span> |
| <span class="source-line-no">933</span><span id="line-933"></span> |
| <span class="source-line-no">934</span><span id="line-934"> @Override</span> |
| <span class="source-line-no">935</span><span id="line-935"> public byte[] lastRow() {</span> |
| <span class="source-line-no">936</span><span id="line-936"> return convertToByte(lastRowInt);</span> |
| <span class="source-line-no">937</span><span id="line-937"> }</span> |
| <span class="source-line-no">938</span><span id="line-938"></span> |
| <span class="source-line-no">939</span><span id="line-939"> @Override</span> |
| <span class="source-line-no">940</span><span id="line-940"> public void setFirstRow(String userInput) {</span> |
| <span class="source-line-no">941</span><span id="line-941"> firstRow = userInput;</span> |
| <span class="source-line-no">942</span><span id="line-942"> firstRowInt = new BigInteger(firstRow, radix);</span> |
| <span class="source-line-no">943</span><span id="line-943"> }</span> |
| <span class="source-line-no">944</span><span id="line-944"></span> |
| <span class="source-line-no">945</span><span id="line-945"> @Override</span> |
| <span class="source-line-no">946</span><span id="line-946"> public void setLastRow(String userInput) {</span> |
| <span class="source-line-no">947</span><span id="line-947"> lastRow = userInput;</span> |
| <span class="source-line-no">948</span><span id="line-948"> lastRowInt = new BigInteger(lastRow, radix);</span> |
| <span class="source-line-no">949</span><span id="line-949"> // Precondition: lastRow > firstRow, so last's length is the greater</span> |
| <span class="source-line-no">950</span><span id="line-950"> rowComparisonLength = lastRow.length();</span> |
| <span class="source-line-no">951</span><span id="line-951"> }</span> |
| <span class="source-line-no">952</span><span id="line-952"></span> |
| <span class="source-line-no">953</span><span id="line-953"> @Override</span> |
| <span class="source-line-no">954</span><span id="line-954"> public byte[] strToRow(String in) {</span> |
| <span class="source-line-no">955</span><span id="line-955"> return convertToByte(new BigInteger(in, radix));</span> |
| <span class="source-line-no">956</span><span id="line-956"> }</span> |
| <span class="source-line-no">957</span><span id="line-957"></span> |
| <span class="source-line-no">958</span><span id="line-958"> @Override</span> |
| <span class="source-line-no">959</span><span id="line-959"> public String rowToStr(byte[] row) {</span> |
| <span class="source-line-no">960</span><span id="line-960"> return Bytes.toStringBinary(row);</span> |
| <span class="source-line-no">961</span><span id="line-961"> }</span> |
| <span class="source-line-no">962</span><span id="line-962"></span> |
| <span class="source-line-no">963</span><span id="line-963"> @Override</span> |
| <span class="source-line-no">964</span><span id="line-964"> public String separator() {</span> |
| <span class="source-line-no">965</span><span id="line-965"> return " ";</span> |
| <span class="source-line-no">966</span><span id="line-966"> }</span> |
| <span class="source-line-no">967</span><span id="line-967"></span> |
| <span class="source-line-no">968</span><span id="line-968"> @Override</span> |
| <span class="source-line-no">969</span><span id="line-969"> public void setFirstRow(byte[] userInput) {</span> |
| <span class="source-line-no">970</span><span id="line-970"> firstRow = Bytes.toString(userInput);</span> |
| <span class="source-line-no">971</span><span id="line-971"> }</span> |
| <span class="source-line-no">972</span><span id="line-972"></span> |
| <span class="source-line-no">973</span><span id="line-973"> @Override</span> |
| <span class="source-line-no">974</span><span id="line-974"> public void setLastRow(byte[] userInput) {</span> |
| <span class="source-line-no">975</span><span id="line-975"> lastRow = Bytes.toString(userInput);</span> |
| <span class="source-line-no">976</span><span id="line-976"> }</span> |
| <span class="source-line-no">977</span><span id="line-977"></span> |
| <span class="source-line-no">978</span><span id="line-978"> /**</span> |
| <span class="source-line-no">979</span><span id="line-979"> * Divide 2 numbers in half (for split algorithm)</span> |
| <span class="source-line-no">980</span><span id="line-980"> * @param a number #1</span> |
| <span class="source-line-no">981</span><span id="line-981"> * @param b number #2</span> |
| <span class="source-line-no">982</span><span id="line-982"> * @return the midpoint of the 2 numbers</span> |
| <span class="source-line-no">983</span><span id="line-983"> */</span> |
| <span class="source-line-no">984</span><span id="line-984"> public BigInteger split2(BigInteger a, BigInteger b) {</span> |
| <span class="source-line-no">985</span><span id="line-985"> if (b.equals(lastRowInt)) {</span> |
| <span class="source-line-no">986</span><span id="line-986"> b = b.add(BigInteger.ONE);</span> |
| <span class="source-line-no">987</span><span id="line-987"> }</span> |
| <span class="source-line-no">988</span><span id="line-988"> return a.add(b).divide(BigInteger.valueOf(2)).abs();</span> |
| <span class="source-line-no">989</span><span id="line-989"> }</span> |
| <span class="source-line-no">990</span><span id="line-990"></span> |
| <span class="source-line-no">991</span><span id="line-991"> /**</span> |
| <span class="source-line-no">992</span><span id="line-992"> * Returns an array of bytes corresponding to an array of BigIntegers</span> |
| <span class="source-line-no">993</span><span id="line-993"> * @param bigIntegers numbers to convert</span> |
| <span class="source-line-no">994</span><span id="line-994"> * @return bytes corresponding to the bigIntegers</span> |
| <span class="source-line-no">995</span><span id="line-995"> */</span> |
| <span class="source-line-no">996</span><span id="line-996"> public byte[][] convertToBytes(BigInteger[] bigIntegers) {</span> |
| <span class="source-line-no">997</span><span id="line-997"> byte[][] returnBytes = new byte[bigIntegers.length][];</span> |
| <span class="source-line-no">998</span><span id="line-998"> for (int i = 0; i < bigIntegers.length; i++) {</span> |
| <span class="source-line-no">999</span><span id="line-999"> returnBytes[i] = convertToByte(bigIntegers[i]);</span> |
| <span class="source-line-no">1000</span><span id="line-1000"> }</span> |
| <span class="source-line-no">1001</span><span id="line-1001"> return returnBytes;</span> |
| <span class="source-line-no">1002</span><span id="line-1002"> }</span> |
| <span class="source-line-no">1003</span><span id="line-1003"></span> |
| <span class="source-line-no">1004</span><span id="line-1004"> /**</span> |
| <span class="source-line-no">1005</span><span id="line-1005"> * Returns the bytes corresponding to the BigInteger</span> |
| <span class="source-line-no">1006</span><span id="line-1006"> * @param bigInteger number to convert</span> |
| <span class="source-line-no">1007</span><span id="line-1007"> * @param pad padding length</span> |
| <span class="source-line-no">1008</span><span id="line-1008"> * @return byte corresponding to input BigInteger</span> |
| <span class="source-line-no">1009</span><span id="line-1009"> */</span> |
| <span class="source-line-no">1010</span><span id="line-1010"> public byte[] convertToByte(BigInteger bigInteger, int pad) {</span> |
| <span class="source-line-no">1011</span><span id="line-1011"> String bigIntegerString = bigInteger.toString(radix);</span> |
| <span class="source-line-no">1012</span><span id="line-1012"> bigIntegerString = StringUtils.leftPad(bigIntegerString, pad, '0');</span> |
| <span class="source-line-no">1013</span><span id="line-1013"> return Bytes.toBytes(bigIntegerString);</span> |
| <span class="source-line-no">1014</span><span id="line-1014"> }</span> |
| <span class="source-line-no">1015</span><span id="line-1015"></span> |
| <span class="source-line-no">1016</span><span id="line-1016"> /**</span> |
| <span class="source-line-no">1017</span><span id="line-1017"> * Returns the bytes corresponding to the BigInteger</span> |
| <span class="source-line-no">1018</span><span id="line-1018"> * @param bigInteger number to convert</span> |
| <span class="source-line-no">1019</span><span id="line-1019"> * @return corresponding bytes</span> |
| <span class="source-line-no">1020</span><span id="line-1020"> */</span> |
| <span class="source-line-no">1021</span><span id="line-1021"> public byte[] convertToByte(BigInteger bigInteger) {</span> |
| <span class="source-line-no">1022</span><span id="line-1022"> return convertToByte(bigInteger, rowComparisonLength);</span> |
| <span class="source-line-no">1023</span><span id="line-1023"> }</span> |
| <span class="source-line-no">1024</span><span id="line-1024"></span> |
| <span class="source-line-no">1025</span><span id="line-1025"> /**</span> |
| <span class="source-line-no">1026</span><span id="line-1026"> * Returns the BigInteger represented by the byte array</span> |
| <span class="source-line-no">1027</span><span id="line-1027"> * @param row byte array representing row</span> |
| <span class="source-line-no">1028</span><span id="line-1028"> * @return the corresponding BigInteger</span> |
| <span class="source-line-no">1029</span><span id="line-1029"> */</span> |
| <span class="source-line-no">1030</span><span id="line-1030"> public BigInteger convertToBigInteger(byte[] row) {</span> |
| <span class="source-line-no">1031</span><span id="line-1031"> return (row.length > 0) ? new BigInteger(Bytes.toString(row), radix) : BigInteger.ZERO;</span> |
| <span class="source-line-no">1032</span><span id="line-1032"> }</span> |
| <span class="source-line-no">1033</span><span id="line-1033"></span> |
| <span class="source-line-no">1034</span><span id="line-1034"> @Override</span> |
| <span class="source-line-no">1035</span><span id="line-1035"> public String toString() {</span> |
| <span class="source-line-no">1036</span><span id="line-1036"> return this.getClass().getSimpleName() + " [" + rowToStr(firstRow()) + ","</span> |
| <span class="source-line-no">1037</span><span id="line-1037"> + rowToStr(lastRow()) + "]";</span> |
| <span class="source-line-no">1038</span><span id="line-1038"> }</span> |
| <span class="source-line-no">1039</span><span id="line-1039"> }</span> |
| <span class="source-line-no">1040</span><span id="line-1040"></span> |
| <span class="source-line-no">1041</span><span id="line-1041"> /**</span> |
| <span class="source-line-no">1042</span><span id="line-1042"> * A SplitAlgorithm that divides the space of possible keys evenly. Useful when the keys are</span> |
| <span class="source-line-no">1043</span><span id="line-1043"> * approximately uniform random bytes (e.g. hashes). Rows are raw byte values in the range <b>00</span> |
| <span class="source-line-no">1044</span><span id="line-1044"> * =&gt; FF</b> and are right-padded with zeros to keep the same memcmp() order. This is the</span> |
| <span class="source-line-no">1045</span><span id="line-1045"> * natural algorithm to use for a byte[] environment and saves space, but is not necessarily the</span> |
| <span class="source-line-no">1046</span><span id="line-1046"> * easiest for readability.</span> |
| <span class="source-line-no">1047</span><span id="line-1047"> */</span> |
| <span class="source-line-no">1048</span><span id="line-1048"> public static class UniformSplit implements SplitAlgorithm {</span> |
| <span class="source-line-no">1049</span><span id="line-1049"> static final byte xFF = (byte) 0xFF;</span> |
| <span class="source-line-no">1050</span><span id="line-1050"> byte[] firstRowBytes = ArrayUtils.EMPTY_BYTE_ARRAY;</span> |
| <span class="source-line-no">1051</span><span id="line-1051"> byte[] lastRowBytes = new byte[] { xFF, xFF, xFF, xFF, xFF, xFF, xFF, xFF };</span> |
| <span class="source-line-no">1052</span><span id="line-1052"></span> |
| <span class="source-line-no">1053</span><span id="line-1053"> @Override</span> |
| <span class="source-line-no">1054</span><span id="line-1054"> public byte[] split(byte[] start, byte[] end) {</span> |
| <span class="source-line-no">1055</span><span id="line-1055"> return Bytes.split(start, end, 1)[1];</span> |
| <span class="source-line-no">1056</span><span id="line-1056"> }</span> |
| <span class="source-line-no">1057</span><span id="line-1057"></span> |
| <span class="source-line-no">1058</span><span id="line-1058"> @Override</span> |
| <span class="source-line-no">1059</span><span id="line-1059"> public byte[][] split(int numRegions) {</span> |
| <span class="source-line-no">1060</span><span id="line-1060"> Preconditions.checkArgument(Bytes.compareTo(lastRowBytes, firstRowBytes) > 0,</span> |
| <span class="source-line-no">1061</span><span id="line-1061"> "last row (%s) is configured less than first row (%s)", Bytes.toStringBinary(lastRowBytes),</span> |
| <span class="source-line-no">1062</span><span id="line-1062"> Bytes.toStringBinary(firstRowBytes));</span> |
| <span class="source-line-no">1063</span><span id="line-1063"></span> |
| <span class="source-line-no">1064</span><span id="line-1064"> byte[][] splits = Bytes.split(firstRowBytes, lastRowBytes, true, numRegions - 1);</span> |
| <span class="source-line-no">1065</span><span id="line-1065"> Preconditions.checkState(splits != null,</span> |
| <span class="source-line-no">1066</span><span id="line-1066"> "Could not split region with given user input: " + this);</span> |
| <span class="source-line-no">1067</span><span id="line-1067"></span> |
| <span class="source-line-no">1068</span><span id="line-1068"> // remove endpoints, which are included in the splits list</span> |
| <span class="source-line-no">1069</span><span id="line-1069"></span> |
| <span class="source-line-no">1070</span><span id="line-1070"> return splits == null ? null : Arrays.copyOfRange(splits, 1, splits.length - 1);</span> |
| <span class="source-line-no">1071</span><span id="line-1071"> }</span> |
| <span class="source-line-no">1072</span><span id="line-1072"></span> |
| <span class="source-line-no">1073</span><span id="line-1073"> @Override</span> |
| <span class="source-line-no">1074</span><span id="line-1074"> public byte[][] split(byte[] start, byte[] end, int numSplits, boolean inclusive) {</span> |
| <span class="source-line-no">1075</span><span id="line-1075"> if (Arrays.equals(start, HConstants.EMPTY_BYTE_ARRAY)) {</span> |
| <span class="source-line-no">1076</span><span id="line-1076"> start = firstRowBytes;</span> |
| <span class="source-line-no">1077</span><span id="line-1077"> }</span> |
| <span class="source-line-no">1078</span><span id="line-1078"> if (Arrays.equals(end, HConstants.EMPTY_BYTE_ARRAY)) {</span> |
| <span class="source-line-no">1079</span><span id="line-1079"> end = lastRowBytes;</span> |
| <span class="source-line-no">1080</span><span id="line-1080"> }</span> |
| <span class="source-line-no">1081</span><span id="line-1081"> Preconditions.checkArgument(Bytes.compareTo(end, start) > 0,</span> |
| <span class="source-line-no">1082</span><span id="line-1082"> "last row (%s) is configured less than first row (%s)", Bytes.toStringBinary(end),</span> |
| <span class="source-line-no">1083</span><span id="line-1083"> Bytes.toStringBinary(start));</span> |
| <span class="source-line-no">1084</span><span id="line-1084"></span> |
| <span class="source-line-no">1085</span><span id="line-1085"> byte[][] splits = Bytes.split(start, end, true, numSplits - 1);</span> |
| <span class="source-line-no">1086</span><span id="line-1086"> Preconditions.checkState(splits != null,</span> |
| <span class="source-line-no">1087</span><span id="line-1087"> "Could not calculate input splits with given user input: " + this);</span> |
| <span class="source-line-no">1088</span><span id="line-1088"> if (inclusive) {</span> |
| <span class="source-line-no">1089</span><span id="line-1089"> return splits;</span> |
| <span class="source-line-no">1090</span><span id="line-1090"> } else {</span> |
| <span class="source-line-no">1091</span><span id="line-1091"> // remove endpoints, which are included in the splits list</span> |
| <span class="source-line-no">1092</span><span id="line-1092"> return Arrays.copyOfRange(splits, 1, splits.length - 1);</span> |
| <span class="source-line-no">1093</span><span id="line-1093"> }</span> |
| <span class="source-line-no">1094</span><span id="line-1094"> }</span> |
| <span class="source-line-no">1095</span><span id="line-1095"></span> |
| <span class="source-line-no">1096</span><span id="line-1096"> @Override</span> |
| <span class="source-line-no">1097</span><span id="line-1097"> public byte[] firstRow() {</span> |
| <span class="source-line-no">1098</span><span id="line-1098"> return firstRowBytes;</span> |
| <span class="source-line-no">1099</span><span id="line-1099"> }</span> |
| <span class="source-line-no">1100</span><span id="line-1100"></span> |
| <span class="source-line-no">1101</span><span id="line-1101"> @Override</span> |
| <span class="source-line-no">1102</span><span id="line-1102"> public byte[] lastRow() {</span> |
| <span class="source-line-no">1103</span><span id="line-1103"> return lastRowBytes;</span> |
| <span class="source-line-no">1104</span><span id="line-1104"> }</span> |
| <span class="source-line-no">1105</span><span id="line-1105"></span> |
| <span class="source-line-no">1106</span><span id="line-1106"> @Override</span> |
| <span class="source-line-no">1107</span><span id="line-1107"> public void setFirstRow(String userInput) {</span> |
| <span class="source-line-no">1108</span><span id="line-1108"> firstRowBytes = Bytes.toBytesBinary(userInput);</span> |
| <span class="source-line-no">1109</span><span id="line-1109"> }</span> |
| <span class="source-line-no">1110</span><span id="line-1110"></span> |
| <span class="source-line-no">1111</span><span id="line-1111"> @Override</span> |
| <span class="source-line-no">1112</span><span id="line-1112"> public void setLastRow(String userInput) {</span> |
| <span class="source-line-no">1113</span><span id="line-1113"> lastRowBytes = Bytes.toBytesBinary(userInput);</span> |
| <span class="source-line-no">1114</span><span id="line-1114"> }</span> |
| <span class="source-line-no">1115</span><span id="line-1115"></span> |
| <span class="source-line-no">1116</span><span id="line-1116"> @Override</span> |
| <span class="source-line-no">1117</span><span id="line-1117"> public void setFirstRow(byte[] userInput) {</span> |
| <span class="source-line-no">1118</span><span id="line-1118"> firstRowBytes = userInput;</span> |
| <span class="source-line-no">1119</span><span id="line-1119"> }</span> |
| <span class="source-line-no">1120</span><span id="line-1120"></span> |
| <span class="source-line-no">1121</span><span id="line-1121"> @Override</span> |
| <span class="source-line-no">1122</span><span id="line-1122"> public void setLastRow(byte[] userInput) {</span> |
| <span class="source-line-no">1123</span><span id="line-1123"> lastRowBytes = userInput;</span> |
| <span class="source-line-no">1124</span><span id="line-1124"> }</span> |
| <span class="source-line-no">1125</span><span id="line-1125"></span> |
| <span class="source-line-no">1126</span><span id="line-1126"> @Override</span> |
| <span class="source-line-no">1127</span><span id="line-1127"> public byte[] strToRow(String input) {</span> |
| <span class="source-line-no">1128</span><span id="line-1128"> return Bytes.toBytesBinary(input);</span> |
| <span class="source-line-no">1129</span><span id="line-1129"> }</span> |
| <span class="source-line-no">1130</span><span id="line-1130"></span> |
| <span class="source-line-no">1131</span><span id="line-1131"> @Override</span> |
| <span class="source-line-no">1132</span><span id="line-1132"> public String rowToStr(byte[] row) {</span> |
| <span class="source-line-no">1133</span><span id="line-1133"> return Bytes.toStringBinary(row);</span> |
| <span class="source-line-no">1134</span><span id="line-1134"> }</span> |
| <span class="source-line-no">1135</span><span id="line-1135"></span> |
| <span class="source-line-no">1136</span><span id="line-1136"> @Override</span> |
| <span class="source-line-no">1137</span><span id="line-1137"> public String separator() {</span> |
| <span class="source-line-no">1138</span><span id="line-1138"> return ",";</span> |
| <span class="source-line-no">1139</span><span id="line-1139"> }</span> |
| <span class="source-line-no">1140</span><span id="line-1140"></span> |
| <span class="source-line-no">1141</span><span id="line-1141"> @Override</span> |
| <span class="source-line-no">1142</span><span id="line-1142"> public String toString() {</span> |
| <span class="source-line-no">1143</span><span id="line-1143"> return this.getClass().getSimpleName() + " [" + rowToStr(firstRow()) + ","</span> |
| <span class="source-line-no">1144</span><span id="line-1144"> + rowToStr(lastRow()) + "]";</span> |
| <span class="source-line-no">1145</span><span id="line-1145"> }</span> |
| <span class="source-line-no">1146</span><span id="line-1146"> }</span> |
| <span class="source-line-no">1147</span><span id="line-1147">}</span> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </pre> |
| </div> |
| </main> |
| </body> |
| </html> |