| <!DOCTYPE HTML> |
| <html lang="en"> |
| <head> |
| <!-- Generated by javadoc (17) --> |
| <title>Source code</title> |
| <meta name="viewport" content="width=device-width, initial-scale=1"> |
| <meta name="description" content="source: package: org.apache.hadoop.hbase.mapreduce, class: HashTable, class: ResultHasher"> |
| <meta name="generator" content="javadoc/SourceToHTMLConverter"> |
| <link rel="stylesheet" type="text/css" href="../../../../../../stylesheet.css" title="Style"> |
| </head> |
| <body class="source-page"> |
| <main role="main"> |
| <div class="source-container"> |
| <pre><span class="source-line-no">001</span><span id="line-1">/*</span> |
| <span class="source-line-no">002</span><span id="line-2"> * Licensed to the Apache Software Foundation (ASF) under one</span> |
| <span class="source-line-no">003</span><span id="line-3"> * or more contributor license agreements. See the NOTICE file</span> |
| <span class="source-line-no">004</span><span id="line-4"> * distributed with this work for additional information</span> |
| <span class="source-line-no">005</span><span id="line-5"> * regarding copyright ownership. The ASF licenses this file</span> |
| <span class="source-line-no">006</span><span id="line-6"> * to you under the Apache License, Version 2.0 (the</span> |
| <span class="source-line-no">007</span><span id="line-7"> * "License"); you may not use this file except in compliance</span> |
| <span class="source-line-no">008</span><span id="line-8"> * with the License. You may obtain a copy of the License at</span> |
| <span class="source-line-no">009</span><span id="line-9"> *</span> |
| <span class="source-line-no">010</span><span id="line-10"> * http://www.apache.org/licenses/LICENSE-2.0</span> |
| <span class="source-line-no">011</span><span id="line-11"> *</span> |
| <span class="source-line-no">012</span><span id="line-12"> * Unless required by applicable law or agreed to in writing, software</span> |
| <span class="source-line-no">013</span><span id="line-13"> * distributed under the License is distributed on an "AS IS" BASIS,</span> |
| <span class="source-line-no">014</span><span id="line-14"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span> |
| <span class="source-line-no">015</span><span id="line-15"> * See the License for the specific language governing permissions and</span> |
| <span class="source-line-no">016</span><span id="line-16"> * limitations under the License.</span> |
| <span class="source-line-no">017</span><span id="line-17"> */</span> |
| <span class="source-line-no">018</span><span id="line-18">package org.apache.hadoop.hbase.mapreduce;</span> |
| <span class="source-line-no">019</span><span id="line-19"></span> |
| <span class="source-line-no">020</span><span id="line-20">import java.io.IOException;</span> |
| <span class="source-line-no">021</span><span id="line-21">import java.io.InputStreamReader;</span> |
| <span class="source-line-no">022</span><span id="line-22">import java.io.OutputStreamWriter;</span> |
| <span class="source-line-no">023</span><span id="line-23">import java.security.MessageDigest;</span> |
| <span class="source-line-no">024</span><span id="line-24">import java.security.NoSuchAlgorithmException;</span> |
| <span class="source-line-no">025</span><span id="line-25">import java.util.ArrayList;</span> |
| <span class="source-line-no">026</span><span id="line-26">import java.util.Collections;</span> |
| <span class="source-line-no">027</span><span id="line-27">import java.util.List;</span> |
| <span class="source-line-no">028</span><span id="line-28">import java.util.Properties;</span> |
| <span class="source-line-no">029</span><span id="line-29">import org.apache.hadoop.conf.Configuration;</span> |
| <span class="source-line-no">030</span><span id="line-30">import org.apache.hadoop.conf.Configured;</span> |
| <span class="source-line-no">031</span><span id="line-31">import org.apache.hadoop.fs.FSDataInputStream;</span> |
| <span class="source-line-no">032</span><span id="line-32">import org.apache.hadoop.fs.FileSystem;</span> |
| <span class="source-line-no">033</span><span id="line-33">import org.apache.hadoop.fs.Path;</span> |
| <span class="source-line-no">034</span><span id="line-34">import org.apache.hadoop.hbase.Cell;</span> |
| <span class="source-line-no">035</span><span id="line-35">import org.apache.hadoop.hbase.HBaseConfiguration;</span> |
| <span class="source-line-no">036</span><span id="line-36">import org.apache.hadoop.hbase.HConstants;</span> |
| <span class="source-line-no">037</span><span id="line-37">import org.apache.hadoop.hbase.TableName;</span> |
| <span class="source-line-no">038</span><span id="line-38">import org.apache.hadoop.hbase.client.Connection;</span> |
| <span class="source-line-no">039</span><span id="line-39">import org.apache.hadoop.hbase.client.ConnectionFactory;</span> |
| <span class="source-line-no">040</span><span id="line-40">import org.apache.hadoop.hbase.client.Result;</span> |
| <span class="source-line-no">041</span><span id="line-41">import org.apache.hadoop.hbase.client.Scan;</span> |
| <span class="source-line-no">042</span><span id="line-42">import org.apache.hadoop.hbase.io.ImmutableBytesWritable;</span> |
| <span class="source-line-no">043</span><span id="line-43">import org.apache.hadoop.hbase.util.Bytes;</span> |
| <span class="source-line-no">044</span><span id="line-44">import org.apache.hadoop.hbase.util.Pair;</span> |
| <span class="source-line-no">045</span><span id="line-45">import org.apache.hadoop.io.MapFile;</span> |
| <span class="source-line-no">046</span><span id="line-46">import org.apache.hadoop.io.NullWritable;</span> |
| <span class="source-line-no">047</span><span id="line-47">import org.apache.hadoop.io.SequenceFile;</span> |
| <span class="source-line-no">048</span><span id="line-48">import org.apache.hadoop.mapreduce.Job;</span> |
| <span class="source-line-no">049</span><span id="line-49">import org.apache.hadoop.mapreduce.Reducer;</span> |
| <span class="source-line-no">050</span><span id="line-50">import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;</span> |
| <span class="source-line-no">051</span><span id="line-51">import org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat;</span> |
| <span class="source-line-no">052</span><span id="line-52">import org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner;</span> |
| <span class="source-line-no">053</span><span id="line-53">import org.apache.hadoop.util.GenericOptionsParser;</span> |
| <span class="source-line-no">054</span><span id="line-54">import org.apache.hadoop.util.Tool;</span> |
| <span class="source-line-no">055</span><span id="line-55">import org.apache.hadoop.util.ToolRunner;</span> |
| <span class="source-line-no">056</span><span id="line-56">import org.apache.yetus.audience.InterfaceAudience;</span> |
| <span class="source-line-no">057</span><span id="line-57">import org.slf4j.Logger;</span> |
| <span class="source-line-no">058</span><span id="line-58">import org.slf4j.LoggerFactory;</span> |
| <span class="source-line-no">059</span><span id="line-59"></span> |
| <span class="source-line-no">060</span><span id="line-60">import org.apache.hbase.thirdparty.com.google.common.base.Charsets;</span> |
| <span class="source-line-no">061</span><span id="line-61">import org.apache.hbase.thirdparty.com.google.common.base.Throwables;</span> |
| <span class="source-line-no">062</span><span id="line-62">import org.apache.hbase.thirdparty.com.google.common.collect.Ordering;</span> |
| <span class="source-line-no">063</span><span id="line-63"></span> |
| <span class="source-line-no">064</span><span id="line-64">@InterfaceAudience.Private</span> |
| <span class="source-line-no">065</span><span id="line-65">public class HashTable extends Configured implements Tool {</span> |
| <span class="source-line-no">066</span><span id="line-66"></span> |
| <span class="source-line-no">067</span><span id="line-67"> private static final Logger LOG = LoggerFactory.getLogger(HashTable.class);</span> |
| <span class="source-line-no">068</span><span id="line-68"></span> |
| <span class="source-line-no">069</span><span id="line-69"> private static final int DEFAULT_BATCH_SIZE = 8000;</span> |
| <span class="source-line-no">070</span><span id="line-70"></span> |
| <span class="source-line-no">071</span><span id="line-71"> private final static String HASH_BATCH_SIZE_CONF_KEY = "hash.batch.size";</span> |
| <span class="source-line-no">072</span><span id="line-72"> final static String PARTITIONS_FILE_NAME = "partitions";</span> |
| <span class="source-line-no">073</span><span id="line-73"> final static String MANIFEST_FILE_NAME = "manifest";</span> |
| <span class="source-line-no">074</span><span id="line-74"> final static String HASH_DATA_DIR = "hashes";</span> |
| <span class="source-line-no">075</span><span id="line-75"> final static String OUTPUT_DATA_FILE_PREFIX = "part-r-";</span> |
| <span class="source-line-no">076</span><span id="line-76"> final static String IGNORE_TIMESTAMPS = "ignoreTimestamps";</span> |
| <span class="source-line-no">077</span><span id="line-77"> private final static String TMP_MANIFEST_FILE_NAME = "manifest.tmp";</span> |
| <span class="source-line-no">078</span><span id="line-78"></span> |
| <span class="source-line-no">079</span><span id="line-79"> TableHash tableHash = new TableHash();</span> |
| <span class="source-line-no">080</span><span id="line-80"> Path destPath;</span> |
| <span class="source-line-no">081</span><span id="line-81"></span> |
| <span class="source-line-no">082</span><span id="line-82"> public HashTable(Configuration conf) {</span> |
| <span class="source-line-no">083</span><span id="line-83"> super(conf);</span> |
| <span class="source-line-no">084</span><span id="line-84"> }</span> |
| <span class="source-line-no">085</span><span id="line-85"></span> |
| <span class="source-line-no">086</span><span id="line-86"> public static class TableHash {</span> |
| <span class="source-line-no">087</span><span id="line-87"></span> |
| <span class="source-line-no">088</span><span id="line-88"> Path hashDir;</span> |
| <span class="source-line-no">089</span><span id="line-89"></span> |
| <span class="source-line-no">090</span><span id="line-90"> String tableName;</span> |
| <span class="source-line-no">091</span><span id="line-91"> String families = null;</span> |
| <span class="source-line-no">092</span><span id="line-92"> long batchSize = DEFAULT_BATCH_SIZE;</span> |
| <span class="source-line-no">093</span><span id="line-93"> int numHashFiles = 0;</span> |
| <span class="source-line-no">094</span><span id="line-94"> byte[] startRow = HConstants.EMPTY_START_ROW;</span> |
| <span class="source-line-no">095</span><span id="line-95"> byte[] stopRow = HConstants.EMPTY_END_ROW;</span> |
| <span class="source-line-no">096</span><span id="line-96"> int scanBatch = 0;</span> |
| <span class="source-line-no">097</span><span id="line-97"> int versions = -1;</span> |
| <span class="source-line-no">098</span><span id="line-98"> long startTime = 0;</span> |
| <span class="source-line-no">099</span><span id="line-99"> long endTime = 0;</span> |
| <span class="source-line-no">100</span><span id="line-100"> boolean ignoreTimestamps;</span> |
| <span class="source-line-no">101</span><span id="line-101"> boolean rawScan;</span> |
| <span class="source-line-no">102</span><span id="line-102"></span> |
| <span class="source-line-no">103</span><span id="line-103"> List<ImmutableBytesWritable> partitions;</span> |
| <span class="source-line-no">104</span><span id="line-104"></span> |
| <span class="source-line-no">105</span><span id="line-105"> public static TableHash read(Configuration conf, Path hashDir) throws IOException {</span> |
| <span class="source-line-no">106</span><span id="line-106"> TableHash tableHash = new TableHash();</span> |
| <span class="source-line-no">107</span><span id="line-107"> FileSystem fs = hashDir.getFileSystem(conf);</span> |
| <span class="source-line-no">108</span><span id="line-108"> tableHash.hashDir = hashDir;</span> |
| <span class="source-line-no">109</span><span id="line-109"> tableHash.readPropertiesFile(fs, new Path(hashDir, MANIFEST_FILE_NAME));</span> |
| <span class="source-line-no">110</span><span id="line-110"> tableHash.readPartitionFile(fs, conf, new Path(hashDir, PARTITIONS_FILE_NAME));</span> |
| <span class="source-line-no">111</span><span id="line-111"> return tableHash;</span> |
| <span class="source-line-no">112</span><span id="line-112"> }</span> |
| <span class="source-line-no">113</span><span id="line-113"></span> |
| <span class="source-line-no">114</span><span id="line-114"> void writePropertiesFile(FileSystem fs, Path path) throws IOException {</span> |
| <span class="source-line-no">115</span><span id="line-115"> Properties p = new Properties();</span> |
| <span class="source-line-no">116</span><span id="line-116"> p.setProperty("table", tableName);</span> |
| <span class="source-line-no">117</span><span id="line-117"> if (families != null) {</span> |
| <span class="source-line-no">118</span><span id="line-118"> p.setProperty("columnFamilies", families);</span> |
| <span class="source-line-no">119</span><span id="line-119"> }</span> |
| <span class="source-line-no">120</span><span id="line-120"> p.setProperty("targetBatchSize", Long.toString(batchSize));</span> |
| <span class="source-line-no">121</span><span id="line-121"> p.setProperty("numHashFiles", Integer.toString(numHashFiles));</span> |
| <span class="source-line-no">122</span><span id="line-122"> if (!isTableStartRow(startRow)) {</span> |
| <span class="source-line-no">123</span><span id="line-123"> p.setProperty("startRowHex", Bytes.toHex(startRow));</span> |
| <span class="source-line-no">124</span><span id="line-124"> }</span> |
| <span class="source-line-no">125</span><span id="line-125"> if (!isTableEndRow(stopRow)) {</span> |
| <span class="source-line-no">126</span><span id="line-126"> p.setProperty("stopRowHex", Bytes.toHex(stopRow));</span> |
| <span class="source-line-no">127</span><span id="line-127"> }</span> |
| <span class="source-line-no">128</span><span id="line-128"> if (scanBatch > 0) {</span> |
| <span class="source-line-no">129</span><span id="line-129"> p.setProperty("scanBatch", Integer.toString(scanBatch));</span> |
| <span class="source-line-no">130</span><span id="line-130"> }</span> |
| <span class="source-line-no">131</span><span id="line-131"> if (versions >= 0) {</span> |
| <span class="source-line-no">132</span><span id="line-132"> p.setProperty("versions", Integer.toString(versions));</span> |
| <span class="source-line-no">133</span><span id="line-133"> }</span> |
| <span class="source-line-no">134</span><span id="line-134"> if (startTime != 0) {</span> |
| <span class="source-line-no">135</span><span id="line-135"> p.setProperty("startTimestamp", Long.toString(startTime));</span> |
| <span class="source-line-no">136</span><span id="line-136"> }</span> |
| <span class="source-line-no">137</span><span id="line-137"> if (endTime != 0) {</span> |
| <span class="source-line-no">138</span><span id="line-138"> p.setProperty("endTimestamp", Long.toString(endTime));</span> |
| <span class="source-line-no">139</span><span id="line-139"> }</span> |
| <span class="source-line-no">140</span><span id="line-140"> p.setProperty("rawScan", Boolean.toString(rawScan));</span> |
| <span class="source-line-no">141</span><span id="line-141"></span> |
| <span class="source-line-no">142</span><span id="line-142"> try (OutputStreamWriter osw = new OutputStreamWriter(fs.create(path), Charsets.UTF_8)) {</span> |
| <span class="source-line-no">143</span><span id="line-143"> p.store(osw, null);</span> |
| <span class="source-line-no">144</span><span id="line-144"> }</span> |
| <span class="source-line-no">145</span><span id="line-145"> }</span> |
| <span class="source-line-no">146</span><span id="line-146"></span> |
| <span class="source-line-no">147</span><span id="line-147"> void readPropertiesFile(FileSystem fs, Path path) throws IOException {</span> |
| <span class="source-line-no">148</span><span id="line-148"> Properties p = new Properties();</span> |
| <span class="source-line-no">149</span><span id="line-149"> try (FSDataInputStream in = fs.open(path)) {</span> |
| <span class="source-line-no">150</span><span id="line-150"> try (InputStreamReader isr = new InputStreamReader(in, Charsets.UTF_8)) {</span> |
| <span class="source-line-no">151</span><span id="line-151"> p.load(isr);</span> |
| <span class="source-line-no">152</span><span id="line-152"> }</span> |
| <span class="source-line-no">153</span><span id="line-153"> }</span> |
| <span class="source-line-no">154</span><span id="line-154"> tableName = p.getProperty("table");</span> |
| <span class="source-line-no">155</span><span id="line-155"> families = p.getProperty("columnFamilies");</span> |
| <span class="source-line-no">156</span><span id="line-156"> batchSize = Long.parseLong(p.getProperty("targetBatchSize"));</span> |
| <span class="source-line-no">157</span><span id="line-157"> numHashFiles = Integer.parseInt(p.getProperty("numHashFiles"));</span> |
| <span class="source-line-no">158</span><span id="line-158"></span> |
| <span class="source-line-no">159</span><span id="line-159"> String startRowHex = p.getProperty("startRowHex");</span> |
| <span class="source-line-no">160</span><span id="line-160"> if (startRowHex != null) {</span> |
| <span class="source-line-no">161</span><span id="line-161"> startRow = Bytes.fromHex(startRowHex);</span> |
| <span class="source-line-no">162</span><span id="line-162"> }</span> |
| <span class="source-line-no">163</span><span id="line-163"> String stopRowHex = p.getProperty("stopRowHex");</span> |
| <span class="source-line-no">164</span><span id="line-164"> if (stopRowHex != null) {</span> |
| <span class="source-line-no">165</span><span id="line-165"> stopRow = Bytes.fromHex(stopRowHex);</span> |
| <span class="source-line-no">166</span><span id="line-166"> }</span> |
| <span class="source-line-no">167</span><span id="line-167"></span> |
| <span class="source-line-no">168</span><span id="line-168"> String scanBatchString = p.getProperty("scanBatch");</span> |
| <span class="source-line-no">169</span><span id="line-169"> if (scanBatchString != null) {</span> |
| <span class="source-line-no">170</span><span id="line-170"> scanBatch = Integer.parseInt(scanBatchString);</span> |
| <span class="source-line-no">171</span><span id="line-171"> }</span> |
| <span class="source-line-no">172</span><span id="line-172"></span> |
| <span class="source-line-no">173</span><span id="line-173"> String versionString = p.getProperty("versions");</span> |
| <span class="source-line-no">174</span><span id="line-174"> if (versionString != null) {</span> |
| <span class="source-line-no">175</span><span id="line-175"> versions = Integer.parseInt(versionString);</span> |
| <span class="source-line-no">176</span><span id="line-176"> }</span> |
| <span class="source-line-no">177</span><span id="line-177"></span> |
| <span class="source-line-no">178</span><span id="line-178"> String rawScanString = p.getProperty("rawScan");</span> |
| <span class="source-line-no">179</span><span id="line-179"> if (rawScanString != null) {</span> |
| <span class="source-line-no">180</span><span id="line-180"> rawScan = Boolean.parseBoolean(rawScanString);</span> |
| <span class="source-line-no">181</span><span id="line-181"> }</span> |
| <span class="source-line-no">182</span><span id="line-182"></span> |
| <span class="source-line-no">183</span><span id="line-183"> String startTimeString = p.getProperty("startTimestamp");</span> |
| <span class="source-line-no">184</span><span id="line-184"> if (startTimeString != null) {</span> |
| <span class="source-line-no">185</span><span id="line-185"> startTime = Long.parseLong(startTimeString);</span> |
| <span class="source-line-no">186</span><span id="line-186"> }</span> |
| <span class="source-line-no">187</span><span id="line-187"></span> |
| <span class="source-line-no">188</span><span id="line-188"> String endTimeString = p.getProperty("endTimestamp");</span> |
| <span class="source-line-no">189</span><span id="line-189"> if (endTimeString != null) {</span> |
| <span class="source-line-no">190</span><span id="line-190"> endTime = Long.parseLong(endTimeString);</span> |
| <span class="source-line-no">191</span><span id="line-191"> }</span> |
| <span class="source-line-no">192</span><span id="line-192"> }</span> |
| <span class="source-line-no">193</span><span id="line-193"></span> |
| <span class="source-line-no">194</span><span id="line-194"> Scan initScan() throws IOException {</span> |
| <span class="source-line-no">195</span><span id="line-195"> Scan scan = new Scan();</span> |
| <span class="source-line-no">196</span><span id="line-196"> scan.setCacheBlocks(false);</span> |
| <span class="source-line-no">197</span><span id="line-197"> if (startTime != 0 || endTime != 0) {</span> |
| <span class="source-line-no">198</span><span id="line-198"> scan.setTimeRange(startTime, endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);</span> |
| <span class="source-line-no">199</span><span id="line-199"> }</span> |
| <span class="source-line-no">200</span><span id="line-200"> if (scanBatch > 0) {</span> |
| <span class="source-line-no">201</span><span id="line-201"> scan.setBatch(scanBatch);</span> |
| <span class="source-line-no">202</span><span id="line-202"> }</span> |
| <span class="source-line-no">203</span><span id="line-203"> if (versions >= 0) {</span> |
| <span class="source-line-no">204</span><span id="line-204"> scan.readVersions(versions);</span> |
| <span class="source-line-no">205</span><span id="line-205"> }</span> |
| <span class="source-line-no">206</span><span id="line-206"> if (!isTableStartRow(startRow)) {</span> |
| <span class="source-line-no">207</span><span id="line-207"> scan.withStartRow(startRow);</span> |
| <span class="source-line-no">208</span><span id="line-208"> }</span> |
| <span class="source-line-no">209</span><span id="line-209"> if (!isTableEndRow(stopRow)) {</span> |
| <span class="source-line-no">210</span><span id="line-210"> scan.withStopRow(stopRow);</span> |
| <span class="source-line-no">211</span><span id="line-211"> }</span> |
| <span class="source-line-no">212</span><span id="line-212"> if (families != null) {</span> |
| <span class="source-line-no">213</span><span id="line-213"> for (String fam : families.split(",")) {</span> |
| <span class="source-line-no">214</span><span id="line-214"> scan.addFamily(Bytes.toBytes(fam));</span> |
| <span class="source-line-no">215</span><span id="line-215"> }</span> |
| <span class="source-line-no">216</span><span id="line-216"> }</span> |
| <span class="source-line-no">217</span><span id="line-217"> scan.setRaw(rawScan);</span> |
| <span class="source-line-no">218</span><span id="line-218"></span> |
| <span class="source-line-no">219</span><span id="line-219"> return scan;</span> |
| <span class="source-line-no">220</span><span id="line-220"> }</span> |
| <span class="source-line-no">221</span><span id="line-221"></span> |
| <span class="source-line-no">222</span><span id="line-222"> /**</span> |
| <span class="source-line-no">223</span><span id="line-223"> * Choose partitions between row ranges to hash to a single output file Selects region</span> |
| <span class="source-line-no">224</span><span id="line-224"> * boundaries that fall within the scan range, and groups them into the desired number of</span> |
| <span class="source-line-no">225</span><span id="line-225"> * partitions.</span> |
| <span class="source-line-no">226</span><span id="line-226"> */</span> |
| <span class="source-line-no">227</span><span id="line-227"> void selectPartitions(Pair<byte[][], byte[][]> regionStartEndKeys) {</span> |
| <span class="source-line-no">228</span><span id="line-228"> List<byte[]> startKeys = new ArrayList<>();</span> |
| <span class="source-line-no">229</span><span id="line-229"> for (int i = 0; i < regionStartEndKeys.getFirst().length; i++) {</span> |
| <span class="source-line-no">230</span><span id="line-230"> byte[] regionStartKey = regionStartEndKeys.getFirst()[i];</span> |
| <span class="source-line-no">231</span><span id="line-231"> byte[] regionEndKey = regionStartEndKeys.getSecond()[i];</span> |
| <span class="source-line-no">232</span><span id="line-232"></span> |
| <span class="source-line-no">233</span><span id="line-233"> // if scan begins after this region, or starts before this region, then drop this region</span> |
| <span class="source-line-no">234</span><span id="line-234"> // in other words:</span> |
| <span class="source-line-no">235</span><span id="line-235"> // IF (scan begins before the end of this region</span> |
| <span class="source-line-no">236</span><span id="line-236"> // AND scan ends before the start of this region)</span> |
| <span class="source-line-no">237</span><span id="line-237"> // THEN include this region</span> |
| <span class="source-line-no">238</span><span id="line-238"> if (</span> |
| <span class="source-line-no">239</span><span id="line-239"> (isTableStartRow(startRow) || isTableEndRow(regionEndKey)</span> |
| <span class="source-line-no">240</span><span id="line-240"> || Bytes.compareTo(startRow, regionEndKey) < 0)</span> |
| <span class="source-line-no">241</span><span id="line-241"> && (isTableEndRow(stopRow) || isTableStartRow(regionStartKey)</span> |
| <span class="source-line-no">242</span><span id="line-242"> || Bytes.compareTo(stopRow, regionStartKey) > 0)</span> |
| <span class="source-line-no">243</span><span id="line-243"> ) {</span> |
| <span class="source-line-no">244</span><span id="line-244"> startKeys.add(regionStartKey);</span> |
| <span class="source-line-no">245</span><span id="line-245"> }</span> |
| <span class="source-line-no">246</span><span id="line-246"> }</span> |
| <span class="source-line-no">247</span><span id="line-247"></span> |
| <span class="source-line-no">248</span><span id="line-248"> int numRegions = startKeys.size();</span> |
| <span class="source-line-no">249</span><span id="line-249"> if (numHashFiles == 0) {</span> |
| <span class="source-line-no">250</span><span id="line-250"> numHashFiles = numRegions / 100;</span> |
| <span class="source-line-no">251</span><span id="line-251"> }</span> |
| <span class="source-line-no">252</span><span id="line-252"> if (numHashFiles == 0) {</span> |
| <span class="source-line-no">253</span><span id="line-253"> numHashFiles = 1;</span> |
| <span class="source-line-no">254</span><span id="line-254"> }</span> |
| <span class="source-line-no">255</span><span id="line-255"> if (numHashFiles > numRegions) {</span> |
| <span class="source-line-no">256</span><span id="line-256"> // can't partition within regions</span> |
| <span class="source-line-no">257</span><span id="line-257"> numHashFiles = numRegions;</span> |
| <span class="source-line-no">258</span><span id="line-258"> }</span> |
| <span class="source-line-no">259</span><span id="line-259"></span> |
| <span class="source-line-no">260</span><span id="line-260"> // choose a subset of start keys to group regions into ranges</span> |
| <span class="source-line-no">261</span><span id="line-261"> partitions = new ArrayList<>(numHashFiles - 1);</span> |
| <span class="source-line-no">262</span><span id="line-262"> // skip the first start key as it is not a partition between ranges.</span> |
| <span class="source-line-no">263</span><span id="line-263"> for (long i = 1; i < numHashFiles; i++) {</span> |
| <span class="source-line-no">264</span><span id="line-264"> int splitIndex = (int) (numRegions * i / numHashFiles);</span> |
| <span class="source-line-no">265</span><span id="line-265"> partitions.add(new ImmutableBytesWritable(startKeys.get(splitIndex)));</span> |
| <span class="source-line-no">266</span><span id="line-266"> }</span> |
| <span class="source-line-no">267</span><span id="line-267"> }</span> |
| <span class="source-line-no">268</span><span id="line-268"></span> |
| <span class="source-line-no">269</span><span id="line-269"> void writePartitionFile(Configuration conf, Path path) throws IOException {</span> |
| <span class="source-line-no">270</span><span id="line-270"> FileSystem fs = path.getFileSystem(conf);</span> |
| <span class="source-line-no">271</span><span id="line-271"> @SuppressWarnings("deprecation")</span> |
| <span class="source-line-no">272</span><span id="line-272"> SequenceFile.Writer writer =</span> |
| <span class="source-line-no">273</span><span id="line-273"> SequenceFile.createWriter(fs, conf, path, ImmutableBytesWritable.class, NullWritable.class);</span> |
| <span class="source-line-no">274</span><span id="line-274"></span> |
| <span class="source-line-no">275</span><span id="line-275"> for (int i = 0; i < partitions.size(); i++) {</span> |
| <span class="source-line-no">276</span><span id="line-276"> writer.append(partitions.get(i), NullWritable.get());</span> |
| <span class="source-line-no">277</span><span id="line-277"> }</span> |
| <span class="source-line-no">278</span><span id="line-278"> writer.close();</span> |
| <span class="source-line-no">279</span><span id="line-279"> }</span> |
| <span class="source-line-no">280</span><span id="line-280"></span> |
| <span class="source-line-no">281</span><span id="line-281"> private void readPartitionFile(FileSystem fs, Configuration conf, Path path)</span> |
| <span class="source-line-no">282</span><span id="line-282"> throws IOException {</span> |
| <span class="source-line-no">283</span><span id="line-283"> @SuppressWarnings("deprecation")</span> |
| <span class="source-line-no">284</span><span id="line-284"> SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);</span> |
| <span class="source-line-no">285</span><span id="line-285"> ImmutableBytesWritable key = new ImmutableBytesWritable();</span> |
| <span class="source-line-no">286</span><span id="line-286"> partitions = new ArrayList<>();</span> |
| <span class="source-line-no">287</span><span id="line-287"> while (reader.next(key)) {</span> |
| <span class="source-line-no">288</span><span id="line-288"> partitions.add(new ImmutableBytesWritable(key.copyBytes()));</span> |
| <span class="source-line-no">289</span><span id="line-289"> }</span> |
| <span class="source-line-no">290</span><span id="line-290"> reader.close();</span> |
| <span class="source-line-no">291</span><span id="line-291"></span> |
| <span class="source-line-no">292</span><span id="line-292"> if (!Ordering.natural().isOrdered(partitions)) {</span> |
| <span class="source-line-no">293</span><span id="line-293"> throw new IOException("Partitions are not ordered!");</span> |
| <span class="source-line-no">294</span><span id="line-294"> }</span> |
| <span class="source-line-no">295</span><span id="line-295"> }</span> |
| <span class="source-line-no">296</span><span id="line-296"></span> |
| <span class="source-line-no">297</span><span id="line-297"> @Override</span> |
| <span class="source-line-no">298</span><span id="line-298"> public String toString() {</span> |
| <span class="source-line-no">299</span><span id="line-299"> StringBuilder sb = new StringBuilder();</span> |
| <span class="source-line-no">300</span><span id="line-300"> sb.append("tableName=").append(tableName);</span> |
| <span class="source-line-no">301</span><span id="line-301"> if (families != null) {</span> |
| <span class="source-line-no">302</span><span id="line-302"> sb.append(", families=").append(families);</span> |
| <span class="source-line-no">303</span><span id="line-303"> }</span> |
| <span class="source-line-no">304</span><span id="line-304"> sb.append(", batchSize=").append(batchSize);</span> |
| <span class="source-line-no">305</span><span id="line-305"> sb.append(", numHashFiles=").append(numHashFiles);</span> |
| <span class="source-line-no">306</span><span id="line-306"> if (!isTableStartRow(startRow)) {</span> |
| <span class="source-line-no">307</span><span id="line-307"> sb.append(", startRowHex=").append(Bytes.toHex(startRow));</span> |
| <span class="source-line-no">308</span><span id="line-308"> }</span> |
| <span class="source-line-no">309</span><span id="line-309"> if (!isTableEndRow(stopRow)) {</span> |
| <span class="source-line-no">310</span><span id="line-310"> sb.append(", stopRowHex=").append(Bytes.toHex(stopRow));</span> |
| <span class="source-line-no">311</span><span id="line-311"> }</span> |
| <span class="source-line-no">312</span><span id="line-312"> if (scanBatch >= 0) {</span> |
| <span class="source-line-no">313</span><span id="line-313"> sb.append(", scanBatch=").append(scanBatch);</span> |
| <span class="source-line-no">314</span><span id="line-314"> }</span> |
| <span class="source-line-no">315</span><span id="line-315"> if (versions >= 0) {</span> |
| <span class="source-line-no">316</span><span id="line-316"> sb.append(", versions=").append(versions);</span> |
| <span class="source-line-no">317</span><span id="line-317"> }</span> |
| <span class="source-line-no">318</span><span id="line-318"> sb.append(", rawScan=").append(rawScan);</span> |
| <span class="source-line-no">319</span><span id="line-319"> if (startTime != 0) {</span> |
| <span class="source-line-no">320</span><span id="line-320"> sb.append("startTime=").append(startTime);</span> |
| <span class="source-line-no">321</span><span id="line-321"> }</span> |
| <span class="source-line-no">322</span><span id="line-322"> if (endTime != 0) {</span> |
| <span class="source-line-no">323</span><span id="line-323"> sb.append("endTime=").append(endTime);</span> |
| <span class="source-line-no">324</span><span id="line-324"> }</span> |
| <span class="source-line-no">325</span><span id="line-325"> return sb.toString();</span> |
| <span class="source-line-no">326</span><span id="line-326"> }</span> |
| <span class="source-line-no">327</span><span id="line-327"></span> |
| <span class="source-line-no">328</span><span id="line-328"> static String getDataFileName(int hashFileIndex) {</span> |
| <span class="source-line-no">329</span><span id="line-329"> return String.format(HashTable.OUTPUT_DATA_FILE_PREFIX + "%05d", hashFileIndex);</span> |
| <span class="source-line-no">330</span><span id="line-330"> }</span> |
| <span class="source-line-no">331</span><span id="line-331"></span> |
| <span class="source-line-no">332</span><span id="line-332"> /**</span> |
| <span class="source-line-no">333</span><span id="line-333"> * Open a TableHash.Reader starting at the first hash at or after the given key.</span> |
| <span class="source-line-no">334</span><span id="line-334"> */</span> |
| <span class="source-line-no">335</span><span id="line-335"> public Reader newReader(Configuration conf, ImmutableBytesWritable startKey)</span> |
| <span class="source-line-no">336</span><span id="line-336"> throws IOException {</span> |
| <span class="source-line-no">337</span><span id="line-337"> return new Reader(conf, startKey);</span> |
| <span class="source-line-no">338</span><span id="line-338"> }</span> |
| <span class="source-line-no">339</span><span id="line-339"></span> |
| <span class="source-line-no">340</span><span id="line-340"> public class Reader implements java.io.Closeable {</span> |
| <span class="source-line-no">341</span><span id="line-341"> private final Configuration conf;</span> |
| <span class="source-line-no">342</span><span id="line-342"></span> |
| <span class="source-line-no">343</span><span id="line-343"> private int hashFileIndex;</span> |
| <span class="source-line-no">344</span><span id="line-344"> private MapFile.Reader mapFileReader;</span> |
| <span class="source-line-no">345</span><span id="line-345"></span> |
| <span class="source-line-no">346</span><span id="line-346"> private boolean cachedNext;</span> |
| <span class="source-line-no">347</span><span id="line-347"> private ImmutableBytesWritable key;</span> |
| <span class="source-line-no">348</span><span id="line-348"> private ImmutableBytesWritable hash;</span> |
| <span class="source-line-no">349</span><span id="line-349"></span> |
| <span class="source-line-no">350</span><span id="line-350"> Reader(Configuration conf, ImmutableBytesWritable startKey) throws IOException {</span> |
| <span class="source-line-no">351</span><span id="line-351"> this.conf = conf;</span> |
| <span class="source-line-no">352</span><span id="line-352"> int partitionIndex = Collections.binarySearch(partitions, startKey);</span> |
| <span class="source-line-no">353</span><span id="line-353"> if (partitionIndex >= 0) {</span> |
| <span class="source-line-no">354</span><span id="line-354"> // if the key is equal to a partition, then go the file after that partition</span> |
| <span class="source-line-no">355</span><span id="line-355"> hashFileIndex = partitionIndex + 1;</span> |
| <span class="source-line-no">356</span><span id="line-356"> } else {</span> |
| <span class="source-line-no">357</span><span id="line-357"> // if the key is between partitions, then go to the file between those partitions</span> |
| <span class="source-line-no">358</span><span id="line-358"> hashFileIndex = -1 - partitionIndex;</span> |
| <span class="source-line-no">359</span><span id="line-359"> }</span> |
| <span class="source-line-no">360</span><span id="line-360"> openHashFile();</span> |
| <span class="source-line-no">361</span><span id="line-361"></span> |
| <span class="source-line-no">362</span><span id="line-362"> // MapFile's don't make it easy to seek() so that the subsequent next() returns</span> |
| <span class="source-line-no">363</span><span id="line-363"> // the desired key/value pair. So we cache it for the first call of next().</span> |
| <span class="source-line-no">364</span><span id="line-364"> hash = new ImmutableBytesWritable();</span> |
| <span class="source-line-no">365</span><span id="line-365"> key = (ImmutableBytesWritable) mapFileReader.getClosest(startKey, hash);</span> |
| <span class="source-line-no">366</span><span id="line-366"> if (key == null) {</span> |
| <span class="source-line-no">367</span><span id="line-367"> cachedNext = false;</span> |
| <span class="source-line-no">368</span><span id="line-368"> hash = null;</span> |
| <span class="source-line-no">369</span><span id="line-369"> } else {</span> |
| <span class="source-line-no">370</span><span id="line-370"> cachedNext = true;</span> |
| <span class="source-line-no">371</span><span id="line-371"> }</span> |
| <span class="source-line-no">372</span><span id="line-372"> }</span> |
| <span class="source-line-no">373</span><span id="line-373"></span> |
| <span class="source-line-no">374</span><span id="line-374"> /**</span> |
| <span class="source-line-no">375</span><span id="line-375"> * Read the next key/hash pair. Returns true if such a pair exists and false when at the end</span> |
| <span class="source-line-no">376</span><span id="line-376"> * of the data.</span> |
| <span class="source-line-no">377</span><span id="line-377"> */</span> |
| <span class="source-line-no">378</span><span id="line-378"> public boolean next() throws IOException {</span> |
| <span class="source-line-no">379</span><span id="line-379"> if (cachedNext) {</span> |
| <span class="source-line-no">380</span><span id="line-380"> cachedNext = false;</span> |
| <span class="source-line-no">381</span><span id="line-381"> return true;</span> |
| <span class="source-line-no">382</span><span id="line-382"> }</span> |
| <span class="source-line-no">383</span><span id="line-383"> key = new ImmutableBytesWritable();</span> |
| <span class="source-line-no">384</span><span id="line-384"> hash = new ImmutableBytesWritable();</span> |
| <span class="source-line-no">385</span><span id="line-385"> while (true) {</span> |
| <span class="source-line-no">386</span><span id="line-386"> boolean hasNext = mapFileReader.next(key, hash);</span> |
| <span class="source-line-no">387</span><span id="line-387"> if (hasNext) {</span> |
| <span class="source-line-no">388</span><span id="line-388"> return true;</span> |
| <span class="source-line-no">389</span><span id="line-389"> }</span> |
| <span class="source-line-no">390</span><span id="line-390"> hashFileIndex++;</span> |
| <span class="source-line-no">391</span><span id="line-391"> if (hashFileIndex < TableHash.this.numHashFiles) {</span> |
| <span class="source-line-no">392</span><span id="line-392"> mapFileReader.close();</span> |
| <span class="source-line-no">393</span><span id="line-393"> openHashFile();</span> |
| <span class="source-line-no">394</span><span id="line-394"> } else {</span> |
| <span class="source-line-no">395</span><span id="line-395"> key = null;</span> |
| <span class="source-line-no">396</span><span id="line-396"> hash = null;</span> |
| <span class="source-line-no">397</span><span id="line-397"> return false;</span> |
| <span class="source-line-no">398</span><span id="line-398"> }</span> |
| <span class="source-line-no">399</span><span id="line-399"> }</span> |
| <span class="source-line-no">400</span><span id="line-400"> }</span> |
| <span class="source-line-no">401</span><span id="line-401"></span> |
| <span class="source-line-no">402</span><span id="line-402"> /**</span> |
| <span class="source-line-no">403</span><span id="line-403"> * Get the current key</span> |
| <span class="source-line-no">404</span><span id="line-404"> * @return the current key or null if there is no current key</span> |
| <span class="source-line-no">405</span><span id="line-405"> */</span> |
| <span class="source-line-no">406</span><span id="line-406"> public ImmutableBytesWritable getCurrentKey() {</span> |
| <span class="source-line-no">407</span><span id="line-407"> return key;</span> |
| <span class="source-line-no">408</span><span id="line-408"> }</span> |
| <span class="source-line-no">409</span><span id="line-409"></span> |
| <span class="source-line-no">410</span><span id="line-410"> /**</span> |
| <span class="source-line-no">411</span><span id="line-411"> * Get the current hash</span> |
| <span class="source-line-no">412</span><span id="line-412"> * @return the current hash or null if there is no current hash</span> |
| <span class="source-line-no">413</span><span id="line-413"> */</span> |
| <span class="source-line-no">414</span><span id="line-414"> public ImmutableBytesWritable getCurrentHash() {</span> |
| <span class="source-line-no">415</span><span id="line-415"> return hash;</span> |
| <span class="source-line-no">416</span><span id="line-416"> }</span> |
| <span class="source-line-no">417</span><span id="line-417"></span> |
| <span class="source-line-no">418</span><span id="line-418"> private void openHashFile() throws IOException {</span> |
| <span class="source-line-no">419</span><span id="line-419"> if (mapFileReader != null) {</span> |
| <span class="source-line-no">420</span><span id="line-420"> mapFileReader.close();</span> |
| <span class="source-line-no">421</span><span id="line-421"> }</span> |
| <span class="source-line-no">422</span><span id="line-422"> Path dataDir = new Path(TableHash.this.hashDir, HASH_DATA_DIR);</span> |
| <span class="source-line-no">423</span><span id="line-423"> Path dataFile = new Path(dataDir, getDataFileName(hashFileIndex));</span> |
| <span class="source-line-no">424</span><span id="line-424"> mapFileReader = new MapFile.Reader(dataFile, conf);</span> |
| <span class="source-line-no">425</span><span id="line-425"> }</span> |
| <span class="source-line-no">426</span><span id="line-426"></span> |
| <span class="source-line-no">427</span><span id="line-427"> @Override</span> |
| <span class="source-line-no">428</span><span id="line-428"> public void close() throws IOException {</span> |
| <span class="source-line-no">429</span><span id="line-429"> mapFileReader.close();</span> |
| <span class="source-line-no">430</span><span id="line-430"> }</span> |
| <span class="source-line-no">431</span><span id="line-431"> }</span> |
| <span class="source-line-no">432</span><span id="line-432"> }</span> |
| <span class="source-line-no">433</span><span id="line-433"></span> |
| <span class="source-line-no">434</span><span id="line-434"> static boolean isTableStartRow(byte[] row) {</span> |
| <span class="source-line-no">435</span><span id="line-435"> return Bytes.equals(HConstants.EMPTY_START_ROW, row);</span> |
| <span class="source-line-no">436</span><span id="line-436"> }</span> |
| <span class="source-line-no">437</span><span id="line-437"></span> |
| <span class="source-line-no">438</span><span id="line-438"> static boolean isTableEndRow(byte[] row) {</span> |
| <span class="source-line-no">439</span><span id="line-439"> return Bytes.equals(HConstants.EMPTY_END_ROW, row);</span> |
| <span class="source-line-no">440</span><span id="line-440"> }</span> |
| <span class="source-line-no">441</span><span id="line-441"></span> |
| <span class="source-line-no">442</span><span id="line-442"> public Job createSubmittableJob(String[] args) throws IOException {</span> |
| <span class="source-line-no">443</span><span id="line-443"> Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME);</span> |
| <span class="source-line-no">444</span><span id="line-444"> generatePartitions(partitionsPath);</span> |
| <span class="source-line-no">445</span><span id="line-445"></span> |
| <span class="source-line-no">446</span><span id="line-446"> Job job = Job.getInstance(getConf(),</span> |
| <span class="source-line-no">447</span><span id="line-447"> getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName));</span> |
| <span class="source-line-no">448</span><span id="line-448"> Configuration jobConf = job.getConfiguration();</span> |
| <span class="source-line-no">449</span><span id="line-449"> jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize);</span> |
| <span class="source-line-no">450</span><span id="line-450"> jobConf.setBoolean(IGNORE_TIMESTAMPS, tableHash.ignoreTimestamps);</span> |
| <span class="source-line-no">451</span><span id="line-451"> job.setJarByClass(HashTable.class);</span> |
| <span class="source-line-no">452</span><span id="line-452"></span> |
| <span class="source-line-no">453</span><span id="line-453"> TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(),</span> |
| <span class="source-line-no">454</span><span id="line-454"> HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);</span> |
| <span class="source-line-no">455</span><span id="line-455"></span> |
| <span class="source-line-no">456</span><span id="line-456"> // use a TotalOrderPartitioner and reducers to group region output into hash files</span> |
| <span class="source-line-no">457</span><span id="line-457"> job.setPartitionerClass(TotalOrderPartitioner.class);</span> |
| <span class="source-line-no">458</span><span id="line-458"> TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath);</span> |
| <span class="source-line-no">459</span><span id="line-459"> job.setReducerClass(Reducer.class); // identity reducer</span> |
| <span class="source-line-no">460</span><span id="line-460"> job.setNumReduceTasks(tableHash.numHashFiles);</span> |
| <span class="source-line-no">461</span><span id="line-461"> job.setOutputKeyClass(ImmutableBytesWritable.class);</span> |
| <span class="source-line-no">462</span><span id="line-462"> job.setOutputValueClass(ImmutableBytesWritable.class);</span> |
| <span class="source-line-no">463</span><span id="line-463"> job.setOutputFormatClass(MapFileOutputFormat.class);</span> |
| <span class="source-line-no">464</span><span id="line-464"> FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR));</span> |
| <span class="source-line-no">465</span><span id="line-465"></span> |
| <span class="source-line-no">466</span><span id="line-466"> return job;</span> |
| <span class="source-line-no">467</span><span id="line-467"> }</span> |
| <span class="source-line-no">468</span><span id="line-468"></span> |
| <span class="source-line-no">469</span><span id="line-469"> private void generatePartitions(Path partitionsPath) throws IOException {</span> |
| <span class="source-line-no">470</span><span id="line-470"> Connection connection = ConnectionFactory.createConnection(getConf());</span> |
| <span class="source-line-no">471</span><span id="line-471"> Pair<byte[][], byte[][]> regionKeys =</span> |
| <span class="source-line-no">472</span><span id="line-472"> connection.getRegionLocator(TableName.valueOf(tableHash.tableName)).getStartEndKeys();</span> |
| <span class="source-line-no">473</span><span id="line-473"> connection.close();</span> |
| <span class="source-line-no">474</span><span id="line-474"></span> |
| <span class="source-line-no">475</span><span id="line-475"> tableHash.selectPartitions(regionKeys);</span> |
| <span class="source-line-no">476</span><span id="line-476"> LOG.info("Writing " + tableHash.partitions.size() + " partition keys to " + partitionsPath);</span> |
| <span class="source-line-no">477</span><span id="line-477"></span> |
| <span class="source-line-no">478</span><span id="line-478"> tableHash.writePartitionFile(getConf(), partitionsPath);</span> |
| <span class="source-line-no">479</span><span id="line-479"> }</span> |
| <span class="source-line-no">480</span><span id="line-480"></span> |
| <span class="source-line-no">481</span><span id="line-481"> static class ResultHasher {</span> |
| <span class="source-line-no">482</span><span id="line-482"> private MessageDigest digest;</span> |
| <span class="source-line-no">483</span><span id="line-483"></span> |
| <span class="source-line-no">484</span><span id="line-484"> private boolean batchStarted = false;</span> |
| <span class="source-line-no">485</span><span id="line-485"> private ImmutableBytesWritable batchStartKey;</span> |
| <span class="source-line-no">486</span><span id="line-486"> private ImmutableBytesWritable batchHash;</span> |
| <span class="source-line-no">487</span><span id="line-487"> private long batchSize = 0;</span> |
| <span class="source-line-no">488</span><span id="line-488"> boolean ignoreTimestamps;</span> |
| <span class="source-line-no">489</span><span id="line-489"></span> |
| <span class="source-line-no">490</span><span id="line-490"> public ResultHasher() {</span> |
| <span class="source-line-no">491</span><span id="line-491"> try {</span> |
| <span class="source-line-no">492</span><span id="line-492"> digest = MessageDigest.getInstance("MD5");</span> |
| <span class="source-line-no">493</span><span id="line-493"> } catch (NoSuchAlgorithmException e) {</span> |
| <span class="source-line-no">494</span><span id="line-494"> Throwables.propagate(e);</span> |
| <span class="source-line-no">495</span><span id="line-495"> }</span> |
| <span class="source-line-no">496</span><span id="line-496"> }</span> |
| <span class="source-line-no">497</span><span id="line-497"></span> |
| <span class="source-line-no">498</span><span id="line-498"> public void startBatch(ImmutableBytesWritable row) {</span> |
| <span class="source-line-no">499</span><span id="line-499"> if (batchStarted) {</span> |
| <span class="source-line-no">500</span><span id="line-500"> throw new RuntimeException("Cannot start new batch without finishing existing one.");</span> |
| <span class="source-line-no">501</span><span id="line-501"> }</span> |
| <span class="source-line-no">502</span><span id="line-502"> batchStarted = true;</span> |
| <span class="source-line-no">503</span><span id="line-503"> batchSize = 0;</span> |
| <span class="source-line-no">504</span><span id="line-504"> batchStartKey = row;</span> |
| <span class="source-line-no">505</span><span id="line-505"> batchHash = null;</span> |
| <span class="source-line-no">506</span><span id="line-506"> }</span> |
| <span class="source-line-no">507</span><span id="line-507"></span> |
| <span class="source-line-no">508</span><span id="line-508"> public void hashResult(Result result) {</span> |
| <span class="source-line-no">509</span><span id="line-509"> if (!batchStarted) {</span> |
| <span class="source-line-no">510</span><span id="line-510"> throw new RuntimeException("Cannot add to batch that has not been started.");</span> |
| <span class="source-line-no">511</span><span id="line-511"> }</span> |
| <span class="source-line-no">512</span><span id="line-512"> for (Cell cell : result.rawCells()) {</span> |
| <span class="source-line-no">513</span><span id="line-513"> int rowLength = cell.getRowLength();</span> |
| <span class="source-line-no">514</span><span id="line-514"> int familyLength = cell.getFamilyLength();</span> |
| <span class="source-line-no">515</span><span id="line-515"> int qualifierLength = cell.getQualifierLength();</span> |
| <span class="source-line-no">516</span><span id="line-516"> int valueLength = cell.getValueLength();</span> |
| <span class="source-line-no">517</span><span id="line-517"> digest.update(cell.getRowArray(), cell.getRowOffset(), rowLength);</span> |
| <span class="source-line-no">518</span><span id="line-518"> digest.update(cell.getFamilyArray(), cell.getFamilyOffset(), familyLength);</span> |
| <span class="source-line-no">519</span><span id="line-519"> digest.update(cell.getQualifierArray(), cell.getQualifierOffset(), qualifierLength);</span> |
| <span class="source-line-no">520</span><span id="line-520"></span> |
| <span class="source-line-no">521</span><span id="line-521"> if (!ignoreTimestamps) {</span> |
| <span class="source-line-no">522</span><span id="line-522"> long ts = cell.getTimestamp();</span> |
| <span class="source-line-no">523</span><span id="line-523"> for (int i = 8; i > 0; i--) {</span> |
| <span class="source-line-no">524</span><span id="line-524"> digest.update((byte) ts);</span> |
| <span class="source-line-no">525</span><span id="line-525"> ts >>>= 8;</span> |
| <span class="source-line-no">526</span><span id="line-526"> }</span> |
| <span class="source-line-no">527</span><span id="line-527"> }</span> |
| <span class="source-line-no">528</span><span id="line-528"> digest.update(cell.getValueArray(), cell.getValueOffset(), valueLength);</span> |
| <span class="source-line-no">529</span><span id="line-529"></span> |
| <span class="source-line-no">530</span><span id="line-530"> batchSize += rowLength + familyLength + qualifierLength + 8 + valueLength;</span> |
| <span class="source-line-no">531</span><span id="line-531"> }</span> |
| <span class="source-line-no">532</span><span id="line-532"> }</span> |
| <span class="source-line-no">533</span><span id="line-533"></span> |
| <span class="source-line-no">534</span><span id="line-534"> public void finishBatch() {</span> |
| <span class="source-line-no">535</span><span id="line-535"> if (!batchStarted) {</span> |
| <span class="source-line-no">536</span><span id="line-536"> throw new RuntimeException("Cannot finish batch that has not started.");</span> |
| <span class="source-line-no">537</span><span id="line-537"> }</span> |
| <span class="source-line-no">538</span><span id="line-538"> batchStarted = false;</span> |
| <span class="source-line-no">539</span><span id="line-539"> batchHash = new ImmutableBytesWritable(digest.digest());</span> |
| <span class="source-line-no">540</span><span id="line-540"> }</span> |
| <span class="source-line-no">541</span><span id="line-541"></span> |
| <span class="source-line-no">542</span><span id="line-542"> public boolean isBatchStarted() {</span> |
| <span class="source-line-no">543</span><span id="line-543"> return batchStarted;</span> |
| <span class="source-line-no">544</span><span id="line-544"> }</span> |
| <span class="source-line-no">545</span><span id="line-545"></span> |
| <span class="source-line-no">546</span><span id="line-546"> public ImmutableBytesWritable getBatchStartKey() {</span> |
| <span class="source-line-no">547</span><span id="line-547"> return batchStartKey;</span> |
| <span class="source-line-no">548</span><span id="line-548"> }</span> |
| <span class="source-line-no">549</span><span id="line-549"></span> |
| <span class="source-line-no">550</span><span id="line-550"> public ImmutableBytesWritable getBatchHash() {</span> |
| <span class="source-line-no">551</span><span id="line-551"> return batchHash;</span> |
| <span class="source-line-no">552</span><span id="line-552"> }</span> |
| <span class="source-line-no">553</span><span id="line-553"></span> |
| <span class="source-line-no">554</span><span id="line-554"> public long getBatchSize() {</span> |
| <span class="source-line-no">555</span><span id="line-555"> return batchSize;</span> |
| <span class="source-line-no">556</span><span id="line-556"> }</span> |
| <span class="source-line-no">557</span><span id="line-557"> }</span> |
| <span class="source-line-no">558</span><span id="line-558"></span> |
| <span class="source-line-no">559</span><span id="line-559"> public static class HashMapper</span> |
| <span class="source-line-no">560</span><span id="line-560"> extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {</span> |
| <span class="source-line-no">561</span><span id="line-561"></span> |
| <span class="source-line-no">562</span><span id="line-562"> private ResultHasher hasher;</span> |
| <span class="source-line-no">563</span><span id="line-563"> private long targetBatchSize;</span> |
| <span class="source-line-no">564</span><span id="line-564"></span> |
| <span class="source-line-no">565</span><span id="line-565"> private ImmutableBytesWritable currentRow;</span> |
| <span class="source-line-no">566</span><span id="line-566"></span> |
| <span class="source-line-no">567</span><span id="line-567"> @Override</span> |
| <span class="source-line-no">568</span><span id="line-568"> protected void setup(Context context) throws IOException, InterruptedException {</span> |
| <span class="source-line-no">569</span><span id="line-569"> targetBatchSize =</span> |
| <span class="source-line-no">570</span><span id="line-570"> context.getConfiguration().getLong(HASH_BATCH_SIZE_CONF_KEY, DEFAULT_BATCH_SIZE);</span> |
| <span class="source-line-no">571</span><span id="line-571"> hasher = new ResultHasher();</span> |
| <span class="source-line-no">572</span><span id="line-572"> hasher.ignoreTimestamps = context.getConfiguration().getBoolean(IGNORE_TIMESTAMPS, false);</span> |
| <span class="source-line-no">573</span><span id="line-573"> TableSplit split = (TableSplit) context.getInputSplit();</span> |
| <span class="source-line-no">574</span><span id="line-574"> hasher.startBatch(new ImmutableBytesWritable(split.getStartRow()));</span> |
| <span class="source-line-no">575</span><span id="line-575"> }</span> |
| <span class="source-line-no">576</span><span id="line-576"></span> |
| <span class="source-line-no">577</span><span id="line-577"> @Override</span> |
| <span class="source-line-no">578</span><span id="line-578"> protected void map(ImmutableBytesWritable key, Result value, Context context)</span> |
| <span class="source-line-no">579</span><span id="line-579"> throws IOException, InterruptedException {</span> |
| <span class="source-line-no">580</span><span id="line-580"></span> |
| <span class="source-line-no">581</span><span id="line-581"> if (currentRow == null || !currentRow.equals(key)) {</span> |
| <span class="source-line-no">582</span><span id="line-582"> currentRow = new ImmutableBytesWritable(key); // not immutable</span> |
| <span class="source-line-no">583</span><span id="line-583"></span> |
| <span class="source-line-no">584</span><span id="line-584"> if (hasher.getBatchSize() >= targetBatchSize) {</span> |
| <span class="source-line-no">585</span><span id="line-585"> hasher.finishBatch();</span> |
| <span class="source-line-no">586</span><span id="line-586"> context.write(hasher.getBatchStartKey(), hasher.getBatchHash());</span> |
| <span class="source-line-no">587</span><span id="line-587"> hasher.startBatch(currentRow);</span> |
| <span class="source-line-no">588</span><span id="line-588"> }</span> |
| <span class="source-line-no">589</span><span id="line-589"> }</span> |
| <span class="source-line-no">590</span><span id="line-590"></span> |
| <span class="source-line-no">591</span><span id="line-591"> hasher.hashResult(value);</span> |
| <span class="source-line-no">592</span><span id="line-592"> }</span> |
| <span class="source-line-no">593</span><span id="line-593"></span> |
| <span class="source-line-no">594</span><span id="line-594"> @Override</span> |
| <span class="source-line-no">595</span><span id="line-595"> protected void cleanup(Context context) throws IOException, InterruptedException {</span> |
| <span class="source-line-no">596</span><span id="line-596"> hasher.finishBatch();</span> |
| <span class="source-line-no">597</span><span id="line-597"> context.write(hasher.getBatchStartKey(), hasher.getBatchHash());</span> |
| <span class="source-line-no">598</span><span id="line-598"> }</span> |
| <span class="source-line-no">599</span><span id="line-599"> }</span> |
| <span class="source-line-no">600</span><span id="line-600"></span> |
| <span class="source-line-no">601</span><span id="line-601"> private void writeTempManifestFile() throws IOException {</span> |
| <span class="source-line-no">602</span><span id="line-602"> Path tempManifestPath = new Path(destPath, TMP_MANIFEST_FILE_NAME);</span> |
| <span class="source-line-no">603</span><span id="line-603"> FileSystem fs = tempManifestPath.getFileSystem(getConf());</span> |
| <span class="source-line-no">604</span><span id="line-604"> tableHash.writePropertiesFile(fs, tempManifestPath);</span> |
| <span class="source-line-no">605</span><span id="line-605"> }</span> |
| <span class="source-line-no">606</span><span id="line-606"></span> |
| <span class="source-line-no">607</span><span id="line-607"> private void completeManifest() throws IOException {</span> |
| <span class="source-line-no">608</span><span id="line-608"> Path tempManifestPath = new Path(destPath, TMP_MANIFEST_FILE_NAME);</span> |
| <span class="source-line-no">609</span><span id="line-609"> Path manifestPath = new Path(destPath, MANIFEST_FILE_NAME);</span> |
| <span class="source-line-no">610</span><span id="line-610"> FileSystem fs = tempManifestPath.getFileSystem(getConf());</span> |
| <span class="source-line-no">611</span><span id="line-611"> fs.rename(tempManifestPath, manifestPath);</span> |
| <span class="source-line-no">612</span><span id="line-612"> }</span> |
| <span class="source-line-no">613</span><span id="line-613"></span> |
| <span class="source-line-no">614</span><span id="line-614"> private static final int NUM_ARGS = 2;</span> |
| <span class="source-line-no">615</span><span id="line-615"></span> |
| <span class="source-line-no">616</span><span id="line-616"> private static void printUsage(final String errorMsg) {</span> |
| <span class="source-line-no">617</span><span id="line-617"> if (errorMsg != null && errorMsg.length() > 0) {</span> |
| <span class="source-line-no">618</span><span id="line-618"> System.err.println("ERROR: " + errorMsg);</span> |
| <span class="source-line-no">619</span><span id="line-619"> System.err.println();</span> |
| <span class="source-line-no">620</span><span id="line-620"> }</span> |
| <span class="source-line-no">621</span><span id="line-621"> System.err.println("Usage: HashTable [options] <tablename> <outputpath>");</span> |
| <span class="source-line-no">622</span><span id="line-622"> System.err.println();</span> |
| <span class="source-line-no">623</span><span id="line-623"> System.err.println("Options:");</span> |
| <span class="source-line-no">624</span><span id="line-624"> System.err.println(" batchsize the target amount of bytes to hash in each batch");</span> |
| <span class="source-line-no">625</span><span id="line-625"> System.err.println(" rows are added to the batch until this size is reached");</span> |
| <span class="source-line-no">626</span><span id="line-626"> System.err.println(" (defaults to " + DEFAULT_BATCH_SIZE + " bytes)");</span> |
| <span class="source-line-no">627</span><span id="line-627"> System.err.println(" numhashfiles the number of hash files to create");</span> |
| <span class="source-line-no">628</span><span id="line-628"> System.err.println(" if set to fewer than number of regions then");</span> |
| <span class="source-line-no">629</span><span id="line-629"> System.err.println(" the job will create this number of reducers");</span> |
| <span class="source-line-no">630</span><span id="line-630"> System.err.println(" (defaults to 1/100 of regions -- at least 1)");</span> |
| <span class="source-line-no">631</span><span id="line-631"> System.err.println(" startrow the start row");</span> |
| <span class="source-line-no">632</span><span id="line-632"> System.err.println(" stoprow the stop row");</span> |
| <span class="source-line-no">633</span><span id="line-633"> System.err.println(" starttime beginning of the time range (unixtime in millis)");</span> |
| <span class="source-line-no">634</span><span id="line-634"> System.err.println(" without endtime means from starttime to forever");</span> |
| <span class="source-line-no">635</span><span id="line-635"> System.err.println(" endtime end of the time range.");</span> |
| <span class="source-line-no">636</span><span id="line-636"> System.err.println(" Ignored if no starttime specified.");</span> |
| <span class="source-line-no">637</span><span id="line-637"> System.err.println(" scanbatch scanner batch size to support intra row scans");</span> |
| <span class="source-line-no">638</span><span id="line-638"> System.err.println(" versions number of cell versions to include");</span> |
| <span class="source-line-no">639</span><span id="line-639"> System.err.println(" rawScan performs a raw scan (false if omitted)");</span> |
| <span class="source-line-no">640</span><span id="line-640"> System.err.println(" families comma-separated list of families to include");</span> |
| <span class="source-line-no">641</span><span id="line-641"> System.err.println(" ignoreTimestamps if true, ignores cell timestamps");</span> |
| <span class="source-line-no">642</span><span id="line-642"> System.err.println(" when calculating hashes");</span> |
| <span class="source-line-no">643</span><span id="line-643"> System.err.println();</span> |
| <span class="source-line-no">644</span><span id="line-644"> System.err.println("Args:");</span> |
| <span class="source-line-no">645</span><span id="line-645"> System.err.println(" tablename Name of the table to hash");</span> |
| <span class="source-line-no">646</span><span id="line-646"> System.err.println(" outputpath Filesystem path to put the output data");</span> |
| <span class="source-line-no">647</span><span id="line-647"> System.err.println();</span> |
| <span class="source-line-no">648</span><span id="line-648"> System.err.println("Examples:");</span> |
| <span class="source-line-no">649</span><span id="line-649"> System.err.println(" To hash 'TestTable' in 32kB batches for a 1 hour window into 50 files:");</span> |
| <span class="source-line-no">650</span><span id="line-650"> System.err.println(" $ hbase "</span> |
| <span class="source-line-no">651</span><span id="line-651"> + "org.apache.hadoop.hbase.mapreduce.HashTable --batchsize=32000 --numhashfiles=50"</span> |
| <span class="source-line-no">652</span><span id="line-652"> + " --starttime=1265875194289 --endtime=1265878794289 --families=cf2,cf3"</span> |
| <span class="source-line-no">653</span><span id="line-653"> + " TestTable /hashes/testTable");</span> |
| <span class="source-line-no">654</span><span id="line-654"> }</span> |
| <span class="source-line-no">655</span><span id="line-655"></span> |
| <span class="source-line-no">656</span><span id="line-656"> private boolean doCommandLine(final String[] args) {</span> |
| <span class="source-line-no">657</span><span id="line-657"> if (args.length < NUM_ARGS) {</span> |
| <span class="source-line-no">658</span><span id="line-658"> printUsage(null);</span> |
| <span class="source-line-no">659</span><span id="line-659"> return false;</span> |
| <span class="source-line-no">660</span><span id="line-660"> }</span> |
| <span class="source-line-no">661</span><span id="line-661"> try {</span> |
| <span class="source-line-no">662</span><span id="line-662"></span> |
| <span class="source-line-no">663</span><span id="line-663"> tableHash.tableName = args[args.length - 2];</span> |
| <span class="source-line-no">664</span><span id="line-664"> destPath = new Path(args[args.length - 1]);</span> |
| <span class="source-line-no">665</span><span id="line-665"></span> |
| <span class="source-line-no">666</span><span id="line-666"> for (int i = 0; i < args.length - NUM_ARGS; i++) {</span> |
| <span class="source-line-no">667</span><span id="line-667"> String cmd = args[i];</span> |
| <span class="source-line-no">668</span><span id="line-668"> if (cmd.equals("-h") || cmd.startsWith("--h")) {</span> |
| <span class="source-line-no">669</span><span id="line-669"> printUsage(null);</span> |
| <span class="source-line-no">670</span><span id="line-670"> return false;</span> |
| <span class="source-line-no">671</span><span id="line-671"> }</span> |
| <span class="source-line-no">672</span><span id="line-672"></span> |
| <span class="source-line-no">673</span><span id="line-673"> final String batchSizeArgKey = "--batchsize=";</span> |
| <span class="source-line-no">674</span><span id="line-674"> if (cmd.startsWith(batchSizeArgKey)) {</span> |
| <span class="source-line-no">675</span><span id="line-675"> tableHash.batchSize = Long.parseLong(cmd.substring(batchSizeArgKey.length()));</span> |
| <span class="source-line-no">676</span><span id="line-676"> continue;</span> |
| <span class="source-line-no">677</span><span id="line-677"> }</span> |
| <span class="source-line-no">678</span><span id="line-678"></span> |
| <span class="source-line-no">679</span><span id="line-679"> final String numHashFilesArgKey = "--numhashfiles=";</span> |
| <span class="source-line-no">680</span><span id="line-680"> if (cmd.startsWith(numHashFilesArgKey)) {</span> |
| <span class="source-line-no">681</span><span id="line-681"> tableHash.numHashFiles = Integer.parseInt(cmd.substring(numHashFilesArgKey.length()));</span> |
| <span class="source-line-no">682</span><span id="line-682"> continue;</span> |
| <span class="source-line-no">683</span><span id="line-683"> }</span> |
| <span class="source-line-no">684</span><span id="line-684"></span> |
| <span class="source-line-no">685</span><span id="line-685"> final String startRowArgKey = "--startrow=";</span> |
| <span class="source-line-no">686</span><span id="line-686"> if (cmd.startsWith(startRowArgKey)) {</span> |
| <span class="source-line-no">687</span><span id="line-687"> tableHash.startRow = Bytes.fromHex(cmd.substring(startRowArgKey.length()));</span> |
| <span class="source-line-no">688</span><span id="line-688"> continue;</span> |
| <span class="source-line-no">689</span><span id="line-689"> }</span> |
| <span class="source-line-no">690</span><span id="line-690"></span> |
| <span class="source-line-no">691</span><span id="line-691"> final String stopRowArgKey = "--stoprow=";</span> |
| <span class="source-line-no">692</span><span id="line-692"> if (cmd.startsWith(stopRowArgKey)) {</span> |
| <span class="source-line-no">693</span><span id="line-693"> tableHash.stopRow = Bytes.fromHex(cmd.substring(stopRowArgKey.length()));</span> |
| <span class="source-line-no">694</span><span id="line-694"> continue;</span> |
| <span class="source-line-no">695</span><span id="line-695"> }</span> |
| <span class="source-line-no">696</span><span id="line-696"></span> |
| <span class="source-line-no">697</span><span id="line-697"> final String startTimeArgKey = "--starttime=";</span> |
| <span class="source-line-no">698</span><span id="line-698"> if (cmd.startsWith(startTimeArgKey)) {</span> |
| <span class="source-line-no">699</span><span id="line-699"> tableHash.startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));</span> |
| <span class="source-line-no">700</span><span id="line-700"> continue;</span> |
| <span class="source-line-no">701</span><span id="line-701"> }</span> |
| <span class="source-line-no">702</span><span id="line-702"></span> |
| <span class="source-line-no">703</span><span id="line-703"> final String endTimeArgKey = "--endtime=";</span> |
| <span class="source-line-no">704</span><span id="line-704"> if (cmd.startsWith(endTimeArgKey)) {</span> |
| <span class="source-line-no">705</span><span id="line-705"> tableHash.endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));</span> |
| <span class="source-line-no">706</span><span id="line-706"> continue;</span> |
| <span class="source-line-no">707</span><span id="line-707"> }</span> |
| <span class="source-line-no">708</span><span id="line-708"></span> |
| <span class="source-line-no">709</span><span id="line-709"> final String scanBatchArgKey = "--scanbatch=";</span> |
| <span class="source-line-no">710</span><span id="line-710"> if (cmd.startsWith(scanBatchArgKey)) {</span> |
| <span class="source-line-no">711</span><span id="line-711"> tableHash.scanBatch = Integer.parseInt(cmd.substring(scanBatchArgKey.length()));</span> |
| <span class="source-line-no">712</span><span id="line-712"> continue;</span> |
| <span class="source-line-no">713</span><span id="line-713"> }</span> |
| <span class="source-line-no">714</span><span id="line-714"></span> |
| <span class="source-line-no">715</span><span id="line-715"> final String versionsArgKey = "--versions=";</span> |
| <span class="source-line-no">716</span><span id="line-716"> if (cmd.startsWith(versionsArgKey)) {</span> |
| <span class="source-line-no">717</span><span id="line-717"> tableHash.versions = Integer.parseInt(cmd.substring(versionsArgKey.length()));</span> |
| <span class="source-line-no">718</span><span id="line-718"> continue;</span> |
| <span class="source-line-no">719</span><span id="line-719"> }</span> |
| <span class="source-line-no">720</span><span id="line-720"></span> |
| <span class="source-line-no">721</span><span id="line-721"> final String rawScanArgKey = "--rawScan=";</span> |
| <span class="source-line-no">722</span><span id="line-722"> if (cmd.startsWith(rawScanArgKey)) {</span> |
| <span class="source-line-no">723</span><span id="line-723"> tableHash.rawScan = Boolean.parseBoolean(cmd.substring(rawScanArgKey.length()));</span> |
| <span class="source-line-no">724</span><span id="line-724"> continue;</span> |
| <span class="source-line-no">725</span><span id="line-725"> }</span> |
| <span class="source-line-no">726</span><span id="line-726"></span> |
| <span class="source-line-no">727</span><span id="line-727"> final String familiesArgKey = "--families=";</span> |
| <span class="source-line-no">728</span><span id="line-728"> if (cmd.startsWith(familiesArgKey)) {</span> |
| <span class="source-line-no">729</span><span id="line-729"> tableHash.families = cmd.substring(familiesArgKey.length());</span> |
| <span class="source-line-no">730</span><span id="line-730"> continue;</span> |
| <span class="source-line-no">731</span><span id="line-731"> }</span> |
| <span class="source-line-no">732</span><span id="line-732"></span> |
| <span class="source-line-no">733</span><span id="line-733"> final String ignoreTimestampsKey = "--ignoreTimestamps=";</span> |
| <span class="source-line-no">734</span><span id="line-734"> if (cmd.startsWith(ignoreTimestampsKey)) {</span> |
| <span class="source-line-no">735</span><span id="line-735"> tableHash.ignoreTimestamps =</span> |
| <span class="source-line-no">736</span><span id="line-736"> Boolean.parseBoolean(cmd.substring(ignoreTimestampsKey.length()));</span> |
| <span class="source-line-no">737</span><span id="line-737"> continue;</span> |
| <span class="source-line-no">738</span><span id="line-738"> }</span> |
| <span class="source-line-no">739</span><span id="line-739"></span> |
| <span class="source-line-no">740</span><span id="line-740"> printUsage("Invalid argument '" + cmd + "'");</span> |
| <span class="source-line-no">741</span><span id="line-741"> return false;</span> |
| <span class="source-line-no">742</span><span id="line-742"> }</span> |
| <span class="source-line-no">743</span><span id="line-743"> if (</span> |
| <span class="source-line-no">744</span><span id="line-744"> (tableHash.startTime != 0 || tableHash.endTime != 0)</span> |
| <span class="source-line-no">745</span><span id="line-745"> && (tableHash.startTime >= tableHash.endTime)</span> |
| <span class="source-line-no">746</span><span id="line-746"> ) {</span> |
| <span class="source-line-no">747</span><span id="line-747"> printUsage("Invalid time range filter: starttime=" + tableHash.startTime + " >= endtime="</span> |
| <span class="source-line-no">748</span><span id="line-748"> + tableHash.endTime);</span> |
| <span class="source-line-no">749</span><span id="line-749"> return false;</span> |
| <span class="source-line-no">750</span><span id="line-750"> }</span> |
| <span class="source-line-no">751</span><span id="line-751"></span> |
| <span class="source-line-no">752</span><span id="line-752"> } catch (Exception e) {</span> |
| <span class="source-line-no">753</span><span id="line-753"> LOG.error("Failed to parse commandLine arguments", e);</span> |
| <span class="source-line-no">754</span><span id="line-754"> printUsage("Can't start because " + e.getMessage());</span> |
| <span class="source-line-no">755</span><span id="line-755"> return false;</span> |
| <span class="source-line-no">756</span><span id="line-756"> }</span> |
| <span class="source-line-no">757</span><span id="line-757"> return true;</span> |
| <span class="source-line-no">758</span><span id="line-758"> }</span> |
| <span class="source-line-no">759</span><span id="line-759"></span> |
| <span class="source-line-no">760</span><span id="line-760"> /**</span> |
| <span class="source-line-no">761</span><span id="line-761"> * Main entry point.</span> |
| <span class="source-line-no">762</span><span id="line-762"> */</span> |
| <span class="source-line-no">763</span><span id="line-763"> public static void main(String[] args) throws Exception {</span> |
| <span class="source-line-no">764</span><span id="line-764"> int ret = ToolRunner.run(new HashTable(HBaseConfiguration.create()), args);</span> |
| <span class="source-line-no">765</span><span id="line-765"> System.exit(ret);</span> |
| <span class="source-line-no">766</span><span id="line-766"> }</span> |
| <span class="source-line-no">767</span><span id="line-767"></span> |
| <span class="source-line-no">768</span><span id="line-768"> @Override</span> |
| <span class="source-line-no">769</span><span id="line-769"> public int run(String[] args) throws Exception {</span> |
| <span class="source-line-no">770</span><span id="line-770"> String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();</span> |
| <span class="source-line-no">771</span><span id="line-771"> if (!doCommandLine(otherArgs)) {</span> |
| <span class="source-line-no">772</span><span id="line-772"> return 1;</span> |
| <span class="source-line-no">773</span><span id="line-773"> }</span> |
| <span class="source-line-no">774</span><span id="line-774"></span> |
| <span class="source-line-no">775</span><span id="line-775"> Job job = createSubmittableJob(otherArgs);</span> |
| <span class="source-line-no">776</span><span id="line-776"> writeTempManifestFile();</span> |
| <span class="source-line-no">777</span><span id="line-777"> if (!job.waitForCompletion(true)) {</span> |
| <span class="source-line-no">778</span><span id="line-778"> LOG.info("Map-reduce job failed!");</span> |
| <span class="source-line-no">779</span><span id="line-779"> return 1;</span> |
| <span class="source-line-no">780</span><span id="line-780"> }</span> |
| <span class="source-line-no">781</span><span id="line-781"> completeManifest();</span> |
| <span class="source-line-no">782</span><span id="line-782"> return 0;</span> |
| <span class="source-line-no">783</span><span id="line-783"> }</span> |
| <span class="source-line-no">784</span><span id="line-784"></span> |
| <span class="source-line-no">785</span><span id="line-785">}</span> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </pre> |
| </div> |
| </main> |
| </body> |
| </html> |