| <!DOCTYPE HTML> |
| <html lang="en"> |
| <head> |
| <!-- Generated by javadoc (17) --> |
| <title>Source code</title> |
| <meta name="viewport" content="width=device-width, initial-scale=1"> |
| <meta name="description" content="source: package: org.apache.hadoop.hbase.mapreduce, class: SyncTable, class: SyncMapper"> |
| <meta name="generator" content="javadoc/SourceToHTMLConverter"> |
| <link rel="stylesheet" type="text/css" href="../../../../../../stylesheet.css" title="Style"> |
| </head> |
| <body class="source-page"> |
| <main role="main"> |
| <div class="source-container"> |
| <pre><span class="source-line-no">001</span><span id="line-1">/*</span> |
| <span class="source-line-no">002</span><span id="line-2"> * Licensed to the Apache Software Foundation (ASF) under one</span> |
| <span class="source-line-no">003</span><span id="line-3"> * or more contributor license agreements. See the NOTICE file</span> |
| <span class="source-line-no">004</span><span id="line-4"> * distributed with this work for additional information</span> |
| <span class="source-line-no">005</span><span id="line-5"> * regarding copyright ownership. The ASF licenses this file</span> |
| <span class="source-line-no">006</span><span id="line-6"> * to you under the Apache License, Version 2.0 (the</span> |
| <span class="source-line-no">007</span><span id="line-7"> * "License"); you may not use this file except in compliance</span> |
| <span class="source-line-no">008</span><span id="line-8"> * with the License. You may obtain a copy of the License at</span> |
| <span class="source-line-no">009</span><span id="line-9"> *</span> |
| <span class="source-line-no">010</span><span id="line-10"> * http://www.apache.org/licenses/LICENSE-2.0</span> |
| <span class="source-line-no">011</span><span id="line-11"> *</span> |
| <span class="source-line-no">012</span><span id="line-12"> * Unless required by applicable law or agreed to in writing, software</span> |
| <span class="source-line-no">013</span><span id="line-13"> * distributed under the License is distributed on an "AS IS" BASIS,</span> |
| <span class="source-line-no">014</span><span id="line-14"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span> |
| <span class="source-line-no">015</span><span id="line-15"> * See the License for the specific language governing permissions and</span> |
| <span class="source-line-no">016</span><span id="line-16"> * limitations under the License.</span> |
| <span class="source-line-no">017</span><span id="line-17"> */</span> |
| <span class="source-line-no">018</span><span id="line-18">package org.apache.hadoop.hbase.mapreduce;</span> |
| <span class="source-line-no">019</span><span id="line-19"></span> |
| <span class="source-line-no">020</span><span id="line-20">import java.io.IOException;</span> |
| <span class="source-line-no">021</span><span id="line-21">import java.net.URI;</span> |
| <span class="source-line-no">022</span><span id="line-22">import java.net.URISyntaxException;</span> |
| <span class="source-line-no">023</span><span id="line-23">import java.util.Collections;</span> |
| <span class="source-line-no">024</span><span id="line-24">import java.util.Iterator;</span> |
| <span class="source-line-no">025</span><span id="line-25">import org.apache.commons.lang3.StringUtils;</span> |
| <span class="source-line-no">026</span><span id="line-26">import org.apache.hadoop.conf.Configuration;</span> |
| <span class="source-line-no">027</span><span id="line-27">import org.apache.hadoop.conf.Configured;</span> |
| <span class="source-line-no">028</span><span id="line-28">import org.apache.hadoop.fs.FileStatus;</span> |
| <span class="source-line-no">029</span><span id="line-29">import org.apache.hadoop.fs.FileSystem;</span> |
| <span class="source-line-no">030</span><span id="line-30">import org.apache.hadoop.fs.Path;</span> |
| <span class="source-line-no">031</span><span id="line-31">import org.apache.hadoop.hbase.Cell;</span> |
| <span class="source-line-no">032</span><span id="line-32">import org.apache.hadoop.hbase.CellBuilderFactory;</span> |
| <span class="source-line-no">033</span><span id="line-33">import org.apache.hadoop.hbase.CellBuilderType;</span> |
| <span class="source-line-no">034</span><span id="line-34">import org.apache.hadoop.hbase.CellComparator;</span> |
| <span class="source-line-no">035</span><span id="line-35">import org.apache.hadoop.hbase.CellUtil;</span> |
| <span class="source-line-no">036</span><span id="line-36">import org.apache.hadoop.hbase.HBaseConfiguration;</span> |
| <span class="source-line-no">037</span><span id="line-37">import org.apache.hadoop.hbase.TableName;</span> |
| <span class="source-line-no">038</span><span id="line-38">import org.apache.hadoop.hbase.client.Connection;</span> |
| <span class="source-line-no">039</span><span id="line-39">import org.apache.hadoop.hbase.client.ConnectionFactory;</span> |
| <span class="source-line-no">040</span><span id="line-40">import org.apache.hadoop.hbase.client.Delete;</span> |
| <span class="source-line-no">041</span><span id="line-41">import org.apache.hadoop.hbase.client.Mutation;</span> |
| <span class="source-line-no">042</span><span id="line-42">import org.apache.hadoop.hbase.client.Put;</span> |
| <span class="source-line-no">043</span><span id="line-43">import org.apache.hadoop.hbase.client.Result;</span> |
| <span class="source-line-no">044</span><span id="line-44">import org.apache.hadoop.hbase.client.ResultScanner;</span> |
| <span class="source-line-no">045</span><span id="line-45">import org.apache.hadoop.hbase.client.Scan;</span> |
| <span class="source-line-no">046</span><span id="line-46">import org.apache.hadoop.hbase.client.Table;</span> |
| <span class="source-line-no">047</span><span id="line-47">import org.apache.hadoop.hbase.io.ImmutableBytesWritable;</span> |
| <span class="source-line-no">048</span><span id="line-48">import org.apache.hadoop.hbase.util.Bytes;</span> |
| <span class="source-line-no">049</span><span id="line-49">import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;</span> |
| <span class="source-line-no">050</span><span id="line-50">import org.apache.hadoop.mapreduce.Counters;</span> |
| <span class="source-line-no">051</span><span id="line-51">import org.apache.hadoop.mapreduce.Job;</span> |
| <span class="source-line-no">052</span><span id="line-52">import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;</span> |
| <span class="source-line-no">053</span><span id="line-53">import org.apache.hadoop.mapreduce.security.TokenCache;</span> |
| <span class="source-line-no">054</span><span id="line-54">import org.apache.hadoop.util.GenericOptionsParser;</span> |
| <span class="source-line-no">055</span><span id="line-55">import org.apache.hadoop.util.Tool;</span> |
| <span class="source-line-no">056</span><span id="line-56">import org.apache.hadoop.util.ToolRunner;</span> |
| <span class="source-line-no">057</span><span id="line-57">import org.apache.yetus.audience.InterfaceAudience;</span> |
| <span class="source-line-no">058</span><span id="line-58">import org.slf4j.Logger;</span> |
| <span class="source-line-no">059</span><span id="line-59">import org.slf4j.LoggerFactory;</span> |
| <span class="source-line-no">060</span><span id="line-60"></span> |
| <span class="source-line-no">061</span><span id="line-61">import org.apache.hbase.thirdparty.com.google.common.base.Throwables;</span> |
| <span class="source-line-no">062</span><span id="line-62"></span> |
| <span class="source-line-no">063</span><span id="line-63">@InterfaceAudience.Private</span> |
| <span class="source-line-no">064</span><span id="line-64">public class SyncTable extends Configured implements Tool {</span> |
| <span class="source-line-no">065</span><span id="line-65"></span> |
| <span class="source-line-no">066</span><span id="line-66"> private static final Logger LOG = LoggerFactory.getLogger(SyncTable.class);</span> |
| <span class="source-line-no">067</span><span id="line-67"></span> |
| <span class="source-line-no">068</span><span id="line-68"> static final String SOURCE_HASH_DIR_CONF_KEY = "sync.table.source.hash.dir";</span> |
| <span class="source-line-no">069</span><span id="line-69"> static final String SOURCE_TABLE_CONF_KEY = "sync.table.source.table.name";</span> |
| <span class="source-line-no">070</span><span id="line-70"> static final String TARGET_TABLE_CONF_KEY = "sync.table.target.table.name";</span> |
| <span class="source-line-no">071</span><span id="line-71"> static final String SOURCE_URI_CONF_KEY = "sync.table.source.uri";</span> |
| <span class="source-line-no">072</span><span id="line-72"> /**</span> |
| <span class="source-line-no">073</span><span id="line-73"> * @deprecated Since 3.0.0, will be removed in 4.0.0 Use {@link #SOURCE_URI_CONF_KEY} instead.</span> |
| <span class="source-line-no">074</span><span id="line-74"> */</span> |
| <span class="source-line-no">075</span><span id="line-75"> @Deprecated</span> |
| <span class="source-line-no">076</span><span id="line-76"> static final String SOURCE_ZK_CLUSTER_CONF_KEY = "sync.table.source.zk.cluster";</span> |
| <span class="source-line-no">077</span><span id="line-77"> static final String TARGET_URI_CONF_KEY = "sync.table.target.uri";</span> |
| <span class="source-line-no">078</span><span id="line-78"> /**</span> |
| <span class="source-line-no">079</span><span id="line-79"> * @deprecated Since 3.0.0, will be removed in 4.0.0 Use {@link #TARGET_URI_CONF_KEY} instead.</span> |
| <span class="source-line-no">080</span><span id="line-80"> */</span> |
| <span class="source-line-no">081</span><span id="line-81"> @Deprecated</span> |
| <span class="source-line-no">082</span><span id="line-82"> static final String TARGET_ZK_CLUSTER_CONF_KEY = "sync.table.target.zk.cluster";</span> |
| <span class="source-line-no">083</span><span id="line-83"> static final String DRY_RUN_CONF_KEY = "sync.table.dry.run";</span> |
| <span class="source-line-no">084</span><span id="line-84"> static final String DO_DELETES_CONF_KEY = "sync.table.do.deletes";</span> |
| <span class="source-line-no">085</span><span id="line-85"> static final String DO_PUTS_CONF_KEY = "sync.table.do.puts";</span> |
| <span class="source-line-no">086</span><span id="line-86"> static final String IGNORE_TIMESTAMPS = "sync.table.ignore.timestamps";</span> |
| <span class="source-line-no">087</span><span id="line-87"></span> |
| <span class="source-line-no">088</span><span id="line-88"> Path sourceHashDir;</span> |
| <span class="source-line-no">089</span><span id="line-89"> String sourceTableName;</span> |
| <span class="source-line-no">090</span><span id="line-90"> String targetTableName;</span> |
| <span class="source-line-no">091</span><span id="line-91"></span> |
| <span class="source-line-no">092</span><span id="line-92"> URI sourceUri;</span> |
| <span class="source-line-no">093</span><span id="line-93"> /**</span> |
| <span class="source-line-no">094</span><span id="line-94"> * @deprecated Since 3.0.0, will be removed in 4.0.0 Use {@link #sourceUri} instead.</span> |
| <span class="source-line-no">095</span><span id="line-95"> */</span> |
| <span class="source-line-no">096</span><span id="line-96"> @Deprecated</span> |
| <span class="source-line-no">097</span><span id="line-97"> String sourceZkCluster;</span> |
| <span class="source-line-no">098</span><span id="line-98"> URI targetUri;</span> |
| <span class="source-line-no">099</span><span id="line-99"> /**</span> |
| <span class="source-line-no">100</span><span id="line-100"> * @deprecated Since 3.0.0, will be removed in 4.0.0 Use {@link #targetUri} instead.</span> |
| <span class="source-line-no">101</span><span id="line-101"> */</span> |
| <span class="source-line-no">102</span><span id="line-102"> @Deprecated</span> |
| <span class="source-line-no">103</span><span id="line-103"> String targetZkCluster;</span> |
| <span class="source-line-no">104</span><span id="line-104"> boolean dryRun;</span> |
| <span class="source-line-no">105</span><span id="line-105"> boolean doDeletes = true;</span> |
| <span class="source-line-no">106</span><span id="line-106"> boolean doPuts = true;</span> |
| <span class="source-line-no">107</span><span id="line-107"> boolean ignoreTimestamps;</span> |
| <span class="source-line-no">108</span><span id="line-108"></span> |
| <span class="source-line-no">109</span><span id="line-109"> Counters counters;</span> |
| <span class="source-line-no">110</span><span id="line-110"></span> |
| <span class="source-line-no">111</span><span id="line-111"> public SyncTable(Configuration conf) {</span> |
| <span class="source-line-no">112</span><span id="line-112"> super(conf);</span> |
| <span class="source-line-no">113</span><span id="line-113"> }</span> |
| <span class="source-line-no">114</span><span id="line-114"></span> |
| <span class="source-line-no">115</span><span id="line-115"> private void initCredentialsForHBase(String clusterKey, Job job) throws IOException {</span> |
| <span class="source-line-no">116</span><span id="line-116"> Configuration peerConf =</span> |
| <span class="source-line-no">117</span><span id="line-117"> HBaseConfiguration.createClusterConf(job.getConfiguration(), clusterKey);</span> |
| <span class="source-line-no">118</span><span id="line-118"> TableMapReduceUtil.initCredentialsForCluster(job, peerConf);</span> |
| <span class="source-line-no">119</span><span id="line-119"> }</span> |
| <span class="source-line-no">120</span><span id="line-120"></span> |
| <span class="source-line-no">121</span><span id="line-121"> public Job createSubmittableJob(String[] args) throws IOException {</span> |
| <span class="source-line-no">122</span><span id="line-122"> FileSystem fs = sourceHashDir.getFileSystem(getConf());</span> |
| <span class="source-line-no">123</span><span id="line-123"> if (!fs.exists(sourceHashDir)) {</span> |
| <span class="source-line-no">124</span><span id="line-124"> throw new IOException("Source hash dir not found: " + sourceHashDir);</span> |
| <span class="source-line-no">125</span><span id="line-125"> }</span> |
| <span class="source-line-no">126</span><span id="line-126"></span> |
| <span class="source-line-no">127</span><span id="line-127"> Job job = Job.getInstance(getConf(),</span> |
| <span class="source-line-no">128</span><span id="line-128"> getConf().get("mapreduce.job.name", "syncTable_" + sourceTableName + "-" + targetTableName));</span> |
| <span class="source-line-no">129</span><span id="line-129"> Configuration jobConf = job.getConfiguration();</span> |
| <span class="source-line-no">130</span><span id="line-130"> if ("kerberos".equalsIgnoreCase(jobConf.get("hadoop.security.authentication"))) {</span> |
| <span class="source-line-no">131</span><span id="line-131"> TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { sourceHashDir },</span> |
| <span class="source-line-no">132</span><span id="line-132"> getConf());</span> |
| <span class="source-line-no">133</span><span id="line-133"> }</span> |
| <span class="source-line-no">134</span><span id="line-134"></span> |
| <span class="source-line-no">135</span><span id="line-135"> HashTable.TableHash tableHash = HashTable.TableHash.read(getConf(), sourceHashDir);</span> |
| <span class="source-line-no">136</span><span id="line-136"> LOG.info("Read source hash manifest: " + tableHash);</span> |
| <span class="source-line-no">137</span><span id="line-137"> LOG.info("Read " + tableHash.partitions.size() + " partition keys");</span> |
| <span class="source-line-no">138</span><span id="line-138"> if (!tableHash.tableName.equals(sourceTableName)) {</span> |
| <span class="source-line-no">139</span><span id="line-139"> LOG.warn("Table name mismatch - manifest indicates hash was taken from: "</span> |
| <span class="source-line-no">140</span><span id="line-140"> + tableHash.tableName + " but job is reading from: " + sourceTableName);</span> |
| <span class="source-line-no">141</span><span id="line-141"> }</span> |
| <span class="source-line-no">142</span><span id="line-142"> if (tableHash.numHashFiles != tableHash.partitions.size() + 1) {</span> |
| <span class="source-line-no">143</span><span id="line-143"> throw new RuntimeException("Hash data appears corrupt. The number of of hash files created"</span> |
| <span class="source-line-no">144</span><span id="line-144"> + " should be 1 more than the number of partition keys. However, the manifest file "</span> |
| <span class="source-line-no">145</span><span id="line-145"> + " says numHashFiles=" + tableHash.numHashFiles + " but the number of partition keys"</span> |
| <span class="source-line-no">146</span><span id="line-146"> + " found in the partitions file is " + tableHash.partitions.size());</span> |
| <span class="source-line-no">147</span><span id="line-147"> }</span> |
| <span class="source-line-no">148</span><span id="line-148"></span> |
| <span class="source-line-no">149</span><span id="line-149"> Path dataDir = new Path(sourceHashDir, HashTable.HASH_DATA_DIR);</span> |
| <span class="source-line-no">150</span><span id="line-150"> int dataSubdirCount = 0;</span> |
| <span class="source-line-no">151</span><span id="line-151"> for (FileStatus file : fs.listStatus(dataDir)) {</span> |
| <span class="source-line-no">152</span><span id="line-152"> if (file.getPath().getName().startsWith(HashTable.OUTPUT_DATA_FILE_PREFIX)) {</span> |
| <span class="source-line-no">153</span><span id="line-153"> dataSubdirCount++;</span> |
| <span class="source-line-no">154</span><span id="line-154"> }</span> |
| <span class="source-line-no">155</span><span id="line-155"> }</span> |
| <span class="source-line-no">156</span><span id="line-156"></span> |
| <span class="source-line-no">157</span><span id="line-157"> if (dataSubdirCount != tableHash.numHashFiles) {</span> |
| <span class="source-line-no">158</span><span id="line-158"> throw new RuntimeException("Hash data appears corrupt. The number of of hash files created"</span> |
| <span class="source-line-no">159</span><span id="line-159"> + " should be 1 more than the number of partition keys. However, the number of data dirs"</span> |
| <span class="source-line-no">160</span><span id="line-160"> + " found is " + dataSubdirCount + " but the number of partition keys"</span> |
| <span class="source-line-no">161</span><span id="line-161"> + " found in the partitions file is " + tableHash.partitions.size());</span> |
| <span class="source-line-no">162</span><span id="line-162"> }</span> |
| <span class="source-line-no">163</span><span id="line-163"></span> |
| <span class="source-line-no">164</span><span id="line-164"> job.setJarByClass(HashTable.class);</span> |
| <span class="source-line-no">165</span><span id="line-165"> jobConf.set(SOURCE_HASH_DIR_CONF_KEY, sourceHashDir.toString());</span> |
| <span class="source-line-no">166</span><span id="line-166"> jobConf.set(SOURCE_TABLE_CONF_KEY, sourceTableName);</span> |
| <span class="source-line-no">167</span><span id="line-167"> jobConf.set(TARGET_TABLE_CONF_KEY, targetTableName);</span> |
| <span class="source-line-no">168</span><span id="line-168"> if (sourceUri != null) {</span> |
| <span class="source-line-no">169</span><span id="line-169"> jobConf.set(SOURCE_URI_CONF_KEY, sourceUri.toString());</span> |
| <span class="source-line-no">170</span><span id="line-170"> TableMapReduceUtil.initCredentialsForCluster(job, jobConf, sourceUri);</span> |
| <span class="source-line-no">171</span><span id="line-171"> } else if (sourceZkCluster != null) {</span> |
| <span class="source-line-no">172</span><span id="line-172"> jobConf.set(SOURCE_ZK_CLUSTER_CONF_KEY, sourceZkCluster);</span> |
| <span class="source-line-no">173</span><span id="line-173"> initCredentialsForHBase(sourceZkCluster, job);</span> |
| <span class="source-line-no">174</span><span id="line-174"> }</span> |
| <span class="source-line-no">175</span><span id="line-175"> if (targetUri != null) {</span> |
| <span class="source-line-no">176</span><span id="line-176"> jobConf.set(TARGET_URI_CONF_KEY, targetUri.toString());</span> |
| <span class="source-line-no">177</span><span id="line-177"> TableMapReduceUtil.initCredentialsForCluster(job, jobConf, targetUri);</span> |
| <span class="source-line-no">178</span><span id="line-178"> } else if (targetZkCluster != null) {</span> |
| <span class="source-line-no">179</span><span id="line-179"> jobConf.set(TARGET_ZK_CLUSTER_CONF_KEY, targetZkCluster);</span> |
| <span class="source-line-no">180</span><span id="line-180"> initCredentialsForHBase(targetZkCluster, job);</span> |
| <span class="source-line-no">181</span><span id="line-181"> }</span> |
| <span class="source-line-no">182</span><span id="line-182"> jobConf.setBoolean(DRY_RUN_CONF_KEY, dryRun);</span> |
| <span class="source-line-no">183</span><span id="line-183"> jobConf.setBoolean(DO_DELETES_CONF_KEY, doDeletes);</span> |
| <span class="source-line-no">184</span><span id="line-184"> jobConf.setBoolean(DO_PUTS_CONF_KEY, doPuts);</span> |
| <span class="source-line-no">185</span><span id="line-185"> jobConf.setBoolean(IGNORE_TIMESTAMPS, ignoreTimestamps);</span> |
| <span class="source-line-no">186</span><span id="line-186"></span> |
| <span class="source-line-no">187</span><span id="line-187"> TableMapReduceUtil.initTableMapperJob(targetTableName, tableHash.initScan(), SyncMapper.class,</span> |
| <span class="source-line-no">188</span><span id="line-188"> null, null, job);</span> |
| <span class="source-line-no">189</span><span id="line-189"></span> |
| <span class="source-line-no">190</span><span id="line-190"> job.setNumReduceTasks(0);</span> |
| <span class="source-line-no">191</span><span id="line-191"></span> |
| <span class="source-line-no">192</span><span id="line-192"> if (dryRun) {</span> |
| <span class="source-line-no">193</span><span id="line-193"> job.setOutputFormatClass(NullOutputFormat.class);</span> |
| <span class="source-line-no">194</span><span id="line-194"> } else {</span> |
| <span class="source-line-no">195</span><span id="line-195"> // No reducers. Just write straight to table. Call initTableReducerJob</span> |
| <span class="source-line-no">196</span><span id="line-196"> // because it sets up the TableOutputFormat.</span> |
| <span class="source-line-no">197</span><span id="line-197"> if (targetUri != null) {</span> |
| <span class="source-line-no">198</span><span id="line-198"> TableMapReduceUtil.initTableReducerJob(targetTableName, null, job, null, targetUri);</span> |
| <span class="source-line-no">199</span><span id="line-199"> } else {</span> |
| <span class="source-line-no">200</span><span id="line-200"> TableMapReduceUtil.initTableReducerJob(targetTableName, null, job, null, targetZkCluster);</span> |
| <span class="source-line-no">201</span><span id="line-201"> }</span> |
| <span class="source-line-no">202</span><span id="line-202"> // would be nice to add an option for bulk load instead</span> |
| <span class="source-line-no">203</span><span id="line-203"> }</span> |
| <span class="source-line-no">204</span><span id="line-204"></span> |
| <span class="source-line-no">205</span><span id="line-205"> return job;</span> |
| <span class="source-line-no">206</span><span id="line-206"> }</span> |
| <span class="source-line-no">207</span><span id="line-207"></span> |
| <span class="source-line-no">208</span><span id="line-208"> public static class SyncMapper extends TableMapper<ImmutableBytesWritable, Mutation> {</span> |
| <span class="source-line-no">209</span><span id="line-209"> Path sourceHashDir;</span> |
| <span class="source-line-no">210</span><span id="line-210"></span> |
| <span class="source-line-no">211</span><span id="line-211"> Connection sourceConnection;</span> |
| <span class="source-line-no">212</span><span id="line-212"> Connection targetConnection;</span> |
| <span class="source-line-no">213</span><span id="line-213"> Table sourceTable;</span> |
| <span class="source-line-no">214</span><span id="line-214"> Table targetTable;</span> |
| <span class="source-line-no">215</span><span id="line-215"> boolean dryRun;</span> |
| <span class="source-line-no">216</span><span id="line-216"> boolean doDeletes = true;</span> |
| <span class="source-line-no">217</span><span id="line-217"> boolean doPuts = true;</span> |
| <span class="source-line-no">218</span><span id="line-218"> boolean ignoreTimestamp;</span> |
| <span class="source-line-no">219</span><span id="line-219"></span> |
| <span class="source-line-no">220</span><span id="line-220"> HashTable.TableHash sourceTableHash;</span> |
| <span class="source-line-no">221</span><span id="line-221"> HashTable.TableHash.Reader sourceHashReader;</span> |
| <span class="source-line-no">222</span><span id="line-222"> ImmutableBytesWritable currentSourceHash;</span> |
| <span class="source-line-no">223</span><span id="line-223"> ImmutableBytesWritable nextSourceKey;</span> |
| <span class="source-line-no">224</span><span id="line-224"> HashTable.ResultHasher targetHasher;</span> |
| <span class="source-line-no">225</span><span id="line-225"></span> |
| <span class="source-line-no">226</span><span id="line-226"> Throwable mapperException;</span> |
| <span class="source-line-no">227</span><span id="line-227"></span> |
| <span class="source-line-no">228</span><span id="line-228"> public static enum Counter {</span> |
| <span class="source-line-no">229</span><span id="line-229"> BATCHES,</span> |
| <span class="source-line-no">230</span><span id="line-230"> HASHES_MATCHED,</span> |
| <span class="source-line-no">231</span><span id="line-231"> HASHES_NOT_MATCHED,</span> |
| <span class="source-line-no">232</span><span id="line-232"> SOURCEMISSINGROWS,</span> |
| <span class="source-line-no">233</span><span id="line-233"> SOURCEMISSINGCELLS,</span> |
| <span class="source-line-no">234</span><span id="line-234"> TARGETMISSINGROWS,</span> |
| <span class="source-line-no">235</span><span id="line-235"> TARGETMISSINGCELLS,</span> |
| <span class="source-line-no">236</span><span id="line-236"> ROWSWITHDIFFS,</span> |
| <span class="source-line-no">237</span><span id="line-237"> DIFFERENTCELLVALUES,</span> |
| <span class="source-line-no">238</span><span id="line-238"> MATCHINGROWS,</span> |
| <span class="source-line-no">239</span><span id="line-239"> MATCHINGCELLS,</span> |
| <span class="source-line-no">240</span><span id="line-240"> EMPTY_BATCHES,</span> |
| <span class="source-line-no">241</span><span id="line-241"> RANGESMATCHED,</span> |
| <span class="source-line-no">242</span><span id="line-242"> RANGESNOTMATCHED</span> |
| <span class="source-line-no">243</span><span id="line-243"> }</span> |
| <span class="source-line-no">244</span><span id="line-244"></span> |
| <span class="source-line-no">245</span><span id="line-245"> @Override</span> |
| <span class="source-line-no">246</span><span id="line-246"> protected void setup(Context context) throws IOException {</span> |
| <span class="source-line-no">247</span><span id="line-247"> Configuration conf = context.getConfiguration();</span> |
| <span class="source-line-no">248</span><span id="line-248"> sourceHashDir = new Path(conf.get(SOURCE_HASH_DIR_CONF_KEY));</span> |
| <span class="source-line-no">249</span><span id="line-249"> sourceConnection =</span> |
| <span class="source-line-no">250</span><span id="line-250"> openConnection(conf, SOURCE_URI_CONF_KEY, SOURCE_ZK_CLUSTER_CONF_KEY, null);</span> |
| <span class="source-line-no">251</span><span id="line-251"> targetConnection = openConnection(conf, TARGET_URI_CONF_KEY, TARGET_ZK_CLUSTER_CONF_KEY,</span> |
| <span class="source-line-no">252</span><span id="line-252"> TableOutputFormat.OUTPUT_CONF_PREFIX);</span> |
| <span class="source-line-no">253</span><span id="line-253"> sourceTable = openTable(sourceConnection, conf, SOURCE_TABLE_CONF_KEY);</span> |
| <span class="source-line-no">254</span><span id="line-254"> targetTable = openTable(targetConnection, conf, TARGET_TABLE_CONF_KEY);</span> |
| <span class="source-line-no">255</span><span id="line-255"> dryRun = conf.getBoolean(DRY_RUN_CONF_KEY, false);</span> |
| <span class="source-line-no">256</span><span id="line-256"> doDeletes = conf.getBoolean(DO_DELETES_CONF_KEY, true);</span> |
| <span class="source-line-no">257</span><span id="line-257"> doPuts = conf.getBoolean(DO_PUTS_CONF_KEY, true);</span> |
| <span class="source-line-no">258</span><span id="line-258"> ignoreTimestamp = conf.getBoolean(IGNORE_TIMESTAMPS, false);</span> |
| <span class="source-line-no">259</span><span id="line-259"></span> |
| <span class="source-line-no">260</span><span id="line-260"> sourceTableHash = HashTable.TableHash.read(conf, sourceHashDir);</span> |
| <span class="source-line-no">261</span><span id="line-261"> LOG.info("Read source hash manifest: " + sourceTableHash);</span> |
| <span class="source-line-no">262</span><span id="line-262"> LOG.info("Read " + sourceTableHash.partitions.size() + " partition keys");</span> |
| <span class="source-line-no">263</span><span id="line-263"></span> |
| <span class="source-line-no">264</span><span id="line-264"> TableSplit split = (TableSplit) context.getInputSplit();</span> |
| <span class="source-line-no">265</span><span id="line-265"> ImmutableBytesWritable splitStartKey = new ImmutableBytesWritable(split.getStartRow());</span> |
| <span class="source-line-no">266</span><span id="line-266"></span> |
| <span class="source-line-no">267</span><span id="line-267"> sourceHashReader = sourceTableHash.newReader(conf, splitStartKey);</span> |
| <span class="source-line-no">268</span><span id="line-268"> findNextKeyHashPair();</span> |
| <span class="source-line-no">269</span><span id="line-269"></span> |
| <span class="source-line-no">270</span><span id="line-270"> // create a hasher, but don't start it right away</span> |
| <span class="source-line-no">271</span><span id="line-271"> // instead, find the first hash batch at or after the start row</span> |
| <span class="source-line-no">272</span><span id="line-272"> // and skip any rows that come before. they will be caught by the previous task</span> |
| <span class="source-line-no">273</span><span id="line-273"> targetHasher = new HashTable.ResultHasher();</span> |
| <span class="source-line-no">274</span><span id="line-274"> targetHasher.ignoreTimestamps = ignoreTimestamp;</span> |
| <span class="source-line-no">275</span><span id="line-275"> }</span> |
| <span class="source-line-no">276</span><span id="line-276"></span> |
| <span class="source-line-no">277</span><span id="line-277"> private static Connection openConnection(Configuration conf, String uriConfKey,</span> |
| <span class="source-line-no">278</span><span id="line-278"> String zkClusterConfKey, String configPrefix) throws IOException {</span> |
| <span class="source-line-no">279</span><span id="line-279"> String uri = conf.get(uriConfKey);</span> |
| <span class="source-line-no">280</span><span id="line-280"> if (!StringUtils.isBlank(uri)) {</span> |
| <span class="source-line-no">281</span><span id="line-281"> try {</span> |
| <span class="source-line-no">282</span><span id="line-282"> return ConnectionFactory.createConnection(new URI(uri), conf);</span> |
| <span class="source-line-no">283</span><span id="line-283"> } catch (URISyntaxException e) {</span> |
| <span class="source-line-no">284</span><span id="line-284"> throw new IOException(</span> |
| <span class="source-line-no">285</span><span id="line-285"> "malformed connection uri: " + uri + ", please check config " + uriConfKey, e);</span> |
| <span class="source-line-no">286</span><span id="line-286"> }</span> |
| <span class="source-line-no">287</span><span id="line-287"> } else {</span> |
| <span class="source-line-no">288</span><span id="line-288"> String zkCluster = conf.get(zkClusterConfKey);</span> |
| <span class="source-line-no">289</span><span id="line-289"> Configuration clusterConf =</span> |
| <span class="source-line-no">290</span><span id="line-290"> HBaseConfiguration.createClusterConf(conf, zkCluster, configPrefix);</span> |
| <span class="source-line-no">291</span><span id="line-291"> return ConnectionFactory.createConnection(clusterConf);</span> |
| <span class="source-line-no">292</span><span id="line-292"> }</span> |
| <span class="source-line-no">293</span><span id="line-293"> }</span> |
| <span class="source-line-no">294</span><span id="line-294"></span> |
| <span class="source-line-no">295</span><span id="line-295"> private static Table openTable(Connection connection, Configuration conf,</span> |
| <span class="source-line-no">296</span><span id="line-296"> String tableNameConfKey) throws IOException {</span> |
| <span class="source-line-no">297</span><span id="line-297"> return connection.getTable(TableName.valueOf(conf.get(tableNameConfKey)));</span> |
| <span class="source-line-no">298</span><span id="line-298"> }</span> |
| <span class="source-line-no">299</span><span id="line-299"></span> |
| <span class="source-line-no">300</span><span id="line-300"> /**</span> |
| <span class="source-line-no">301</span><span id="line-301"> * Attempt to read the next source key/hash pair. If there are no more, set nextSourceKey to</span> |
| <span class="source-line-no">302</span><span id="line-302"> * null</span> |
| <span class="source-line-no">303</span><span id="line-303"> */</span> |
| <span class="source-line-no">304</span><span id="line-304"> private void findNextKeyHashPair() throws IOException {</span> |
| <span class="source-line-no">305</span><span id="line-305"> boolean hasNext = sourceHashReader.next();</span> |
| <span class="source-line-no">306</span><span id="line-306"> if (hasNext) {</span> |
| <span class="source-line-no">307</span><span id="line-307"> nextSourceKey = sourceHashReader.getCurrentKey();</span> |
| <span class="source-line-no">308</span><span id="line-308"> } else {</span> |
| <span class="source-line-no">309</span><span id="line-309"> // no more keys - last hash goes to the end</span> |
| <span class="source-line-no">310</span><span id="line-310"> nextSourceKey = null;</span> |
| <span class="source-line-no">311</span><span id="line-311"> }</span> |
| <span class="source-line-no">312</span><span id="line-312"> }</span> |
| <span class="source-line-no">313</span><span id="line-313"></span> |
| <span class="source-line-no">314</span><span id="line-314"> @Override</span> |
| <span class="source-line-no">315</span><span id="line-315"> protected void map(ImmutableBytesWritable key, Result value, Context context)</span> |
| <span class="source-line-no">316</span><span id="line-316"> throws IOException, InterruptedException {</span> |
| <span class="source-line-no">317</span><span id="line-317"> try {</span> |
| <span class="source-line-no">318</span><span id="line-318"> // first, finish any hash batches that end before the scanned row</span> |
| <span class="source-line-no">319</span><span id="line-319"> while (nextSourceKey != null && key.compareTo(nextSourceKey) >= 0) {</span> |
| <span class="source-line-no">320</span><span id="line-320"> moveToNextBatch(context);</span> |
| <span class="source-line-no">321</span><span id="line-321"> }</span> |
| <span class="source-line-no">322</span><span id="line-322"></span> |
| <span class="source-line-no">323</span><span id="line-323"> // next, add the scanned row (as long as we've reached the first batch)</span> |
| <span class="source-line-no">324</span><span id="line-324"> if (targetHasher.isBatchStarted()) {</span> |
| <span class="source-line-no">325</span><span id="line-325"> targetHasher.hashResult(value);</span> |
| <span class="source-line-no">326</span><span id="line-326"> }</span> |
| <span class="source-line-no">327</span><span id="line-327"> } catch (Throwable t) {</span> |
| <span class="source-line-no">328</span><span id="line-328"> mapperException = t;</span> |
| <span class="source-line-no">329</span><span id="line-329"> throw t;</span> |
| <span class="source-line-no">330</span><span id="line-330"> }</span> |
| <span class="source-line-no">331</span><span id="line-331"> }</span> |
| <span class="source-line-no">332</span><span id="line-332"></span> |
| <span class="source-line-no">333</span><span id="line-333"> /**</span> |
| <span class="source-line-no">334</span><span id="line-334"> * If there is an open hash batch, complete it and sync if there are diffs. Start a new batch,</span> |
| <span class="source-line-no">335</span><span id="line-335"> * and seek to read the</span> |
| <span class="source-line-no">336</span><span id="line-336"> */</span> |
| <span class="source-line-no">337</span><span id="line-337"> private void moveToNextBatch(Context context) throws IOException, InterruptedException {</span> |
| <span class="source-line-no">338</span><span id="line-338"> if (targetHasher.isBatchStarted()) {</span> |
| <span class="source-line-no">339</span><span id="line-339"> finishBatchAndCompareHashes(context);</span> |
| <span class="source-line-no">340</span><span id="line-340"> }</span> |
| <span class="source-line-no">341</span><span id="line-341"> targetHasher.startBatch(nextSourceKey);</span> |
| <span class="source-line-no">342</span><span id="line-342"> currentSourceHash = sourceHashReader.getCurrentHash();</span> |
| <span class="source-line-no">343</span><span id="line-343"></span> |
| <span class="source-line-no">344</span><span id="line-344"> findNextKeyHashPair();</span> |
| <span class="source-line-no">345</span><span id="line-345"> }</span> |
| <span class="source-line-no">346</span><span id="line-346"></span> |
| <span class="source-line-no">347</span><span id="line-347"> /**</span> |
| <span class="source-line-no">348</span><span id="line-348"> * Finish the currently open hash batch. Compare the target hash to the given source hash. If</span> |
| <span class="source-line-no">349</span><span id="line-349"> * they do not match, then sync the covered key range.</span> |
| <span class="source-line-no">350</span><span id="line-350"> */</span> |
| <span class="source-line-no">351</span><span id="line-351"> private void finishBatchAndCompareHashes(Context context)</span> |
| <span class="source-line-no">352</span><span id="line-352"> throws IOException, InterruptedException {</span> |
| <span class="source-line-no">353</span><span id="line-353"> targetHasher.finishBatch();</span> |
| <span class="source-line-no">354</span><span id="line-354"> context.getCounter(Counter.BATCHES).increment(1);</span> |
| <span class="source-line-no">355</span><span id="line-355"> if (targetHasher.getBatchSize() == 0) {</span> |
| <span class="source-line-no">356</span><span id="line-356"> context.getCounter(Counter.EMPTY_BATCHES).increment(1);</span> |
| <span class="source-line-no">357</span><span id="line-357"> }</span> |
| <span class="source-line-no">358</span><span id="line-358"> ImmutableBytesWritable targetHash = targetHasher.getBatchHash();</span> |
| <span class="source-line-no">359</span><span id="line-359"> if (targetHash.equals(currentSourceHash)) {</span> |
| <span class="source-line-no">360</span><span id="line-360"> context.getCounter(Counter.HASHES_MATCHED).increment(1);</span> |
| <span class="source-line-no">361</span><span id="line-361"> } else {</span> |
| <span class="source-line-no">362</span><span id="line-362"> context.getCounter(Counter.HASHES_NOT_MATCHED).increment(1);</span> |
| <span class="source-line-no">363</span><span id="line-363"></span> |
| <span class="source-line-no">364</span><span id="line-364"> ImmutableBytesWritable stopRow = nextSourceKey == null</span> |
| <span class="source-line-no">365</span><span id="line-365"> ? new ImmutableBytesWritable(sourceTableHash.stopRow)</span> |
| <span class="source-line-no">366</span><span id="line-366"> : nextSourceKey;</span> |
| <span class="source-line-no">367</span><span id="line-367"></span> |
| <span class="source-line-no">368</span><span id="line-368"> if (LOG.isDebugEnabled()) {</span> |
| <span class="source-line-no">369</span><span id="line-369"> LOG.debug("Hash mismatch. Key range: " + toHex(targetHasher.getBatchStartKey()) + " to "</span> |
| <span class="source-line-no">370</span><span id="line-370"> + toHex(stopRow) + " sourceHash: " + toHex(currentSourceHash) + " targetHash: "</span> |
| <span class="source-line-no">371</span><span id="line-371"> + toHex(targetHash));</span> |
| <span class="source-line-no">372</span><span id="line-372"> }</span> |
| <span class="source-line-no">373</span><span id="line-373"></span> |
| <span class="source-line-no">374</span><span id="line-374"> syncRange(context, targetHasher.getBatchStartKey(), stopRow);</span> |
| <span class="source-line-no">375</span><span id="line-375"> }</span> |
| <span class="source-line-no">376</span><span id="line-376"> }</span> |
| <span class="source-line-no">377</span><span id="line-377"></span> |
| <span class="source-line-no">378</span><span id="line-378"> private static String toHex(ImmutableBytesWritable bytes) {</span> |
| <span class="source-line-no">379</span><span id="line-379"> return Bytes.toHex(bytes.get(), bytes.getOffset(), bytes.getLength());</span> |
| <span class="source-line-no">380</span><span id="line-380"> }</span> |
| <span class="source-line-no">381</span><span id="line-381"></span> |
| <span class="source-line-no">382</span><span id="line-382"> private static final CellScanner EMPTY_CELL_SCANNER =</span> |
| <span class="source-line-no">383</span><span id="line-383"> new CellScanner(Collections.<Result> emptyIterator());</span> |
| <span class="source-line-no">384</span><span id="line-384"></span> |
| <span class="source-line-no">385</span><span id="line-385"> /**</span> |
| <span class="source-line-no">386</span><span id="line-386"> * Rescan the given range directly from the source and target tables. Count and log differences,</span> |
| <span class="source-line-no">387</span><span id="line-387"> * and if this is not a dry run, output Puts and Deletes to make the target table match the</span> |
| <span class="source-line-no">388</span><span id="line-388"> * source table for this range</span> |
| <span class="source-line-no">389</span><span id="line-389"> */</span> |
| <span class="source-line-no">390</span><span id="line-390"> private void syncRange(Context context, ImmutableBytesWritable startRow,</span> |
| <span class="source-line-no">391</span><span id="line-391"> ImmutableBytesWritable stopRow) throws IOException, InterruptedException {</span> |
| <span class="source-line-no">392</span><span id="line-392"> Scan scan = sourceTableHash.initScan();</span> |
| <span class="source-line-no">393</span><span id="line-393"> scan.withStartRow(startRow.copyBytes());</span> |
| <span class="source-line-no">394</span><span id="line-394"> scan.withStopRow(stopRow.copyBytes());</span> |
| <span class="source-line-no">395</span><span id="line-395"></span> |
| <span class="source-line-no">396</span><span id="line-396"> ResultScanner sourceScanner = sourceTable.getScanner(scan);</span> |
| <span class="source-line-no">397</span><span id="line-397"> CellScanner sourceCells = new CellScanner(sourceScanner.iterator());</span> |
| <span class="source-line-no">398</span><span id="line-398"></span> |
| <span class="source-line-no">399</span><span id="line-399"> ResultScanner targetScanner = targetTable.getScanner(new Scan(scan));</span> |
| <span class="source-line-no">400</span><span id="line-400"> CellScanner targetCells = new CellScanner(targetScanner.iterator());</span> |
| <span class="source-line-no">401</span><span id="line-401"></span> |
| <span class="source-line-no">402</span><span id="line-402"> boolean rangeMatched = true;</span> |
| <span class="source-line-no">403</span><span id="line-403"> byte[] nextSourceRow = sourceCells.nextRow();</span> |
| <span class="source-line-no">404</span><span id="line-404"> byte[] nextTargetRow = targetCells.nextRow();</span> |
| <span class="source-line-no">405</span><span id="line-405"> while (nextSourceRow != null || nextTargetRow != null) {</span> |
| <span class="source-line-no">406</span><span id="line-406"> boolean rowMatched;</span> |
| <span class="source-line-no">407</span><span id="line-407"> int rowComparison = compareRowKeys(nextSourceRow, nextTargetRow);</span> |
| <span class="source-line-no">408</span><span id="line-408"> if (rowComparison < 0) {</span> |
| <span class="source-line-no">409</span><span id="line-409"> if (LOG.isDebugEnabled()) {</span> |
| <span class="source-line-no">410</span><span id="line-410"> LOG.debug("Target missing row: " + Bytes.toString(nextSourceRow));</span> |
| <span class="source-line-no">411</span><span id="line-411"> }</span> |
| <span class="source-line-no">412</span><span id="line-412"> context.getCounter(Counter.TARGETMISSINGROWS).increment(1);</span> |
| <span class="source-line-no">413</span><span id="line-413"></span> |
| <span class="source-line-no">414</span><span id="line-414"> rowMatched = syncRowCells(context, nextSourceRow, sourceCells, EMPTY_CELL_SCANNER);</span> |
| <span class="source-line-no">415</span><span id="line-415"> nextSourceRow = sourceCells.nextRow(); // advance only source to next row</span> |
| <span class="source-line-no">416</span><span id="line-416"> } else if (rowComparison > 0) {</span> |
| <span class="source-line-no">417</span><span id="line-417"> if (LOG.isDebugEnabled()) {</span> |
| <span class="source-line-no">418</span><span id="line-418"> LOG.debug("Source missing row: " + Bytes.toString(nextTargetRow));</span> |
| <span class="source-line-no">419</span><span id="line-419"> }</span> |
| <span class="source-line-no">420</span><span id="line-420"> context.getCounter(Counter.SOURCEMISSINGROWS).increment(1);</span> |
| <span class="source-line-no">421</span><span id="line-421"></span> |
| <span class="source-line-no">422</span><span id="line-422"> rowMatched = syncRowCells(context, nextTargetRow, EMPTY_CELL_SCANNER, targetCells);</span> |
| <span class="source-line-no">423</span><span id="line-423"> nextTargetRow = targetCells.nextRow(); // advance only target to next row</span> |
| <span class="source-line-no">424</span><span id="line-424"> } else {</span> |
| <span class="source-line-no">425</span><span id="line-425"> // current row is the same on both sides, compare cell by cell</span> |
| <span class="source-line-no">426</span><span id="line-426"> rowMatched = syncRowCells(context, nextSourceRow, sourceCells, targetCells);</span> |
| <span class="source-line-no">427</span><span id="line-427"> nextSourceRow = sourceCells.nextRow();</span> |
| <span class="source-line-no">428</span><span id="line-428"> nextTargetRow = targetCells.nextRow();</span> |
| <span class="source-line-no">429</span><span id="line-429"> }</span> |
| <span class="source-line-no">430</span><span id="line-430"></span> |
| <span class="source-line-no">431</span><span id="line-431"> if (!rowMatched) {</span> |
| <span class="source-line-no">432</span><span id="line-432"> rangeMatched = false;</span> |
| <span class="source-line-no">433</span><span id="line-433"> }</span> |
| <span class="source-line-no">434</span><span id="line-434"> }</span> |
| <span class="source-line-no">435</span><span id="line-435"></span> |
| <span class="source-line-no">436</span><span id="line-436"> sourceScanner.close();</span> |
| <span class="source-line-no">437</span><span id="line-437"> targetScanner.close();</span> |
| <span class="source-line-no">438</span><span id="line-438"></span> |
| <span class="source-line-no">439</span><span id="line-439"> context.getCounter(rangeMatched ? Counter.RANGESMATCHED : Counter.RANGESNOTMATCHED)</span> |
| <span class="source-line-no">440</span><span id="line-440"> .increment(1);</span> |
| <span class="source-line-no">441</span><span id="line-441"> }</span> |
| <span class="source-line-no">442</span><span id="line-442"></span> |
| <span class="source-line-no">443</span><span id="line-443"> private static class CellScanner {</span> |
| <span class="source-line-no">444</span><span id="line-444"> private final Iterator<Result> results;</span> |
| <span class="source-line-no">445</span><span id="line-445"></span> |
| <span class="source-line-no">446</span><span id="line-446"> private byte[] currentRow;</span> |
| <span class="source-line-no">447</span><span id="line-447"> private Result currentRowResult;</span> |
| <span class="source-line-no">448</span><span id="line-448"> private int nextCellInRow;</span> |
| <span class="source-line-no">449</span><span id="line-449"></span> |
| <span class="source-line-no">450</span><span id="line-450"> private Result nextRowResult;</span> |
| <span class="source-line-no">451</span><span id="line-451"></span> |
| <span class="source-line-no">452</span><span id="line-452"> public CellScanner(Iterator<Result> results) {</span> |
| <span class="source-line-no">453</span><span id="line-453"> this.results = results;</span> |
| <span class="source-line-no">454</span><span id="line-454"> }</span> |
| <span class="source-line-no">455</span><span id="line-455"></span> |
| <span class="source-line-no">456</span><span id="line-456"> /**</span> |
| <span class="source-line-no">457</span><span id="line-457"> * Advance to the next row and return its row key. Returns null iff there are no more rows.</span> |
| <span class="source-line-no">458</span><span id="line-458"> */</span> |
| <span class="source-line-no">459</span><span id="line-459"> public byte[] nextRow() {</span> |
| <span class="source-line-no">460</span><span id="line-460"> if (nextRowResult == null) {</span> |
| <span class="source-line-no">461</span><span id="line-461"> // no cached row - check scanner for more</span> |
| <span class="source-line-no">462</span><span id="line-462"> while (results.hasNext()) {</span> |
| <span class="source-line-no">463</span><span id="line-463"> nextRowResult = results.next();</span> |
| <span class="source-line-no">464</span><span id="line-464"> Cell nextCell = nextRowResult.rawCells()[0];</span> |
| <span class="source-line-no">465</span><span id="line-465"> if (</span> |
| <span class="source-line-no">466</span><span id="line-466"> currentRow == null || !Bytes.equals(currentRow, 0, currentRow.length,</span> |
| <span class="source-line-no">467</span><span id="line-467"> nextCell.getRowArray(), nextCell.getRowOffset(), nextCell.getRowLength())</span> |
| <span class="source-line-no">468</span><span id="line-468"> ) {</span> |
| <span class="source-line-no">469</span><span id="line-469"> // found next row</span> |
| <span class="source-line-no">470</span><span id="line-470"> break;</span> |
| <span class="source-line-no">471</span><span id="line-471"> } else {</span> |
| <span class="source-line-no">472</span><span id="line-472"> // found another result from current row, keep scanning</span> |
| <span class="source-line-no">473</span><span id="line-473"> nextRowResult = null;</span> |
| <span class="source-line-no">474</span><span id="line-474"> }</span> |
| <span class="source-line-no">475</span><span id="line-475"> }</span> |
| <span class="source-line-no">476</span><span id="line-476"></span> |
| <span class="source-line-no">477</span><span id="line-477"> if (nextRowResult == null) {</span> |
| <span class="source-line-no">478</span><span id="line-478"> // end of data, no more rows</span> |
| <span class="source-line-no">479</span><span id="line-479"> currentRowResult = null;</span> |
| <span class="source-line-no">480</span><span id="line-480"> currentRow = null;</span> |
| <span class="source-line-no">481</span><span id="line-481"> return null;</span> |
| <span class="source-line-no">482</span><span id="line-482"> }</span> |
| <span class="source-line-no">483</span><span id="line-483"> }</span> |
| <span class="source-line-no">484</span><span id="line-484"></span> |
| <span class="source-line-no">485</span><span id="line-485"> // advance to cached result for next row</span> |
| <span class="source-line-no">486</span><span id="line-486"> currentRowResult = nextRowResult;</span> |
| <span class="source-line-no">487</span><span id="line-487"> nextCellInRow = 0;</span> |
| <span class="source-line-no">488</span><span id="line-488"> currentRow = currentRowResult.getRow();</span> |
| <span class="source-line-no">489</span><span id="line-489"> nextRowResult = null;</span> |
| <span class="source-line-no">490</span><span id="line-490"> return currentRow;</span> |
| <span class="source-line-no">491</span><span id="line-491"> }</span> |
| <span class="source-line-no">492</span><span id="line-492"></span> |
| <span class="source-line-no">493</span><span id="line-493"> /**</span> |
| <span class="source-line-no">494</span><span id="line-494"> * Returns the next Cell in the current row or null iff none remain.</span> |
| <span class="source-line-no">495</span><span id="line-495"> */</span> |
| <span class="source-line-no">496</span><span id="line-496"> public Cell nextCellInRow() {</span> |
| <span class="source-line-no">497</span><span id="line-497"> if (currentRowResult == null) {</span> |
| <span class="source-line-no">498</span><span id="line-498"> // nothing left in current row</span> |
| <span class="source-line-no">499</span><span id="line-499"> return null;</span> |
| <span class="source-line-no">500</span><span id="line-500"> }</span> |
| <span class="source-line-no">501</span><span id="line-501"></span> |
| <span class="source-line-no">502</span><span id="line-502"> Cell nextCell = currentRowResult.rawCells()[nextCellInRow];</span> |
| <span class="source-line-no">503</span><span id="line-503"> nextCellInRow++;</span> |
| <span class="source-line-no">504</span><span id="line-504"> if (nextCellInRow == currentRowResult.size()) {</span> |
| <span class="source-line-no">505</span><span id="line-505"> if (results.hasNext()) {</span> |
| <span class="source-line-no">506</span><span id="line-506"> Result result = results.next();</span> |
| <span class="source-line-no">507</span><span id="line-507"> Cell cell = result.rawCells()[0];</span> |
| <span class="source-line-no">508</span><span id="line-508"> if (</span> |
| <span class="source-line-no">509</span><span id="line-509"> Bytes.equals(currentRow, 0, currentRow.length, cell.getRowArray(),</span> |
| <span class="source-line-no">510</span><span id="line-510"> cell.getRowOffset(), cell.getRowLength())</span> |
| <span class="source-line-no">511</span><span id="line-511"> ) {</span> |
| <span class="source-line-no">512</span><span id="line-512"> // result is part of current row</span> |
| <span class="source-line-no">513</span><span id="line-513"> currentRowResult = result;</span> |
| <span class="source-line-no">514</span><span id="line-514"> nextCellInRow = 0;</span> |
| <span class="source-line-no">515</span><span id="line-515"> } else {</span> |
| <span class="source-line-no">516</span><span id="line-516"> // result is part of next row, cache it</span> |
| <span class="source-line-no">517</span><span id="line-517"> nextRowResult = result;</span> |
| <span class="source-line-no">518</span><span id="line-518"> // current row is complete</span> |
| <span class="source-line-no">519</span><span id="line-519"> currentRowResult = null;</span> |
| <span class="source-line-no">520</span><span id="line-520"> }</span> |
| <span class="source-line-no">521</span><span id="line-521"> } else {</span> |
| <span class="source-line-no">522</span><span id="line-522"> // end of data</span> |
| <span class="source-line-no">523</span><span id="line-523"> currentRowResult = null;</span> |
| <span class="source-line-no">524</span><span id="line-524"> }</span> |
| <span class="source-line-no">525</span><span id="line-525"> }</span> |
| <span class="source-line-no">526</span><span id="line-526"> return nextCell;</span> |
| <span class="source-line-no">527</span><span id="line-527"> }</span> |
| <span class="source-line-no">528</span><span id="line-528"> }</span> |
| <span class="source-line-no">529</span><span id="line-529"></span> |
| <span class="source-line-no">530</span><span id="line-530"> private Cell checkAndResetTimestamp(Cell sourceCell) {</span> |
| <span class="source-line-no">531</span><span id="line-531"> if (ignoreTimestamp) {</span> |
| <span class="source-line-no">532</span><span id="line-532"> sourceCell =</span> |
| <span class="source-line-no">533</span><span id="line-533"> CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setType(sourceCell.getType())</span> |
| <span class="source-line-no">534</span><span id="line-534"> .setRow(sourceCell.getRowArray(), sourceCell.getRowOffset(), sourceCell.getRowLength())</span> |
| <span class="source-line-no">535</span><span id="line-535"> .setFamily(sourceCell.getFamilyArray(), sourceCell.getFamilyOffset(),</span> |
| <span class="source-line-no">536</span><span id="line-536"> sourceCell.getFamilyLength())</span> |
| <span class="source-line-no">537</span><span id="line-537"> .setQualifier(sourceCell.getQualifierArray(), sourceCell.getQualifierOffset(),</span> |
| <span class="source-line-no">538</span><span id="line-538"> sourceCell.getQualifierLength())</span> |
| <span class="source-line-no">539</span><span id="line-539"> .setTimestamp(EnvironmentEdgeManager.currentTime()).setValue(sourceCell.getValueArray(),</span> |
| <span class="source-line-no">540</span><span id="line-540"> sourceCell.getValueOffset(), sourceCell.getValueLength())</span> |
| <span class="source-line-no">541</span><span id="line-541"> .build();</span> |
| <span class="source-line-no">542</span><span id="line-542"> }</span> |
| <span class="source-line-no">543</span><span id="line-543"> return sourceCell;</span> |
| <span class="source-line-no">544</span><span id="line-544"> }</span> |
| <span class="source-line-no">545</span><span id="line-545"></span> |
| <span class="source-line-no">546</span><span id="line-546"> /**</span> |
| <span class="source-line-no">547</span><span id="line-547"> * Compare the cells for the given row from the source and target tables. Count and log any</span> |
| <span class="source-line-no">548</span><span id="line-548"> * differences. If not a dry run, output a Put and/or Delete needed to sync the target table to</span> |
| <span class="source-line-no">549</span><span id="line-549"> * match the source table.</span> |
| <span class="source-line-no">550</span><span id="line-550"> */</span> |
| <span class="source-line-no">551</span><span id="line-551"> private boolean syncRowCells(Context context, byte[] rowKey, CellScanner sourceCells,</span> |
| <span class="source-line-no">552</span><span id="line-552"> CellScanner targetCells) throws IOException, InterruptedException {</span> |
| <span class="source-line-no">553</span><span id="line-553"> Put put = null;</span> |
| <span class="source-line-no">554</span><span id="line-554"> Delete delete = null;</span> |
| <span class="source-line-no">555</span><span id="line-555"> long matchingCells = 0;</span> |
| <span class="source-line-no">556</span><span id="line-556"> boolean matchingRow = true;</span> |
| <span class="source-line-no">557</span><span id="line-557"> Cell sourceCell = sourceCells.nextCellInRow();</span> |
| <span class="source-line-no">558</span><span id="line-558"> Cell targetCell = targetCells.nextCellInRow();</span> |
| <span class="source-line-no">559</span><span id="line-559"> while (sourceCell != null || targetCell != null) {</span> |
| <span class="source-line-no">560</span><span id="line-560"></span> |
| <span class="source-line-no">561</span><span id="line-561"> int cellKeyComparison = compareCellKeysWithinRow(sourceCell, targetCell);</span> |
| <span class="source-line-no">562</span><span id="line-562"> if (cellKeyComparison < 0) {</span> |
| <span class="source-line-no">563</span><span id="line-563"> if (LOG.isDebugEnabled()) {</span> |
| <span class="source-line-no">564</span><span id="line-564"> LOG.debug("Target missing cell: " + sourceCell);</span> |
| <span class="source-line-no">565</span><span id="line-565"> }</span> |
| <span class="source-line-no">566</span><span id="line-566"> context.getCounter(Counter.TARGETMISSINGCELLS).increment(1);</span> |
| <span class="source-line-no">567</span><span id="line-567"> matchingRow = false;</span> |
| <span class="source-line-no">568</span><span id="line-568"></span> |
| <span class="source-line-no">569</span><span id="line-569"> if (!dryRun && doPuts) {</span> |
| <span class="source-line-no">570</span><span id="line-570"> if (put == null) {</span> |
| <span class="source-line-no">571</span><span id="line-571"> put = new Put(rowKey);</span> |
| <span class="source-line-no">572</span><span id="line-572"> }</span> |
| <span class="source-line-no">573</span><span id="line-573"> sourceCell = checkAndResetTimestamp(sourceCell);</span> |
| <span class="source-line-no">574</span><span id="line-574"> put.add(sourceCell);</span> |
| <span class="source-line-no">575</span><span id="line-575"> }</span> |
| <span class="source-line-no">576</span><span id="line-576"></span> |
| <span class="source-line-no">577</span><span id="line-577"> sourceCell = sourceCells.nextCellInRow();</span> |
| <span class="source-line-no">578</span><span id="line-578"> } else if (cellKeyComparison > 0) {</span> |
| <span class="source-line-no">579</span><span id="line-579"> if (LOG.isDebugEnabled()) {</span> |
| <span class="source-line-no">580</span><span id="line-580"> LOG.debug("Source missing cell: " + targetCell);</span> |
| <span class="source-line-no">581</span><span id="line-581"> }</span> |
| <span class="source-line-no">582</span><span id="line-582"> context.getCounter(Counter.SOURCEMISSINGCELLS).increment(1);</span> |
| <span class="source-line-no">583</span><span id="line-583"> matchingRow = false;</span> |
| <span class="source-line-no">584</span><span id="line-584"></span> |
| <span class="source-line-no">585</span><span id="line-585"> if (!dryRun && doDeletes) {</span> |
| <span class="source-line-no">586</span><span id="line-586"> if (delete == null) {</span> |
| <span class="source-line-no">587</span><span id="line-587"> delete = new Delete(rowKey);</span> |
| <span class="source-line-no">588</span><span id="line-588"> }</span> |
| <span class="source-line-no">589</span><span id="line-589"> // add a tombstone to exactly match the target cell that is missing on the source</span> |
| <span class="source-line-no">590</span><span id="line-590"> delete.addColumn(CellUtil.cloneFamily(targetCell), CellUtil.cloneQualifier(targetCell),</span> |
| <span class="source-line-no">591</span><span id="line-591"> targetCell.getTimestamp());</span> |
| <span class="source-line-no">592</span><span id="line-592"> }</span> |
| <span class="source-line-no">593</span><span id="line-593"></span> |
| <span class="source-line-no">594</span><span id="line-594"> targetCell = targetCells.nextCellInRow();</span> |
| <span class="source-line-no">595</span><span id="line-595"> } else {</span> |
| <span class="source-line-no">596</span><span id="line-596"> // the cell keys are equal, now check values</span> |
| <span class="source-line-no">597</span><span id="line-597"> if (CellUtil.matchingValue(sourceCell, targetCell)) {</span> |
| <span class="source-line-no">598</span><span id="line-598"> matchingCells++;</span> |
| <span class="source-line-no">599</span><span id="line-599"> } else {</span> |
| <span class="source-line-no">600</span><span id="line-600"> if (LOG.isDebugEnabled()) {</span> |
| <span class="source-line-no">601</span><span id="line-601"> LOG.debug("Different values: ");</span> |
| <span class="source-line-no">602</span><span id="line-602"> LOG.debug(" source cell: " + sourceCell + " value: "</span> |
| <span class="source-line-no">603</span><span id="line-603"> + Bytes.toString(sourceCell.getValueArray(), sourceCell.getValueOffset(),</span> |
| <span class="source-line-no">604</span><span id="line-604"> sourceCell.getValueLength()));</span> |
| <span class="source-line-no">605</span><span id="line-605"> LOG.debug(" target cell: " + targetCell + " value: "</span> |
| <span class="source-line-no">606</span><span id="line-606"> + Bytes.toString(targetCell.getValueArray(), targetCell.getValueOffset(),</span> |
| <span class="source-line-no">607</span><span id="line-607"> targetCell.getValueLength()));</span> |
| <span class="source-line-no">608</span><span id="line-608"> }</span> |
| <span class="source-line-no">609</span><span id="line-609"> context.getCounter(Counter.DIFFERENTCELLVALUES).increment(1);</span> |
| <span class="source-line-no">610</span><span id="line-610"> matchingRow = false;</span> |
| <span class="source-line-no">611</span><span id="line-611"></span> |
| <span class="source-line-no">612</span><span id="line-612"> if (!dryRun && doPuts) {</span> |
| <span class="source-line-no">613</span><span id="line-613"> // overwrite target cell</span> |
| <span class="source-line-no">614</span><span id="line-614"> if (put == null) {</span> |
| <span class="source-line-no">615</span><span id="line-615"> put = new Put(rowKey);</span> |
| <span class="source-line-no">616</span><span id="line-616"> }</span> |
| <span class="source-line-no">617</span><span id="line-617"> sourceCell = checkAndResetTimestamp(sourceCell);</span> |
| <span class="source-line-no">618</span><span id="line-618"> put.add(sourceCell);</span> |
| <span class="source-line-no">619</span><span id="line-619"> }</span> |
| <span class="source-line-no">620</span><span id="line-620"> }</span> |
| <span class="source-line-no">621</span><span id="line-621"> sourceCell = sourceCells.nextCellInRow();</span> |
| <span class="source-line-no">622</span><span id="line-622"> targetCell = targetCells.nextCellInRow();</span> |
| <span class="source-line-no">623</span><span id="line-623"> }</span> |
| <span class="source-line-no">624</span><span id="line-624"></span> |
| <span class="source-line-no">625</span><span id="line-625"> if (!dryRun && sourceTableHash.scanBatch > 0) {</span> |
| <span class="source-line-no">626</span><span id="line-626"> if (put != null && put.size() >= sourceTableHash.scanBatch) {</span> |
| <span class="source-line-no">627</span><span id="line-627"> context.write(new ImmutableBytesWritable(rowKey), put);</span> |
| <span class="source-line-no">628</span><span id="line-628"> put = null;</span> |
| <span class="source-line-no">629</span><span id="line-629"> }</span> |
| <span class="source-line-no">630</span><span id="line-630"> if (delete != null && delete.size() >= sourceTableHash.scanBatch) {</span> |
| <span class="source-line-no">631</span><span id="line-631"> context.write(new ImmutableBytesWritable(rowKey), delete);</span> |
| <span class="source-line-no">632</span><span id="line-632"> delete = null;</span> |
| <span class="source-line-no">633</span><span id="line-633"> }</span> |
| <span class="source-line-no">634</span><span id="line-634"> }</span> |
| <span class="source-line-no">635</span><span id="line-635"> }</span> |
| <span class="source-line-no">636</span><span id="line-636"></span> |
| <span class="source-line-no">637</span><span id="line-637"> if (!dryRun) {</span> |
| <span class="source-line-no">638</span><span id="line-638"> if (put != null) {</span> |
| <span class="source-line-no">639</span><span id="line-639"> context.write(new ImmutableBytesWritable(rowKey), put);</span> |
| <span class="source-line-no">640</span><span id="line-640"> }</span> |
| <span class="source-line-no">641</span><span id="line-641"> if (delete != null) {</span> |
| <span class="source-line-no">642</span><span id="line-642"> context.write(new ImmutableBytesWritable(rowKey), delete);</span> |
| <span class="source-line-no">643</span><span id="line-643"> }</span> |
| <span class="source-line-no">644</span><span id="line-644"> }</span> |
| <span class="source-line-no">645</span><span id="line-645"></span> |
| <span class="source-line-no">646</span><span id="line-646"> if (matchingCells > 0) {</span> |
| <span class="source-line-no">647</span><span id="line-647"> context.getCounter(Counter.MATCHINGCELLS).increment(matchingCells);</span> |
| <span class="source-line-no">648</span><span id="line-648"> }</span> |
| <span class="source-line-no">649</span><span id="line-649"> if (matchingRow) {</span> |
| <span class="source-line-no">650</span><span id="line-650"> context.getCounter(Counter.MATCHINGROWS).increment(1);</span> |
| <span class="source-line-no">651</span><span id="line-651"> return true;</span> |
| <span class="source-line-no">652</span><span id="line-652"> } else {</span> |
| <span class="source-line-no">653</span><span id="line-653"> context.getCounter(Counter.ROWSWITHDIFFS).increment(1);</span> |
| <span class="source-line-no">654</span><span id="line-654"> return false;</span> |
| <span class="source-line-no">655</span><span id="line-655"> }</span> |
| <span class="source-line-no">656</span><span id="line-656"> }</span> |
| <span class="source-line-no">657</span><span id="line-657"></span> |
| <span class="source-line-no">658</span><span id="line-658"> /**</span> |
| <span class="source-line-no">659</span><span id="line-659"> * Compare row keys of the given Result objects. Nulls are after non-nulls</span> |
| <span class="source-line-no">660</span><span id="line-660"> */</span> |
| <span class="source-line-no">661</span><span id="line-661"> private static int compareRowKeys(byte[] r1, byte[] r2) {</span> |
| <span class="source-line-no">662</span><span id="line-662"> if (r1 == null) {</span> |
| <span class="source-line-no">663</span><span id="line-663"> return 1; // source missing row</span> |
| <span class="source-line-no">664</span><span id="line-664"> } else if (r2 == null) {</span> |
| <span class="source-line-no">665</span><span id="line-665"> return -1; // target missing row</span> |
| <span class="source-line-no">666</span><span id="line-666"> } else {</span> |
| <span class="source-line-no">667</span><span id="line-667"> // Sync on no META tables only. We can directly do what CellComparator is doing inside.</span> |
| <span class="source-line-no">668</span><span id="line-668"> // Never the call going to MetaCellComparator.</span> |
| <span class="source-line-no">669</span><span id="line-669"> return Bytes.compareTo(r1, 0, r1.length, r2, 0, r2.length);</span> |
| <span class="source-line-no">670</span><span id="line-670"> }</span> |
| <span class="source-line-no">671</span><span id="line-671"> }</span> |
| <span class="source-line-no">672</span><span id="line-672"></span> |
| <span class="source-line-no">673</span><span id="line-673"> /**</span> |
| <span class="source-line-no">674</span><span id="line-674"> * Compare families, qualifiers, and timestamps of the given Cells. They are assumed to be of</span> |
| <span class="source-line-no">675</span><span id="line-675"> * the same row. Nulls are after non-nulls.</span> |
| <span class="source-line-no">676</span><span id="line-676"> */</span> |
| <span class="source-line-no">677</span><span id="line-677"> private int compareCellKeysWithinRow(Cell c1, Cell c2) {</span> |
| <span class="source-line-no">678</span><span id="line-678"> if (c1 == null) {</span> |
| <span class="source-line-no">679</span><span id="line-679"> return 1; // source missing cell</span> |
| <span class="source-line-no">680</span><span id="line-680"> }</span> |
| <span class="source-line-no">681</span><span id="line-681"> if (c2 == null) {</span> |
| <span class="source-line-no">682</span><span id="line-682"> return -1; // target missing cell</span> |
| <span class="source-line-no">683</span><span id="line-683"> }</span> |
| <span class="source-line-no">684</span><span id="line-684"></span> |
| <span class="source-line-no">685</span><span id="line-685"> int result = CellComparator.getInstance().compareFamilies(c1, c2);</span> |
| <span class="source-line-no">686</span><span id="line-686"> if (result != 0) {</span> |
| <span class="source-line-no">687</span><span id="line-687"> return result;</span> |
| <span class="source-line-no">688</span><span id="line-688"> }</span> |
| <span class="source-line-no">689</span><span id="line-689"></span> |
| <span class="source-line-no">690</span><span id="line-690"> result = CellComparator.getInstance().compareQualifiers(c1, c2);</span> |
| <span class="source-line-no">691</span><span id="line-691"> if (result != 0) {</span> |
| <span class="source-line-no">692</span><span id="line-692"> return result;</span> |
| <span class="source-line-no">693</span><span id="line-693"> }</span> |
| <span class="source-line-no">694</span><span id="line-694"></span> |
| <span class="source-line-no">695</span><span id="line-695"> if (this.ignoreTimestamp) {</span> |
| <span class="source-line-no">696</span><span id="line-696"> return 0;</span> |
| <span class="source-line-no">697</span><span id="line-697"> } else {</span> |
| <span class="source-line-no">698</span><span id="line-698"> // note timestamp comparison is inverted - more recent cells first</span> |
| <span class="source-line-no">699</span><span id="line-699"> return CellComparator.getInstance().compareTimestamps(c1, c2);</span> |
| <span class="source-line-no">700</span><span id="line-700"> }</span> |
| <span class="source-line-no">701</span><span id="line-701"> }</span> |
| <span class="source-line-no">702</span><span id="line-702"></span> |
| <span class="source-line-no">703</span><span id="line-703"> @Override</span> |
| <span class="source-line-no">704</span><span id="line-704"> protected void cleanup(Context context) throws IOException, InterruptedException {</span> |
| <span class="source-line-no">705</span><span id="line-705"> if (mapperException == null) {</span> |
| <span class="source-line-no">706</span><span id="line-706"> try {</span> |
| <span class="source-line-no">707</span><span id="line-707"> finishRemainingHashRanges(context);</span> |
| <span class="source-line-no">708</span><span id="line-708"> } catch (Throwable t) {</span> |
| <span class="source-line-no">709</span><span id="line-709"> mapperException = t;</span> |
| <span class="source-line-no">710</span><span id="line-710"> }</span> |
| <span class="source-line-no">711</span><span id="line-711"> }</span> |
| <span class="source-line-no">712</span><span id="line-712"></span> |
| <span class="source-line-no">713</span><span id="line-713"> try {</span> |
| <span class="source-line-no">714</span><span id="line-714"> sourceTable.close();</span> |
| <span class="source-line-no">715</span><span id="line-715"> targetTable.close();</span> |
| <span class="source-line-no">716</span><span id="line-716"> sourceConnection.close();</span> |
| <span class="source-line-no">717</span><span id="line-717"> targetConnection.close();</span> |
| <span class="source-line-no">718</span><span id="line-718"> } catch (Throwable t) {</span> |
| <span class="source-line-no">719</span><span id="line-719"> if (mapperException == null) {</span> |
| <span class="source-line-no">720</span><span id="line-720"> mapperException = t;</span> |
| <span class="source-line-no">721</span><span id="line-721"> } else {</span> |
| <span class="source-line-no">722</span><span id="line-722"> LOG.error("Suppressing exception from closing tables", t);</span> |
| <span class="source-line-no">723</span><span id="line-723"> }</span> |
| <span class="source-line-no">724</span><span id="line-724"> }</span> |
| <span class="source-line-no">725</span><span id="line-725"></span> |
| <span class="source-line-no">726</span><span id="line-726"> // propagate first exception</span> |
| <span class="source-line-no">727</span><span id="line-727"> if (mapperException != null) {</span> |
| <span class="source-line-no">728</span><span id="line-728"> Throwables.throwIfInstanceOf(mapperException, IOException.class);</span> |
| <span class="source-line-no">729</span><span id="line-729"> Throwables.throwIfInstanceOf(mapperException, InterruptedException.class);</span> |
| <span class="source-line-no">730</span><span id="line-730"> Throwables.throwIfUnchecked(mapperException);</span> |
| <span class="source-line-no">731</span><span id="line-731"> }</span> |
| <span class="source-line-no">732</span><span id="line-732"> }</span> |
| <span class="source-line-no">733</span><span id="line-733"></span> |
| <span class="source-line-no">734</span><span id="line-734"> private void finishRemainingHashRanges(Context context)</span> |
| <span class="source-line-no">735</span><span id="line-735"> throws IOException, InterruptedException {</span> |
| <span class="source-line-no">736</span><span id="line-736"> TableSplit split = (TableSplit) context.getInputSplit();</span> |
| <span class="source-line-no">737</span><span id="line-737"> byte[] splitEndRow = split.getEndRow();</span> |
| <span class="source-line-no">738</span><span id="line-738"> boolean reachedEndOfTable = HashTable.isTableEndRow(splitEndRow);</span> |
| <span class="source-line-no">739</span><span id="line-739"></span> |
| <span class="source-line-no">740</span><span id="line-740"> // if there are more hash batches that begin before the end of this split move to them</span> |
| <span class="source-line-no">741</span><span id="line-741"> while (</span> |
| <span class="source-line-no">742</span><span id="line-742"> nextSourceKey != null && (nextSourceKey.compareTo(splitEndRow) < 0 || reachedEndOfTable)</span> |
| <span class="source-line-no">743</span><span id="line-743"> ) {</span> |
| <span class="source-line-no">744</span><span id="line-744"> moveToNextBatch(context);</span> |
| <span class="source-line-no">745</span><span id="line-745"> }</span> |
| <span class="source-line-no">746</span><span id="line-746"></span> |
| <span class="source-line-no">747</span><span id="line-747"> if (targetHasher.isBatchStarted()) {</span> |
| <span class="source-line-no">748</span><span id="line-748"> // need to complete the final open hash batch</span> |
| <span class="source-line-no">749</span><span id="line-749"></span> |
| <span class="source-line-no">750</span><span id="line-750"> if (</span> |
| <span class="source-line-no">751</span><span id="line-751"> (nextSourceKey != null && nextSourceKey.compareTo(splitEndRow) > 0)</span> |
| <span class="source-line-no">752</span><span id="line-752"> || (nextSourceKey == null && !Bytes.equals(splitEndRow, sourceTableHash.stopRow))</span> |
| <span class="source-line-no">753</span><span id="line-753"> ) {</span> |
| <span class="source-line-no">754</span><span id="line-754"> // the open hash range continues past the end of this region</span> |
| <span class="source-line-no">755</span><span id="line-755"> // add a scan to complete the current hash range</span> |
| <span class="source-line-no">756</span><span id="line-756"> Scan scan = sourceTableHash.initScan();</span> |
| <span class="source-line-no">757</span><span id="line-757"> scan.withStartRow(splitEndRow);</span> |
| <span class="source-line-no">758</span><span id="line-758"> if (nextSourceKey == null) {</span> |
| <span class="source-line-no">759</span><span id="line-759"> scan.withStopRow(sourceTableHash.stopRow);</span> |
| <span class="source-line-no">760</span><span id="line-760"> } else {</span> |
| <span class="source-line-no">761</span><span id="line-761"> scan.withStopRow(nextSourceKey.copyBytes());</span> |
| <span class="source-line-no">762</span><span id="line-762"> }</span> |
| <span class="source-line-no">763</span><span id="line-763"></span> |
| <span class="source-line-no">764</span><span id="line-764"> ResultScanner targetScanner = null;</span> |
| <span class="source-line-no">765</span><span id="line-765"> try {</span> |
| <span class="source-line-no">766</span><span id="line-766"> targetScanner = targetTable.getScanner(scan);</span> |
| <span class="source-line-no">767</span><span id="line-767"> for (Result row : targetScanner) {</span> |
| <span class="source-line-no">768</span><span id="line-768"> targetHasher.hashResult(row);</span> |
| <span class="source-line-no">769</span><span id="line-769"> }</span> |
| <span class="source-line-no">770</span><span id="line-770"> } finally {</span> |
| <span class="source-line-no">771</span><span id="line-771"> if (targetScanner != null) {</span> |
| <span class="source-line-no">772</span><span id="line-772"> targetScanner.close();</span> |
| <span class="source-line-no">773</span><span id="line-773"> }</span> |
| <span class="source-line-no">774</span><span id="line-774"> }</span> |
| <span class="source-line-no">775</span><span id="line-775"> } // else current batch ends exactly at split end row</span> |
| <span class="source-line-no">776</span><span id="line-776"></span> |
| <span class="source-line-no">777</span><span id="line-777"> finishBatchAndCompareHashes(context);</span> |
| <span class="source-line-no">778</span><span id="line-778"> }</span> |
| <span class="source-line-no">779</span><span id="line-779"> }</span> |
| <span class="source-line-no">780</span><span id="line-780"> }</span> |
| <span class="source-line-no">781</span><span id="line-781"></span> |
| <span class="source-line-no">782</span><span id="line-782"> private static final int NUM_ARGS = 3;</span> |
| <span class="source-line-no">783</span><span id="line-783"></span> |
| <span class="source-line-no">784</span><span id="line-784"> private static void printUsage(final String errorMsg) {</span> |
| <span class="source-line-no">785</span><span id="line-785"> if (errorMsg != null && errorMsg.length() > 0) {</span> |
| <span class="source-line-no">786</span><span id="line-786"> System.err.println("ERROR: " + errorMsg);</span> |
| <span class="source-line-no">787</span><span id="line-787"> System.err.println();</span> |
| <span class="source-line-no">788</span><span id="line-788"> }</span> |
| <span class="source-line-no">789</span><span id="line-789"> System.err.println("Usage: SyncTable [options] <sourcehashdir> <sourcetable> <targettable>");</span> |
| <span class="source-line-no">790</span><span id="line-790"> System.err.println();</span> |
| <span class="source-line-no">791</span><span id="line-791"> System.err.println("Options:");</span> |
| <span class="source-line-no">792</span><span id="line-792"></span> |
| <span class="source-line-no">793</span><span id="line-793"> System.err.println(" sourceuri Cluster connection uri of the source table");</span> |
| <span class="source-line-no">794</span><span id="line-794"> System.err.println(" (defaults to cluster in classpath's config)");</span> |
| <span class="source-line-no">795</span><span id="line-795"> System.err.println(" sourcezkcluster ZK cluster key of the source table");</span> |
| <span class="source-line-no">796</span><span id="line-796"> System.err.println(" (defaults to cluster in classpath's config)");</span> |
| <span class="source-line-no">797</span><span id="line-797"> System.err.println(" Do not take effect if sourceuri is specified");</span> |
| <span class="source-line-no">798</span><span id="line-798"> System.err.println(" Deprecated, please use sourceuri instead");</span> |
| <span class="source-line-no">799</span><span id="line-799"> System.err.println(" targeturi Cluster connection uri of the target table");</span> |
| <span class="source-line-no">800</span><span id="line-800"> System.err.println(" (defaults to cluster in classpath's config)");</span> |
| <span class="source-line-no">801</span><span id="line-801"> System.err.println(" targetzkcluster ZK cluster key of the target table");</span> |
| <span class="source-line-no">802</span><span id="line-802"> System.err.println(" (defaults to cluster in classpath's config)");</span> |
| <span class="source-line-no">803</span><span id="line-803"> System.err.println(" Do not take effect if targeturi is specified");</span> |
| <span class="source-line-no">804</span><span id="line-804"> System.err.println(" Deprecated, please use targeturi instead");</span> |
| <span class="source-line-no">805</span><span id="line-805"> System.err.println(" dryrun if true, output counters but no writes");</span> |
| <span class="source-line-no">806</span><span id="line-806"> System.err.println(" (defaults to false)");</span> |
| <span class="source-line-no">807</span><span id="line-807"> System.err.println(" doDeletes if false, does not perform deletes");</span> |
| <span class="source-line-no">808</span><span id="line-808"> System.err.println(" (defaults to true)");</span> |
| <span class="source-line-no">809</span><span id="line-809"> System.err.println(" doPuts if false, does not perform puts");</span> |
| <span class="source-line-no">810</span><span id="line-810"> System.err.println(" (defaults to true)");</span> |
| <span class="source-line-no">811</span><span id="line-811"> System.err.println(" ignoreTimestamps if true, ignores cells timestamps while comparing ");</span> |
| <span class="source-line-no">812</span><span id="line-812"> System.err.println(" cell values. Any missing cell on target then gets");</span> |
| <span class="source-line-no">813</span><span id="line-813"> System.err.println(" added with current time as timestamp ");</span> |
| <span class="source-line-no">814</span><span id="line-814"> System.err.println(" (defaults to false)");</span> |
| <span class="source-line-no">815</span><span id="line-815"> System.err.println();</span> |
| <span class="source-line-no">816</span><span id="line-816"> System.err.println("Args:");</span> |
| <span class="source-line-no">817</span><span id="line-817"> System.err.println(" sourcehashdir path to HashTable output dir for source table");</span> |
| <span class="source-line-no">818</span><span id="line-818"> System.err.println(" (see org.apache.hadoop.hbase.mapreduce.HashTable)");</span> |
| <span class="source-line-no">819</span><span id="line-819"> System.err.println(" sourcetable Name of the source table to sync from");</span> |
| <span class="source-line-no">820</span><span id="line-820"> System.err.println(" targettable Name of the target table to sync to");</span> |
| <span class="source-line-no">821</span><span id="line-821"> System.err.println();</span> |
| <span class="source-line-no">822</span><span id="line-822"> System.err.println("Examples:");</span> |
| <span class="source-line-no">823</span><span id="line-823"> System.err.println(" For a dry run SyncTable of tableA from a remote source cluster");</span> |
| <span class="source-line-no">824</span><span id="line-824"> System.err.println(" to a local target cluster:");</span> |
| <span class="source-line-no">825</span><span id="line-825"> System.err.println(" $ hbase " + "org.apache.hadoop.hbase.mapreduce.SyncTable --dryrun=true"</span> |
| <span class="source-line-no">826</span><span id="line-826"> + " --sourcezkcluster=zk1.example.com,zk2.example.com,zk3.example.com:2181:/hbase"</span> |
| <span class="source-line-no">827</span><span id="line-827"> + " hdfs://nn:9000/hashes/tableA tableA tableA");</span> |
| <span class="source-line-no">828</span><span id="line-828"> }</span> |
| <span class="source-line-no">829</span><span id="line-829"></span> |
| <span class="source-line-no">830</span><span id="line-830"> private boolean doCommandLine(final String[] args) {</span> |
| <span class="source-line-no">831</span><span id="line-831"> if (args.length < NUM_ARGS) {</span> |
| <span class="source-line-no">832</span><span id="line-832"> printUsage(null);</span> |
| <span class="source-line-no">833</span><span id="line-833"> return false;</span> |
| <span class="source-line-no">834</span><span id="line-834"> }</span> |
| <span class="source-line-no">835</span><span id="line-835"> try {</span> |
| <span class="source-line-no">836</span><span id="line-836"> sourceHashDir = new Path(args[args.length - 3]);</span> |
| <span class="source-line-no">837</span><span id="line-837"> sourceTableName = args[args.length - 2];</span> |
| <span class="source-line-no">838</span><span id="line-838"> targetTableName = args[args.length - 1];</span> |
| <span class="source-line-no">839</span><span id="line-839"></span> |
| <span class="source-line-no">840</span><span id="line-840"> for (int i = 0; i < args.length - NUM_ARGS; i++) {</span> |
| <span class="source-line-no">841</span><span id="line-841"> String cmd = args[i];</span> |
| <span class="source-line-no">842</span><span id="line-842"> if (cmd.equals("-h") || cmd.startsWith("--h")) {</span> |
| <span class="source-line-no">843</span><span id="line-843"> printUsage(null);</span> |
| <span class="source-line-no">844</span><span id="line-844"> return false;</span> |
| <span class="source-line-no">845</span><span id="line-845"> }</span> |
| <span class="source-line-no">846</span><span id="line-846"> final String sourceUriKey = "--sourceuri=";</span> |
| <span class="source-line-no">847</span><span id="line-847"> if (cmd.startsWith(sourceUriKey)) {</span> |
| <span class="source-line-no">848</span><span id="line-848"> sourceUri = new URI(cmd.substring(sourceUriKey.length()));</span> |
| <span class="source-line-no">849</span><span id="line-849"> continue;</span> |
| <span class="source-line-no">850</span><span id="line-850"> }</span> |
| <span class="source-line-no">851</span><span id="line-851"></span> |
| <span class="source-line-no">852</span><span id="line-852"> final String sourceZkClusterKey = "--sourcezkcluster=";</span> |
| <span class="source-line-no">853</span><span id="line-853"> if (cmd.startsWith(sourceZkClusterKey)) {</span> |
| <span class="source-line-no">854</span><span id="line-854"> sourceZkCluster = cmd.substring(sourceZkClusterKey.length());</span> |
| <span class="source-line-no">855</span><span id="line-855"> continue;</span> |
| <span class="source-line-no">856</span><span id="line-856"> }</span> |
| <span class="source-line-no">857</span><span id="line-857"></span> |
| <span class="source-line-no">858</span><span id="line-858"> final String targetUriKey = "--targeturi=";</span> |
| <span class="source-line-no">859</span><span id="line-859"> if (cmd.startsWith(targetUriKey)) {</span> |
| <span class="source-line-no">860</span><span id="line-860"> targetUri = new URI(cmd.substring(targetUriKey.length()));</span> |
| <span class="source-line-no">861</span><span id="line-861"> continue;</span> |
| <span class="source-line-no">862</span><span id="line-862"> }</span> |
| <span class="source-line-no">863</span><span id="line-863"></span> |
| <span class="source-line-no">864</span><span id="line-864"> final String targetZkClusterKey = "--targetzkcluster=";</span> |
| <span class="source-line-no">865</span><span id="line-865"> if (cmd.startsWith(targetZkClusterKey)) {</span> |
| <span class="source-line-no">866</span><span id="line-866"> targetZkCluster = cmd.substring(targetZkClusterKey.length());</span> |
| <span class="source-line-no">867</span><span id="line-867"> continue;</span> |
| <span class="source-line-no">868</span><span id="line-868"> }</span> |
| <span class="source-line-no">869</span><span id="line-869"></span> |
| <span class="source-line-no">870</span><span id="line-870"> final String dryRunKey = "--dryrun=";</span> |
| <span class="source-line-no">871</span><span id="line-871"> if (cmd.startsWith(dryRunKey)) {</span> |
| <span class="source-line-no">872</span><span id="line-872"> dryRun = Boolean.parseBoolean(cmd.substring(dryRunKey.length()));</span> |
| <span class="source-line-no">873</span><span id="line-873"> continue;</span> |
| <span class="source-line-no">874</span><span id="line-874"> }</span> |
| <span class="source-line-no">875</span><span id="line-875"></span> |
| <span class="source-line-no">876</span><span id="line-876"> final String doDeletesKey = "--doDeletes=";</span> |
| <span class="source-line-no">877</span><span id="line-877"> if (cmd.startsWith(doDeletesKey)) {</span> |
| <span class="source-line-no">878</span><span id="line-878"> doDeletes = Boolean.parseBoolean(cmd.substring(doDeletesKey.length()));</span> |
| <span class="source-line-no">879</span><span id="line-879"> continue;</span> |
| <span class="source-line-no">880</span><span id="line-880"> }</span> |
| <span class="source-line-no">881</span><span id="line-881"></span> |
| <span class="source-line-no">882</span><span id="line-882"> final String doPutsKey = "--doPuts=";</span> |
| <span class="source-line-no">883</span><span id="line-883"> if (cmd.startsWith(doPutsKey)) {</span> |
| <span class="source-line-no">884</span><span id="line-884"> doPuts = Boolean.parseBoolean(cmd.substring(doPutsKey.length()));</span> |
| <span class="source-line-no">885</span><span id="line-885"> continue;</span> |
| <span class="source-line-no">886</span><span id="line-886"> }</span> |
| <span class="source-line-no">887</span><span id="line-887"></span> |
| <span class="source-line-no">888</span><span id="line-888"> final String ignoreTimestampsKey = "--ignoreTimestamps=";</span> |
| <span class="source-line-no">889</span><span id="line-889"> if (cmd.startsWith(ignoreTimestampsKey)) {</span> |
| <span class="source-line-no">890</span><span id="line-890"> ignoreTimestamps = Boolean.parseBoolean(cmd.substring(ignoreTimestampsKey.length()));</span> |
| <span class="source-line-no">891</span><span id="line-891"> continue;</span> |
| <span class="source-line-no">892</span><span id="line-892"> }</span> |
| <span class="source-line-no">893</span><span id="line-893"></span> |
| <span class="source-line-no">894</span><span id="line-894"> printUsage("Invalid argument '" + cmd + "'");</span> |
| <span class="source-line-no">895</span><span id="line-895"> return false;</span> |
| <span class="source-line-no">896</span><span id="line-896"> }</span> |
| <span class="source-line-no">897</span><span id="line-897"></span> |
| <span class="source-line-no">898</span><span id="line-898"> } catch (Exception e) {</span> |
| <span class="source-line-no">899</span><span id="line-899"> LOG.error("Failed to parse commandLine arguments", e);</span> |
| <span class="source-line-no">900</span><span id="line-900"> printUsage("Can't start because " + e.getMessage());</span> |
| <span class="source-line-no">901</span><span id="line-901"> return false;</span> |
| <span class="source-line-no">902</span><span id="line-902"> }</span> |
| <span class="source-line-no">903</span><span id="line-903"> return true;</span> |
| <span class="source-line-no">904</span><span id="line-904"> }</span> |
| <span class="source-line-no">905</span><span id="line-905"></span> |
| <span class="source-line-no">906</span><span id="line-906"> /**</span> |
| <span class="source-line-no">907</span><span id="line-907"> * Main entry point.</span> |
| <span class="source-line-no">908</span><span id="line-908"> */</span> |
| <span class="source-line-no">909</span><span id="line-909"> public static void main(String[] args) throws Exception {</span> |
| <span class="source-line-no">910</span><span id="line-910"> int ret = ToolRunner.run(new SyncTable(HBaseConfiguration.create()), args);</span> |
| <span class="source-line-no">911</span><span id="line-911"> System.exit(ret);</span> |
| <span class="source-line-no">912</span><span id="line-912"> }</span> |
| <span class="source-line-no">913</span><span id="line-913"></span> |
| <span class="source-line-no">914</span><span id="line-914"> @Override</span> |
| <span class="source-line-no">915</span><span id="line-915"> public int run(String[] args) throws Exception {</span> |
| <span class="source-line-no">916</span><span id="line-916"> String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();</span> |
| <span class="source-line-no">917</span><span id="line-917"> if (!doCommandLine(otherArgs)) {</span> |
| <span class="source-line-no">918</span><span id="line-918"> return 1;</span> |
| <span class="source-line-no">919</span><span id="line-919"> }</span> |
| <span class="source-line-no">920</span><span id="line-920"></span> |
| <span class="source-line-no">921</span><span id="line-921"> Job job = createSubmittableJob(otherArgs);</span> |
| <span class="source-line-no">922</span><span id="line-922"> if (!job.waitForCompletion(true)) {</span> |
| <span class="source-line-no">923</span><span id="line-923"> LOG.info("Map-reduce job failed!");</span> |
| <span class="source-line-no">924</span><span id="line-924"> return 1;</span> |
| <span class="source-line-no">925</span><span id="line-925"> }</span> |
| <span class="source-line-no">926</span><span id="line-926"> counters = job.getCounters();</span> |
| <span class="source-line-no">927</span><span id="line-927"> return 0;</span> |
| <span class="source-line-no">928</span><span id="line-928"> }</span> |
| <span class="source-line-no">929</span><span id="line-929"></span> |
| <span class="source-line-no">930</span><span id="line-930">}</span> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </pre> |
| </div> |
| </main> |
| </body> |
| </html> |