| <!DOCTYPE HTML> |
| <html lang="en"> |
| <head> |
| <!-- Generated by javadoc (17) --> |
| <title>Source code</title> |
| <meta name="viewport" content="width=device-width, initial-scale=1"> |
| <meta name="description" content="source: package: org.apache.hadoop.hbase.mapreduce, class: TestImportTsv"> |
| <meta name="generator" content="javadoc/SourceToHTMLConverter"> |
| <link rel="stylesheet" type="text/css" href="../../../../../../stylesheet.css" title="Style"> |
| </head> |
| <body class="source-page"> |
| <main role="main"> |
| <div class="source-container"> |
| <pre><span class="source-line-no">001</span><span id="line-1">/*</span> |
| <span class="source-line-no">002</span><span id="line-2"> * Licensed to the Apache Software Foundation (ASF) under one</span> |
| <span class="source-line-no">003</span><span id="line-3"> * or more contributor license agreements. See the NOTICE file</span> |
| <span class="source-line-no">004</span><span id="line-4"> * distributed with this work for additional information</span> |
| <span class="source-line-no">005</span><span id="line-5"> * regarding copyright ownership. The ASF licenses this file</span> |
| <span class="source-line-no">006</span><span id="line-6"> * to you under the Apache License, Version 2.0 (the</span> |
| <span class="source-line-no">007</span><span id="line-7"> * "License"); you may not use this file except in compliance</span> |
| <span class="source-line-no">008</span><span id="line-8"> * with the License. You may obtain a copy of the License at</span> |
| <span class="source-line-no">009</span><span id="line-9"> *</span> |
| <span class="source-line-no">010</span><span id="line-10"> * http://www.apache.org/licenses/LICENSE-2.0</span> |
| <span class="source-line-no">011</span><span id="line-11"> *</span> |
| <span class="source-line-no">012</span><span id="line-12"> * Unless required by applicable law or agreed to in writing, software</span> |
| <span class="source-line-no">013</span><span id="line-13"> * distributed under the License is distributed on an "AS IS" BASIS,</span> |
| <span class="source-line-no">014</span><span id="line-14"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span> |
| <span class="source-line-no">015</span><span id="line-15"> * See the License for the specific language governing permissions and</span> |
| <span class="source-line-no">016</span><span id="line-16"> * limitations under the License.</span> |
| <span class="source-line-no">017</span><span id="line-17"> */</span> |
| <span class="source-line-no">018</span><span id="line-18">package org.apache.hadoop.hbase.mapreduce;</span> |
| <span class="source-line-no">019</span><span id="line-19"></span> |
| <span class="source-line-no">020</span><span id="line-20">import static org.junit.Assert.assertEquals;</span> |
| <span class="source-line-no">021</span><span id="line-21">import static org.junit.Assert.assertFalse;</span> |
| <span class="source-line-no">022</span><span id="line-22">import static org.junit.Assert.assertTrue;</span> |
| <span class="source-line-no">023</span><span id="line-23"></span> |
| <span class="source-line-no">024</span><span id="line-24">import java.io.IOException;</span> |
| <span class="source-line-no">025</span><span id="line-25">import java.util.Arrays;</span> |
| <span class="source-line-no">026</span><span id="line-26">import java.util.HashMap;</span> |
| <span class="source-line-no">027</span><span id="line-27">import java.util.HashSet;</span> |
| <span class="source-line-no">028</span><span id="line-28">import java.util.Iterator;</span> |
| <span class="source-line-no">029</span><span id="line-29">import java.util.List;</span> |
| <span class="source-line-no">030</span><span id="line-30">import java.util.Map;</span> |
| <span class="source-line-no">031</span><span id="line-31">import java.util.Set;</span> |
| <span class="source-line-no">032</span><span id="line-32">import org.apache.hadoop.conf.Configurable;</span> |
| <span class="source-line-no">033</span><span id="line-33">import org.apache.hadoop.conf.Configuration;</span> |
| <span class="source-line-no">034</span><span id="line-34">import org.apache.hadoop.fs.FSDataOutputStream;</span> |
| <span class="source-line-no">035</span><span id="line-35">import org.apache.hadoop.fs.FileStatus;</span> |
| <span class="source-line-no">036</span><span id="line-36">import org.apache.hadoop.fs.FileSystem;</span> |
| <span class="source-line-no">037</span><span id="line-37">import org.apache.hadoop.fs.Path;</span> |
| <span class="source-line-no">038</span><span id="line-38">import org.apache.hadoop.hbase.Cell;</span> |
| <span class="source-line-no">039</span><span id="line-39">import org.apache.hadoop.hbase.CellUtil;</span> |
| <span class="source-line-no">040</span><span id="line-40">import org.apache.hadoop.hbase.HBaseClassTestRule;</span> |
| <span class="source-line-no">041</span><span id="line-41">import org.apache.hadoop.hbase.HBaseTestingUtil;</span> |
| <span class="source-line-no">042</span><span id="line-42">import org.apache.hadoop.hbase.HConstants;</span> |
| <span class="source-line-no">043</span><span id="line-43">import org.apache.hadoop.hbase.TableName;</span> |
| <span class="source-line-no">044</span><span id="line-44">import org.apache.hadoop.hbase.TableNotFoundException;</span> |
| <span class="source-line-no">045</span><span id="line-45">import org.apache.hadoop.hbase.client.Connection;</span> |
| <span class="source-line-no">046</span><span id="line-46">import org.apache.hadoop.hbase.client.ConnectionFactory;</span> |
| <span class="source-line-no">047</span><span id="line-47">import org.apache.hadoop.hbase.client.Result;</span> |
| <span class="source-line-no">048</span><span id="line-48">import org.apache.hadoop.hbase.client.ResultScanner;</span> |
| <span class="source-line-no">049</span><span id="line-49">import org.apache.hadoop.hbase.client.Scan;</span> |
| <span class="source-line-no">050</span><span id="line-50">import org.apache.hadoop.hbase.client.Table;</span> |
| <span class="source-line-no">051</span><span id="line-51">import org.apache.hadoop.hbase.io.hfile.CacheConfig;</span> |
| <span class="source-line-no">052</span><span id="line-52">import org.apache.hadoop.hbase.io.hfile.HFile;</span> |
| <span class="source-line-no">053</span><span id="line-53">import org.apache.hadoop.hbase.io.hfile.HFileScanner;</span> |
| <span class="source-line-no">054</span><span id="line-54">import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException;</span> |
| <span class="source-line-no">055</span><span id="line-55">import org.apache.hadoop.hbase.testclassification.LargeTests;</span> |
| <span class="source-line-no">056</span><span id="line-56">import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;</span> |
| <span class="source-line-no">057</span><span id="line-57">import org.apache.hadoop.hbase.util.Bytes;</span> |
| <span class="source-line-no">058</span><span id="line-58">import org.apache.hadoop.io.Text;</span> |
| <span class="source-line-no">059</span><span id="line-59">import org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputFilesFilter;</span> |
| <span class="source-line-no">060</span><span id="line-60">import org.apache.hadoop.mapreduce.Job;</span> |
| <span class="source-line-no">061</span><span id="line-61">import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;</span> |
| <span class="source-line-no">062</span><span id="line-62">import org.apache.hadoop.util.Tool;</span> |
| <span class="source-line-no">063</span><span id="line-63">import org.apache.hadoop.util.ToolRunner;</span> |
| <span class="source-line-no">064</span><span id="line-64">import org.junit.AfterClass;</span> |
| <span class="source-line-no">065</span><span id="line-65">import org.junit.Before;</span> |
| <span class="source-line-no">066</span><span id="line-66">import org.junit.BeforeClass;</span> |
| <span class="source-line-no">067</span><span id="line-67">import org.junit.ClassRule;</span> |
| <span class="source-line-no">068</span><span id="line-68">import org.junit.Rule;</span> |
| <span class="source-line-no">069</span><span id="line-69">import org.junit.Test;</span> |
| <span class="source-line-no">070</span><span id="line-70">import org.junit.experimental.categories.Category;</span> |
| <span class="source-line-no">071</span><span id="line-71">import org.junit.rules.ExpectedException;</span> |
| <span class="source-line-no">072</span><span id="line-72">import org.slf4j.Logger;</span> |
| <span class="source-line-no">073</span><span id="line-73">import org.slf4j.LoggerFactory;</span> |
| <span class="source-line-no">074</span><span id="line-74"></span> |
| <span class="source-line-no">075</span><span id="line-75">@Category({ VerySlowMapReduceTests.class, LargeTests.class })</span> |
| <span class="source-line-no">076</span><span id="line-76">public class TestImportTsv implements Configurable {</span> |
| <span class="source-line-no">077</span><span id="line-77"></span> |
| <span class="source-line-no">078</span><span id="line-78"> @ClassRule</span> |
| <span class="source-line-no">079</span><span id="line-79"> public static final HBaseClassTestRule CLASS_RULE =</span> |
| <span class="source-line-no">080</span><span id="line-80"> HBaseClassTestRule.forClass(TestImportTsv.class);</span> |
| <span class="source-line-no">081</span><span id="line-81"></span> |
| <span class="source-line-no">082</span><span id="line-82"> private static final Logger LOG = LoggerFactory.getLogger(TestImportTsv.class);</span> |
| <span class="source-line-no">083</span><span id="line-83"> protected static final String NAME = TestImportTsv.class.getSimpleName();</span> |
| <span class="source-line-no">084</span><span id="line-84"> protected static HBaseTestingUtil util = new HBaseTestingUtil();</span> |
| <span class="source-line-no">085</span><span id="line-85"></span> |
| <span class="source-line-no">086</span><span id="line-86"> // Delete the tmp directory after running doMROnTableTest. Boolean. Default is true.</span> |
| <span class="source-line-no">087</span><span id="line-87"> protected static final String DELETE_AFTER_LOAD_CONF = NAME + ".deleteAfterLoad";</span> |
| <span class="source-line-no">088</span><span id="line-88"></span> |
| <span class="source-line-no">089</span><span id="line-89"> /**</span> |
| <span class="source-line-no">090</span><span id="line-90"> * Force use of combiner in doMROnTableTest. Boolean. Default is true.</span> |
| <span class="source-line-no">091</span><span id="line-91"> */</span> |
| <span class="source-line-no">092</span><span id="line-92"> protected static final String FORCE_COMBINER_CONF = NAME + ".forceCombiner";</span> |
| <span class="source-line-no">093</span><span id="line-93"></span> |
| <span class="source-line-no">094</span><span id="line-94"> private final String FAMILY = "FAM";</span> |
| <span class="source-line-no">095</span><span id="line-95"> private TableName tn;</span> |
| <span class="source-line-no">096</span><span id="line-96"> private Map<String, String> args;</span> |
| <span class="source-line-no">097</span><span id="line-97"></span> |
| <span class="source-line-no">098</span><span id="line-98"> @Rule</span> |
| <span class="source-line-no">099</span><span id="line-99"> public ExpectedException exception = ExpectedException.none();</span> |
| <span class="source-line-no">100</span><span id="line-100"></span> |
| <span class="source-line-no">101</span><span id="line-101"> public Configuration getConf() {</span> |
| <span class="source-line-no">102</span><span id="line-102"> return util.getConfiguration();</span> |
| <span class="source-line-no">103</span><span id="line-103"> }</span> |
| <span class="source-line-no">104</span><span id="line-104"></span> |
| <span class="source-line-no">105</span><span id="line-105"> public void setConf(Configuration conf) {</span> |
| <span class="source-line-no">106</span><span id="line-106"> throw new IllegalArgumentException("setConf not supported");</span> |
| <span class="source-line-no">107</span><span id="line-107"> }</span> |
| <span class="source-line-no">108</span><span id="line-108"></span> |
| <span class="source-line-no">109</span><span id="line-109"> @BeforeClass</span> |
| <span class="source-line-no">110</span><span id="line-110"> public static void provisionCluster() throws Exception {</span> |
| <span class="source-line-no">111</span><span id="line-111"> util.startMiniCluster();</span> |
| <span class="source-line-no">112</span><span id="line-112"> }</span> |
| <span class="source-line-no">113</span><span id="line-113"></span> |
| <span class="source-line-no">114</span><span id="line-114"> @AfterClass</span> |
| <span class="source-line-no">115</span><span id="line-115"> public static void releaseCluster() throws Exception {</span> |
| <span class="source-line-no">116</span><span id="line-116"> util.shutdownMiniCluster();</span> |
| <span class="source-line-no">117</span><span id="line-117"> }</span> |
| <span class="source-line-no">118</span><span id="line-118"></span> |
| <span class="source-line-no">119</span><span id="line-119"> @Before</span> |
| <span class="source-line-no">120</span><span id="line-120"> public void setup() throws Exception {</span> |
| <span class="source-line-no">121</span><span id="line-121"> tn = TableName.valueOf("test-" + util.getRandomUUID());</span> |
| <span class="source-line-no">122</span><span id="line-122"> args = new HashMap<>();</span> |
| <span class="source-line-no">123</span><span id="line-123"> // Prepare the arguments required for the test.</span> |
| <span class="source-line-no">124</span><span id="line-124"> args.put(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,FAM:A,FAM:B");</span> |
| <span class="source-line-no">125</span><span id="line-125"> args.put(ImportTsv.SEPARATOR_CONF_KEY, "\u001b");</span> |
| <span class="source-line-no">126</span><span id="line-126"> }</span> |
| <span class="source-line-no">127</span><span id="line-127"></span> |
| <span class="source-line-no">128</span><span id="line-128"> @Test</span> |
| <span class="source-line-no">129</span><span id="line-129"> public void testMROnTable() throws Exception {</span> |
| <span class="source-line-no">130</span><span id="line-130"> util.createTable(tn, FAMILY);</span> |
| <span class="source-line-no">131</span><span id="line-131"> doMROnTableTest(null, 1);</span> |
| <span class="source-line-no">132</span><span id="line-132"> util.deleteTable(tn);</span> |
| <span class="source-line-no">133</span><span id="line-133"> }</span> |
| <span class="source-line-no">134</span><span id="line-134"></span> |
| <span class="source-line-no">135</span><span id="line-135"> @Test</span> |
| <span class="source-line-no">136</span><span id="line-136"> public void testMROnTableWithTimestamp() throws Exception {</span> |
| <span class="source-line-no">137</span><span id="line-137"> util.createTable(tn, FAMILY);</span> |
| <span class="source-line-no">138</span><span id="line-138"> args.put(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,HBASE_TS_KEY,FAM:A,FAM:B");</span> |
| <span class="source-line-no">139</span><span id="line-139"> args.put(ImportTsv.SEPARATOR_CONF_KEY, ",");</span> |
| <span class="source-line-no">140</span><span id="line-140"> String data = "KEY,1234,VALUE1,VALUE2\n";</span> |
| <span class="source-line-no">141</span><span id="line-141"></span> |
| <span class="source-line-no">142</span><span id="line-142"> doMROnTableTest(data, 1);</span> |
| <span class="source-line-no">143</span><span id="line-143"> util.deleteTable(tn);</span> |
| <span class="source-line-no">144</span><span id="line-144"> }</span> |
| <span class="source-line-no">145</span><span id="line-145"></span> |
| <span class="source-line-no">146</span><span id="line-146"> @Test</span> |
| <span class="source-line-no">147</span><span id="line-147"> public void testMROnTableWithCustomMapper() throws Exception {</span> |
| <span class="source-line-no">148</span><span id="line-148"> util.createTable(tn, FAMILY);</span> |
| <span class="source-line-no">149</span><span id="line-149"> args.put(ImportTsv.MAPPER_CONF_KEY,</span> |
| <span class="source-line-no">150</span><span id="line-150"> "org.apache.hadoop.hbase.mapreduce.TsvImporterCustomTestMapper");</span> |
| <span class="source-line-no">151</span><span id="line-151"></span> |
| <span class="source-line-no">152</span><span id="line-152"> doMROnTableTest(null, 3);</span> |
| <span class="source-line-no">153</span><span id="line-153"> util.deleteTable(tn);</span> |
| <span class="source-line-no">154</span><span id="line-154"> }</span> |
| <span class="source-line-no">155</span><span id="line-155"></span> |
| <span class="source-line-no">156</span><span id="line-156"> @Test</span> |
| <span class="source-line-no">157</span><span id="line-157"> public void testBulkOutputWithoutAnExistingTable() throws Exception {</span> |
| <span class="source-line-no">158</span><span id="line-158"> // Prepare the arguments required for the test.</span> |
| <span class="source-line-no">159</span><span id="line-159"> Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");</span> |
| <span class="source-line-no">160</span><span id="line-160"> args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());</span> |
| <span class="source-line-no">161</span><span id="line-161"></span> |
| <span class="source-line-no">162</span><span id="line-162"> doMROnTableTest(null, 3);</span> |
| <span class="source-line-no">163</span><span id="line-163"> util.deleteTable(tn);</span> |
| <span class="source-line-no">164</span><span id="line-164"> }</span> |
| <span class="source-line-no">165</span><span id="line-165"></span> |
| <span class="source-line-no">166</span><span id="line-166"> @Test</span> |
| <span class="source-line-no">167</span><span id="line-167"> public void testBulkOutputWithAnExistingTable() throws Exception {</span> |
| <span class="source-line-no">168</span><span id="line-168"> util.createTable(tn, FAMILY);</span> |
| <span class="source-line-no">169</span><span id="line-169"></span> |
| <span class="source-line-no">170</span><span id="line-170"> // Prepare the arguments required for the test.</span> |
| <span class="source-line-no">171</span><span id="line-171"> Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");</span> |
| <span class="source-line-no">172</span><span id="line-172"> args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());</span> |
| <span class="source-line-no">173</span><span id="line-173"></span> |
| <span class="source-line-no">174</span><span id="line-174"> doMROnTableTest(null, 3);</span> |
| <span class="source-line-no">175</span><span id="line-175"> util.deleteTable(tn);</span> |
| <span class="source-line-no">176</span><span id="line-176"> }</span> |
| <span class="source-line-no">177</span><span id="line-177"></span> |
| <span class="source-line-no">178</span><span id="line-178"> @Test</span> |
| <span class="source-line-no">179</span><span id="line-179"> public void testBulkOutputWithAnExistingTableNoStrictTrue() throws Exception {</span> |
| <span class="source-line-no">180</span><span id="line-180"> util.createTable(tn, FAMILY);</span> |
| <span class="source-line-no">181</span><span id="line-181"></span> |
| <span class="source-line-no">182</span><span id="line-182"> // Prepare the arguments required for the test.</span> |
| <span class="source-line-no">183</span><span id="line-183"> Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");</span> |
| <span class="source-line-no">184</span><span id="line-184"> args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());</span> |
| <span class="source-line-no">185</span><span id="line-185"> args.put(ImportTsv.NO_STRICT_COL_FAMILY, "true");</span> |
| <span class="source-line-no">186</span><span id="line-186"> doMROnTableTest(null, 3);</span> |
| <span class="source-line-no">187</span><span id="line-187"> util.deleteTable(tn);</span> |
| <span class="source-line-no">188</span><span id="line-188"> }</span> |
| <span class="source-line-no">189</span><span id="line-189"></span> |
| <span class="source-line-no">190</span><span id="line-190"> @Test</span> |
| <span class="source-line-no">191</span><span id="line-191"> public void testJobConfigurationsWithTsvImporterTextMapper() throws Exception {</span> |
| <span class="source-line-no">192</span><span id="line-192"> Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");</span> |
| <span class="source-line-no">193</span><span id="line-193"> String INPUT_FILE = "InputFile1.csv";</span> |
| <span class="source-line-no">194</span><span id="line-194"> // Prepare the arguments required for the test.</span> |
| <span class="source-line-no">195</span><span id="line-195"> String[] args = new String[] {</span> |
| <span class="source-line-no">196</span><span id="line-196"> "-D" + ImportTsv.MAPPER_CONF_KEY + "=org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper",</span> |
| <span class="source-line-no">197</span><span id="line-197"> "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B",</span> |
| <span class="source-line-no">198</span><span id="line-198"> "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=,",</span> |
| <span class="source-line-no">199</span><span id="line-199"> "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + bulkOutputPath.toString(), tn.getNameAsString(),</span> |
| <span class="source-line-no">200</span><span id="line-200"> INPUT_FILE };</span> |
| <span class="source-line-no">201</span><span id="line-201"> assertEquals("running test job configuration failed.", 0,</span> |
| <span class="source-line-no">202</span><span id="line-202"> ToolRunner.run(new Configuration(util.getConfiguration()), new ImportTsv() {</span> |
| <span class="source-line-no">203</span><span id="line-203"> @Override</span> |
| <span class="source-line-no">204</span><span id="line-204"> public int run(String[] args) throws Exception {</span> |
| <span class="source-line-no">205</span><span id="line-205"> Job job = createSubmittableJob(getConf(), args);</span> |
| <span class="source-line-no">206</span><span id="line-206"> assertTrue(job.getMapperClass().equals(TsvImporterTextMapper.class));</span> |
| <span class="source-line-no">207</span><span id="line-207"> assertTrue(job.getReducerClass().equals(TextSortReducer.class));</span> |
| <span class="source-line-no">208</span><span id="line-208"> assertTrue(job.getMapOutputValueClass().equals(Text.class));</span> |
| <span class="source-line-no">209</span><span id="line-209"> return 0;</span> |
| <span class="source-line-no">210</span><span id="line-210"> }</span> |
| <span class="source-line-no">211</span><span id="line-211"> }, args));</span> |
| <span class="source-line-no">212</span><span id="line-212"> // Delete table created by createSubmittableJob.</span> |
| <span class="source-line-no">213</span><span id="line-213"> util.deleteTable(tn);</span> |
| <span class="source-line-no">214</span><span id="line-214"> }</span> |
| <span class="source-line-no">215</span><span id="line-215"></span> |
| <span class="source-line-no">216</span><span id="line-216"> @Test</span> |
| <span class="source-line-no">217</span><span id="line-217"> public void testBulkOutputWithTsvImporterTextMapper() throws Exception {</span> |
| <span class="source-line-no">218</span><span id="line-218"> Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");</span> |
| <span class="source-line-no">219</span><span id="line-219"> args.put(ImportTsv.MAPPER_CONF_KEY, "org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper");</span> |
| <span class="source-line-no">220</span><span id="line-220"> args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, bulkOutputPath.toString());</span> |
| <span class="source-line-no">221</span><span id="line-221"> String data = "KEY\u001bVALUE4\u001bVALUE8\n";</span> |
| <span class="source-line-no">222</span><span id="line-222"> doMROnTableTest(data, 4);</span> |
| <span class="source-line-no">223</span><span id="line-223"> util.deleteTable(tn);</span> |
| <span class="source-line-no">224</span><span id="line-224"> }</span> |
| <span class="source-line-no">225</span><span id="line-225"></span> |
| <span class="source-line-no">226</span><span id="line-226"> @Test</span> |
| <span class="source-line-no">227</span><span id="line-227"> public void testWithoutAnExistingTableAndCreateTableSetToNo() throws Exception {</span> |
| <span class="source-line-no">228</span><span id="line-228"> String[] args = new String[] { tn.getNameAsString(), "/inputFile" };</span> |
| <span class="source-line-no">229</span><span id="line-229"></span> |
| <span class="source-line-no">230</span><span id="line-230"> Configuration conf = new Configuration(util.getConfiguration());</span> |
| <span class="source-line-no">231</span><span id="line-231"> conf.set(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,FAM:A");</span> |
| <span class="source-line-no">232</span><span id="line-232"> conf.set(ImportTsv.BULK_OUTPUT_CONF_KEY, "/output");</span> |
| <span class="source-line-no">233</span><span id="line-233"> conf.set(ImportTsv.CREATE_TABLE_CONF_KEY, "no");</span> |
| <span class="source-line-no">234</span><span id="line-234"> exception.expect(TableNotFoundException.class);</span> |
| <span class="source-line-no">235</span><span id="line-235"> assertEquals("running test job configuration failed.", 0,</span> |
| <span class="source-line-no">236</span><span id="line-236"> ToolRunner.run(new Configuration(util.getConfiguration()), new ImportTsv() {</span> |
| <span class="source-line-no">237</span><span id="line-237"> @Override</span> |
| <span class="source-line-no">238</span><span id="line-238"> public int run(String[] args) throws Exception {</span> |
| <span class="source-line-no">239</span><span id="line-239"> createSubmittableJob(getConf(), args);</span> |
| <span class="source-line-no">240</span><span id="line-240"> return 0;</span> |
| <span class="source-line-no">241</span><span id="line-241"> }</span> |
| <span class="source-line-no">242</span><span id="line-242"> }, args));</span> |
| <span class="source-line-no">243</span><span id="line-243"> }</span> |
| <span class="source-line-no">244</span><span id="line-244"></span> |
| <span class="source-line-no">245</span><span id="line-245"> @Test</span> |
| <span class="source-line-no">246</span><span id="line-246"> public void testMRNoMatchedColumnFamily() throws Exception {</span> |
| <span class="source-line-no">247</span><span id="line-247"> util.createTable(tn, FAMILY);</span> |
| <span class="source-line-no">248</span><span id="line-248"></span> |
| <span class="source-line-no">249</span><span id="line-249"> String[] args = new String[] {</span> |
| <span class="source-line-no">250</span><span id="line-250"> "-D" + ImportTsv.COLUMNS_CONF_KEY</span> |
| <span class="source-line-no">251</span><span id="line-251"> + "=HBASE_ROW_KEY,FAM:A,FAM01_ERROR:A,FAM01_ERROR:B,FAM02_ERROR:C",</span> |
| <span class="source-line-no">252</span><span id="line-252"> tn.getNameAsString(), "/inputFile" };</span> |
| <span class="source-line-no">253</span><span id="line-253"> exception.expect(NoSuchColumnFamilyException.class);</span> |
| <span class="source-line-no">254</span><span id="line-254"> assertEquals("running test job configuration failed.", 0,</span> |
| <span class="source-line-no">255</span><span id="line-255"> ToolRunner.run(new Configuration(util.getConfiguration()), new ImportTsv() {</span> |
| <span class="source-line-no">256</span><span id="line-256"> @Override</span> |
| <span class="source-line-no">257</span><span id="line-257"> public int run(String[] args) throws Exception {</span> |
| <span class="source-line-no">258</span><span id="line-258"> createSubmittableJob(getConf(), args);</span> |
| <span class="source-line-no">259</span><span id="line-259"> return 0;</span> |
| <span class="source-line-no">260</span><span id="line-260"> }</span> |
| <span class="source-line-no">261</span><span id="line-261"> }, args));</span> |
| <span class="source-line-no">262</span><span id="line-262"></span> |
| <span class="source-line-no">263</span><span id="line-263"> util.deleteTable(tn);</span> |
| <span class="source-line-no">264</span><span id="line-264"> }</span> |
| <span class="source-line-no">265</span><span id="line-265"></span> |
| <span class="source-line-no">266</span><span id="line-266"> @Test</span> |
| <span class="source-line-no">267</span><span id="line-267"> public void testMRWithoutAnExistingTable() throws Exception {</span> |
| <span class="source-line-no">268</span><span id="line-268"> String[] args = new String[] { tn.getNameAsString(), "/inputFile" };</span> |
| <span class="source-line-no">269</span><span id="line-269"></span> |
| <span class="source-line-no">270</span><span id="line-270"> exception.expect(TableNotFoundException.class);</span> |
| <span class="source-line-no">271</span><span id="line-271"> assertEquals("running test job configuration failed.", 0,</span> |
| <span class="source-line-no">272</span><span id="line-272"> ToolRunner.run(new Configuration(util.getConfiguration()), new ImportTsv() {</span> |
| <span class="source-line-no">273</span><span id="line-273"> @Override</span> |
| <span class="source-line-no">274</span><span id="line-274"> public int run(String[] args) throws Exception {</span> |
| <span class="source-line-no">275</span><span id="line-275"> createSubmittableJob(getConf(), args);</span> |
| <span class="source-line-no">276</span><span id="line-276"> return 0;</span> |
| <span class="source-line-no">277</span><span id="line-277"> }</span> |
| <span class="source-line-no">278</span><span id="line-278"> }, args));</span> |
| <span class="source-line-no">279</span><span id="line-279"> }</span> |
| <span class="source-line-no">280</span><span id="line-280"></span> |
| <span class="source-line-no">281</span><span id="line-281"> @Test</span> |
| <span class="source-line-no">282</span><span id="line-282"> public void testJobConfigurationsWithDryMode() throws Exception {</span> |
| <span class="source-line-no">283</span><span id="line-283"> Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");</span> |
| <span class="source-line-no">284</span><span id="line-284"> String INPUT_FILE = "InputFile1.csv";</span> |
| <span class="source-line-no">285</span><span id="line-285"> // Prepare the arguments required for the test.</span> |
| <span class="source-line-no">286</span><span id="line-286"> String[] argsArray =</span> |
| <span class="source-line-no">287</span><span id="line-287"> new String[] { "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B",</span> |
| <span class="source-line-no">288</span><span id="line-288"> "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=,",</span> |
| <span class="source-line-no">289</span><span id="line-289"> "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + bulkOutputPath.toString(),</span> |
| <span class="source-line-no">290</span><span id="line-290"> "-D" + ImportTsv.DRY_RUN_CONF_KEY + "=true", tn.getNameAsString(), INPUT_FILE };</span> |
| <span class="source-line-no">291</span><span id="line-291"> assertEquals("running test job configuration failed.", 0,</span> |
| <span class="source-line-no">292</span><span id="line-292"> ToolRunner.run(new Configuration(util.getConfiguration()), new ImportTsv() {</span> |
| <span class="source-line-no">293</span><span id="line-293"> @Override</span> |
| <span class="source-line-no">294</span><span id="line-294"> public int run(String[] args) throws Exception {</span> |
| <span class="source-line-no">295</span><span id="line-295"> Job job = createSubmittableJob(getConf(), args);</span> |
| <span class="source-line-no">296</span><span id="line-296"> assertTrue(job.getOutputFormatClass().equals(NullOutputFormat.class));</span> |
| <span class="source-line-no">297</span><span id="line-297"> return 0;</span> |
| <span class="source-line-no">298</span><span id="line-298"> }</span> |
| <span class="source-line-no">299</span><span id="line-299"> }, argsArray));</span> |
| <span class="source-line-no">300</span><span id="line-300"> // Delete table created by createSubmittableJob.</span> |
| <span class="source-line-no">301</span><span id="line-301"> util.deleteTable(tn);</span> |
| <span class="source-line-no">302</span><span id="line-302"> }</span> |
| <span class="source-line-no">303</span><span id="line-303"></span> |
| <span class="source-line-no">304</span><span id="line-304"> @Test</span> |
| <span class="source-line-no">305</span><span id="line-305"> public void testDryModeWithoutBulkOutputAndTableExists() throws Exception {</span> |
| <span class="source-line-no">306</span><span id="line-306"> util.createTable(tn, FAMILY);</span> |
| <span class="source-line-no">307</span><span id="line-307"> args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");</span> |
| <span class="source-line-no">308</span><span id="line-308"> doMROnTableTest(null, 1);</span> |
| <span class="source-line-no">309</span><span id="line-309"> // Dry mode should not delete an existing table. If it's not present,</span> |
| <span class="source-line-no">310</span><span id="line-310"> // this will throw TableNotFoundException.</span> |
| <span class="source-line-no">311</span><span id="line-311"> util.deleteTable(tn);</span> |
| <span class="source-line-no">312</span><span id="line-312"> }</span> |
| <span class="source-line-no">313</span><span id="line-313"></span> |
| <span class="source-line-no">314</span><span id="line-314"> /**</span> |
| <span class="source-line-no">315</span><span id="line-315"> * If table is not present in non-bulk mode, dry run should fail just like normal mode.</span> |
| <span class="source-line-no">316</span><span id="line-316"> */</span> |
| <span class="source-line-no">317</span><span id="line-317"> @Test</span> |
| <span class="source-line-no">318</span><span id="line-318"> public void testDryModeWithoutBulkOutputAndTableDoesNotExists() throws Exception {</span> |
| <span class="source-line-no">319</span><span id="line-319"> args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");</span> |
| <span class="source-line-no">320</span><span id="line-320"> exception.expect(TableNotFoundException.class);</span> |
| <span class="source-line-no">321</span><span id="line-321"> doMROnTableTest(null, 1);</span> |
| <span class="source-line-no">322</span><span id="line-322"> }</span> |
| <span class="source-line-no">323</span><span id="line-323"></span> |
| <span class="source-line-no">324</span><span id="line-324"> @Test</span> |
| <span class="source-line-no">325</span><span id="line-325"> public void testDryModeWithBulkOutputAndTableExists() throws Exception {</span> |
| <span class="source-line-no">326</span><span id="line-326"> util.createTable(tn, FAMILY);</span> |
| <span class="source-line-no">327</span><span id="line-327"> // Prepare the arguments required for the test.</span> |
| <span class="source-line-no">328</span><span id="line-328"> Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");</span> |
| <span class="source-line-no">329</span><span id="line-329"> args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());</span> |
| <span class="source-line-no">330</span><span id="line-330"> args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");</span> |
| <span class="source-line-no">331</span><span id="line-331"> doMROnTableTest(null, 1);</span> |
| <span class="source-line-no">332</span><span id="line-332"> // Dry mode should not delete an existing table. If it's not present,</span> |
| <span class="source-line-no">333</span><span id="line-333"> // this will throw TableNotFoundException.</span> |
| <span class="source-line-no">334</span><span id="line-334"> util.deleteTable(tn);</span> |
| <span class="source-line-no">335</span><span id="line-335"> }</span> |
| <span class="source-line-no">336</span><span id="line-336"></span> |
| <span class="source-line-no">337</span><span id="line-337"> /**</span> |
| <span class="source-line-no">338</span><span id="line-338"> * If table is not present in bulk mode and create.table is not set to yes, import should fail</span> |
| <span class="source-line-no">339</span><span id="line-339"> * with TableNotFoundException.</span> |
| <span class="source-line-no">340</span><span id="line-340"> */</span> |
| <span class="source-line-no">341</span><span id="line-341"> @Test</span> |
| <span class="source-line-no">342</span><span id="line-342"> public void testDryModeWithBulkOutputAndTableDoesNotExistsCreateTableSetToNo() throws Exception {</span> |
| <span class="source-line-no">343</span><span id="line-343"> // Prepare the arguments required for the test.</span> |
| <span class="source-line-no">344</span><span id="line-344"> Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");</span> |
| <span class="source-line-no">345</span><span id="line-345"> args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());</span> |
| <span class="source-line-no">346</span><span id="line-346"> args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");</span> |
| <span class="source-line-no">347</span><span id="line-347"> args.put(ImportTsv.CREATE_TABLE_CONF_KEY, "no");</span> |
| <span class="source-line-no">348</span><span id="line-348"> exception.expect(TableNotFoundException.class);</span> |
| <span class="source-line-no">349</span><span id="line-349"> doMROnTableTest(null, 1);</span> |
| <span class="source-line-no">350</span><span id="line-350"> }</span> |
| <span class="source-line-no">351</span><span id="line-351"></span> |
| <span class="source-line-no">352</span><span id="line-352"> @Test</span> |
| <span class="source-line-no">353</span><span id="line-353"> public void testDryModeWithBulkModeAndTableDoesNotExistsCreateTableSetToYes() throws Exception {</span> |
| <span class="source-line-no">354</span><span id="line-354"> // Prepare the arguments required for the test.</span> |
| <span class="source-line-no">355</span><span id="line-355"> Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");</span> |
| <span class="source-line-no">356</span><span id="line-356"> args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());</span> |
| <span class="source-line-no">357</span><span id="line-357"> args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");</span> |
| <span class="source-line-no">358</span><span id="line-358"> args.put(ImportTsv.CREATE_TABLE_CONF_KEY, "yes");</span> |
| <span class="source-line-no">359</span><span id="line-359"> doMROnTableTest(null, 1);</span> |
| <span class="source-line-no">360</span><span id="line-360"> // Verify temporary table was deleted.</span> |
| <span class="source-line-no">361</span><span id="line-361"> exception.expect(TableNotFoundException.class);</span> |
| <span class="source-line-no">362</span><span id="line-362"> util.deleteTable(tn);</span> |
| <span class="source-line-no">363</span><span id="line-363"> }</span> |
| <span class="source-line-no">364</span><span id="line-364"></span> |
| <span class="source-line-no">365</span><span id="line-365"> /**</span> |
| <span class="source-line-no">366</span><span id="line-366"> * If there are invalid data rows as inputs, then only those rows should be ignored.</span> |
| <span class="source-line-no">367</span><span id="line-367"> */</span> |
| <span class="source-line-no">368</span><span id="line-368"> @Test</span> |
| <span class="source-line-no">369</span><span id="line-369"> public void testTsvImporterTextMapperWithInvalidData() throws Exception {</span> |
| <span class="source-line-no">370</span><span id="line-370"> Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");</span> |
| <span class="source-line-no">371</span><span id="line-371"> args.put(ImportTsv.MAPPER_CONF_KEY, "org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper");</span> |
| <span class="source-line-no">372</span><span id="line-372"> args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, bulkOutputPath.toString());</span> |
| <span class="source-line-no">373</span><span id="line-373"> args.put(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,HBASE_TS_KEY,FAM:A,FAM:B");</span> |
| <span class="source-line-no">374</span><span id="line-374"> args.put(ImportTsv.SEPARATOR_CONF_KEY, ",");</span> |
| <span class="source-line-no">375</span><span id="line-375"> // 3 Rows of data as input. 2 Rows are valid and 1 row is invalid as it doesn't have TS</span> |
| <span class="source-line-no">376</span><span id="line-376"> String data = "KEY,1234,VALUE1,VALUE2\nKEY\nKEY,1235,VALUE1,VALUE2\n";</span> |
| <span class="source-line-no">377</span><span id="line-377"> doMROnTableTest(util, tn, FAMILY, data, args, 1, 4);</span> |
| <span class="source-line-no">378</span><span id="line-378"> util.deleteTable(tn);</span> |
| <span class="source-line-no">379</span><span id="line-379"> }</span> |
| <span class="source-line-no">380</span><span id="line-380"></span> |
| <span class="source-line-no">381</span><span id="line-381"> @Test</span> |
| <span class="source-line-no">382</span><span id="line-382"> public void testSkipEmptyColumns() throws Exception {</span> |
| <span class="source-line-no">383</span><span id="line-383"> Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");</span> |
| <span class="source-line-no">384</span><span id="line-384"> args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, bulkOutputPath.toString());</span> |
| <span class="source-line-no">385</span><span id="line-385"> args.put(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,HBASE_TS_KEY,FAM:A,FAM:B");</span> |
| <span class="source-line-no">386</span><span id="line-386"> args.put(ImportTsv.SEPARATOR_CONF_KEY, ",");</span> |
| <span class="source-line-no">387</span><span id="line-387"> args.put(ImportTsv.SKIP_EMPTY_COLUMNS, "true");</span> |
| <span class="source-line-no">388</span><span id="line-388"> // 2 Rows of data as input. Both rows are valid and only 3 columns are no-empty among 4</span> |
| <span class="source-line-no">389</span><span id="line-389"> String data = "KEY,1234,VALUE1,VALUE2\nKEY,1235,,VALUE2\n";</span> |
| <span class="source-line-no">390</span><span id="line-390"> doMROnTableTest(util, tn, FAMILY, data, args, 1, 3);</span> |
| <span class="source-line-no">391</span><span id="line-391"> util.deleteTable(tn);</span> |
| <span class="source-line-no">392</span><span id="line-392"> }</span> |
| <span class="source-line-no">393</span><span id="line-393"></span> |
| <span class="source-line-no">394</span><span id="line-394"> private Tool doMROnTableTest(String data, int valueMultiplier) throws Exception {</span> |
| <span class="source-line-no">395</span><span id="line-395"> return doMROnTableTest(util, tn, FAMILY, data, args, valueMultiplier, -1);</span> |
| <span class="source-line-no">396</span><span id="line-396"> }</span> |
| <span class="source-line-no">397</span><span id="line-397"></span> |
| <span class="source-line-no">398</span><span id="line-398"> protected static Tool doMROnTableTest(HBaseTestingUtil util, TableName table, String family,</span> |
| <span class="source-line-no">399</span><span id="line-399"> String data, Map<String, String> args) throws Exception {</span> |
| <span class="source-line-no">400</span><span id="line-400"> return doMROnTableTest(util, table, family, data, args, 1, -1);</span> |
| <span class="source-line-no">401</span><span id="line-401"> }</span> |
| <span class="source-line-no">402</span><span id="line-402"></span> |
| <span class="source-line-no">403</span><span id="line-403"> /**</span> |
| <span class="source-line-no">404</span><span id="line-404"> * Run an ImportTsv job and perform basic validation on the results. Returns the ImportTsv</span> |
| <span class="source-line-no">405</span><span id="line-405"> * <code>Tool</code> instance so that other tests can inspect it for further validation as</span> |
| <span class="source-line-no">406</span><span id="line-406"> * necessary. This method is static to insure non-reliance on instance's util/conf facilities.</span> |
| <span class="source-line-no">407</span><span id="line-407"> * @param args Any arguments to pass BEFORE inputFile path is appended.</span> |
| <span class="source-line-no">408</span><span id="line-408"> * @return The Tool instance used to run the test.</span> |
| <span class="source-line-no">409</span><span id="line-409"> */</span> |
| <span class="source-line-no">410</span><span id="line-410"> protected static Tool doMROnTableTest(HBaseTestingUtil util, TableName table, String family,</span> |
| <span class="source-line-no">411</span><span id="line-411"> String data, Map<String, String> args, int valueMultiplier, int expectedKVCount)</span> |
| <span class="source-line-no">412</span><span id="line-412"> throws Exception {</span> |
| <span class="source-line-no">413</span><span id="line-413"> Configuration conf = new Configuration(util.getConfiguration());</span> |
| <span class="source-line-no">414</span><span id="line-414"></span> |
| <span class="source-line-no">415</span><span id="line-415"> // populate input file</span> |
| <span class="source-line-no">416</span><span id="line-416"> FileSystem fs = FileSystem.get(conf);</span> |
| <span class="source-line-no">417</span><span id="line-417"> Path inputPath =</span> |
| <span class="source-line-no">418</span><span id="line-418"> fs.makeQualified(new Path(util.getDataTestDirOnTestFS(table.getNameAsString()), "input.dat"));</span> |
| <span class="source-line-no">419</span><span id="line-419"> FSDataOutputStream op = fs.create(inputPath, true);</span> |
| <span class="source-line-no">420</span><span id="line-420"> if (data == null) {</span> |
| <span class="source-line-no">421</span><span id="line-421"> data = "KEY\u001bVALUE1\u001bVALUE2\n";</span> |
| <span class="source-line-no">422</span><span id="line-422"> }</span> |
| <span class="source-line-no">423</span><span id="line-423"> op.write(Bytes.toBytes(data));</span> |
| <span class="source-line-no">424</span><span id="line-424"> op.close();</span> |
| <span class="source-line-no">425</span><span id="line-425"> LOG.debug(String.format("Wrote test data to file: %s", inputPath));</span> |
| <span class="source-line-no">426</span><span id="line-426"></span> |
| <span class="source-line-no">427</span><span id="line-427"> if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {</span> |
| <span class="source-line-no">428</span><span id="line-428"> LOG.debug("Forcing combiner.");</span> |
| <span class="source-line-no">429</span><span id="line-429"> conf.setInt("mapreduce.map.combine.minspills", 1);</span> |
| <span class="source-line-no">430</span><span id="line-430"> }</span> |
| <span class="source-line-no">431</span><span id="line-431"></span> |
| <span class="source-line-no">432</span><span id="line-432"> // Build args array.</span> |
| <span class="source-line-no">433</span><span id="line-433"> String[] argsArray = new String[args.size() + 2];</span> |
| <span class="source-line-no">434</span><span id="line-434"> Iterator it = args.entrySet().iterator();</span> |
| <span class="source-line-no">435</span><span id="line-435"> int i = 0;</span> |
| <span class="source-line-no">436</span><span id="line-436"> while (it.hasNext()) {</span> |
| <span class="source-line-no">437</span><span id="line-437"> Map.Entry pair = (Map.Entry) it.next();</span> |
| <span class="source-line-no">438</span><span id="line-438"> argsArray[i] = "-D" + pair.getKey() + "=" + pair.getValue();</span> |
| <span class="source-line-no">439</span><span id="line-439"> i++;</span> |
| <span class="source-line-no">440</span><span id="line-440"> }</span> |
| <span class="source-line-no">441</span><span id="line-441"> argsArray[i] = table.getNameAsString();</span> |
| <span class="source-line-no">442</span><span id="line-442"> argsArray[i + 1] = inputPath.toString();</span> |
| <span class="source-line-no">443</span><span id="line-443"></span> |
| <span class="source-line-no">444</span><span id="line-444"> // run the import</span> |
| <span class="source-line-no">445</span><span id="line-445"> Tool tool = new ImportTsv();</span> |
| <span class="source-line-no">446</span><span id="line-446"> LOG.debug("Running ImportTsv with arguments: " + Arrays.toString(argsArray));</span> |
| <span class="source-line-no">447</span><span id="line-447"> assertEquals(0, ToolRunner.run(conf, tool, argsArray));</span> |
| <span class="source-line-no">448</span><span id="line-448"></span> |
| <span class="source-line-no">449</span><span id="line-449"> // Perform basic validation. If the input args did not include</span> |
| <span class="source-line-no">450</span><span id="line-450"> // ImportTsv.BULK_OUTPUT_CONF_KEY then validate data in the table.</span> |
| <span class="source-line-no">451</span><span id="line-451"> // Otherwise, validate presence of hfiles.</span> |
| <span class="source-line-no">452</span><span id="line-452"> boolean isDryRun = args.containsKey(ImportTsv.DRY_RUN_CONF_KEY)</span> |
| <span class="source-line-no">453</span><span id="line-453"> && "true".equalsIgnoreCase(args.get(ImportTsv.DRY_RUN_CONF_KEY));</span> |
| <span class="source-line-no">454</span><span id="line-454"> if (args.containsKey(ImportTsv.BULK_OUTPUT_CONF_KEY)) {</span> |
| <span class="source-line-no">455</span><span id="line-455"> if (isDryRun) {</span> |
| <span class="source-line-no">456</span><span id="line-456"> assertFalse(String.format("Dry run mode, %s should not have been created.",</span> |
| <span class="source-line-no">457</span><span id="line-457"> ImportTsv.BULK_OUTPUT_CONF_KEY), fs.exists(new Path(ImportTsv.BULK_OUTPUT_CONF_KEY)));</span> |
| <span class="source-line-no">458</span><span id="line-458"> } else {</span> |
| <span class="source-line-no">459</span><span id="line-459"> validateHFiles(fs, args.get(ImportTsv.BULK_OUTPUT_CONF_KEY), family, expectedKVCount);</span> |
| <span class="source-line-no">460</span><span id="line-460"> }</span> |
| <span class="source-line-no">461</span><span id="line-461"> } else {</span> |
| <span class="source-line-no">462</span><span id="line-462"> validateTable(conf, table, family, valueMultiplier, isDryRun);</span> |
| <span class="source-line-no">463</span><span id="line-463"> }</span> |
| <span class="source-line-no">464</span><span id="line-464"></span> |
| <span class="source-line-no">465</span><span id="line-465"> if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {</span> |
| <span class="source-line-no">466</span><span id="line-466"> LOG.debug("Deleting test subdirectory");</span> |
| <span class="source-line-no">467</span><span id="line-467"> util.cleanupDataTestDirOnTestFS(table.getNameAsString());</span> |
| <span class="source-line-no">468</span><span id="line-468"> }</span> |
| <span class="source-line-no">469</span><span id="line-469"> return tool;</span> |
| <span class="source-line-no">470</span><span id="line-470"> }</span> |
| <span class="source-line-no">471</span><span id="line-471"></span> |
| <span class="source-line-no">472</span><span id="line-472"> /**</span> |
| <span class="source-line-no">473</span><span id="line-473"> * Confirm ImportTsv via data in online table.</span> |
| <span class="source-line-no">474</span><span id="line-474"> */</span> |
| <span class="source-line-no">475</span><span id="line-475"> private static void validateTable(Configuration conf, TableName tableName, String family,</span> |
| <span class="source-line-no">476</span><span id="line-476"> int valueMultiplier, boolean isDryRun) throws IOException {</span> |
| <span class="source-line-no">477</span><span id="line-477"></span> |
| <span class="source-line-no">478</span><span id="line-478"> LOG.debug("Validating table.");</span> |
| <span class="source-line-no">479</span><span id="line-479"> Connection connection = ConnectionFactory.createConnection(conf);</span> |
| <span class="source-line-no">480</span><span id="line-480"> Table table = connection.getTable(tableName);</span> |
| <span class="source-line-no">481</span><span id="line-481"> boolean verified = false;</span> |
| <span class="source-line-no">482</span><span id="line-482"> long pause = conf.getLong("hbase.client.pause", 5 * 1000);</span> |
| <span class="source-line-no">483</span><span id="line-483"> int numRetries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);</span> |
| <span class="source-line-no">484</span><span id="line-484"> for (int i = 0; i < numRetries; i++) {</span> |
| <span class="source-line-no">485</span><span id="line-485"> try {</span> |
| <span class="source-line-no">486</span><span id="line-486"> Scan scan = new Scan();</span> |
| <span class="source-line-no">487</span><span id="line-487"> // Scan entire family.</span> |
| <span class="source-line-no">488</span><span id="line-488"> scan.addFamily(Bytes.toBytes(family));</span> |
| <span class="source-line-no">489</span><span id="line-489"> ResultScanner resScanner = table.getScanner(scan);</span> |
| <span class="source-line-no">490</span><span id="line-490"> int numRows = 0;</span> |
| <span class="source-line-no">491</span><span id="line-491"> for (Result res : resScanner) {</span> |
| <span class="source-line-no">492</span><span id="line-492"> numRows++;</span> |
| <span class="source-line-no">493</span><span id="line-493"> assertEquals(2, res.size());</span> |
| <span class="source-line-no">494</span><span id="line-494"> List<Cell> kvs = res.listCells();</span> |
| <span class="source-line-no">495</span><span id="line-495"> assertTrue(CellUtil.matchingRows(kvs.get(0), Bytes.toBytes("KEY")));</span> |
| <span class="source-line-no">496</span><span id="line-496"> assertTrue(CellUtil.matchingRows(kvs.get(1), Bytes.toBytes("KEY")));</span> |
| <span class="source-line-no">497</span><span id="line-497"> assertTrue(CellUtil.matchingValue(kvs.get(0), Bytes.toBytes("VALUE" + valueMultiplier)));</span> |
| <span class="source-line-no">498</span><span id="line-498"> assertTrue(</span> |
| <span class="source-line-no">499</span><span id="line-499"> CellUtil.matchingValue(kvs.get(1), Bytes.toBytes("VALUE" + 2 * valueMultiplier)));</span> |
| <span class="source-line-no">500</span><span id="line-500"> // Only one result set is expected, so let it loop.</span> |
| <span class="source-line-no">501</span><span id="line-501"> }</span> |
| <span class="source-line-no">502</span><span id="line-502"> if (isDryRun) {</span> |
| <span class="source-line-no">503</span><span id="line-503"> assertEquals(0, numRows);</span> |
| <span class="source-line-no">504</span><span id="line-504"> } else {</span> |
| <span class="source-line-no">505</span><span id="line-505"> assertEquals(1, numRows);</span> |
| <span class="source-line-no">506</span><span id="line-506"> }</span> |
| <span class="source-line-no">507</span><span id="line-507"> verified = true;</span> |
| <span class="source-line-no">508</span><span id="line-508"> break;</span> |
| <span class="source-line-no">509</span><span id="line-509"> } catch (NullPointerException e) {</span> |
| <span class="source-line-no">510</span><span id="line-510"> // If here, a cell was empty. Presume its because updates came in</span> |
| <span class="source-line-no">511</span><span id="line-511"> // after the scanner had been opened. Wait a while and retry.</span> |
| <span class="source-line-no">512</span><span id="line-512"> }</span> |
| <span class="source-line-no">513</span><span id="line-513"> try {</span> |
| <span class="source-line-no">514</span><span id="line-514"> Thread.sleep(pause);</span> |
| <span class="source-line-no">515</span><span id="line-515"> } catch (InterruptedException e) {</span> |
| <span class="source-line-no">516</span><span id="line-516"> // continue</span> |
| <span class="source-line-no">517</span><span id="line-517"> }</span> |
| <span class="source-line-no">518</span><span id="line-518"> }</span> |
| <span class="source-line-no">519</span><span id="line-519"> table.close();</span> |
| <span class="source-line-no">520</span><span id="line-520"> connection.close();</span> |
| <span class="source-line-no">521</span><span id="line-521"> assertTrue(verified);</span> |
| <span class="source-line-no">522</span><span id="line-522"> }</span> |
| <span class="source-line-no">523</span><span id="line-523"></span> |
| <span class="source-line-no">524</span><span id="line-524"> /**</span> |
| <span class="source-line-no">525</span><span id="line-525"> * Confirm ImportTsv via HFiles on fs.</span> |
| <span class="source-line-no">526</span><span id="line-526"> */</span> |
| <span class="source-line-no">527</span><span id="line-527"> private static void validateHFiles(FileSystem fs, String outputPath, String family,</span> |
| <span class="source-line-no">528</span><span id="line-528"> int expectedKVCount) throws IOException {</span> |
| <span class="source-line-no">529</span><span id="line-529"> // validate number and content of output columns</span> |
| <span class="source-line-no">530</span><span id="line-530"> LOG.debug("Validating HFiles.");</span> |
| <span class="source-line-no">531</span><span id="line-531"> Set<String> configFamilies = new HashSet<>();</span> |
| <span class="source-line-no">532</span><span id="line-532"> configFamilies.add(family);</span> |
| <span class="source-line-no">533</span><span id="line-533"> Set<String> foundFamilies = new HashSet<>();</span> |
| <span class="source-line-no">534</span><span id="line-534"> int actualKVCount = 0;</span> |
| <span class="source-line-no">535</span><span id="line-535"> for (FileStatus cfStatus : fs.listStatus(new Path(outputPath), new OutputFilesFilter())) {</span> |
| <span class="source-line-no">536</span><span id="line-536"> String[] elements = cfStatus.getPath().toString().split(Path.SEPARATOR);</span> |
| <span class="source-line-no">537</span><span id="line-537"> String cf = elements[elements.length - 1];</span> |
| <span class="source-line-no">538</span><span id="line-538"> foundFamilies.add(cf);</span> |
| <span class="source-line-no">539</span><span id="line-539"> assertTrue(String.format(</span> |
| <span class="source-line-no">540</span><span id="line-540"> "HFile output contains a column family (%s) not present in input families (%s)", cf,</span> |
| <span class="source-line-no">541</span><span id="line-541"> configFamilies), configFamilies.contains(cf));</span> |
| <span class="source-line-no">542</span><span id="line-542"> for (FileStatus hfile : fs.listStatus(cfStatus.getPath())) {</span> |
| <span class="source-line-no">543</span><span id="line-543"> assertTrue(String.format("HFile %s appears to contain no data.", hfile.getPath()),</span> |
| <span class="source-line-no">544</span><span id="line-544"> hfile.getLen() > 0);</span> |
| <span class="source-line-no">545</span><span id="line-545"> // count the number of KVs from all the hfiles</span> |
| <span class="source-line-no">546</span><span id="line-546"> if (expectedKVCount > -1) {</span> |
| <span class="source-line-no">547</span><span id="line-547"> actualKVCount += getKVCountFromHfile(fs, hfile.getPath());</span> |
| <span class="source-line-no">548</span><span id="line-548"> }</span> |
| <span class="source-line-no">549</span><span id="line-549"> }</span> |
| <span class="source-line-no">550</span><span id="line-550"> }</span> |
| <span class="source-line-no">551</span><span id="line-551"> assertTrue(String.format("HFile output does not contain the input family '%s'.", family),</span> |
| <span class="source-line-no">552</span><span id="line-552"> foundFamilies.contains(family));</span> |
| <span class="source-line-no">553</span><span id="line-553"> if (expectedKVCount > -1) {</span> |
| <span class="source-line-no">554</span><span id="line-554"> assertTrue(</span> |
| <span class="source-line-no">555</span><span id="line-555"> String.format("KV count in ouput hfile=<%d> doesn't match with expected KV count=<%d>",</span> |
| <span class="source-line-no">556</span><span id="line-556"> actualKVCount, expectedKVCount),</span> |
| <span class="source-line-no">557</span><span id="line-557"> actualKVCount == expectedKVCount);</span> |
| <span class="source-line-no">558</span><span id="line-558"> }</span> |
| <span class="source-line-no">559</span><span id="line-559"> }</span> |
| <span class="source-line-no">560</span><span id="line-560"></span> |
| <span class="source-line-no">561</span><span id="line-561"> /**</span> |
| <span class="source-line-no">562</span><span id="line-562"> * Method returns the total KVs in given hfile</span> |
| <span class="source-line-no">563</span><span id="line-563"> * @param fs File System</span> |
| <span class="source-line-no">564</span><span id="line-564"> * @param p HFile path</span> |
| <span class="source-line-no">565</span><span id="line-565"> * @return KV count in the given hfile</span> |
| <span class="source-line-no">566</span><span id="line-566"> */</span> |
| <span class="source-line-no">567</span><span id="line-567"> private static int getKVCountFromHfile(FileSystem fs, Path p) throws IOException {</span> |
| <span class="source-line-no">568</span><span id="line-568"> Configuration conf = util.getConfiguration();</span> |
| <span class="source-line-no">569</span><span id="line-569"> HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);</span> |
| <span class="source-line-no">570</span><span id="line-570"> HFileScanner scanner = reader.getScanner(conf, false, false);</span> |
| <span class="source-line-no">571</span><span id="line-571"> scanner.seekTo();</span> |
| <span class="source-line-no">572</span><span id="line-572"> int count = 0;</span> |
| <span class="source-line-no">573</span><span id="line-573"> do {</span> |
| <span class="source-line-no">574</span><span id="line-574"> count++;</span> |
| <span class="source-line-no">575</span><span id="line-575"> } while (scanner.next());</span> |
| <span class="source-line-no">576</span><span id="line-576"> reader.close();</span> |
| <span class="source-line-no">577</span><span id="line-577"> return count;</span> |
| <span class="source-line-no">578</span><span id="line-578"> }</span> |
| <span class="source-line-no">579</span><span id="line-579">}</span> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </pre> |
| </div> |
| </main> |
| </body> |
| </html> |