blob: 3b0c9182509d52f10ccc42f503ae545146188fd5 [file] [log] [blame]
<!DOCTYPE HTML>
<html lang="en">
<head>
<!-- Generated by javadoc (17) -->
<title>Source code</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="description" content="source: package: org.apache.hadoop.hbase.mapreduce, class: ImportTsv">
<meta name="generator" content="javadoc/SourceToHTMLConverter">
<link rel="stylesheet" type="text/css" href="../../../../../../stylesheet.css" title="Style">
</head>
<body class="source-page">
<main role="main">
<div class="source-container">
<pre><span class="source-line-no">001</span><span id="line-1">/*</span>
<span class="source-line-no">002</span><span id="line-2"> * Licensed to the Apache Software Foundation (ASF) under one</span>
<span class="source-line-no">003</span><span id="line-3"> * or more contributor license agreements. See the NOTICE file</span>
<span class="source-line-no">004</span><span id="line-4"> * distributed with this work for additional information</span>
<span class="source-line-no">005</span><span id="line-5"> * regarding copyright ownership. The ASF licenses this file</span>
<span class="source-line-no">006</span><span id="line-6"> * to you under the Apache License, Version 2.0 (the</span>
<span class="source-line-no">007</span><span id="line-7"> * "License"); you may not use this file except in compliance</span>
<span class="source-line-no">008</span><span id="line-8"> * with the License. You may obtain a copy of the License at</span>
<span class="source-line-no">009</span><span id="line-9"> *</span>
<span class="source-line-no">010</span><span id="line-10"> * http://www.apache.org/licenses/LICENSE-2.0</span>
<span class="source-line-no">011</span><span id="line-11"> *</span>
<span class="source-line-no">012</span><span id="line-12"> * Unless required by applicable law or agreed to in writing, software</span>
<span class="source-line-no">013</span><span id="line-13"> * distributed under the License is distributed on an "AS IS" BASIS,</span>
<span class="source-line-no">014</span><span id="line-14"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
<span class="source-line-no">015</span><span id="line-15"> * See the License for the specific language governing permissions and</span>
<span class="source-line-no">016</span><span id="line-16"> * limitations under the License.</span>
<span class="source-line-no">017</span><span id="line-17"> */</span>
<span class="source-line-no">018</span><span id="line-18">package org.apache.hadoop.hbase.mapreduce;</span>
<span class="source-line-no">019</span><span id="line-19"></span>
<span class="source-line-no">020</span><span id="line-20">import static java.lang.String.format;</span>
<span class="source-line-no">021</span><span id="line-21"></span>
<span class="source-line-no">022</span><span id="line-22">import java.io.File;</span>
<span class="source-line-no">023</span><span id="line-23">import java.io.IOException;</span>
<span class="source-line-no">024</span><span id="line-24">import java.util.ArrayList;</span>
<span class="source-line-no">025</span><span id="line-25">import java.util.Base64;</span>
<span class="source-line-no">026</span><span id="line-26">import java.util.HashSet;</span>
<span class="source-line-no">027</span><span id="line-27">import java.util.Set;</span>
<span class="source-line-no">028</span><span id="line-28">import org.apache.commons.lang3.StringUtils;</span>
<span class="source-line-no">029</span><span id="line-29">import org.apache.hadoop.conf.Configuration;</span>
<span class="source-line-no">030</span><span id="line-30">import org.apache.hadoop.conf.Configured;</span>
<span class="source-line-no">031</span><span id="line-31">import org.apache.hadoop.fs.Path;</span>
<span class="source-line-no">032</span><span id="line-32">import org.apache.hadoop.hbase.HBaseConfiguration;</span>
<span class="source-line-no">033</span><span id="line-33">import org.apache.hadoop.hbase.HConstants;</span>
<span class="source-line-no">034</span><span id="line-34">import org.apache.hadoop.hbase.TableName;</span>
<span class="source-line-no">035</span><span id="line-35">import org.apache.hadoop.hbase.TableNotEnabledException;</span>
<span class="source-line-no">036</span><span id="line-36">import org.apache.hadoop.hbase.TableNotFoundException;</span>
<span class="source-line-no">037</span><span id="line-37">import org.apache.hadoop.hbase.client.Admin;</span>
<span class="source-line-no">038</span><span id="line-38">import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;</span>
<span class="source-line-no">039</span><span id="line-39">import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;</span>
<span class="source-line-no">040</span><span id="line-40">import org.apache.hadoop.hbase.client.Connection;</span>
<span class="source-line-no">041</span><span id="line-41">import org.apache.hadoop.hbase.client.ConnectionFactory;</span>
<span class="source-line-no">042</span><span id="line-42">import org.apache.hadoop.hbase.client.Put;</span>
<span class="source-line-no">043</span><span id="line-43">import org.apache.hadoop.hbase.client.RegionLocator;</span>
<span class="source-line-no">044</span><span id="line-44">import org.apache.hadoop.hbase.client.Table;</span>
<span class="source-line-no">045</span><span id="line-45">import org.apache.hadoop.hbase.client.TableDescriptor;</span>
<span class="source-line-no">046</span><span id="line-46">import org.apache.hadoop.hbase.client.TableDescriptorBuilder;</span>
<span class="source-line-no">047</span><span id="line-47">import org.apache.hadoop.hbase.io.ImmutableBytesWritable;</span>
<span class="source-line-no">048</span><span id="line-48">import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException;</span>
<span class="source-line-no">049</span><span id="line-49">import org.apache.hadoop.hbase.util.Bytes;</span>
<span class="source-line-no">050</span><span id="line-50">import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;</span>
<span class="source-line-no">051</span><span id="line-51">import org.apache.hadoop.hbase.util.Pair;</span>
<span class="source-line-no">052</span><span id="line-52">import org.apache.hadoop.io.Text;</span>
<span class="source-line-no">053</span><span id="line-53">import org.apache.hadoop.mapreduce.Job;</span>
<span class="source-line-no">054</span><span id="line-54">import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;</span>
<span class="source-line-no">055</span><span id="line-55">import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;</span>
<span class="source-line-no">056</span><span id="line-56">import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;</span>
<span class="source-line-no">057</span><span id="line-57">import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;</span>
<span class="source-line-no">058</span><span id="line-58">import org.apache.hadoop.security.Credentials;</span>
<span class="source-line-no">059</span><span id="line-59">import org.apache.hadoop.util.Tool;</span>
<span class="source-line-no">060</span><span id="line-60">import org.apache.hadoop.util.ToolRunner;</span>
<span class="source-line-no">061</span><span id="line-61">import org.apache.yetus.audience.InterfaceAudience;</span>
<span class="source-line-no">062</span><span id="line-62">import org.slf4j.Logger;</span>
<span class="source-line-no">063</span><span id="line-63">import org.slf4j.LoggerFactory;</span>
<span class="source-line-no">064</span><span id="line-64"></span>
<span class="source-line-no">065</span><span id="line-65">import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;</span>
<span class="source-line-no">066</span><span id="line-66">import org.apache.hbase.thirdparty.com.google.common.base.Splitter;</span>
<span class="source-line-no">067</span><span id="line-67">import org.apache.hbase.thirdparty.com.google.common.collect.Lists;</span>
<span class="source-line-no">068</span><span id="line-68"></span>
<span class="source-line-no">069</span><span id="line-69">/**</span>
<span class="source-line-no">070</span><span id="line-70"> * Tool to import data from a TSV file. This tool is rather simplistic - it doesn't do any quoting</span>
<span class="source-line-no">071</span><span id="line-71"> * or escaping, but is useful for many data loads.</span>
<span class="source-line-no">072</span><span id="line-72"> * @see ImportTsv#usage(String)</span>
<span class="source-line-no">073</span><span id="line-73"> */</span>
<span class="source-line-no">074</span><span id="line-74">@InterfaceAudience.Public</span>
<span class="source-line-no">075</span><span id="line-75">public class ImportTsv extends Configured implements Tool {</span>
<span class="source-line-no">076</span><span id="line-76"></span>
<span class="source-line-no">077</span><span id="line-77"> protected static final Logger LOG = LoggerFactory.getLogger(ImportTsv.class);</span>
<span class="source-line-no">078</span><span id="line-78"></span>
<span class="source-line-no">079</span><span id="line-79"> final static String NAME = "importtsv";</span>
<span class="source-line-no">080</span><span id="line-80"></span>
<span class="source-line-no">081</span><span id="line-81"> public final static String MAPPER_CONF_KEY = "importtsv.mapper.class";</span>
<span class="source-line-no">082</span><span id="line-82"> public final static String BULK_OUTPUT_CONF_KEY = "importtsv.bulk.output";</span>
<span class="source-line-no">083</span><span id="line-83"> public final static String TIMESTAMP_CONF_KEY = "importtsv.timestamp";</span>
<span class="source-line-no">084</span><span id="line-84"> public final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";</span>
<span class="source-line-no">085</span><span id="line-85"> // TODO: the rest of these configs are used exclusively by TsvImporterMapper.</span>
<span class="source-line-no">086</span><span id="line-86"> // Move them out of the tool and let the mapper handle its own validation.</span>
<span class="source-line-no">087</span><span id="line-87"> public final static String DRY_RUN_CONF_KEY = "importtsv.dry.run";</span>
<span class="source-line-no">088</span><span id="line-88"> // If true, bad lines are logged to stderr. Default: false.</span>
<span class="source-line-no">089</span><span id="line-89"> public final static String LOG_BAD_LINES_CONF_KEY = "importtsv.log.bad.lines";</span>
<span class="source-line-no">090</span><span id="line-90"> public final static String SKIP_LINES_CONF_KEY = "importtsv.skip.bad.lines";</span>
<span class="source-line-no">091</span><span id="line-91"> public final static String SKIP_EMPTY_COLUMNS = "importtsv.skip.empty.columns";</span>
<span class="source-line-no">092</span><span id="line-92"> public final static String COLUMNS_CONF_KEY = "importtsv.columns";</span>
<span class="source-line-no">093</span><span id="line-93"> public final static String SEPARATOR_CONF_KEY = "importtsv.separator";</span>
<span class="source-line-no">094</span><span id="line-94"> public final static String ATTRIBUTE_SEPERATOR_CONF_KEY = "attributes.seperator";</span>
<span class="source-line-no">095</span><span id="line-95"> // This config is used to propagate credentials from parent MR jobs which launch</span>
<span class="source-line-no">096</span><span id="line-96"> // ImportTSV jobs. SEE IntegrationTestImportTsv.</span>
<span class="source-line-no">097</span><span id="line-97"> public final static String CREDENTIALS_LOCATION = "credentials_location";</span>
<span class="source-line-no">098</span><span id="line-98"> final static String DEFAULT_SEPARATOR = "\t";</span>
<span class="source-line-no">099</span><span id="line-99"> final static String DEFAULT_ATTRIBUTES_SEPERATOR = "=&gt;";</span>
<span class="source-line-no">100</span><span id="line-100"> final static String DEFAULT_MULTIPLE_ATTRIBUTES_SEPERATOR = ",";</span>
<span class="source-line-no">101</span><span id="line-101"> final static Class DEFAULT_MAPPER = TsvImporterMapper.class;</span>
<span class="source-line-no">102</span><span id="line-102"> public final static String CREATE_TABLE_CONF_KEY = "create.table";</span>
<span class="source-line-no">103</span><span id="line-103"> public final static String NO_STRICT_COL_FAMILY = "no.strict";</span>
<span class="source-line-no">104</span><span id="line-104"> /**</span>
<span class="source-line-no">105</span><span id="line-105"> * If table didn't exist and was created in dry-run mode, this flag is flipped to delete it when</span>
<span class="source-line-no">106</span><span id="line-106"> * MR ends.</span>
<span class="source-line-no">107</span><span id="line-107"> */</span>
<span class="source-line-no">108</span><span id="line-108"> private static boolean DRY_RUN_TABLE_CREATED;</span>
<span class="source-line-no">109</span><span id="line-109"></span>
<span class="source-line-no">110</span><span id="line-110"> public static class TsvParser {</span>
<span class="source-line-no">111</span><span id="line-111"> /**</span>
<span class="source-line-no">112</span><span id="line-112"> * Column families and qualifiers mapped to the TSV columns</span>
<span class="source-line-no">113</span><span id="line-113"> */</span>
<span class="source-line-no">114</span><span id="line-114"> private final byte[][] families;</span>
<span class="source-line-no">115</span><span id="line-115"> private final byte[][] qualifiers;</span>
<span class="source-line-no">116</span><span id="line-116"></span>
<span class="source-line-no">117</span><span id="line-117"> private final byte separatorByte;</span>
<span class="source-line-no">118</span><span id="line-118"></span>
<span class="source-line-no">119</span><span id="line-119"> private int rowKeyColumnIndex;</span>
<span class="source-line-no">120</span><span id="line-120"></span>
<span class="source-line-no">121</span><span id="line-121"> private int maxColumnCount;</span>
<span class="source-line-no">122</span><span id="line-122"></span>
<span class="source-line-no">123</span><span id="line-123"> // Default value must be negative</span>
<span class="source-line-no">124</span><span id="line-124"> public static final int DEFAULT_TIMESTAMP_COLUMN_INDEX = -1;</span>
<span class="source-line-no">125</span><span id="line-125"></span>
<span class="source-line-no">126</span><span id="line-126"> private int timestampKeyColumnIndex = DEFAULT_TIMESTAMP_COLUMN_INDEX;</span>
<span class="source-line-no">127</span><span id="line-127"></span>
<span class="source-line-no">128</span><span id="line-128"> public static final String ROWKEY_COLUMN_SPEC = "HBASE_ROW_KEY";</span>
<span class="source-line-no">129</span><span id="line-129"></span>
<span class="source-line-no">130</span><span id="line-130"> public static final String TIMESTAMPKEY_COLUMN_SPEC = "HBASE_TS_KEY";</span>
<span class="source-line-no">131</span><span id="line-131"></span>
<span class="source-line-no">132</span><span id="line-132"> public static final String ATTRIBUTES_COLUMN_SPEC = "HBASE_ATTRIBUTES_KEY";</span>
<span class="source-line-no">133</span><span id="line-133"></span>
<span class="source-line-no">134</span><span id="line-134"> public static final String CELL_VISIBILITY_COLUMN_SPEC = "HBASE_CELL_VISIBILITY";</span>
<span class="source-line-no">135</span><span id="line-135"></span>
<span class="source-line-no">136</span><span id="line-136"> public static final String CELL_TTL_COLUMN_SPEC = "HBASE_CELL_TTL";</span>
<span class="source-line-no">137</span><span id="line-137"></span>
<span class="source-line-no">138</span><span id="line-138"> private int attrKeyColumnIndex = DEFAULT_ATTRIBUTES_COLUMN_INDEX;</span>
<span class="source-line-no">139</span><span id="line-139"></span>
<span class="source-line-no">140</span><span id="line-140"> public static final int DEFAULT_ATTRIBUTES_COLUMN_INDEX = -1;</span>
<span class="source-line-no">141</span><span id="line-141"></span>
<span class="source-line-no">142</span><span id="line-142"> public static final int DEFAULT_CELL_VISIBILITY_COLUMN_INDEX = -1;</span>
<span class="source-line-no">143</span><span id="line-143"></span>
<span class="source-line-no">144</span><span id="line-144"> public static final int DEFAULT_CELL_TTL_COLUMN_INDEX = -1;</span>
<span class="source-line-no">145</span><span id="line-145"></span>
<span class="source-line-no">146</span><span id="line-146"> private int cellVisibilityColumnIndex = DEFAULT_CELL_VISIBILITY_COLUMN_INDEX;</span>
<span class="source-line-no">147</span><span id="line-147"></span>
<span class="source-line-no">148</span><span id="line-148"> private int cellTTLColumnIndex = DEFAULT_CELL_TTL_COLUMN_INDEX;</span>
<span class="source-line-no">149</span><span id="line-149"></span>
<span class="source-line-no">150</span><span id="line-150"> /**</span>
<span class="source-line-no">151</span><span id="line-151"> * @param columnsSpecification the list of columns to parser out, comma separated. The row key</span>
<span class="source-line-no">152</span><span id="line-152"> * should be the special token TsvParser.ROWKEY_COLUMN_SPEC</span>
<span class="source-line-no">153</span><span id="line-153"> */</span>
<span class="source-line-no">154</span><span id="line-154"> public TsvParser(String columnsSpecification, String separatorStr) {</span>
<span class="source-line-no">155</span><span id="line-155"> // Configure separator</span>
<span class="source-line-no">156</span><span id="line-156"> byte[] separator = Bytes.toBytes(separatorStr);</span>
<span class="source-line-no">157</span><span id="line-157"> Preconditions.checkArgument(separator.length == 1,</span>
<span class="source-line-no">158</span><span id="line-158"> "TsvParser only supports single-byte separators");</span>
<span class="source-line-no">159</span><span id="line-159"> separatorByte = separator[0];</span>
<span class="source-line-no">160</span><span id="line-160"></span>
<span class="source-line-no">161</span><span id="line-161"> // Configure columns</span>
<span class="source-line-no">162</span><span id="line-162"> ArrayList&lt;String&gt; columnStrings =</span>
<span class="source-line-no">163</span><span id="line-163"> Lists.newArrayList(Splitter.on(',').trimResults().split(columnsSpecification));</span>
<span class="source-line-no">164</span><span id="line-164"></span>
<span class="source-line-no">165</span><span id="line-165"> maxColumnCount = columnStrings.size();</span>
<span class="source-line-no">166</span><span id="line-166"> families = new byte[maxColumnCount][];</span>
<span class="source-line-no">167</span><span id="line-167"> qualifiers = new byte[maxColumnCount][];</span>
<span class="source-line-no">168</span><span id="line-168"></span>
<span class="source-line-no">169</span><span id="line-169"> for (int i = 0; i &lt; columnStrings.size(); i++) {</span>
<span class="source-line-no">170</span><span id="line-170"> String str = columnStrings.get(i);</span>
<span class="source-line-no">171</span><span id="line-171"> if (ROWKEY_COLUMN_SPEC.equals(str)) {</span>
<span class="source-line-no">172</span><span id="line-172"> rowKeyColumnIndex = i;</span>
<span class="source-line-no">173</span><span id="line-173"> continue;</span>
<span class="source-line-no">174</span><span id="line-174"> }</span>
<span class="source-line-no">175</span><span id="line-175"> if (TIMESTAMPKEY_COLUMN_SPEC.equals(str)) {</span>
<span class="source-line-no">176</span><span id="line-176"> timestampKeyColumnIndex = i;</span>
<span class="source-line-no">177</span><span id="line-177"> continue;</span>
<span class="source-line-no">178</span><span id="line-178"> }</span>
<span class="source-line-no">179</span><span id="line-179"> if (ATTRIBUTES_COLUMN_SPEC.equals(str)) {</span>
<span class="source-line-no">180</span><span id="line-180"> attrKeyColumnIndex = i;</span>
<span class="source-line-no">181</span><span id="line-181"> continue;</span>
<span class="source-line-no">182</span><span id="line-182"> }</span>
<span class="source-line-no">183</span><span id="line-183"> if (CELL_VISIBILITY_COLUMN_SPEC.equals(str)) {</span>
<span class="source-line-no">184</span><span id="line-184"> cellVisibilityColumnIndex = i;</span>
<span class="source-line-no">185</span><span id="line-185"> continue;</span>
<span class="source-line-no">186</span><span id="line-186"> }</span>
<span class="source-line-no">187</span><span id="line-187"> if (CELL_TTL_COLUMN_SPEC.equals(str)) {</span>
<span class="source-line-no">188</span><span id="line-188"> cellTTLColumnIndex = i;</span>
<span class="source-line-no">189</span><span id="line-189"> continue;</span>
<span class="source-line-no">190</span><span id="line-190"> }</span>
<span class="source-line-no">191</span><span id="line-191"> String[] parts = str.split(":", 2);</span>
<span class="source-line-no">192</span><span id="line-192"> if (parts.length == 1) {</span>
<span class="source-line-no">193</span><span id="line-193"> families[i] = Bytes.toBytes(str);</span>
<span class="source-line-no">194</span><span id="line-194"> qualifiers[i] = HConstants.EMPTY_BYTE_ARRAY;</span>
<span class="source-line-no">195</span><span id="line-195"> } else {</span>
<span class="source-line-no">196</span><span id="line-196"> families[i] = Bytes.toBytes(parts[0]);</span>
<span class="source-line-no">197</span><span id="line-197"> qualifiers[i] = Bytes.toBytes(parts[1]);</span>
<span class="source-line-no">198</span><span id="line-198"> }</span>
<span class="source-line-no">199</span><span id="line-199"> }</span>
<span class="source-line-no">200</span><span id="line-200"> }</span>
<span class="source-line-no">201</span><span id="line-201"></span>
<span class="source-line-no">202</span><span id="line-202"> public boolean hasTimestamp() {</span>
<span class="source-line-no">203</span><span id="line-203"> return timestampKeyColumnIndex != DEFAULT_TIMESTAMP_COLUMN_INDEX;</span>
<span class="source-line-no">204</span><span id="line-204"> }</span>
<span class="source-line-no">205</span><span id="line-205"></span>
<span class="source-line-no">206</span><span id="line-206"> public int getTimestampKeyColumnIndex() {</span>
<span class="source-line-no">207</span><span id="line-207"> return timestampKeyColumnIndex;</span>
<span class="source-line-no">208</span><span id="line-208"> }</span>
<span class="source-line-no">209</span><span id="line-209"></span>
<span class="source-line-no">210</span><span id="line-210"> public boolean hasAttributes() {</span>
<span class="source-line-no">211</span><span id="line-211"> return attrKeyColumnIndex != DEFAULT_ATTRIBUTES_COLUMN_INDEX;</span>
<span class="source-line-no">212</span><span id="line-212"> }</span>
<span class="source-line-no">213</span><span id="line-213"></span>
<span class="source-line-no">214</span><span id="line-214"> public boolean hasCellVisibility() {</span>
<span class="source-line-no">215</span><span id="line-215"> return cellVisibilityColumnIndex != DEFAULT_CELL_VISIBILITY_COLUMN_INDEX;</span>
<span class="source-line-no">216</span><span id="line-216"> }</span>
<span class="source-line-no">217</span><span id="line-217"></span>
<span class="source-line-no">218</span><span id="line-218"> public boolean hasCellTTL() {</span>
<span class="source-line-no">219</span><span id="line-219"> return cellTTLColumnIndex != DEFAULT_CELL_VISIBILITY_COLUMN_INDEX;</span>
<span class="source-line-no">220</span><span id="line-220"> }</span>
<span class="source-line-no">221</span><span id="line-221"></span>
<span class="source-line-no">222</span><span id="line-222"> public int getAttributesKeyColumnIndex() {</span>
<span class="source-line-no">223</span><span id="line-223"> return attrKeyColumnIndex;</span>
<span class="source-line-no">224</span><span id="line-224"> }</span>
<span class="source-line-no">225</span><span id="line-225"></span>
<span class="source-line-no">226</span><span id="line-226"> public int getCellVisibilityColumnIndex() {</span>
<span class="source-line-no">227</span><span id="line-227"> return cellVisibilityColumnIndex;</span>
<span class="source-line-no">228</span><span id="line-228"> }</span>
<span class="source-line-no">229</span><span id="line-229"></span>
<span class="source-line-no">230</span><span id="line-230"> public int getCellTTLColumnIndex() {</span>
<span class="source-line-no">231</span><span id="line-231"> return cellTTLColumnIndex;</span>
<span class="source-line-no">232</span><span id="line-232"> }</span>
<span class="source-line-no">233</span><span id="line-233"></span>
<span class="source-line-no">234</span><span id="line-234"> public int getRowKeyColumnIndex() {</span>
<span class="source-line-no">235</span><span id="line-235"> return rowKeyColumnIndex;</span>
<span class="source-line-no">236</span><span id="line-236"> }</span>
<span class="source-line-no">237</span><span id="line-237"></span>
<span class="source-line-no">238</span><span id="line-238"> public byte[] getFamily(int idx) {</span>
<span class="source-line-no">239</span><span id="line-239"> return families[idx];</span>
<span class="source-line-no">240</span><span id="line-240"> }</span>
<span class="source-line-no">241</span><span id="line-241"></span>
<span class="source-line-no">242</span><span id="line-242"> public byte[] getQualifier(int idx) {</span>
<span class="source-line-no">243</span><span id="line-243"> return qualifiers[idx];</span>
<span class="source-line-no">244</span><span id="line-244"> }</span>
<span class="source-line-no">245</span><span id="line-245"></span>
<span class="source-line-no">246</span><span id="line-246"> public ParsedLine parse(byte[] lineBytes, int length) throws BadTsvLineException {</span>
<span class="source-line-no">247</span><span id="line-247"> // Enumerate separator offsets</span>
<span class="source-line-no">248</span><span id="line-248"> ArrayList&lt;Integer&gt; tabOffsets = new ArrayList&lt;&gt;(maxColumnCount);</span>
<span class="source-line-no">249</span><span id="line-249"> for (int i = 0; i &lt; length; i++) {</span>
<span class="source-line-no">250</span><span id="line-250"> if (lineBytes[i] == separatorByte) {</span>
<span class="source-line-no">251</span><span id="line-251"> tabOffsets.add(i);</span>
<span class="source-line-no">252</span><span id="line-252"> }</span>
<span class="source-line-no">253</span><span id="line-253"> }</span>
<span class="source-line-no">254</span><span id="line-254"> if (tabOffsets.isEmpty()) {</span>
<span class="source-line-no">255</span><span id="line-255"> throw new BadTsvLineException("No delimiter");</span>
<span class="source-line-no">256</span><span id="line-256"> }</span>
<span class="source-line-no">257</span><span id="line-257"></span>
<span class="source-line-no">258</span><span id="line-258"> tabOffsets.add(length);</span>
<span class="source-line-no">259</span><span id="line-259"></span>
<span class="source-line-no">260</span><span id="line-260"> if (tabOffsets.size() &gt; maxColumnCount) {</span>
<span class="source-line-no">261</span><span id="line-261"> throw new BadTsvLineException("Excessive columns");</span>
<span class="source-line-no">262</span><span id="line-262"> } else if (tabOffsets.size() &lt;= getRowKeyColumnIndex()) {</span>
<span class="source-line-no">263</span><span id="line-263"> throw new BadTsvLineException("No row key");</span>
<span class="source-line-no">264</span><span id="line-264"> } else if (hasTimestamp() &amp;&amp; tabOffsets.size() &lt;= getTimestampKeyColumnIndex()) {</span>
<span class="source-line-no">265</span><span id="line-265"> throw new BadTsvLineException("No timestamp");</span>
<span class="source-line-no">266</span><span id="line-266"> } else if (hasAttributes() &amp;&amp; tabOffsets.size() &lt;= getAttributesKeyColumnIndex()) {</span>
<span class="source-line-no">267</span><span id="line-267"> throw new BadTsvLineException("No attributes specified");</span>
<span class="source-line-no">268</span><span id="line-268"> } else if (hasCellVisibility() &amp;&amp; tabOffsets.size() &lt;= getCellVisibilityColumnIndex()) {</span>
<span class="source-line-no">269</span><span id="line-269"> throw new BadTsvLineException("No cell visibility specified");</span>
<span class="source-line-no">270</span><span id="line-270"> } else if (hasCellTTL() &amp;&amp; tabOffsets.size() &lt;= getCellTTLColumnIndex()) {</span>
<span class="source-line-no">271</span><span id="line-271"> throw new BadTsvLineException("No cell TTL specified");</span>
<span class="source-line-no">272</span><span id="line-272"> }</span>
<span class="source-line-no">273</span><span id="line-273"> return new ParsedLine(tabOffsets, lineBytes);</span>
<span class="source-line-no">274</span><span id="line-274"> }</span>
<span class="source-line-no">275</span><span id="line-275"></span>
<span class="source-line-no">276</span><span id="line-276"> class ParsedLine {</span>
<span class="source-line-no">277</span><span id="line-277"> private final ArrayList&lt;Integer&gt; tabOffsets;</span>
<span class="source-line-no">278</span><span id="line-278"> private byte[] lineBytes;</span>
<span class="source-line-no">279</span><span id="line-279"></span>
<span class="source-line-no">280</span><span id="line-280"> ParsedLine(ArrayList&lt;Integer&gt; tabOffsets, byte[] lineBytes) {</span>
<span class="source-line-no">281</span><span id="line-281"> this.tabOffsets = tabOffsets;</span>
<span class="source-line-no">282</span><span id="line-282"> this.lineBytes = lineBytes;</span>
<span class="source-line-no">283</span><span id="line-283"> }</span>
<span class="source-line-no">284</span><span id="line-284"></span>
<span class="source-line-no">285</span><span id="line-285"> public int getRowKeyOffset() {</span>
<span class="source-line-no">286</span><span id="line-286"> return getColumnOffset(rowKeyColumnIndex);</span>
<span class="source-line-no">287</span><span id="line-287"> }</span>
<span class="source-line-no">288</span><span id="line-288"></span>
<span class="source-line-no">289</span><span id="line-289"> public int getRowKeyLength() {</span>
<span class="source-line-no">290</span><span id="line-290"> return getColumnLength(rowKeyColumnIndex);</span>
<span class="source-line-no">291</span><span id="line-291"> }</span>
<span class="source-line-no">292</span><span id="line-292"></span>
<span class="source-line-no">293</span><span id="line-293"> public long getTimestamp(long ts) throws BadTsvLineException {</span>
<span class="source-line-no">294</span><span id="line-294"> // Return ts if HBASE_TS_KEY is not configured in column spec</span>
<span class="source-line-no">295</span><span id="line-295"> if (!hasTimestamp()) {</span>
<span class="source-line-no">296</span><span id="line-296"> return ts;</span>
<span class="source-line-no">297</span><span id="line-297"> }</span>
<span class="source-line-no">298</span><span id="line-298"></span>
<span class="source-line-no">299</span><span id="line-299"> String timeStampStr = Bytes.toString(lineBytes, getColumnOffset(timestampKeyColumnIndex),</span>
<span class="source-line-no">300</span><span id="line-300"> getColumnLength(timestampKeyColumnIndex));</span>
<span class="source-line-no">301</span><span id="line-301"> try {</span>
<span class="source-line-no">302</span><span id="line-302"> return Long.parseLong(timeStampStr);</span>
<span class="source-line-no">303</span><span id="line-303"> } catch (NumberFormatException nfe) {</span>
<span class="source-line-no">304</span><span id="line-304"> // treat this record as bad record</span>
<span class="source-line-no">305</span><span id="line-305"> throw new BadTsvLineException("Invalid timestamp " + timeStampStr);</span>
<span class="source-line-no">306</span><span id="line-306"> }</span>
<span class="source-line-no">307</span><span id="line-307"> }</span>
<span class="source-line-no">308</span><span id="line-308"></span>
<span class="source-line-no">309</span><span id="line-309"> private String getAttributes() {</span>
<span class="source-line-no">310</span><span id="line-310"> if (!hasAttributes()) {</span>
<span class="source-line-no">311</span><span id="line-311"> return null;</span>
<span class="source-line-no">312</span><span id="line-312"> } else {</span>
<span class="source-line-no">313</span><span id="line-313"> return Bytes.toString(lineBytes, getColumnOffset(attrKeyColumnIndex),</span>
<span class="source-line-no">314</span><span id="line-314"> getColumnLength(attrKeyColumnIndex));</span>
<span class="source-line-no">315</span><span id="line-315"> }</span>
<span class="source-line-no">316</span><span id="line-316"> }</span>
<span class="source-line-no">317</span><span id="line-317"></span>
<span class="source-line-no">318</span><span id="line-318"> public String[] getIndividualAttributes() {</span>
<span class="source-line-no">319</span><span id="line-319"> String attributes = getAttributes();</span>
<span class="source-line-no">320</span><span id="line-320"> if (attributes != null) {</span>
<span class="source-line-no">321</span><span id="line-321"> return attributes.split(DEFAULT_MULTIPLE_ATTRIBUTES_SEPERATOR);</span>
<span class="source-line-no">322</span><span id="line-322"> } else {</span>
<span class="source-line-no">323</span><span id="line-323"> return null;</span>
<span class="source-line-no">324</span><span id="line-324"> }</span>
<span class="source-line-no">325</span><span id="line-325"> }</span>
<span class="source-line-no">326</span><span id="line-326"></span>
<span class="source-line-no">327</span><span id="line-327"> public int getAttributeKeyOffset() {</span>
<span class="source-line-no">328</span><span id="line-328"> if (hasAttributes()) {</span>
<span class="source-line-no">329</span><span id="line-329"> return getColumnOffset(attrKeyColumnIndex);</span>
<span class="source-line-no">330</span><span id="line-330"> } else {</span>
<span class="source-line-no">331</span><span id="line-331"> return DEFAULT_ATTRIBUTES_COLUMN_INDEX;</span>
<span class="source-line-no">332</span><span id="line-332"> }</span>
<span class="source-line-no">333</span><span id="line-333"> }</span>
<span class="source-line-no">334</span><span id="line-334"></span>
<span class="source-line-no">335</span><span id="line-335"> public int getAttributeKeyLength() {</span>
<span class="source-line-no">336</span><span id="line-336"> if (hasAttributes()) {</span>
<span class="source-line-no">337</span><span id="line-337"> return getColumnLength(attrKeyColumnIndex);</span>
<span class="source-line-no">338</span><span id="line-338"> } else {</span>
<span class="source-line-no">339</span><span id="line-339"> return DEFAULT_ATTRIBUTES_COLUMN_INDEX;</span>
<span class="source-line-no">340</span><span id="line-340"> }</span>
<span class="source-line-no">341</span><span id="line-341"> }</span>
<span class="source-line-no">342</span><span id="line-342"></span>
<span class="source-line-no">343</span><span id="line-343"> public int getCellVisibilityColumnOffset() {</span>
<span class="source-line-no">344</span><span id="line-344"> if (hasCellVisibility()) {</span>
<span class="source-line-no">345</span><span id="line-345"> return getColumnOffset(cellVisibilityColumnIndex);</span>
<span class="source-line-no">346</span><span id="line-346"> } else {</span>
<span class="source-line-no">347</span><span id="line-347"> return DEFAULT_CELL_VISIBILITY_COLUMN_INDEX;</span>
<span class="source-line-no">348</span><span id="line-348"> }</span>
<span class="source-line-no">349</span><span id="line-349"> }</span>
<span class="source-line-no">350</span><span id="line-350"></span>
<span class="source-line-no">351</span><span id="line-351"> public int getCellVisibilityColumnLength() {</span>
<span class="source-line-no">352</span><span id="line-352"> if (hasCellVisibility()) {</span>
<span class="source-line-no">353</span><span id="line-353"> return getColumnLength(cellVisibilityColumnIndex);</span>
<span class="source-line-no">354</span><span id="line-354"> } else {</span>
<span class="source-line-no">355</span><span id="line-355"> return DEFAULT_CELL_VISIBILITY_COLUMN_INDEX;</span>
<span class="source-line-no">356</span><span id="line-356"> }</span>
<span class="source-line-no">357</span><span id="line-357"> }</span>
<span class="source-line-no">358</span><span id="line-358"></span>
<span class="source-line-no">359</span><span id="line-359"> public String getCellVisibility() {</span>
<span class="source-line-no">360</span><span id="line-360"> if (!hasCellVisibility()) {</span>
<span class="source-line-no">361</span><span id="line-361"> return null;</span>
<span class="source-line-no">362</span><span id="line-362"> } else {</span>
<span class="source-line-no">363</span><span id="line-363"> return Bytes.toString(lineBytes, getColumnOffset(cellVisibilityColumnIndex),</span>
<span class="source-line-no">364</span><span id="line-364"> getColumnLength(cellVisibilityColumnIndex));</span>
<span class="source-line-no">365</span><span id="line-365"> }</span>
<span class="source-line-no">366</span><span id="line-366"> }</span>
<span class="source-line-no">367</span><span id="line-367"></span>
<span class="source-line-no">368</span><span id="line-368"> public int getCellTTLColumnOffset() {</span>
<span class="source-line-no">369</span><span id="line-369"> if (hasCellTTL()) {</span>
<span class="source-line-no">370</span><span id="line-370"> return getColumnOffset(cellTTLColumnIndex);</span>
<span class="source-line-no">371</span><span id="line-371"> } else {</span>
<span class="source-line-no">372</span><span id="line-372"> return DEFAULT_CELL_TTL_COLUMN_INDEX;</span>
<span class="source-line-no">373</span><span id="line-373"> }</span>
<span class="source-line-no">374</span><span id="line-374"> }</span>
<span class="source-line-no">375</span><span id="line-375"></span>
<span class="source-line-no">376</span><span id="line-376"> public int getCellTTLColumnLength() {</span>
<span class="source-line-no">377</span><span id="line-377"> if (hasCellTTL()) {</span>
<span class="source-line-no">378</span><span id="line-378"> return getColumnLength(cellTTLColumnIndex);</span>
<span class="source-line-no">379</span><span id="line-379"> } else {</span>
<span class="source-line-no">380</span><span id="line-380"> return DEFAULT_CELL_TTL_COLUMN_INDEX;</span>
<span class="source-line-no">381</span><span id="line-381"> }</span>
<span class="source-line-no">382</span><span id="line-382"> }</span>
<span class="source-line-no">383</span><span id="line-383"></span>
<span class="source-line-no">384</span><span id="line-384"> public long getCellTTL() {</span>
<span class="source-line-no">385</span><span id="line-385"> if (!hasCellTTL()) {</span>
<span class="source-line-no">386</span><span id="line-386"> return 0;</span>
<span class="source-line-no">387</span><span id="line-387"> } else {</span>
<span class="source-line-no">388</span><span id="line-388"> return Bytes.toLong(lineBytes, getColumnOffset(cellTTLColumnIndex),</span>
<span class="source-line-no">389</span><span id="line-389"> getColumnLength(cellTTLColumnIndex));</span>
<span class="source-line-no">390</span><span id="line-390"> }</span>
<span class="source-line-no">391</span><span id="line-391"> }</span>
<span class="source-line-no">392</span><span id="line-392"></span>
<span class="source-line-no">393</span><span id="line-393"> public int getColumnOffset(int idx) {</span>
<span class="source-line-no">394</span><span id="line-394"> if (idx &gt; 0) return tabOffsets.get(idx - 1) + 1;</span>
<span class="source-line-no">395</span><span id="line-395"> else return 0;</span>
<span class="source-line-no">396</span><span id="line-396"> }</span>
<span class="source-line-no">397</span><span id="line-397"></span>
<span class="source-line-no">398</span><span id="line-398"> public int getColumnLength(int idx) {</span>
<span class="source-line-no">399</span><span id="line-399"> return tabOffsets.get(idx) - getColumnOffset(idx);</span>
<span class="source-line-no">400</span><span id="line-400"> }</span>
<span class="source-line-no">401</span><span id="line-401"></span>
<span class="source-line-no">402</span><span id="line-402"> public int getColumnCount() {</span>
<span class="source-line-no">403</span><span id="line-403"> return tabOffsets.size();</span>
<span class="source-line-no">404</span><span id="line-404"> }</span>
<span class="source-line-no">405</span><span id="line-405"></span>
<span class="source-line-no">406</span><span id="line-406"> public byte[] getLineBytes() {</span>
<span class="source-line-no">407</span><span id="line-407"> return lineBytes;</span>
<span class="source-line-no">408</span><span id="line-408"> }</span>
<span class="source-line-no">409</span><span id="line-409"> }</span>
<span class="source-line-no">410</span><span id="line-410"></span>
<span class="source-line-no">411</span><span id="line-411"> public static class BadTsvLineException extends Exception {</span>
<span class="source-line-no">412</span><span id="line-412"> public BadTsvLineException(String err) {</span>
<span class="source-line-no">413</span><span id="line-413"> super(err);</span>
<span class="source-line-no">414</span><span id="line-414"> }</span>
<span class="source-line-no">415</span><span id="line-415"></span>
<span class="source-line-no">416</span><span id="line-416"> private static final long serialVersionUID = 1L;</span>
<span class="source-line-no">417</span><span id="line-417"> }</span>
<span class="source-line-no">418</span><span id="line-418"></span>
<span class="source-line-no">419</span><span id="line-419"> /**</span>
<span class="source-line-no">420</span><span id="line-420"> * Return starting position and length of row key from the specified line bytes.</span>
<span class="source-line-no">421</span><span id="line-421"> * @return Pair of row key offset and length.</span>
<span class="source-line-no">422</span><span id="line-422"> */</span>
<span class="source-line-no">423</span><span id="line-423"> public Pair&lt;Integer, Integer&gt; parseRowKey(byte[] lineBytes, int length)</span>
<span class="source-line-no">424</span><span id="line-424"> throws BadTsvLineException {</span>
<span class="source-line-no">425</span><span id="line-425"> int rkColumnIndex = 0;</span>
<span class="source-line-no">426</span><span id="line-426"> int startPos = 0, endPos = 0;</span>
<span class="source-line-no">427</span><span id="line-427"> for (int i = 0; i &lt;= length; i++) {</span>
<span class="source-line-no">428</span><span id="line-428"> if (i == length || lineBytes[i] == separatorByte) {</span>
<span class="source-line-no">429</span><span id="line-429"> endPos = i - 1;</span>
<span class="source-line-no">430</span><span id="line-430"> if (rkColumnIndex++ == getRowKeyColumnIndex()) {</span>
<span class="source-line-no">431</span><span id="line-431"> if ((endPos + 1) == startPos) {</span>
<span class="source-line-no">432</span><span id="line-432"> throw new BadTsvLineException("Empty value for ROW KEY.");</span>
<span class="source-line-no">433</span><span id="line-433"> }</span>
<span class="source-line-no">434</span><span id="line-434"> break;</span>
<span class="source-line-no">435</span><span id="line-435"> } else {</span>
<span class="source-line-no">436</span><span id="line-436"> startPos = endPos + 2;</span>
<span class="source-line-no">437</span><span id="line-437"> }</span>
<span class="source-line-no">438</span><span id="line-438"> }</span>
<span class="source-line-no">439</span><span id="line-439"> if (i == length) {</span>
<span class="source-line-no">440</span><span id="line-440"> throw new BadTsvLineException("Row key does not exist as number of columns in the line"</span>
<span class="source-line-no">441</span><span id="line-441"> + " are less than row key position.");</span>
<span class="source-line-no">442</span><span id="line-442"> }</span>
<span class="source-line-no">443</span><span id="line-443"> }</span>
<span class="source-line-no">444</span><span id="line-444"> return new Pair&lt;&gt;(startPos, endPos - startPos + 1);</span>
<span class="source-line-no">445</span><span id="line-445"> }</span>
<span class="source-line-no">446</span><span id="line-446"> }</span>
<span class="source-line-no">447</span><span id="line-447"></span>
<span class="source-line-no">448</span><span id="line-448"> /**</span>
<span class="source-line-no">449</span><span id="line-449"> * Sets up the actual job.</span>
<span class="source-line-no">450</span><span id="line-450"> * @param conf The current configuration.</span>
<span class="source-line-no">451</span><span id="line-451"> * @param args The command line parameters.</span>
<span class="source-line-no">452</span><span id="line-452"> * @return The newly created job.</span>
<span class="source-line-no">453</span><span id="line-453"> * @throws IOException When setting up the job fails.</span>
<span class="source-line-no">454</span><span id="line-454"> */</span>
<span class="source-line-no">455</span><span id="line-455"> protected static Job createSubmittableJob(Configuration conf, String[] args)</span>
<span class="source-line-no">456</span><span id="line-456"> throws IOException, ClassNotFoundException {</span>
<span class="source-line-no">457</span><span id="line-457"> Job job = null;</span>
<span class="source-line-no">458</span><span id="line-458"> boolean isDryRun = conf.getBoolean(DRY_RUN_CONF_KEY, false);</span>
<span class="source-line-no">459</span><span id="line-459"> try (Connection connection = ConnectionFactory.createConnection(conf)) {</span>
<span class="source-line-no">460</span><span id="line-460"> try (Admin admin = connection.getAdmin()) {</span>
<span class="source-line-no">461</span><span id="line-461"> // Support non-XML supported characters</span>
<span class="source-line-no">462</span><span id="line-462"> // by re-encoding the passed separator as a Base64 string.</span>
<span class="source-line-no">463</span><span id="line-463"> String actualSeparator = conf.get(SEPARATOR_CONF_KEY);</span>
<span class="source-line-no">464</span><span id="line-464"> if (actualSeparator != null) {</span>
<span class="source-line-no">465</span><span id="line-465"> conf.set(SEPARATOR_CONF_KEY,</span>
<span class="source-line-no">466</span><span id="line-466"> Bytes.toString(Base64.getEncoder().encode(Bytes.toBytes(actualSeparator))));</span>
<span class="source-line-no">467</span><span id="line-467"> }</span>
<span class="source-line-no">468</span><span id="line-468"></span>
<span class="source-line-no">469</span><span id="line-469"> // See if a non-default Mapper was set</span>
<span class="source-line-no">470</span><span id="line-470"> String mapperClassName = conf.get(MAPPER_CONF_KEY);</span>
<span class="source-line-no">471</span><span id="line-471"> Class mapperClass =</span>
<span class="source-line-no">472</span><span id="line-472"> mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER;</span>
<span class="source-line-no">473</span><span id="line-473"></span>
<span class="source-line-no">474</span><span id="line-474"> TableName tableName = TableName.valueOf(args[0]);</span>
<span class="source-line-no">475</span><span id="line-475"> Path inputDir = new Path(args[1]);</span>
<span class="source-line-no">476</span><span id="line-476"> String jobName = conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName.getNameAsString());</span>
<span class="source-line-no">477</span><span id="line-477"> job = Job.getInstance(conf, jobName);</span>
<span class="source-line-no">478</span><span id="line-478"> job.setJarByClass(mapperClass);</span>
<span class="source-line-no">479</span><span id="line-479"> FileInputFormat.setInputPaths(job, inputDir);</span>
<span class="source-line-no">480</span><span id="line-480"> job.setInputFormatClass(TextInputFormat.class);</span>
<span class="source-line-no">481</span><span id="line-481"> job.setMapperClass(mapperClass);</span>
<span class="source-line-no">482</span><span id="line-482"> job.setMapOutputKeyClass(ImmutableBytesWritable.class);</span>
<span class="source-line-no">483</span><span id="line-483"> String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);</span>
<span class="source-line-no">484</span><span id="line-484"> String[] columns = conf.getStrings(COLUMNS_CONF_KEY);</span>
<span class="source-line-no">485</span><span id="line-485"> if (StringUtils.isNotEmpty(conf.get(CREDENTIALS_LOCATION))) {</span>
<span class="source-line-no">486</span><span id="line-486"> String fileLoc = conf.get(CREDENTIALS_LOCATION);</span>
<span class="source-line-no">487</span><span id="line-487"> Credentials cred = Credentials.readTokenStorageFile(new File(fileLoc), conf);</span>
<span class="source-line-no">488</span><span id="line-488"> job.getCredentials().addAll(cred);</span>
<span class="source-line-no">489</span><span id="line-489"> }</span>
<span class="source-line-no">490</span><span id="line-490"></span>
<span class="source-line-no">491</span><span id="line-491"> if (hfileOutPath != null) {</span>
<span class="source-line-no">492</span><span id="line-492"> if (!admin.tableExists(tableName)) {</span>
<span class="source-line-no">493</span><span id="line-493"> LOG.warn(format("Table '%s' does not exist.", tableName));</span>
<span class="source-line-no">494</span><span id="line-494"> if ("yes".equalsIgnoreCase(conf.get(CREATE_TABLE_CONF_KEY, "yes"))) {</span>
<span class="source-line-no">495</span><span id="line-495"> // TODO: this is backwards. Instead of depending on the existence of a table,</span>
<span class="source-line-no">496</span><span id="line-496"> // create a sane splits file for HFileOutputFormat based on data sampling.</span>
<span class="source-line-no">497</span><span id="line-497"> createTable(admin, tableName, columns);</span>
<span class="source-line-no">498</span><span id="line-498"> if (isDryRun) {</span>
<span class="source-line-no">499</span><span id="line-499"> LOG.warn("Dry run: Table will be deleted at end of dry run.");</span>
<span class="source-line-no">500</span><span id="line-500"> synchronized (ImportTsv.class) {</span>
<span class="source-line-no">501</span><span id="line-501"> DRY_RUN_TABLE_CREATED = true;</span>
<span class="source-line-no">502</span><span id="line-502"> }</span>
<span class="source-line-no">503</span><span id="line-503"> }</span>
<span class="source-line-no">504</span><span id="line-504"> } else {</span>
<span class="source-line-no">505</span><span id="line-505"> String errorMsg = format("Table '%s' does not exist and '%s' is set to no.",</span>
<span class="source-line-no">506</span><span id="line-506"> tableName, CREATE_TABLE_CONF_KEY);</span>
<span class="source-line-no">507</span><span id="line-507"> LOG.error(errorMsg);</span>
<span class="source-line-no">508</span><span id="line-508"> throw new TableNotFoundException(errorMsg);</span>
<span class="source-line-no">509</span><span id="line-509"> }</span>
<span class="source-line-no">510</span><span id="line-510"> }</span>
<span class="source-line-no">511</span><span id="line-511"> try (Table table = connection.getTable(tableName);</span>
<span class="source-line-no">512</span><span id="line-512"> RegionLocator regionLocator = connection.getRegionLocator(tableName)) {</span>
<span class="source-line-no">513</span><span id="line-513"> boolean noStrict = conf.getBoolean(NO_STRICT_COL_FAMILY, false);</span>
<span class="source-line-no">514</span><span id="line-514"> // if no.strict is false then check column family</span>
<span class="source-line-no">515</span><span id="line-515"> if (!noStrict) {</span>
<span class="source-line-no">516</span><span id="line-516"> ArrayList&lt;String&gt; unmatchedFamilies = new ArrayList&lt;&gt;();</span>
<span class="source-line-no">517</span><span id="line-517"> Set&lt;String&gt; cfSet = getColumnFamilies(columns);</span>
<span class="source-line-no">518</span><span id="line-518"> TableDescriptor tDesc = table.getDescriptor();</span>
<span class="source-line-no">519</span><span id="line-519"> for (String cf : cfSet) {</span>
<span class="source-line-no">520</span><span id="line-520"> if (!tDesc.hasColumnFamily(Bytes.toBytes(cf))) {</span>
<span class="source-line-no">521</span><span id="line-521"> unmatchedFamilies.add(cf);</span>
<span class="source-line-no">522</span><span id="line-522"> }</span>
<span class="source-line-no">523</span><span id="line-523"> }</span>
<span class="source-line-no">524</span><span id="line-524"> if (unmatchedFamilies.size() &gt; 0) {</span>
<span class="source-line-no">525</span><span id="line-525"> ArrayList&lt;String&gt; familyNames = new ArrayList&lt;&gt;();</span>
<span class="source-line-no">526</span><span id="line-526"> for (ColumnFamilyDescriptor family : table.getDescriptor().getColumnFamilies()) {</span>
<span class="source-line-no">527</span><span id="line-527"> familyNames.add(family.getNameAsString());</span>
<span class="source-line-no">528</span><span id="line-528"> }</span>
<span class="source-line-no">529</span><span id="line-529"> String msg = "Column Families " + unmatchedFamilies + " specified in "</span>
<span class="source-line-no">530</span><span id="line-530"> + COLUMNS_CONF_KEY + " does not match with any of the table " + tableName</span>
<span class="source-line-no">531</span><span id="line-531"> + " column families " + familyNames + ".\n"</span>
<span class="source-line-no">532</span><span id="line-532"> + "To disable column family check, use -D" + NO_STRICT_COL_FAMILY + "=true.\n";</span>
<span class="source-line-no">533</span><span id="line-533"> usage(msg);</span>
<span class="source-line-no">534</span><span id="line-534"> System.exit(-1);</span>
<span class="source-line-no">535</span><span id="line-535"> }</span>
<span class="source-line-no">536</span><span id="line-536"> }</span>
<span class="source-line-no">537</span><span id="line-537"> if (mapperClass.equals(TsvImporterTextMapper.class)) {</span>
<span class="source-line-no">538</span><span id="line-538"> job.setMapOutputValueClass(Text.class);</span>
<span class="source-line-no">539</span><span id="line-539"> job.setReducerClass(TextSortReducer.class);</span>
<span class="source-line-no">540</span><span id="line-540"> } else {</span>
<span class="source-line-no">541</span><span id="line-541"> job.setMapOutputValueClass(Put.class);</span>
<span class="source-line-no">542</span><span id="line-542"> job.setCombinerClass(PutCombiner.class);</span>
<span class="source-line-no">543</span><span id="line-543"> job.setReducerClass(PutSortReducer.class);</span>
<span class="source-line-no">544</span><span id="line-544"> }</span>
<span class="source-line-no">545</span><span id="line-545"> if (!isDryRun) {</span>
<span class="source-line-no">546</span><span id="line-546"> Path outputDir = new Path(hfileOutPath);</span>
<span class="source-line-no">547</span><span id="line-547"> FileOutputFormat.setOutputPath(job, outputDir);</span>
<span class="source-line-no">548</span><span id="line-548"> HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(),</span>
<span class="source-line-no">549</span><span id="line-549"> regionLocator);</span>
<span class="source-line-no">550</span><span id="line-550"> }</span>
<span class="source-line-no">551</span><span id="line-551"> }</span>
<span class="source-line-no">552</span><span id="line-552"> } else {</span>
<span class="source-line-no">553</span><span id="line-553"> if (!admin.tableExists(tableName)) {</span>
<span class="source-line-no">554</span><span id="line-554"> String errorMsg = format("Table '%s' does not exist.", tableName);</span>
<span class="source-line-no">555</span><span id="line-555"> LOG.error(errorMsg);</span>
<span class="source-line-no">556</span><span id="line-556"> throw new TableNotFoundException(errorMsg);</span>
<span class="source-line-no">557</span><span id="line-557"> }</span>
<span class="source-line-no">558</span><span id="line-558"> try (Table table = connection.getTable(tableName)) {</span>
<span class="source-line-no">559</span><span id="line-559"> ArrayList&lt;String&gt; unmatchedFamilies = new ArrayList&lt;&gt;();</span>
<span class="source-line-no">560</span><span id="line-560"> Set&lt;String&gt; cfSet = getColumnFamilies(columns);</span>
<span class="source-line-no">561</span><span id="line-561"> TableDescriptor tDesc = table.getDescriptor();</span>
<span class="source-line-no">562</span><span id="line-562"> for (String cf : cfSet) {</span>
<span class="source-line-no">563</span><span id="line-563"> if (!tDesc.hasColumnFamily(Bytes.toBytes(cf))) {</span>
<span class="source-line-no">564</span><span id="line-564"> unmatchedFamilies.add(cf);</span>
<span class="source-line-no">565</span><span id="line-565"> }</span>
<span class="source-line-no">566</span><span id="line-566"> }</span>
<span class="source-line-no">567</span><span id="line-567"> if (unmatchedFamilies.size() &gt; 0) {</span>
<span class="source-line-no">568</span><span id="line-568"> String noSuchColumnFamiliesMsg =</span>
<span class="source-line-no">569</span><span id="line-569"> format("Column families: %s do not exist.", unmatchedFamilies);</span>
<span class="source-line-no">570</span><span id="line-570"> LOG.error(noSuchColumnFamiliesMsg);</span>
<span class="source-line-no">571</span><span id="line-571"> throw new NoSuchColumnFamilyException(noSuchColumnFamiliesMsg);</span>
<span class="source-line-no">572</span><span id="line-572"> }</span>
<span class="source-line-no">573</span><span id="line-573"> }</span>
<span class="source-line-no">574</span><span id="line-574"> if (mapperClass.equals(TsvImporterTextMapper.class)) {</span>
<span class="source-line-no">575</span><span id="line-575"> usage(TsvImporterTextMapper.class.toString()</span>
<span class="source-line-no">576</span><span id="line-576"> + " should not be used for non bulkloading case. use "</span>
<span class="source-line-no">577</span><span id="line-577"> + TsvImporterMapper.class.toString() + " or custom mapper whose value type is Put.");</span>
<span class="source-line-no">578</span><span id="line-578"> System.exit(-1);</span>
<span class="source-line-no">579</span><span id="line-579"> }</span>
<span class="source-line-no">580</span><span id="line-580"> if (!isDryRun) {</span>
<span class="source-line-no">581</span><span id="line-581"> // No reducers. Just write straight to table. Call initTableReducerJob</span>
<span class="source-line-no">582</span><span id="line-582"> // to set up the TableOutputFormat.</span>
<span class="source-line-no">583</span><span id="line-583"> TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);</span>
<span class="source-line-no">584</span><span id="line-584"> }</span>
<span class="source-line-no">585</span><span id="line-585"> job.setNumReduceTasks(0);</span>
<span class="source-line-no">586</span><span id="line-586"> }</span>
<span class="source-line-no">587</span><span id="line-587"> if (isDryRun) {</span>
<span class="source-line-no">588</span><span id="line-588"> job.setOutputFormatClass(NullOutputFormat.class);</span>
<span class="source-line-no">589</span><span id="line-589"> job.getConfiguration().setStrings("io.serializations",</span>
<span class="source-line-no">590</span><span id="line-590"> job.getConfiguration().get("io.serializations"), MutationSerialization.class.getName(),</span>
<span class="source-line-no">591</span><span id="line-591"> ResultSerialization.class.getName(), CellSerialization.class.getName());</span>
<span class="source-line-no">592</span><span id="line-592"> }</span>
<span class="source-line-no">593</span><span id="line-593"> TableMapReduceUtil.addDependencyJars(job);</span>
<span class="source-line-no">594</span><span id="line-594"> TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),</span>
<span class="source-line-no">595</span><span id="line-595"> org.apache.hbase.thirdparty.com.google.common.base.Function.class /*</span>
<span class="source-line-no">596</span><span id="line-596"> * Guava used by</span>
<span class="source-line-no">597</span><span id="line-597"> * TsvParser</span>
<span class="source-line-no">598</span><span id="line-598"> */);</span>
<span class="source-line-no">599</span><span id="line-599"> }</span>
<span class="source-line-no">600</span><span id="line-600"> }</span>
<span class="source-line-no">601</span><span id="line-601"> return job;</span>
<span class="source-line-no">602</span><span id="line-602"> }</span>
<span class="source-line-no">603</span><span id="line-603"></span>
<span class="source-line-no">604</span><span id="line-604"> private static void createTable(Admin admin, TableName tableName, String[] columns)</span>
<span class="source-line-no">605</span><span id="line-605"> throws IOException {</span>
<span class="source-line-no">606</span><span id="line-606"> TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName);</span>
<span class="source-line-no">607</span><span id="line-607"> Set&lt;String&gt; cfSet = getColumnFamilies(columns);</span>
<span class="source-line-no">608</span><span id="line-608"> for (String cf : cfSet) {</span>
<span class="source-line-no">609</span><span id="line-609"> builder.setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf));</span>
<span class="source-line-no">610</span><span id="line-610"> }</span>
<span class="source-line-no">611</span><span id="line-611"> LOG.warn(</span>
<span class="source-line-no">612</span><span id="line-612"> format("Creating table '%s' with '%s' columns and default descriptors.", tableName, cfSet));</span>
<span class="source-line-no">613</span><span id="line-613"> admin.createTable(builder.build());</span>
<span class="source-line-no">614</span><span id="line-614"> }</span>
<span class="source-line-no">615</span><span id="line-615"></span>
<span class="source-line-no">616</span><span id="line-616"> private static void deleteTable(Configuration conf, String[] args) {</span>
<span class="source-line-no">617</span><span id="line-617"> TableName tableName = TableName.valueOf(args[0]);</span>
<span class="source-line-no">618</span><span id="line-618"> try (Connection connection = ConnectionFactory.createConnection(conf);</span>
<span class="source-line-no">619</span><span id="line-619"> Admin admin = connection.getAdmin()) {</span>
<span class="source-line-no">620</span><span id="line-620"> try {</span>
<span class="source-line-no">621</span><span id="line-621"> admin.disableTable(tableName);</span>
<span class="source-line-no">622</span><span id="line-622"> } catch (TableNotEnabledException e) {</span>
<span class="source-line-no">623</span><span id="line-623"> LOG.debug("Dry mode: Table: " + tableName + " already disabled, so just deleting it.");</span>
<span class="source-line-no">624</span><span id="line-624"> }</span>
<span class="source-line-no">625</span><span id="line-625"> admin.deleteTable(tableName);</span>
<span class="source-line-no">626</span><span id="line-626"> } catch (IOException e) {</span>
<span class="source-line-no">627</span><span id="line-627"> LOG.error(format("***Dry run: Failed to delete table '%s'.***%n%s", tableName, e.toString()));</span>
<span class="source-line-no">628</span><span id="line-628"> return;</span>
<span class="source-line-no">629</span><span id="line-629"> }</span>
<span class="source-line-no">630</span><span id="line-630"> LOG.info(format("Dry run: Deleted table '%s'.", tableName));</span>
<span class="source-line-no">631</span><span id="line-631"> }</span>
<span class="source-line-no">632</span><span id="line-632"></span>
<span class="source-line-no">633</span><span id="line-633"> private static Set&lt;String&gt; getColumnFamilies(String[] columns) {</span>
<span class="source-line-no">634</span><span id="line-634"> Set&lt;String&gt; cfSet = new HashSet&lt;&gt;();</span>
<span class="source-line-no">635</span><span id="line-635"> for (String aColumn : columns) {</span>
<span class="source-line-no">636</span><span id="line-636"> if (</span>
<span class="source-line-no">637</span><span id="line-637"> TsvParser.ROWKEY_COLUMN_SPEC.equals(aColumn)</span>
<span class="source-line-no">638</span><span id="line-638"> || TsvParser.TIMESTAMPKEY_COLUMN_SPEC.equals(aColumn)</span>
<span class="source-line-no">639</span><span id="line-639"> || TsvParser.CELL_VISIBILITY_COLUMN_SPEC.equals(aColumn)</span>
<span class="source-line-no">640</span><span id="line-640"> || TsvParser.CELL_TTL_COLUMN_SPEC.equals(aColumn)</span>
<span class="source-line-no">641</span><span id="line-641"> || TsvParser.ATTRIBUTES_COLUMN_SPEC.equals(aColumn)</span>
<span class="source-line-no">642</span><span id="line-642"> ) continue;</span>
<span class="source-line-no">643</span><span id="line-643"> // we are only concerned with the first one (in case this is a cf:cq)</span>
<span class="source-line-no">644</span><span id="line-644"> cfSet.add(aColumn.split(":", 2)[0]);</span>
<span class="source-line-no">645</span><span id="line-645"> }</span>
<span class="source-line-no">646</span><span id="line-646"> return cfSet;</span>
<span class="source-line-no">647</span><span id="line-647"> }</span>
<span class="source-line-no">648</span><span id="line-648"></span>
<span class="source-line-no">649</span><span id="line-649"> /*</span>
<span class="source-line-no">650</span><span id="line-650"> * @param errorMsg Error message. Can be null.</span>
<span class="source-line-no">651</span><span id="line-651"> */</span>
<span class="source-line-no">652</span><span id="line-652"> private static void usage(final String errorMsg) {</span>
<span class="source-line-no">653</span><span id="line-653"> if (errorMsg != null &amp;&amp; errorMsg.length() &gt; 0) {</span>
<span class="source-line-no">654</span><span id="line-654"> System.err.println("ERROR: " + errorMsg);</span>
<span class="source-line-no">655</span><span id="line-655"> }</span>
<span class="source-line-no">656</span><span id="line-656"> String usage = "Usage: " + NAME + " -D" + COLUMNS_CONF_KEY + "=a,b,c &lt;tablename&gt; &lt;inputdir&gt;\n"</span>
<span class="source-line-no">657</span><span id="line-657"> + "\n" + "Imports the given input directory of TSV data into the specified table.\n" + "\n"</span>
<span class="source-line-no">658</span><span id="line-658"> + "The column names of the TSV data must be specified using the -D" + COLUMNS_CONF_KEY + "\n"</span>
<span class="source-line-no">659</span><span id="line-659"> + "option. This option takes the form of comma-separated column names, where each\n"</span>
<span class="source-line-no">660</span><span id="line-660"> + "column name is either a simple column family, or a columnfamily:qualifier. The special\n"</span>
<span class="source-line-no">661</span><span id="line-661"> + "column name " + TsvParser.ROWKEY_COLUMN_SPEC</span>
<span class="source-line-no">662</span><span id="line-662"> + " is used to designate that this column should be used\n"</span>
<span class="source-line-no">663</span><span id="line-663"> + "as the row key for each imported record. You must specify exactly one column\n"</span>
<span class="source-line-no">664</span><span id="line-664"> + "to be the row key, and you must specify a column name for every column that exists in the\n"</span>
<span class="source-line-no">665</span><span id="line-665"> + "input data. Another special column" + TsvParser.TIMESTAMPKEY_COLUMN_SPEC</span>
<span class="source-line-no">666</span><span id="line-666"> + " designates that this column should be\n" + "used as timestamp for each record. Unlike "</span>
<span class="source-line-no">667</span><span id="line-667"> + TsvParser.ROWKEY_COLUMN_SPEC + ", " + TsvParser.TIMESTAMPKEY_COLUMN_SPEC + " is optional."</span>
<span class="source-line-no">668</span><span id="line-668"> + "\n" + "You must specify at most one column as timestamp key for each imported record.\n"</span>
<span class="source-line-no">669</span><span id="line-669"> + "Record with invalid timestamps (blank, non-numeric) will be treated as bad record.\n"</span>
<span class="source-line-no">670</span><span id="line-670"> + "Note: if you use this option, then '" + TIMESTAMP_CONF_KEY + "' option will be ignored.\n"</span>
<span class="source-line-no">671</span><span id="line-671"> + "\n" + "Other special columns that can be specified are " + TsvParser.CELL_TTL_COLUMN_SPEC</span>
<span class="source-line-no">672</span><span id="line-672"> + " and " + TsvParser.CELL_VISIBILITY_COLUMN_SPEC + ".\n" + TsvParser.CELL_TTL_COLUMN_SPEC</span>
<span class="source-line-no">673</span><span id="line-673"> + " designates that this column will be used " + "as a Cell's Time To Live (TTL) attribute.\n"</span>
<span class="source-line-no">674</span><span id="line-674"> + TsvParser.CELL_VISIBILITY_COLUMN_SPEC + " designates that this column contains the "</span>
<span class="source-line-no">675</span><span id="line-675"> + "visibility label expression.\n" + "\n" + TsvParser.ATTRIBUTES_COLUMN_SPEC</span>
<span class="source-line-no">676</span><span id="line-676"> + " can be used to specify Operation Attributes per record.\n"</span>
<span class="source-line-no">677</span><span id="line-677"> + " Should be specified as key=&gt;value where " + TsvParser.DEFAULT_ATTRIBUTES_COLUMN_INDEX</span>
<span class="source-line-no">678</span><span id="line-678"> + " is used \n"</span>
<span class="source-line-no">679</span><span id="line-679"> + " as the seperator. Note that more than one OperationAttributes can be specified.\n"</span>
<span class="source-line-no">680</span><span id="line-680"> + "By default importtsv will load data directly into HBase. To instead generate\n"</span>
<span class="source-line-no">681</span><span id="line-681"> + "HFiles of data to prepare for a bulk data load, pass the option:\n" + " -D"</span>
<span class="source-line-no">682</span><span id="line-682"> + BULK_OUTPUT_CONF_KEY + "=/path/for/output\n"</span>
<span class="source-line-no">683</span><span id="line-683"> + " Note: if you do not use this option, then the target table must already exist in HBase\n"</span>
<span class="source-line-no">684</span><span id="line-684"> + "\n" + "Other options that may be specified with -D include:\n" + " -D" + DRY_RUN_CONF_KEY</span>
<span class="source-line-no">685</span><span id="line-685"> + "=true - Dry run mode. Data is not actually populated into"</span>
<span class="source-line-no">686</span><span id="line-686"> + " table. If table does not exist, it is created but deleted in the end.\n" + " -D"</span>
<span class="source-line-no">687</span><span id="line-687"> + SKIP_LINES_CONF_KEY + "=false - fail if encountering an invalid line\n" + " -D"</span>
<span class="source-line-no">688</span><span id="line-688"> + LOG_BAD_LINES_CONF_KEY + "=true - logs invalid lines to stderr\n" + " -D"</span>
<span class="source-line-no">689</span><span id="line-689"> + SKIP_EMPTY_COLUMNS + "=false - If true then skip empty columns in bulk import\n" + " '-D"</span>
<span class="source-line-no">690</span><span id="line-690"> + SEPARATOR_CONF_KEY + "=|' - eg separate on pipes instead of tabs\n" + " -D"</span>
<span class="source-line-no">691</span><span id="line-691"> + TIMESTAMP_CONF_KEY + "=currentTimeAsLong - use the specified timestamp for the import\n"</span>
<span class="source-line-no">692</span><span id="line-692"> + " -D" + MAPPER_CONF_KEY + "=my.Mapper - A user-defined Mapper to use instead of "</span>
<span class="source-line-no">693</span><span id="line-693"> + DEFAULT_MAPPER.getName() + "\n" + " -D" + JOB_NAME_CONF_KEY</span>
<span class="source-line-no">694</span><span id="line-694"> + "=jobName - use the specified mapreduce job name for the import\n" + " -D"</span>
<span class="source-line-no">695</span><span id="line-695"> + CREATE_TABLE_CONF_KEY + "=no - can be used to avoid creation of table by this tool\n"</span>
<span class="source-line-no">696</span><span id="line-696"> + " Note: if you set this to 'no', then the target table must already exist in HBase\n"</span>
<span class="source-line-no">697</span><span id="line-697"> + " -D" + NO_STRICT_COL_FAMILY + "=true - ignore column family check in hbase table. "</span>
<span class="source-line-no">698</span><span id="line-698"> + "Default is false\n\n" + "For performance consider the following options:\n"</span>
<span class="source-line-no">699</span><span id="line-699"> + " -Dmapreduce.map.speculative=false\n" + " -Dmapreduce.reduce.speculative=false";</span>
<span class="source-line-no">700</span><span id="line-700"></span>
<span class="source-line-no">701</span><span id="line-701"> System.err.println(usage);</span>
<span class="source-line-no">702</span><span id="line-702"> }</span>
<span class="source-line-no">703</span><span id="line-703"></span>
<span class="source-line-no">704</span><span id="line-704"> @Override</span>
<span class="source-line-no">705</span><span id="line-705"> public int run(String[] args) throws Exception {</span>
<span class="source-line-no">706</span><span id="line-706"> if (args.length &lt; 2) {</span>
<span class="source-line-no">707</span><span id="line-707"> usage("Wrong number of arguments: " + args.length);</span>
<span class="source-line-no">708</span><span id="line-708"> return -1;</span>
<span class="source-line-no">709</span><span id="line-709"> }</span>
<span class="source-line-no">710</span><span id="line-710"></span>
<span class="source-line-no">711</span><span id="line-711"> // When MAPPER_CONF_KEY is null, the user wants to use the provided TsvImporterMapper, so</span>
<span class="source-line-no">712</span><span id="line-712"> // perform validation on these additional args. When it's not null, user has provided their</span>
<span class="source-line-no">713</span><span id="line-713"> // own mapper, thus these validation are not relevant.</span>
<span class="source-line-no">714</span><span id="line-714"> // TODO: validation for TsvImporterMapper, not this tool. Move elsewhere.</span>
<span class="source-line-no">715</span><span id="line-715"> if (null == getConf().get(MAPPER_CONF_KEY)) {</span>
<span class="source-line-no">716</span><span id="line-716"> // Make sure columns are specified</span>
<span class="source-line-no">717</span><span id="line-717"> String[] columns = getConf().getStrings(COLUMNS_CONF_KEY);</span>
<span class="source-line-no">718</span><span id="line-718"> if (columns == null) {</span>
<span class="source-line-no">719</span><span id="line-719"> usage("No columns specified. Please specify with -D" + COLUMNS_CONF_KEY + "=...");</span>
<span class="source-line-no">720</span><span id="line-720"> return -1;</span>
<span class="source-line-no">721</span><span id="line-721"> }</span>
<span class="source-line-no">722</span><span id="line-722"></span>
<span class="source-line-no">723</span><span id="line-723"> // Make sure they specify exactly one column as the row key</span>
<span class="source-line-no">724</span><span id="line-724"> int rowkeysFound = 0;</span>
<span class="source-line-no">725</span><span id="line-725"> for (String col : columns) {</span>
<span class="source-line-no">726</span><span id="line-726"> if (col.equals(TsvParser.ROWKEY_COLUMN_SPEC)) rowkeysFound++;</span>
<span class="source-line-no">727</span><span id="line-727"> }</span>
<span class="source-line-no">728</span><span id="line-728"> if (rowkeysFound != 1) {</span>
<span class="source-line-no">729</span><span id="line-729"> usage("Must specify exactly one column as " + TsvParser.ROWKEY_COLUMN_SPEC);</span>
<span class="source-line-no">730</span><span id="line-730"> return -1;</span>
<span class="source-line-no">731</span><span id="line-731"> }</span>
<span class="source-line-no">732</span><span id="line-732"></span>
<span class="source-line-no">733</span><span id="line-733"> // Make sure we have at most one column as the timestamp key</span>
<span class="source-line-no">734</span><span id="line-734"> int tskeysFound = 0;</span>
<span class="source-line-no">735</span><span id="line-735"> for (String col : columns) {</span>
<span class="source-line-no">736</span><span id="line-736"> if (col.equals(TsvParser.TIMESTAMPKEY_COLUMN_SPEC)) tskeysFound++;</span>
<span class="source-line-no">737</span><span id="line-737"> }</span>
<span class="source-line-no">738</span><span id="line-738"> if (tskeysFound &gt; 1) {</span>
<span class="source-line-no">739</span><span id="line-739"> usage("Must specify at most one column as " + TsvParser.TIMESTAMPKEY_COLUMN_SPEC);</span>
<span class="source-line-no">740</span><span id="line-740"> return -1;</span>
<span class="source-line-no">741</span><span id="line-741"> }</span>
<span class="source-line-no">742</span><span id="line-742"></span>
<span class="source-line-no">743</span><span id="line-743"> int attrKeysFound = 0;</span>
<span class="source-line-no">744</span><span id="line-744"> for (String col : columns) {</span>
<span class="source-line-no">745</span><span id="line-745"> if (col.equals(TsvParser.ATTRIBUTES_COLUMN_SPEC)) attrKeysFound++;</span>
<span class="source-line-no">746</span><span id="line-746"> }</span>
<span class="source-line-no">747</span><span id="line-747"> if (attrKeysFound &gt; 1) {</span>
<span class="source-line-no">748</span><span id="line-748"> usage("Must specify at most one column as " + TsvParser.ATTRIBUTES_COLUMN_SPEC);</span>
<span class="source-line-no">749</span><span id="line-749"> return -1;</span>
<span class="source-line-no">750</span><span id="line-750"> }</span>
<span class="source-line-no">751</span><span id="line-751"></span>
<span class="source-line-no">752</span><span id="line-752"> // Make sure one or more columns are specified excluding rowkey and</span>
<span class="source-line-no">753</span><span id="line-753"> // timestamp key</span>
<span class="source-line-no">754</span><span id="line-754"> if (columns.length - (rowkeysFound + tskeysFound + attrKeysFound) &lt; 1) {</span>
<span class="source-line-no">755</span><span id="line-755"> usage(</span>
<span class="source-line-no">756</span><span id="line-756"> "One or more columns in addition to the row key and timestamp(optional) are required");</span>
<span class="source-line-no">757</span><span id="line-757"> return -1;</span>
<span class="source-line-no">758</span><span id="line-758"> }</span>
<span class="source-line-no">759</span><span id="line-759"> }</span>
<span class="source-line-no">760</span><span id="line-760"></span>
<span class="source-line-no">761</span><span id="line-761"> // If timestamp option is not specified, use current system time.</span>
<span class="source-line-no">762</span><span id="line-762"> long timstamp = getConf().getLong(TIMESTAMP_CONF_KEY, EnvironmentEdgeManager.currentTime());</span>
<span class="source-line-no">763</span><span id="line-763"></span>
<span class="source-line-no">764</span><span id="line-764"> // Set it back to replace invalid timestamp (non-numeric) with current</span>
<span class="source-line-no">765</span><span id="line-765"> // system time</span>
<span class="source-line-no">766</span><span id="line-766"> getConf().setLong(TIMESTAMP_CONF_KEY, timstamp);</span>
<span class="source-line-no">767</span><span id="line-767"></span>
<span class="source-line-no">768</span><span id="line-768"> synchronized (ImportTsv.class) {</span>
<span class="source-line-no">769</span><span id="line-769"> DRY_RUN_TABLE_CREATED = false;</span>
<span class="source-line-no">770</span><span id="line-770"> }</span>
<span class="source-line-no">771</span><span id="line-771"> Job job = createSubmittableJob(getConf(), args);</span>
<span class="source-line-no">772</span><span id="line-772"> boolean success = job.waitForCompletion(true);</span>
<span class="source-line-no">773</span><span id="line-773"> boolean delete = false;</span>
<span class="source-line-no">774</span><span id="line-774"> synchronized (ImportTsv.class) {</span>
<span class="source-line-no">775</span><span id="line-775"> delete = DRY_RUN_TABLE_CREATED;</span>
<span class="source-line-no">776</span><span id="line-776"> }</span>
<span class="source-line-no">777</span><span id="line-777"> if (delete) {</span>
<span class="source-line-no">778</span><span id="line-778"> deleteTable(getConf(), args);</span>
<span class="source-line-no">779</span><span id="line-779"> }</span>
<span class="source-line-no">780</span><span id="line-780"> return success ? 0 : 1;</span>
<span class="source-line-no">781</span><span id="line-781"> }</span>
<span class="source-line-no">782</span><span id="line-782"></span>
<span class="source-line-no">783</span><span id="line-783"> public static void main(String[] args) throws Exception {</span>
<span class="source-line-no">784</span><span id="line-784"> int status = ToolRunner.run(HBaseConfiguration.create(), new ImportTsv(), args);</span>
<span class="source-line-no">785</span><span id="line-785"> System.exit(status);</span>
<span class="source-line-no">786</span><span id="line-786"> }</span>
<span class="source-line-no">787</span><span id="line-787">}</span>
</pre>
</div>
</main>
</body>
</html>