blob: 50d887c520310d30a7d4a8224e77db47716afba6 [file] [log] [blame]
<!DOCTYPE HTML>
<html lang="en">
<head>
<!-- Generated by javadoc (17) -->
<title>Source code</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="description" content="source: package: org.apache.hadoop.hbase.mapreduce, class: CopyTable">
<meta name="generator" content="javadoc/SourceToHTMLConverter">
<link rel="stylesheet" type="text/css" href="../../../../../../stylesheet.css" title="Style">
</head>
<body class="source-page">
<main role="main">
<div class="source-container">
<pre><span class="source-line-no">001</span><span id="line-1">/*</span>
<span class="source-line-no">002</span><span id="line-2"> * Licensed to the Apache Software Foundation (ASF) under one</span>
<span class="source-line-no">003</span><span id="line-3"> * or more contributor license agreements. See the NOTICE file</span>
<span class="source-line-no">004</span><span id="line-4"> * distributed with this work for additional information</span>
<span class="source-line-no">005</span><span id="line-5"> * regarding copyright ownership. The ASF licenses this file</span>
<span class="source-line-no">006</span><span id="line-6"> * to you under the Apache License, Version 2.0 (the</span>
<span class="source-line-no">007</span><span id="line-7"> * "License"); you may not use this file except in compliance</span>
<span class="source-line-no">008</span><span id="line-8"> * with the License. You may obtain a copy of the License at</span>
<span class="source-line-no">009</span><span id="line-9"> *</span>
<span class="source-line-no">010</span><span id="line-10"> * http://www.apache.org/licenses/LICENSE-2.0</span>
<span class="source-line-no">011</span><span id="line-11"> *</span>
<span class="source-line-no">012</span><span id="line-12"> * Unless required by applicable law or agreed to in writing, software</span>
<span class="source-line-no">013</span><span id="line-13"> * distributed under the License is distributed on an "AS IS" BASIS,</span>
<span class="source-line-no">014</span><span id="line-14"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
<span class="source-line-no">015</span><span id="line-15"> * See the License for the specific language governing permissions and</span>
<span class="source-line-no">016</span><span id="line-16"> * limitations under the License.</span>
<span class="source-line-no">017</span><span id="line-17"> */</span>
<span class="source-line-no">018</span><span id="line-18">package org.apache.hadoop.hbase.mapreduce;</span>
<span class="source-line-no">019</span><span id="line-19"></span>
<span class="source-line-no">020</span><span id="line-20">import java.io.IOException;</span>
<span class="source-line-no">021</span><span id="line-21">import java.net.URI;</span>
<span class="source-line-no">022</span><span id="line-22">import java.net.URISyntaxException;</span>
<span class="source-line-no">023</span><span id="line-23">import java.util.HashMap;</span>
<span class="source-line-no">024</span><span id="line-24">import java.util.Map;</span>
<span class="source-line-no">025</span><span id="line-25">import java.util.UUID;</span>
<span class="source-line-no">026</span><span id="line-26">import org.apache.hadoop.conf.Configured;</span>
<span class="source-line-no">027</span><span id="line-27">import org.apache.hadoop.fs.FileSystem;</span>
<span class="source-line-no">028</span><span id="line-28">import org.apache.hadoop.fs.Path;</span>
<span class="source-line-no">029</span><span id="line-29">import org.apache.hadoop.hbase.HBaseConfiguration;</span>
<span class="source-line-no">030</span><span id="line-30">import org.apache.hadoop.hbase.HConstants;</span>
<span class="source-line-no">031</span><span id="line-31">import org.apache.hadoop.hbase.TableName;</span>
<span class="source-line-no">032</span><span id="line-32">import org.apache.hadoop.hbase.client.Admin;</span>
<span class="source-line-no">033</span><span id="line-33">import org.apache.hadoop.hbase.client.Connection;</span>
<span class="source-line-no">034</span><span id="line-34">import org.apache.hadoop.hbase.client.ConnectionFactory;</span>
<span class="source-line-no">035</span><span id="line-35">import org.apache.hadoop.hbase.client.Scan;</span>
<span class="source-line-no">036</span><span id="line-36">import org.apache.hadoop.hbase.mapreduce.Import.CellImporter;</span>
<span class="source-line-no">037</span><span id="line-37">import org.apache.hadoop.hbase.mapreduce.Import.Importer;</span>
<span class="source-line-no">038</span><span id="line-38">import org.apache.hadoop.hbase.tool.BulkLoadHFiles;</span>
<span class="source-line-no">039</span><span id="line-39">import org.apache.hadoop.hbase.tool.BulkLoadHFilesTool;</span>
<span class="source-line-no">040</span><span id="line-40">import org.apache.hadoop.hbase.util.Bytes;</span>
<span class="source-line-no">041</span><span id="line-41">import org.apache.hadoop.hbase.util.CommonFSUtils;</span>
<span class="source-line-no">042</span><span id="line-42">import org.apache.hadoop.mapreduce.Job;</span>
<span class="source-line-no">043</span><span id="line-43">import org.apache.hadoop.util.Tool;</span>
<span class="source-line-no">044</span><span id="line-44">import org.apache.hadoop.util.ToolRunner;</span>
<span class="source-line-no">045</span><span id="line-45">import org.apache.yetus.audience.InterfaceAudience;</span>
<span class="source-line-no">046</span><span id="line-46">import org.slf4j.Logger;</span>
<span class="source-line-no">047</span><span id="line-47">import org.slf4j.LoggerFactory;</span>
<span class="source-line-no">048</span><span id="line-48"></span>
<span class="source-line-no">049</span><span id="line-49">/**</span>
<span class="source-line-no">050</span><span id="line-50"> * Tool used to copy a table to another one which can be on a different setup. It is also</span>
<span class="source-line-no">051</span><span id="line-51"> * configurable with a start and time as well as a specification of the region server implementation</span>
<span class="source-line-no">052</span><span id="line-52"> * if different from the local cluster.</span>
<span class="source-line-no">053</span><span id="line-53"> */</span>
<span class="source-line-no">054</span><span id="line-54">@InterfaceAudience.Public</span>
<span class="source-line-no">055</span><span id="line-55">public class CopyTable extends Configured implements Tool {</span>
<span class="source-line-no">056</span><span id="line-56"> private static final Logger LOG = LoggerFactory.getLogger(CopyTable.class);</span>
<span class="source-line-no">057</span><span id="line-57"></span>
<span class="source-line-no">058</span><span id="line-58"> final static String NAME = "copytable";</span>
<span class="source-line-no">059</span><span id="line-59"> long startTime = 0;</span>
<span class="source-line-no">060</span><span id="line-60"> long endTime = HConstants.LATEST_TIMESTAMP;</span>
<span class="source-line-no">061</span><span id="line-61"> int batch = Integer.MAX_VALUE;</span>
<span class="source-line-no">062</span><span id="line-62"> int cacheRow = -1;</span>
<span class="source-line-no">063</span><span id="line-63"> int versions = -1;</span>
<span class="source-line-no">064</span><span id="line-64"> String tableName = null;</span>
<span class="source-line-no">065</span><span id="line-65"> String startRow = null;</span>
<span class="source-line-no">066</span><span id="line-66"> String stopRow = null;</span>
<span class="source-line-no">067</span><span id="line-67"> String dstTableName = null;</span>
<span class="source-line-no">068</span><span id="line-68"> URI peerUri = null;</span>
<span class="source-line-no">069</span><span id="line-69"> /**</span>
<span class="source-line-no">070</span><span id="line-70"> * @deprecated Since 3.0.0, will be removed in 4.0.0. Use {@link #peerUri} instead.</span>
<span class="source-line-no">071</span><span id="line-71"> */</span>
<span class="source-line-no">072</span><span id="line-72"> @Deprecated</span>
<span class="source-line-no">073</span><span id="line-73"> String peerAddress = null;</span>
<span class="source-line-no">074</span><span id="line-74"> String families = null;</span>
<span class="source-line-no">075</span><span id="line-75"> boolean allCells = false;</span>
<span class="source-line-no">076</span><span id="line-76"> static boolean shuffle = false;</span>
<span class="source-line-no">077</span><span id="line-77"></span>
<span class="source-line-no">078</span><span id="line-78"> boolean bulkload = false;</span>
<span class="source-line-no">079</span><span id="line-79"> Path bulkloadDir = null;</span>
<span class="source-line-no">080</span><span id="line-80"></span>
<span class="source-line-no">081</span><span id="line-81"> boolean readingSnapshot = false;</span>
<span class="source-line-no">082</span><span id="line-82"> String snapshot = null;</span>
<span class="source-line-no">083</span><span id="line-83"></span>
<span class="source-line-no">084</span><span id="line-84"> private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";</span>
<span class="source-line-no">085</span><span id="line-85"></span>
<span class="source-line-no">086</span><span id="line-86"> private Path generateUniqTempDir(boolean withDirCreated) throws IOException {</span>
<span class="source-line-no">087</span><span id="line-87"> FileSystem fs = CommonFSUtils.getCurrentFileSystem(getConf());</span>
<span class="source-line-no">088</span><span id="line-88"> Path dir = new Path(fs.getWorkingDirectory(), NAME);</span>
<span class="source-line-no">089</span><span id="line-89"> if (!fs.exists(dir)) {</span>
<span class="source-line-no">090</span><span id="line-90"> fs.mkdirs(dir);</span>
<span class="source-line-no">091</span><span id="line-91"> }</span>
<span class="source-line-no">092</span><span id="line-92"> Path newDir = new Path(dir, UUID.randomUUID().toString());</span>
<span class="source-line-no">093</span><span id="line-93"> if (withDirCreated) {</span>
<span class="source-line-no">094</span><span id="line-94"> fs.mkdirs(newDir);</span>
<span class="source-line-no">095</span><span id="line-95"> }</span>
<span class="source-line-no">096</span><span id="line-96"> return newDir;</span>
<span class="source-line-no">097</span><span id="line-97"> }</span>
<span class="source-line-no">098</span><span id="line-98"></span>
<span class="source-line-no">099</span><span id="line-99"> private void initCopyTableMapperJob(Job job, Scan scan) throws IOException {</span>
<span class="source-line-no">100</span><span id="line-100"> Class&lt;? extends TableMapper&gt; mapper = bulkload ? CellImporter.class : Importer.class;</span>
<span class="source-line-no">101</span><span id="line-101"> if (readingSnapshot) {</span>
<span class="source-line-no">102</span><span id="line-102"> TableMapReduceUtil.initTableSnapshotMapperJob(snapshot, scan, mapper, null, null, job, true,</span>
<span class="source-line-no">103</span><span id="line-103"> generateUniqTempDir(true));</span>
<span class="source-line-no">104</span><span id="line-104"> } else {</span>
<span class="source-line-no">105</span><span id="line-105"> TableMapReduceUtil.initTableMapperJob(tableName, scan, mapper, null, null, job);</span>
<span class="source-line-no">106</span><span id="line-106"> }</span>
<span class="source-line-no">107</span><span id="line-107"> }</span>
<span class="source-line-no">108</span><span id="line-108"></span>
<span class="source-line-no">109</span><span id="line-109"> /**</span>
<span class="source-line-no">110</span><span id="line-110"> * Sets up the actual job.</span>
<span class="source-line-no">111</span><span id="line-111"> * @param args The command line parameters.</span>
<span class="source-line-no">112</span><span id="line-112"> * @return The newly created job.</span>
<span class="source-line-no">113</span><span id="line-113"> * @throws IOException When setting up the job fails.</span>
<span class="source-line-no">114</span><span id="line-114"> */</span>
<span class="source-line-no">115</span><span id="line-115"> public Job createSubmittableJob(String[] args) throws IOException {</span>
<span class="source-line-no">116</span><span id="line-116"> if (!doCommandLine(args)) {</span>
<span class="source-line-no">117</span><span id="line-117"> return null;</span>
<span class="source-line-no">118</span><span id="line-118"> }</span>
<span class="source-line-no">119</span><span id="line-119"></span>
<span class="source-line-no">120</span><span id="line-120"> String jobName = NAME + "_" + (tableName == null ? snapshot : tableName);</span>
<span class="source-line-no">121</span><span id="line-121"> Job job = Job.getInstance(getConf(), getConf().get(JOB_NAME_CONF_KEY, jobName));</span>
<span class="source-line-no">122</span><span id="line-122"> job.setJarByClass(CopyTable.class);</span>
<span class="source-line-no">123</span><span id="line-123"> Scan scan = new Scan();</span>
<span class="source-line-no">124</span><span id="line-124"></span>
<span class="source-line-no">125</span><span id="line-125"> scan.setBatch(batch);</span>
<span class="source-line-no">126</span><span id="line-126"> scan.setCacheBlocks(false);</span>
<span class="source-line-no">127</span><span id="line-127"></span>
<span class="source-line-no">128</span><span id="line-128"> if (cacheRow &gt; 0) {</span>
<span class="source-line-no">129</span><span id="line-129"> scan.setCaching(cacheRow);</span>
<span class="source-line-no">130</span><span id="line-130"> } else {</span>
<span class="source-line-no">131</span><span id="line-131"> scan.setCaching(getConf().getInt(HConstants.HBASE_CLIENT_SCANNER_CACHING, 100));</span>
<span class="source-line-no">132</span><span id="line-132"> }</span>
<span class="source-line-no">133</span><span id="line-133"></span>
<span class="source-line-no">134</span><span id="line-134"> scan.setTimeRange(startTime, endTime);</span>
<span class="source-line-no">135</span><span id="line-135"></span>
<span class="source-line-no">136</span><span id="line-136"> if (allCells) {</span>
<span class="source-line-no">137</span><span id="line-137"> scan.setRaw(true);</span>
<span class="source-line-no">138</span><span id="line-138"> }</span>
<span class="source-line-no">139</span><span id="line-139"> if (shuffle) {</span>
<span class="source-line-no">140</span><span id="line-140"> job.getConfiguration().set(TableInputFormat.SHUFFLE_MAPS, "true");</span>
<span class="source-line-no">141</span><span id="line-141"> }</span>
<span class="source-line-no">142</span><span id="line-142"> if (versions &gt;= 0) {</span>
<span class="source-line-no">143</span><span id="line-143"> scan.readVersions(versions);</span>
<span class="source-line-no">144</span><span id="line-144"> }</span>
<span class="source-line-no">145</span><span id="line-145"></span>
<span class="source-line-no">146</span><span id="line-146"> if (startRow != null) {</span>
<span class="source-line-no">147</span><span id="line-147"> scan.withStartRow(Bytes.toBytesBinary(startRow));</span>
<span class="source-line-no">148</span><span id="line-148"> }</span>
<span class="source-line-no">149</span><span id="line-149"></span>
<span class="source-line-no">150</span><span id="line-150"> if (stopRow != null) {</span>
<span class="source-line-no">151</span><span id="line-151"> scan.withStopRow(Bytes.toBytesBinary(stopRow));</span>
<span class="source-line-no">152</span><span id="line-152"> }</span>
<span class="source-line-no">153</span><span id="line-153"></span>
<span class="source-line-no">154</span><span id="line-154"> if (families != null) {</span>
<span class="source-line-no">155</span><span id="line-155"> String[] fams = families.split(",");</span>
<span class="source-line-no">156</span><span id="line-156"> Map&lt;String, String&gt; cfRenameMap = new HashMap&lt;&gt;();</span>
<span class="source-line-no">157</span><span id="line-157"> for (String fam : fams) {</span>
<span class="source-line-no">158</span><span id="line-158"> String sourceCf;</span>
<span class="source-line-no">159</span><span id="line-159"> if (fam.contains(":")) {</span>
<span class="source-line-no">160</span><span id="line-160"> // fam looks like "sourceCfName:destCfName"</span>
<span class="source-line-no">161</span><span id="line-161"> String[] srcAndDest = fam.split(":", 2);</span>
<span class="source-line-no">162</span><span id="line-162"> sourceCf = srcAndDest[0];</span>
<span class="source-line-no">163</span><span id="line-163"> String destCf = srcAndDest[1];</span>
<span class="source-line-no">164</span><span id="line-164"> cfRenameMap.put(sourceCf, destCf);</span>
<span class="source-line-no">165</span><span id="line-165"> } else {</span>
<span class="source-line-no">166</span><span id="line-166"> // fam is just "sourceCf"</span>
<span class="source-line-no">167</span><span id="line-167"> sourceCf = fam;</span>
<span class="source-line-no">168</span><span id="line-168"> }</span>
<span class="source-line-no">169</span><span id="line-169"> scan.addFamily(Bytes.toBytes(sourceCf));</span>
<span class="source-line-no">170</span><span id="line-170"> }</span>
<span class="source-line-no">171</span><span id="line-171"> Import.configureCfRenaming(job.getConfiguration(), cfRenameMap);</span>
<span class="source-line-no">172</span><span id="line-172"> }</span>
<span class="source-line-no">173</span><span id="line-173"> job.setNumReduceTasks(0);</span>
<span class="source-line-no">174</span><span id="line-174"></span>
<span class="source-line-no">175</span><span id="line-175"> if (bulkload) {</span>
<span class="source-line-no">176</span><span id="line-176"> initCopyTableMapperJob(job, scan);</span>
<span class="source-line-no">177</span><span id="line-177"></span>
<span class="source-line-no">178</span><span id="line-178"> // We need to split the inputs by destination tables so that output of Map can be bulk-loaded.</span>
<span class="source-line-no">179</span><span id="line-179"> TableInputFormat.configureSplitTable(job, TableName.valueOf(dstTableName));</span>
<span class="source-line-no">180</span><span id="line-180"></span>
<span class="source-line-no">181</span><span id="line-181"> bulkloadDir = generateUniqTempDir(false);</span>
<span class="source-line-no">182</span><span id="line-182"> LOG.info("HFiles will be stored at " + this.bulkloadDir);</span>
<span class="source-line-no">183</span><span id="line-183"> HFileOutputFormat2.setOutputPath(job, bulkloadDir);</span>
<span class="source-line-no">184</span><span id="line-184"> try (Connection conn = ConnectionFactory.createConnection(getConf());</span>
<span class="source-line-no">185</span><span id="line-185"> Admin admin = conn.getAdmin()) {</span>
<span class="source-line-no">186</span><span id="line-186"> HFileOutputFormat2.configureIncrementalLoadMap(job,</span>
<span class="source-line-no">187</span><span id="line-187"> admin.getDescriptor((TableName.valueOf(dstTableName))));</span>
<span class="source-line-no">188</span><span id="line-188"> }</span>
<span class="source-line-no">189</span><span id="line-189"> } else {</span>
<span class="source-line-no">190</span><span id="line-190"> initCopyTableMapperJob(job, scan);</span>
<span class="source-line-no">191</span><span id="line-191"> if (peerUri != null) {</span>
<span class="source-line-no">192</span><span id="line-192"> TableMapReduceUtil.initTableReducerJob(dstTableName, null, job, null, peerUri);</span>
<span class="source-line-no">193</span><span id="line-193"> } else if (peerAddress != null) {</span>
<span class="source-line-no">194</span><span id="line-194"> TableMapReduceUtil.initTableReducerJob(dstTableName, null, job, null, peerAddress);</span>
<span class="source-line-no">195</span><span id="line-195"> } else {</span>
<span class="source-line-no">196</span><span id="line-196"> TableMapReduceUtil.initTableReducerJob(dstTableName, null, job);</span>
<span class="source-line-no">197</span><span id="line-197"> }</span>
<span class="source-line-no">198</span><span id="line-198"></span>
<span class="source-line-no">199</span><span id="line-199"> }</span>
<span class="source-line-no">200</span><span id="line-200"></span>
<span class="source-line-no">201</span><span id="line-201"> return job;</span>
<span class="source-line-no">202</span><span id="line-202"> }</span>
<span class="source-line-no">203</span><span id="line-203"></span>
<span class="source-line-no">204</span><span id="line-204"> /*</span>
<span class="source-line-no">205</span><span id="line-205"> * @param errorMsg Error message. Can be null.</span>
<span class="source-line-no">206</span><span id="line-206"> */</span>
<span class="source-line-no">207</span><span id="line-207"> private static void printUsage(final String errorMsg) {</span>
<span class="source-line-no">208</span><span id="line-208"> if (errorMsg != null &amp;&amp; errorMsg.length() &gt; 0) {</span>
<span class="source-line-no">209</span><span id="line-209"> System.err.println("ERROR: " + errorMsg);</span>
<span class="source-line-no">210</span><span id="line-210"> }</span>
<span class="source-line-no">211</span><span id="line-211"> System.err.println("Usage: CopyTable [general options] [--starttime=X] [--endtime=Y] "</span>
<span class="source-line-no">212</span><span id="line-212"> + "[--new.name=NEW] [--peer.uri=URI|--peer.adr=ADR] &lt;tablename | snapshotName&gt;");</span>
<span class="source-line-no">213</span><span id="line-213"> System.err.println();</span>
<span class="source-line-no">214</span><span id="line-214"> System.err.println("Options:");</span>
<span class="source-line-no">215</span><span id="line-215"> System.err.println(" rs.class hbase.regionserver.class of the peer cluster");</span>
<span class="source-line-no">216</span><span id="line-216"> System.err.println(" specify if different from current cluster");</span>
<span class="source-line-no">217</span><span id="line-217"> System.err.println(" rs.impl hbase.regionserver.impl of the peer cluster");</span>
<span class="source-line-no">218</span><span id="line-218"> System.err.println(" startrow the start row");</span>
<span class="source-line-no">219</span><span id="line-219"> System.err.println(" stoprow the stop row");</span>
<span class="source-line-no">220</span><span id="line-220"> System.err.println(" starttime beginning of the time range (unixtime in millis)");</span>
<span class="source-line-no">221</span><span id="line-221"> System.err.println(" without endtime means from starttime to forever");</span>
<span class="source-line-no">222</span><span id="line-222"> System.err.println(" endtime end of the time range. Ignored if no starttime specified.");</span>
<span class="source-line-no">223</span><span id="line-223"> System.err.println(" versions number of cell versions to copy");</span>
<span class="source-line-no">224</span><span id="line-224"> System.err.println(" new.name new table's name");</span>
<span class="source-line-no">225</span><span id="line-225"> System.err.println(" peer.uri The URI of the peer cluster");</span>
<span class="source-line-no">226</span><span id="line-226"> System.err.println(" peer.adr Address of the peer cluster given in the format");</span>
<span class="source-line-no">227</span><span id="line-227"> System.err.println(" hbase.zookeeper.quorum:hbase.zookeeper.client"</span>
<span class="source-line-no">228</span><span id="line-228"> + ".port:zookeeper.znode.parent");</span>
<span class="source-line-no">229</span><span id="line-229"> System.err.println(" Do not take effect if peer.uri is specified");</span>
<span class="source-line-no">230</span><span id="line-230"> System.err.println(" Deprecated, please use peer.uri instead");</span>
<span class="source-line-no">231</span><span id="line-231"> System.err.println(" families comma-separated list of families to copy");</span>
<span class="source-line-no">232</span><span id="line-232"> System.err.println(" To copy from cf1 to cf2, give sourceCfName:destCfName. ");</span>
<span class="source-line-no">233</span><span id="line-233"> System.err.println(" To keep the same name, just give \"cfName\"");</span>
<span class="source-line-no">234</span><span id="line-234"> System.err.println(" all.cells also copy delete markers and deleted cells");</span>
<span class="source-line-no">235</span><span id="line-235"> System.err</span>
<span class="source-line-no">236</span><span id="line-236"> .println(" bulkload Write input into HFiles and bulk load to the destination " + "table");</span>
<span class="source-line-no">237</span><span id="line-237"> System.err.println(" snapshot Copy the data from snapshot to destination table.");</span>
<span class="source-line-no">238</span><span id="line-238"> System.err.println();</span>
<span class="source-line-no">239</span><span id="line-239"> System.err.println("Args:");</span>
<span class="source-line-no">240</span><span id="line-240"> System.err.println(" tablename Name of the table to copy");</span>
<span class="source-line-no">241</span><span id="line-241"> System.err.println();</span>
<span class="source-line-no">242</span><span id="line-242"> System.err.println("Examples:");</span>
<span class="source-line-no">243</span><span id="line-243"> System.err</span>
<span class="source-line-no">244</span><span id="line-244"> .println(" To copy 'TestTable' to a cluster that uses replication for a 1 hour window:");</span>
<span class="source-line-no">245</span><span id="line-245"> System.err.println(" $ hbase "</span>
<span class="source-line-no">246</span><span id="line-246"> + "org.apache.hadoop.hbase.mapreduce.CopyTable --starttime=1265875194289 --endtime=1265878794289 "</span>
<span class="source-line-no">247</span><span id="line-247"> + "--peer.adr=server1,server2,server3:2181:/hbase --families=myOldCf:myNewCf,cf2,cf3 TestTable ");</span>
<span class="source-line-no">248</span><span id="line-248"> System.err.println(" To copy data from 'sourceTableSnapshot' to 'destTable': ");</span>
<span class="source-line-no">249</span><span id="line-249"> System.err.println(" $ hbase org.apache.hadoop.hbase.mapreduce.CopyTable "</span>
<span class="source-line-no">250</span><span id="line-250"> + "--snapshot --new.name=destTable sourceTableSnapshot");</span>
<span class="source-line-no">251</span><span id="line-251"> System.err.println(" To copy data from 'sourceTableSnapshot' and bulk load to 'destTable': ");</span>
<span class="source-line-no">252</span><span id="line-252"> System.err.println(" $ hbase org.apache.hadoop.hbase.mapreduce.CopyTable "</span>
<span class="source-line-no">253</span><span id="line-253"> + "--new.name=destTable --snapshot --bulkload sourceTableSnapshot");</span>
<span class="source-line-no">254</span><span id="line-254"> System.err.println("For performance consider the following general option:\n"</span>
<span class="source-line-no">255</span><span id="line-255"> + " It is recommended that you set the following to &gt;=100. A higher value uses more memory but\n"</span>
<span class="source-line-no">256</span><span id="line-256"> + " decreases the round trip time to the server and may increase performance.\n"</span>
<span class="source-line-no">257</span><span id="line-257"> + " -Dhbase.client.scanner.caching=100\n"</span>
<span class="source-line-no">258</span><span id="line-258"> + " The following should always be set to false, to prevent writing data twice, which may produce \n"</span>
<span class="source-line-no">259</span><span id="line-259"> + " inaccurate results.\n" + " -Dmapreduce.map.speculative=false");</span>
<span class="source-line-no">260</span><span id="line-260"> }</span>
<span class="source-line-no">261</span><span id="line-261"></span>
<span class="source-line-no">262</span><span id="line-262"> private boolean doCommandLine(final String[] args) {</span>
<span class="source-line-no">263</span><span id="line-263"> if (args.length &lt; 1) {</span>
<span class="source-line-no">264</span><span id="line-264"> printUsage(null);</span>
<span class="source-line-no">265</span><span id="line-265"> return false;</span>
<span class="source-line-no">266</span><span id="line-266"> }</span>
<span class="source-line-no">267</span><span id="line-267"> for (int i = 0; i &lt; args.length; i++) {</span>
<span class="source-line-no">268</span><span id="line-268"> String cmd = args[i];</span>
<span class="source-line-no">269</span><span id="line-269"> if (cmd.equals("-h") || cmd.startsWith("--h")) {</span>
<span class="source-line-no">270</span><span id="line-270"> printUsage(null);</span>
<span class="source-line-no">271</span><span id="line-271"> return false;</span>
<span class="source-line-no">272</span><span id="line-272"> }</span>
<span class="source-line-no">273</span><span id="line-273"></span>
<span class="source-line-no">274</span><span id="line-274"> final String startRowArgKey = "--startrow=";</span>
<span class="source-line-no">275</span><span id="line-275"> if (cmd.startsWith(startRowArgKey)) {</span>
<span class="source-line-no">276</span><span id="line-276"> startRow = cmd.substring(startRowArgKey.length());</span>
<span class="source-line-no">277</span><span id="line-277"> continue;</span>
<span class="source-line-no">278</span><span id="line-278"> }</span>
<span class="source-line-no">279</span><span id="line-279"></span>
<span class="source-line-no">280</span><span id="line-280"> final String stopRowArgKey = "--stoprow=";</span>
<span class="source-line-no">281</span><span id="line-281"> if (cmd.startsWith(stopRowArgKey)) {</span>
<span class="source-line-no">282</span><span id="line-282"> stopRow = cmd.substring(stopRowArgKey.length());</span>
<span class="source-line-no">283</span><span id="line-283"> continue;</span>
<span class="source-line-no">284</span><span id="line-284"> }</span>
<span class="source-line-no">285</span><span id="line-285"></span>
<span class="source-line-no">286</span><span id="line-286"> final String startTimeArgKey = "--starttime=";</span>
<span class="source-line-no">287</span><span id="line-287"> if (cmd.startsWith(startTimeArgKey)) {</span>
<span class="source-line-no">288</span><span id="line-288"> startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));</span>
<span class="source-line-no">289</span><span id="line-289"> continue;</span>
<span class="source-line-no">290</span><span id="line-290"> }</span>
<span class="source-line-no">291</span><span id="line-291"></span>
<span class="source-line-no">292</span><span id="line-292"> final String endTimeArgKey = "--endtime=";</span>
<span class="source-line-no">293</span><span id="line-293"> if (cmd.startsWith(endTimeArgKey)) {</span>
<span class="source-line-no">294</span><span id="line-294"> endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));</span>
<span class="source-line-no">295</span><span id="line-295"> continue;</span>
<span class="source-line-no">296</span><span id="line-296"> }</span>
<span class="source-line-no">297</span><span id="line-297"></span>
<span class="source-line-no">298</span><span id="line-298"> final String batchArgKey = "--batch=";</span>
<span class="source-line-no">299</span><span id="line-299"> if (cmd.startsWith(batchArgKey)) {</span>
<span class="source-line-no">300</span><span id="line-300"> batch = Integer.parseInt(cmd.substring(batchArgKey.length()));</span>
<span class="source-line-no">301</span><span id="line-301"> continue;</span>
<span class="source-line-no">302</span><span id="line-302"> }</span>
<span class="source-line-no">303</span><span id="line-303"></span>
<span class="source-line-no">304</span><span id="line-304"> final String cacheRowArgKey = "--cacheRow=";</span>
<span class="source-line-no">305</span><span id="line-305"> if (cmd.startsWith(cacheRowArgKey)) {</span>
<span class="source-line-no">306</span><span id="line-306"> cacheRow = Integer.parseInt(cmd.substring(cacheRowArgKey.length()));</span>
<span class="source-line-no">307</span><span id="line-307"> continue;</span>
<span class="source-line-no">308</span><span id="line-308"> }</span>
<span class="source-line-no">309</span><span id="line-309"></span>
<span class="source-line-no">310</span><span id="line-310"> final String versionsArgKey = "--versions=";</span>
<span class="source-line-no">311</span><span id="line-311"> if (cmd.startsWith(versionsArgKey)) {</span>
<span class="source-line-no">312</span><span id="line-312"> versions = Integer.parseInt(cmd.substring(versionsArgKey.length()));</span>
<span class="source-line-no">313</span><span id="line-313"> continue;</span>
<span class="source-line-no">314</span><span id="line-314"> }</span>
<span class="source-line-no">315</span><span id="line-315"></span>
<span class="source-line-no">316</span><span id="line-316"> final String newNameArgKey = "--new.name=";</span>
<span class="source-line-no">317</span><span id="line-317"> if (cmd.startsWith(newNameArgKey)) {</span>
<span class="source-line-no">318</span><span id="line-318"> dstTableName = cmd.substring(newNameArgKey.length());</span>
<span class="source-line-no">319</span><span id="line-319"> continue;</span>
<span class="source-line-no">320</span><span id="line-320"> }</span>
<span class="source-line-no">321</span><span id="line-321"></span>
<span class="source-line-no">322</span><span id="line-322"> final String peerUriArgKey = "--peer.uri=";</span>
<span class="source-line-no">323</span><span id="line-323"> if (cmd.startsWith(peerUriArgKey)) {</span>
<span class="source-line-no">324</span><span id="line-324"> try {</span>
<span class="source-line-no">325</span><span id="line-325"> peerUri = new URI(cmd.substring(peerUriArgKey.length()));</span>
<span class="source-line-no">326</span><span id="line-326"> } catch (URISyntaxException e) {</span>
<span class="source-line-no">327</span><span id="line-327"> LOG.error("Malformed peer uri specified: {}", cmd, e);</span>
<span class="source-line-no">328</span><span id="line-328"> return false;</span>
<span class="source-line-no">329</span><span id="line-329"> }</span>
<span class="source-line-no">330</span><span id="line-330"> continue;</span>
<span class="source-line-no">331</span><span id="line-331"> }</span>
<span class="source-line-no">332</span><span id="line-332"></span>
<span class="source-line-no">333</span><span id="line-333"> final String peerAdrArgKey = "--peer.adr=";</span>
<span class="source-line-no">334</span><span id="line-334"> if (cmd.startsWith(peerAdrArgKey)) {</span>
<span class="source-line-no">335</span><span id="line-335"> peerAddress = cmd.substring(peerAdrArgKey.length());</span>
<span class="source-line-no">336</span><span id="line-336"> continue;</span>
<span class="source-line-no">337</span><span id="line-337"> }</span>
<span class="source-line-no">338</span><span id="line-338"></span>
<span class="source-line-no">339</span><span id="line-339"> final String familiesArgKey = "--families=";</span>
<span class="source-line-no">340</span><span id="line-340"> if (cmd.startsWith(familiesArgKey)) {</span>
<span class="source-line-no">341</span><span id="line-341"> families = cmd.substring(familiesArgKey.length());</span>
<span class="source-line-no">342</span><span id="line-342"> continue;</span>
<span class="source-line-no">343</span><span id="line-343"> }</span>
<span class="source-line-no">344</span><span id="line-344"></span>
<span class="source-line-no">345</span><span id="line-345"> if (cmd.startsWith("--all.cells")) {</span>
<span class="source-line-no">346</span><span id="line-346"> allCells = true;</span>
<span class="source-line-no">347</span><span id="line-347"> continue;</span>
<span class="source-line-no">348</span><span id="line-348"> }</span>
<span class="source-line-no">349</span><span id="line-349"></span>
<span class="source-line-no">350</span><span id="line-350"> if (cmd.startsWith("--bulkload")) {</span>
<span class="source-line-no">351</span><span id="line-351"> bulkload = true;</span>
<span class="source-line-no">352</span><span id="line-352"> continue;</span>
<span class="source-line-no">353</span><span id="line-353"> }</span>
<span class="source-line-no">354</span><span id="line-354"></span>
<span class="source-line-no">355</span><span id="line-355"> if (cmd.startsWith("--shuffle")) {</span>
<span class="source-line-no">356</span><span id="line-356"> shuffle = true;</span>
<span class="source-line-no">357</span><span id="line-357"> continue;</span>
<span class="source-line-no">358</span><span id="line-358"> }</span>
<span class="source-line-no">359</span><span id="line-359"></span>
<span class="source-line-no">360</span><span id="line-360"> if (cmd.startsWith("--snapshot")) {</span>
<span class="source-line-no">361</span><span id="line-361"> readingSnapshot = true;</span>
<span class="source-line-no">362</span><span id="line-362"> continue;</span>
<span class="source-line-no">363</span><span id="line-363"> }</span>
<span class="source-line-no">364</span><span id="line-364"></span>
<span class="source-line-no">365</span><span id="line-365"> if (i == args.length - 1) {</span>
<span class="source-line-no">366</span><span id="line-366"> if (readingSnapshot) {</span>
<span class="source-line-no">367</span><span id="line-367"> snapshot = cmd;</span>
<span class="source-line-no">368</span><span id="line-368"> } else {</span>
<span class="source-line-no">369</span><span id="line-369"> tableName = cmd;</span>
<span class="source-line-no">370</span><span id="line-370"> }</span>
<span class="source-line-no">371</span><span id="line-371"> } else {</span>
<span class="source-line-no">372</span><span id="line-372"> printUsage("Invalid argument '" + cmd + "'");</span>
<span class="source-line-no">373</span><span id="line-373"> return false;</span>
<span class="source-line-no">374</span><span id="line-374"> }</span>
<span class="source-line-no">375</span><span id="line-375"> }</span>
<span class="source-line-no">376</span><span id="line-376"> if (dstTableName == null &amp;&amp; peerAddress == null) {</span>
<span class="source-line-no">377</span><span id="line-377"> printUsage("At least a new table name or a peer address must be specified");</span>
<span class="source-line-no">378</span><span id="line-378"> return false;</span>
<span class="source-line-no">379</span><span id="line-379"> }</span>
<span class="source-line-no">380</span><span id="line-380"> if ((endTime != 0) &amp;&amp; (startTime &gt; endTime)) {</span>
<span class="source-line-no">381</span><span id="line-381"> printUsage("Invalid time range filter: starttime=" + startTime + " &gt; endtime=" + endTime);</span>
<span class="source-line-no">382</span><span id="line-382"> return false;</span>
<span class="source-line-no">383</span><span id="line-383"> }</span>
<span class="source-line-no">384</span><span id="line-384"></span>
<span class="source-line-no">385</span><span id="line-385"> if (bulkload &amp;&amp; (peerUri != null || peerAddress != null)) {</span>
<span class="source-line-no">386</span><span id="line-386"> printUsage("Remote bulkload is not supported!");</span>
<span class="source-line-no">387</span><span id="line-387"> return false;</span>
<span class="source-line-no">388</span><span id="line-388"> }</span>
<span class="source-line-no">389</span><span id="line-389"></span>
<span class="source-line-no">390</span><span id="line-390"> if (readingSnapshot &amp;&amp; (peerUri != null || peerAddress != null)) {</span>
<span class="source-line-no">391</span><span id="line-391"> printUsage("Loading data from snapshot to remote peer cluster is not supported.");</span>
<span class="source-line-no">392</span><span id="line-392"> return false;</span>
<span class="source-line-no">393</span><span id="line-393"> }</span>
<span class="source-line-no">394</span><span id="line-394"></span>
<span class="source-line-no">395</span><span id="line-395"> if (readingSnapshot &amp;&amp; dstTableName == null) {</span>
<span class="source-line-no">396</span><span id="line-396"> printUsage("The --new.name=&lt;table&gt; for destination table should be "</span>
<span class="source-line-no">397</span><span id="line-397"> + "provided when copying data from snapshot .");</span>
<span class="source-line-no">398</span><span id="line-398"> return false;</span>
<span class="source-line-no">399</span><span id="line-399"> }</span>
<span class="source-line-no">400</span><span id="line-400"></span>
<span class="source-line-no">401</span><span id="line-401"> if (readingSnapshot &amp;&amp; snapshot == null) {</span>
<span class="source-line-no">402</span><span id="line-402"> printUsage("Snapshot shouldn't be null when --snapshot is enabled.");</span>
<span class="source-line-no">403</span><span id="line-403"> return false;</span>
<span class="source-line-no">404</span><span id="line-404"> }</span>
<span class="source-line-no">405</span><span id="line-405"></span>
<span class="source-line-no">406</span><span id="line-406"> // set dstTableName if necessary</span>
<span class="source-line-no">407</span><span id="line-407"> if (dstTableName == null) {</span>
<span class="source-line-no">408</span><span id="line-408"> dstTableName = tableName;</span>
<span class="source-line-no">409</span><span id="line-409"> }</span>
<span class="source-line-no">410</span><span id="line-410"> return true;</span>
<span class="source-line-no">411</span><span id="line-411"> }</span>
<span class="source-line-no">412</span><span id="line-412"></span>
<span class="source-line-no">413</span><span id="line-413"> /**</span>
<span class="source-line-no">414</span><span id="line-414"> * Main entry point.</span>
<span class="source-line-no">415</span><span id="line-415"> * @param args The command line parameters.</span>
<span class="source-line-no">416</span><span id="line-416"> * @throws Exception When running the job fails.</span>
<span class="source-line-no">417</span><span id="line-417"> */</span>
<span class="source-line-no">418</span><span id="line-418"> public static void main(String[] args) throws Exception {</span>
<span class="source-line-no">419</span><span id="line-419"> int ret = ToolRunner.run(HBaseConfiguration.create(), new CopyTable(), args);</span>
<span class="source-line-no">420</span><span id="line-420"> System.exit(ret);</span>
<span class="source-line-no">421</span><span id="line-421"> }</span>
<span class="source-line-no">422</span><span id="line-422"></span>
<span class="source-line-no">423</span><span id="line-423"> @Override</span>
<span class="source-line-no">424</span><span id="line-424"> public int run(String[] args) throws Exception {</span>
<span class="source-line-no">425</span><span id="line-425"> Job job = createSubmittableJob(args);</span>
<span class="source-line-no">426</span><span id="line-426"> if (job == null) {</span>
<span class="source-line-no">427</span><span id="line-427"> return 1;</span>
<span class="source-line-no">428</span><span id="line-428"> }</span>
<span class="source-line-no">429</span><span id="line-429"> if (!job.waitForCompletion(true)) {</span>
<span class="source-line-no">430</span><span id="line-430"> LOG.info("Map-reduce job failed!");</span>
<span class="source-line-no">431</span><span id="line-431"> if (bulkload) {</span>
<span class="source-line-no">432</span><span id="line-432"> LOG.info("Files are not bulkloaded!");</span>
<span class="source-line-no">433</span><span id="line-433"> }</span>
<span class="source-line-no">434</span><span id="line-434"> return 1;</span>
<span class="source-line-no">435</span><span id="line-435"> }</span>
<span class="source-line-no">436</span><span id="line-436"> int code = 0;</span>
<span class="source-line-no">437</span><span id="line-437"> if (bulkload) {</span>
<span class="source-line-no">438</span><span id="line-438"> LOG.info("Trying to bulk load data to destination table: " + dstTableName);</span>
<span class="source-line-no">439</span><span id="line-439"> LOG.info("command: ./bin/hbase {} {} {}", BulkLoadHFilesTool.NAME,</span>
<span class="source-line-no">440</span><span id="line-440"> this.bulkloadDir.toString(), this.dstTableName);</span>
<span class="source-line-no">441</span><span id="line-441"> if (</span>
<span class="source-line-no">442</span><span id="line-442"> !BulkLoadHFiles.create(getConf()).bulkLoad(TableName.valueOf(dstTableName), bulkloadDir)</span>
<span class="source-line-no">443</span><span id="line-443"> .isEmpty()</span>
<span class="source-line-no">444</span><span id="line-444"> ) {</span>
<span class="source-line-no">445</span><span id="line-445"> // bulkloadDir is deleted only BulkLoadHFiles was successful so that one can rerun</span>
<span class="source-line-no">446</span><span id="line-446"> // BulkLoadHFiles.</span>
<span class="source-line-no">447</span><span id="line-447"> FileSystem fs = CommonFSUtils.getCurrentFileSystem(getConf());</span>
<span class="source-line-no">448</span><span id="line-448"> if (!fs.delete(this.bulkloadDir, true)) {</span>
<span class="source-line-no">449</span><span id="line-449"> LOG.error("Deleting folder " + bulkloadDir + " failed!");</span>
<span class="source-line-no">450</span><span id="line-450"> code = 1;</span>
<span class="source-line-no">451</span><span id="line-451"> }</span>
<span class="source-line-no">452</span><span id="line-452"> }</span>
<span class="source-line-no">453</span><span id="line-453"> }</span>
<span class="source-line-no">454</span><span id="line-454"> return code;</span>
<span class="source-line-no">455</span><span id="line-455"> }</span>
<span class="source-line-no">456</span><span id="line-456">}</span>
</pre>
</div>
</main>
</body>
</html>