blob: c3b570e90f177099a6d9732ba199d75c2bc68872 [file] [log] [blame]
<!DOCTYPE HTML>
<html lang="en">
<head>
<!-- Generated by javadoc (17) -->
<title>Source code</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="description" content="source: package: org.apache.hadoop.hbase.regionserver, class: DataBlockEncodingTool">
<meta name="generator" content="javadoc/SourceToHTMLConverter">
<link rel="stylesheet" type="text/css" href="../../../../../../stylesheet.css" title="Style">
</head>
<body class="source-page">
<main role="main">
<div class="source-container">
<pre><span class="source-line-no">001</span><span id="line-1">/*</span>
<span class="source-line-no">002</span><span id="line-2"> * Licensed to the Apache Software Foundation (ASF) under one</span>
<span class="source-line-no">003</span><span id="line-3"> * or more contributor license agreements. See the NOTICE file</span>
<span class="source-line-no">004</span><span id="line-4"> * distributed with this work for additional information</span>
<span class="source-line-no">005</span><span id="line-5"> * regarding copyright ownership. The ASF licenses this file</span>
<span class="source-line-no">006</span><span id="line-6"> * to you under the Apache License, Version 2.0 (the</span>
<span class="source-line-no">007</span><span id="line-7"> * "License"); you may not use this file except in compliance</span>
<span class="source-line-no">008</span><span id="line-8"> * with the License. You may obtain a copy of the License at</span>
<span class="source-line-no">009</span><span id="line-9"> *</span>
<span class="source-line-no">010</span><span id="line-10"> * http://www.apache.org/licenses/LICENSE-2.0</span>
<span class="source-line-no">011</span><span id="line-11"> *</span>
<span class="source-line-no">012</span><span id="line-12"> * Unless required by applicable law or agreed to in writing, software</span>
<span class="source-line-no">013</span><span id="line-13"> * distributed under the License is distributed on an "AS IS" BASIS,</span>
<span class="source-line-no">014</span><span id="line-14"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
<span class="source-line-no">015</span><span id="line-15"> * See the License for the specific language governing permissions and</span>
<span class="source-line-no">016</span><span id="line-16"> * limitations under the License.</span>
<span class="source-line-no">017</span><span id="line-17"> */</span>
<span class="source-line-no">018</span><span id="line-18">package org.apache.hadoop.hbase.regionserver;</span>
<span class="source-line-no">019</span><span id="line-19"></span>
<span class="source-line-no">020</span><span id="line-20">import java.io.ByteArrayInputStream;</span>
<span class="source-line-no">021</span><span id="line-21">import java.io.ByteArrayOutputStream;</span>
<span class="source-line-no">022</span><span id="line-22">import java.io.DataOutputStream;</span>
<span class="source-line-no">023</span><span id="line-23">import java.io.IOException;</span>
<span class="source-line-no">024</span><span id="line-24">import java.io.InputStream;</span>
<span class="source-line-no">025</span><span id="line-25">import java.text.DecimalFormat;</span>
<span class="source-line-no">026</span><span id="line-26">import java.util.ArrayList;</span>
<span class="source-line-no">027</span><span id="line-27">import java.util.Iterator;</span>
<span class="source-line-no">028</span><span id="line-28">import java.util.List;</span>
<span class="source-line-no">029</span><span id="line-29">import java.util.Locale;</span>
<span class="source-line-no">030</span><span id="line-30">import org.apache.hadoop.conf.Configuration;</span>
<span class="source-line-no">031</span><span id="line-31">import org.apache.hadoop.fs.FileSystem;</span>
<span class="source-line-no">032</span><span id="line-32">import org.apache.hadoop.fs.Path;</span>
<span class="source-line-no">033</span><span id="line-33">import org.apache.hadoop.hbase.ExtendedCell;</span>
<span class="source-line-no">034</span><span id="line-34">import org.apache.hadoop.hbase.HBaseConfiguration;</span>
<span class="source-line-no">035</span><span id="line-35">import org.apache.hadoop.hbase.KeyValue;</span>
<span class="source-line-no">036</span><span id="line-36">import org.apache.hadoop.hbase.KeyValueUtil;</span>
<span class="source-line-no">037</span><span id="line-37">import org.apache.hadoop.hbase.io.compress.Compression;</span>
<span class="source-line-no">038</span><span id="line-38">import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;</span>
<span class="source-line-no">039</span><span id="line-39">import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;</span>
<span class="source-line-no">040</span><span id="line-40">import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;</span>
<span class="source-line-no">041</span><span id="line-41">import org.apache.hadoop.hbase.io.encoding.EncodedDataBlock;</span>
<span class="source-line-no">042</span><span id="line-42">import org.apache.hadoop.hbase.io.hfile.CacheConfig;</span>
<span class="source-line-no">043</span><span id="line-43">import org.apache.hadoop.hbase.io.hfile.HFileBlock;</span>
<span class="source-line-no">044</span><span id="line-44">import org.apache.hadoop.hbase.io.hfile.HFileContext;</span>
<span class="source-line-no">045</span><span id="line-45">import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;</span>
<span class="source-line-no">046</span><span id="line-46">import org.apache.hadoop.hbase.io.hfile.HFileReaderImpl;</span>
<span class="source-line-no">047</span><span id="line-47">import org.apache.hadoop.hbase.util.Bytes;</span>
<span class="source-line-no">048</span><span id="line-48">import org.apache.hadoop.io.WritableUtils;</span>
<span class="source-line-no">049</span><span id="line-49">import org.apache.hadoop.io.compress.CompressionOutputStream;</span>
<span class="source-line-no">050</span><span id="line-50">import org.apache.hadoop.io.compress.Compressor;</span>
<span class="source-line-no">051</span><span id="line-51">import org.apache.hadoop.io.compress.Decompressor;</span>
<span class="source-line-no">052</span><span id="line-52">import org.slf4j.Logger;</span>
<span class="source-line-no">053</span><span id="line-53">import org.slf4j.LoggerFactory;</span>
<span class="source-line-no">054</span><span id="line-54"></span>
<span class="source-line-no">055</span><span id="line-55">import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;</span>
<span class="source-line-no">056</span><span id="line-56">import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLineParser;</span>
<span class="source-line-no">057</span><span id="line-57">import org.apache.hbase.thirdparty.org.apache.commons.cli.Option;</span>
<span class="source-line-no">058</span><span id="line-58">import org.apache.hbase.thirdparty.org.apache.commons.cli.Options;</span>
<span class="source-line-no">059</span><span id="line-59">import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException;</span>
<span class="source-line-no">060</span><span id="line-60">import org.apache.hbase.thirdparty.org.apache.commons.cli.PosixParser;</span>
<span class="source-line-no">061</span><span id="line-61"></span>
<span class="source-line-no">062</span><span id="line-62">/**</span>
<span class="source-line-no">063</span><span id="line-63"> * Tests various algorithms for key compression on an existing HFile. Useful for testing, debugging</span>
<span class="source-line-no">064</span><span id="line-64"> * and benchmarking.</span>
<span class="source-line-no">065</span><span id="line-65"> */</span>
<span class="source-line-no">066</span><span id="line-66">public class DataBlockEncodingTool {</span>
<span class="source-line-no">067</span><span id="line-67"> private static final Logger LOG = LoggerFactory.getLogger(DataBlockEncodingTool.class);</span>
<span class="source-line-no">068</span><span id="line-68"></span>
<span class="source-line-no">069</span><span id="line-69"> private static final boolean includesMemstoreTS = true;</span>
<span class="source-line-no">070</span><span id="line-70"></span>
<span class="source-line-no">071</span><span id="line-71"> /**</span>
<span class="source-line-no">072</span><span id="line-72"> * How many times to run the benchmark. More times means better data in terms of statistics but</span>
<span class="source-line-no">073</span><span id="line-73"> * slower execution. Has to be strictly larger than {@link #DEFAULT_BENCHMARK_N_OMIT}.</span>
<span class="source-line-no">074</span><span id="line-74"> */</span>
<span class="source-line-no">075</span><span id="line-75"> private static final int DEFAULT_BENCHMARK_N_TIMES = 12;</span>
<span class="source-line-no">076</span><span id="line-76"></span>
<span class="source-line-no">077</span><span id="line-77"> /**</span>
<span class="source-line-no">078</span><span id="line-78"> * How many first runs should not be included in the benchmark. Done in order to exclude setup</span>
<span class="source-line-no">079</span><span id="line-79"> * cost.</span>
<span class="source-line-no">080</span><span id="line-80"> */</span>
<span class="source-line-no">081</span><span id="line-81"> private static final int DEFAULT_BENCHMARK_N_OMIT = 2;</span>
<span class="source-line-no">082</span><span id="line-82"></span>
<span class="source-line-no">083</span><span id="line-83"> /** HFile name to be used in benchmark */</span>
<span class="source-line-no">084</span><span id="line-84"> private static final String OPT_HFILE_NAME = "f";</span>
<span class="source-line-no">085</span><span id="line-85"></span>
<span class="source-line-no">086</span><span id="line-86"> /** Maximum number of key/value pairs to process in a single benchmark run */</span>
<span class="source-line-no">087</span><span id="line-87"> private static final String OPT_KV_LIMIT = "n";</span>
<span class="source-line-no">088</span><span id="line-88"></span>
<span class="source-line-no">089</span><span id="line-89"> /** Whether to run a benchmark to measure read throughput */</span>
<span class="source-line-no">090</span><span id="line-90"> private static final String OPT_MEASURE_THROUGHPUT = "b";</span>
<span class="source-line-no">091</span><span id="line-91"></span>
<span class="source-line-no">092</span><span id="line-92"> /** If this is specified, no correctness testing will be done */</span>
<span class="source-line-no">093</span><span id="line-93"> private static final String OPT_OMIT_CORRECTNESS_TEST = "c";</span>
<span class="source-line-no">094</span><span id="line-94"></span>
<span class="source-line-no">095</span><span id="line-95"> /** What compression algorithm to test */</span>
<span class="source-line-no">096</span><span id="line-96"> private static final String OPT_COMPRESSION_ALGORITHM = "a";</span>
<span class="source-line-no">097</span><span id="line-97"></span>
<span class="source-line-no">098</span><span id="line-98"> /** Number of times to run each benchmark */</span>
<span class="source-line-no">099</span><span id="line-99"> private static final String OPT_BENCHMARK_N_TIMES = "t";</span>
<span class="source-line-no">100</span><span id="line-100"></span>
<span class="source-line-no">101</span><span id="line-101"> /** Number of first runs of every benchmark to omit from statistics */</span>
<span class="source-line-no">102</span><span id="line-102"> private static final String OPT_BENCHMARK_N_OMIT = "omit";</span>
<span class="source-line-no">103</span><span id="line-103"></span>
<span class="source-line-no">104</span><span id="line-104"> /** Compression algorithm to use if not specified on the command line */</span>
<span class="source-line-no">105</span><span id="line-105"> private static final Algorithm DEFAULT_COMPRESSION = Compression.Algorithm.GZ;</span>
<span class="source-line-no">106</span><span id="line-106"></span>
<span class="source-line-no">107</span><span id="line-107"> private static final DecimalFormat DELIMITED_DECIMAL_FORMAT = new DecimalFormat();</span>
<span class="source-line-no">108</span><span id="line-108"></span>
<span class="source-line-no">109</span><span id="line-109"> static {</span>
<span class="source-line-no">110</span><span id="line-110"> DELIMITED_DECIMAL_FORMAT.setGroupingSize(3);</span>
<span class="source-line-no">111</span><span id="line-111"> }</span>
<span class="source-line-no">112</span><span id="line-112"></span>
<span class="source-line-no">113</span><span id="line-113"> private static final String PCT_FORMAT = "%.2f %%";</span>
<span class="source-line-no">114</span><span id="line-114"> private static final String INT_FORMAT = "%d";</span>
<span class="source-line-no">115</span><span id="line-115"></span>
<span class="source-line-no">116</span><span id="line-116"> private static int benchmarkNTimes = DEFAULT_BENCHMARK_N_TIMES;</span>
<span class="source-line-no">117</span><span id="line-117"> private static int benchmarkNOmit = DEFAULT_BENCHMARK_N_OMIT;</span>
<span class="source-line-no">118</span><span id="line-118"></span>
<span class="source-line-no">119</span><span id="line-119"> private final Configuration conf;</span>
<span class="source-line-no">120</span><span id="line-120"> private List&lt;EncodedDataBlock&gt; codecs = new ArrayList&lt;&gt;();</span>
<span class="source-line-no">121</span><span id="line-121"> private long totalPrefixLength = 0;</span>
<span class="source-line-no">122</span><span id="line-122"> private long totalKeyLength = 0;</span>
<span class="source-line-no">123</span><span id="line-123"> private long totalValueLength = 0;</span>
<span class="source-line-no">124</span><span id="line-124"> private long totalKeyRedundancyLength = 0;</span>
<span class="source-line-no">125</span><span id="line-125"> private long totalCFLength = 0;</span>
<span class="source-line-no">126</span><span id="line-126"></span>
<span class="source-line-no">127</span><span id="line-127"> private byte[] rawKVs;</span>
<span class="source-line-no">128</span><span id="line-128"> private boolean useHBaseChecksum = false;</span>
<span class="source-line-no">129</span><span id="line-129"></span>
<span class="source-line-no">130</span><span id="line-130"> private final String compressionAlgorithmName;</span>
<span class="source-line-no">131</span><span id="line-131"> private final Algorithm compressionAlgorithm;</span>
<span class="source-line-no">132</span><span id="line-132"> private final Compressor compressor;</span>
<span class="source-line-no">133</span><span id="line-133"> private final Decompressor decompressor;</span>
<span class="source-line-no">134</span><span id="line-134"></span>
<span class="source-line-no">135</span><span id="line-135"> // Check if HFile use Tag.</span>
<span class="source-line-no">136</span><span id="line-136"> private static boolean USE_TAG = false;</span>
<span class="source-line-no">137</span><span id="line-137"></span>
<span class="source-line-no">138</span><span id="line-138"> private enum Manipulation {</span>
<span class="source-line-no">139</span><span id="line-139"> ENCODING,</span>
<span class="source-line-no">140</span><span id="line-140"> DECODING,</span>
<span class="source-line-no">141</span><span id="line-141"> COMPRESSION,</span>
<span class="source-line-no">142</span><span id="line-142"> DECOMPRESSION;</span>
<span class="source-line-no">143</span><span id="line-143"></span>
<span class="source-line-no">144</span><span id="line-144"> @Override</span>
<span class="source-line-no">145</span><span id="line-145"> public String toString() {</span>
<span class="source-line-no">146</span><span id="line-146"> String s = super.toString();</span>
<span class="source-line-no">147</span><span id="line-147"> StringBuilder sb = new StringBuilder();</span>
<span class="source-line-no">148</span><span id="line-148"> sb.append(s.charAt(0));</span>
<span class="source-line-no">149</span><span id="line-149"> sb.append(s.substring(1).toLowerCase(Locale.ROOT));</span>
<span class="source-line-no">150</span><span id="line-150"> return sb.toString();</span>
<span class="source-line-no">151</span><span id="line-151"> }</span>
<span class="source-line-no">152</span><span id="line-152"> }</span>
<span class="source-line-no">153</span><span id="line-153"></span>
<span class="source-line-no">154</span><span id="line-154"> /**</span>
<span class="source-line-no">155</span><span id="line-155"> * @param compressionAlgorithmName What kind of algorithm should be used as baseline for</span>
<span class="source-line-no">156</span><span id="line-156"> * comparison (e.g. lzo, gz).</span>
<span class="source-line-no">157</span><span id="line-157"> */</span>
<span class="source-line-no">158</span><span id="line-158"> public DataBlockEncodingTool(Configuration conf, String compressionAlgorithmName) {</span>
<span class="source-line-no">159</span><span id="line-159"> this.conf = conf;</span>
<span class="source-line-no">160</span><span id="line-160"> this.compressionAlgorithmName = compressionAlgorithmName;</span>
<span class="source-line-no">161</span><span id="line-161"> this.compressionAlgorithm = Compression.getCompressionAlgorithmByName(compressionAlgorithmName);</span>
<span class="source-line-no">162</span><span id="line-162"> this.compressor = this.compressionAlgorithm.getCompressor();</span>
<span class="source-line-no">163</span><span id="line-163"> this.decompressor = this.compressionAlgorithm.getDecompressor();</span>
<span class="source-line-no">164</span><span id="line-164"> }</span>
<span class="source-line-no">165</span><span id="line-165"></span>
<span class="source-line-no">166</span><span id="line-166"> /**</span>
<span class="source-line-no">167</span><span id="line-167"> * Check statistics for given HFile for different data block encoders.</span>
<span class="source-line-no">168</span><span id="line-168"> * @param scanner Of file which will be compressed.</span>
<span class="source-line-no">169</span><span id="line-169"> * @param kvLimit Maximal count of KeyValue which will be processed.</span>
<span class="source-line-no">170</span><span id="line-170"> * @throws IOException thrown if scanner is invalid</span>
<span class="source-line-no">171</span><span id="line-171"> */</span>
<span class="source-line-no">172</span><span id="line-172"> public void checkStatistics(final KeyValueScanner scanner, final int kvLimit) throws IOException {</span>
<span class="source-line-no">173</span><span id="line-173"> scanner.seek(KeyValue.LOWESTKEY);</span>
<span class="source-line-no">174</span><span id="line-174"></span>
<span class="source-line-no">175</span><span id="line-175"> KeyValue currentKV;</span>
<span class="source-line-no">176</span><span id="line-176"></span>
<span class="source-line-no">177</span><span id="line-177"> byte[] previousKey = null;</span>
<span class="source-line-no">178</span><span id="line-178"> byte[] currentKey;</span>
<span class="source-line-no">179</span><span id="line-179"></span>
<span class="source-line-no">180</span><span id="line-180"> DataBlockEncoding[] encodings = DataBlockEncoding.values();</span>
<span class="source-line-no">181</span><span id="line-181"></span>
<span class="source-line-no">182</span><span id="line-182"> ByteArrayOutputStream uncompressedOutputStream = new ByteArrayOutputStream();</span>
<span class="source-line-no">183</span><span id="line-183"></span>
<span class="source-line-no">184</span><span id="line-184"> int j = 0;</span>
<span class="source-line-no">185</span><span id="line-185"> while ((currentKV = KeyValueUtil.ensureKeyValue(scanner.next())) != null &amp;&amp; j &lt; kvLimit) {</span>
<span class="source-line-no">186</span><span id="line-186"> // Iterates through key/value pairs</span>
<span class="source-line-no">187</span><span id="line-187"> j++;</span>
<span class="source-line-no">188</span><span id="line-188"> currentKey = currentKV.getKey();</span>
<span class="source-line-no">189</span><span id="line-189"> if (previousKey != null) {</span>
<span class="source-line-no">190</span><span id="line-190"> for (int i = 0; i &lt; previousKey.length &amp;&amp; i &lt; currentKey.length</span>
<span class="source-line-no">191</span><span id="line-191"> &amp;&amp; previousKey[i] == currentKey[i]; ++i) {</span>
<span class="source-line-no">192</span><span id="line-192"> totalKeyRedundancyLength++;</span>
<span class="source-line-no">193</span><span id="line-193"> }</span>
<span class="source-line-no">194</span><span id="line-194"> }</span>
<span class="source-line-no">195</span><span id="line-195"></span>
<span class="source-line-no">196</span><span id="line-196"> // Add tagsLen zero to cells don't include tags. Since the process of</span>
<span class="source-line-no">197</span><span id="line-197"> // scanner converts byte array to KV would abandon tagsLen part if tagsLen</span>
<span class="source-line-no">198</span><span id="line-198"> // is zero. But we still needs the tagsLen part to check if current cell</span>
<span class="source-line-no">199</span><span id="line-199"> // include tags. If USE_TAG is true, HFile contains cells with tags,</span>
<span class="source-line-no">200</span><span id="line-200"> // if the cell tagsLen equals 0, it means other cells may have tags.</span>
<span class="source-line-no">201</span><span id="line-201"> if (USE_TAG &amp;&amp; currentKV.getTagsLength() == 0) {</span>
<span class="source-line-no">202</span><span id="line-202"> uncompressedOutputStream.write(currentKV.getBuffer(), currentKV.getOffset(),</span>
<span class="source-line-no">203</span><span id="line-203"> currentKV.getLength());</span>
<span class="source-line-no">204</span><span id="line-204"> // write tagsLen = 0.</span>
<span class="source-line-no">205</span><span id="line-205"> uncompressedOutputStream.write(Bytes.toBytes((short) 0));</span>
<span class="source-line-no">206</span><span id="line-206"> } else {</span>
<span class="source-line-no">207</span><span id="line-207"> uncompressedOutputStream.write(currentKV.getBuffer(), currentKV.getOffset(),</span>
<span class="source-line-no">208</span><span id="line-208"> currentKV.getLength());</span>
<span class="source-line-no">209</span><span id="line-209"> }</span>
<span class="source-line-no">210</span><span id="line-210"></span>
<span class="source-line-no">211</span><span id="line-211"> if (includesMemstoreTS) {</span>
<span class="source-line-no">212</span><span id="line-212"> WritableUtils.writeVLong(new DataOutputStream(uncompressedOutputStream),</span>
<span class="source-line-no">213</span><span id="line-213"> currentKV.getSequenceId());</span>
<span class="source-line-no">214</span><span id="line-214"> }</span>
<span class="source-line-no">215</span><span id="line-215"></span>
<span class="source-line-no">216</span><span id="line-216"> previousKey = currentKey;</span>
<span class="source-line-no">217</span><span id="line-217"></span>
<span class="source-line-no">218</span><span id="line-218"> int kLen = currentKV.getKeyLength();</span>
<span class="source-line-no">219</span><span id="line-219"> int vLen = currentKV.getValueLength();</span>
<span class="source-line-no">220</span><span id="line-220"> int cfOffset = currentKV.getFamilyOffset();</span>
<span class="source-line-no">221</span><span id="line-221"> int cfLen = currentKV.getFamilyLength();</span>
<span class="source-line-no">222</span><span id="line-222"> int restLen = currentKV.getLength() - kLen - vLen;</span>
<span class="source-line-no">223</span><span id="line-223"></span>
<span class="source-line-no">224</span><span id="line-224"> totalKeyLength += kLen;</span>
<span class="source-line-no">225</span><span id="line-225"> totalValueLength += vLen;</span>
<span class="source-line-no">226</span><span id="line-226"> totalPrefixLength += restLen;</span>
<span class="source-line-no">227</span><span id="line-227"> totalCFLength += cfLen;</span>
<span class="source-line-no">228</span><span id="line-228"> }</span>
<span class="source-line-no">229</span><span id="line-229"></span>
<span class="source-line-no">230</span><span id="line-230"> rawKVs = uncompressedOutputStream.toByteArray();</span>
<span class="source-line-no">231</span><span id="line-231"> for (DataBlockEncoding encoding : encodings) {</span>
<span class="source-line-no">232</span><span id="line-232"> if (encoding == DataBlockEncoding.NONE) {</span>
<span class="source-line-no">233</span><span id="line-233"> continue;</span>
<span class="source-line-no">234</span><span id="line-234"> }</span>
<span class="source-line-no">235</span><span id="line-235"> DataBlockEncoder d = encoding.getEncoder();</span>
<span class="source-line-no">236</span><span id="line-236"> HFileContext meta = new HFileContextBuilder().withDataBlockEncoding(encoding)</span>
<span class="source-line-no">237</span><span id="line-237"> .withCompression(Compression.Algorithm.NONE).withIncludesMvcc(includesMemstoreTS)</span>
<span class="source-line-no">238</span><span id="line-238"> .withIncludesTags(USE_TAG).build();</span>
<span class="source-line-no">239</span><span id="line-239"> codecs.add(new EncodedDataBlock(conf, d, encoding, rawKVs, meta));</span>
<span class="source-line-no">240</span><span id="line-240"> }</span>
<span class="source-line-no">241</span><span id="line-241"> }</span>
<span class="source-line-no">242</span><span id="line-242"></span>
<span class="source-line-no">243</span><span id="line-243"> /**</span>
<span class="source-line-no">244</span><span id="line-244"> * Verify if all data block encoders are working properly.</span>
<span class="source-line-no">245</span><span id="line-245"> * @param scanner Of file which was compressed.</span>
<span class="source-line-no">246</span><span id="line-246"> * @param kvLimit Maximal count of KeyValue which will be processed.</span>
<span class="source-line-no">247</span><span id="line-247"> * @return true if all data block encoders compressed/decompressed correctly.</span>
<span class="source-line-no">248</span><span id="line-248"> * @throws IOException thrown if scanner is invalid</span>
<span class="source-line-no">249</span><span id="line-249"> */</span>
<span class="source-line-no">250</span><span id="line-250"> public boolean verifyCodecs(final KeyValueScanner scanner, final int kvLimit) throws IOException {</span>
<span class="source-line-no">251</span><span id="line-251"> KeyValue currentKv;</span>
<span class="source-line-no">252</span><span id="line-252"></span>
<span class="source-line-no">253</span><span id="line-253"> scanner.seek(KeyValue.LOWESTKEY);</span>
<span class="source-line-no">254</span><span id="line-254"> List&lt;Iterator&lt;ExtendedCell&gt;&gt; codecIterators = new ArrayList&lt;&gt;();</span>
<span class="source-line-no">255</span><span id="line-255"> for (EncodedDataBlock codec : codecs) {</span>
<span class="source-line-no">256</span><span id="line-256"> codecIterators.add(codec.getIterator(HFileBlock.headerSize(useHBaseChecksum)));</span>
<span class="source-line-no">257</span><span id="line-257"> }</span>
<span class="source-line-no">258</span><span id="line-258"></span>
<span class="source-line-no">259</span><span id="line-259"> int j = 0;</span>
<span class="source-line-no">260</span><span id="line-260"> while ((currentKv = KeyValueUtil.ensureKeyValue(scanner.next())) != null &amp;&amp; j &lt; kvLimit) {</span>
<span class="source-line-no">261</span><span id="line-261"> // Iterates through key/value pairs</span>
<span class="source-line-no">262</span><span id="line-262"> ++j;</span>
<span class="source-line-no">263</span><span id="line-263"> for (Iterator&lt;ExtendedCell&gt; it : codecIterators) {</span>
<span class="source-line-no">264</span><span id="line-264"> ExtendedCell c = it.next();</span>
<span class="source-line-no">265</span><span id="line-265"> KeyValue codecKv = KeyValueUtil.ensureKeyValue(c);</span>
<span class="source-line-no">266</span><span id="line-266"> if (</span>
<span class="source-line-no">267</span><span id="line-267"> codecKv == null</span>
<span class="source-line-no">268</span><span id="line-268"> || 0 != Bytes.compareTo(codecKv.getBuffer(), codecKv.getOffset(), codecKv.getLength(),</span>
<span class="source-line-no">269</span><span id="line-269"> currentKv.getBuffer(), currentKv.getOffset(), currentKv.getLength())</span>
<span class="source-line-no">270</span><span id="line-270"> ) {</span>
<span class="source-line-no">271</span><span id="line-271"> if (codecKv == null) {</span>
<span class="source-line-no">272</span><span id="line-272"> LOG.error("There is a bug in codec " + it + " it returned null KeyValue,");</span>
<span class="source-line-no">273</span><span id="line-273"> } else {</span>
<span class="source-line-no">274</span><span id="line-274"> int prefix = 0;</span>
<span class="source-line-no">275</span><span id="line-275"> int limitLength =</span>
<span class="source-line-no">276</span><span id="line-276"> 2 * Bytes.SIZEOF_INT + Math.min(codecKv.getLength(), currentKv.getLength());</span>
<span class="source-line-no">277</span><span id="line-277"> while (</span>
<span class="source-line-no">278</span><span id="line-278"> prefix &lt; limitLength &amp;&amp; codecKv.getBuffer()[prefix + codecKv.getOffset()]</span>
<span class="source-line-no">279</span><span id="line-279"> == currentKv.getBuffer()[prefix + currentKv.getOffset()]</span>
<span class="source-line-no">280</span><span id="line-280"> ) {</span>
<span class="source-line-no">281</span><span id="line-281"> prefix++;</span>
<span class="source-line-no">282</span><span id="line-282"> }</span>
<span class="source-line-no">283</span><span id="line-283"></span>
<span class="source-line-no">284</span><span id="line-284"> LOG.error("There is bug in codec " + it.toString() + "\n on element " + j</span>
<span class="source-line-no">285</span><span id="line-285"> + "\n codecKv.getKeyLength() " + codecKv.getKeyLength()</span>
<span class="source-line-no">286</span><span id="line-286"> + "\n codecKv.getValueLength() " + codecKv.getValueLength()</span>
<span class="source-line-no">287</span><span id="line-287"> + "\n codecKv.getLength() " + codecKv.getLength() + "\n currentKv.getKeyLength() "</span>
<span class="source-line-no">288</span><span id="line-288"> + currentKv.getKeyLength() + "\n currentKv.getValueLength() "</span>
<span class="source-line-no">289</span><span id="line-289"> + currentKv.getValueLength() + "\n codecKv.getLength() " + currentKv.getLength()</span>
<span class="source-line-no">290</span><span id="line-290"> + "\n currentKV rowLength " + currentKv.getRowLength() + " familyName "</span>
<span class="source-line-no">291</span><span id="line-291"> + currentKv.getFamilyLength() + " qualifier " + currentKv.getQualifierLength()</span>
<span class="source-line-no">292</span><span id="line-292"> + "\n prefix " + prefix + "\n codecKv '"</span>
<span class="source-line-no">293</span><span id="line-293"> + Bytes.toStringBinary(codecKv.getBuffer(), codecKv.getOffset(), prefix) + "' diff '"</span>
<span class="source-line-no">294</span><span id="line-294"> + Bytes.toStringBinary(codecKv.getBuffer(), codecKv.getOffset() + prefix,</span>
<span class="source-line-no">295</span><span id="line-295"> codecKv.getLength() - prefix)</span>
<span class="source-line-no">296</span><span id="line-296"> + "'" + "\n currentKv '"</span>
<span class="source-line-no">297</span><span id="line-297"> + Bytes.toStringBinary(currentKv.getBuffer(), currentKv.getOffset(), prefix)</span>
<span class="source-line-no">298</span><span id="line-298"> + "' diff '" + Bytes.toStringBinary(currentKv.getBuffer(),</span>
<span class="source-line-no">299</span><span id="line-299"> currentKv.getOffset() + prefix, currentKv.getLength() - prefix)</span>
<span class="source-line-no">300</span><span id="line-300"> + "'");</span>
<span class="source-line-no">301</span><span id="line-301"> }</span>
<span class="source-line-no">302</span><span id="line-302"> return false;</span>
<span class="source-line-no">303</span><span id="line-303"> }</span>
<span class="source-line-no">304</span><span id="line-304"> }</span>
<span class="source-line-no">305</span><span id="line-305"> }</span>
<span class="source-line-no">306</span><span id="line-306"></span>
<span class="source-line-no">307</span><span id="line-307"> LOG.info("Verification was successful!");</span>
<span class="source-line-no">308</span><span id="line-308"></span>
<span class="source-line-no">309</span><span id="line-309"> return true;</span>
<span class="source-line-no">310</span><span id="line-310"> }</span>
<span class="source-line-no">311</span><span id="line-311"></span>
<span class="source-line-no">312</span><span id="line-312"> /**</span>
<span class="source-line-no">313</span><span id="line-313"> * Benchmark codec's speed.</span>
<span class="source-line-no">314</span><span id="line-314"> */</span>
<span class="source-line-no">315</span><span id="line-315"> public void benchmarkCodecs() throws IOException {</span>
<span class="source-line-no">316</span><span id="line-316"> LOG.info("Starting a throughput benchmark for data block encoding codecs");</span>
<span class="source-line-no">317</span><span id="line-317"> int prevTotalSize = -1;</span>
<span class="source-line-no">318</span><span id="line-318"> for (EncodedDataBlock codec : codecs) {</span>
<span class="source-line-no">319</span><span id="line-319"> prevTotalSize = benchmarkEncoder(prevTotalSize, codec);</span>
<span class="source-line-no">320</span><span id="line-320"> }</span>
<span class="source-line-no">321</span><span id="line-321"></span>
<span class="source-line-no">322</span><span id="line-322"> benchmarkDefaultCompression(prevTotalSize, rawKVs);</span>
<span class="source-line-no">323</span><span id="line-323"> }</span>
<span class="source-line-no">324</span><span id="line-324"></span>
<span class="source-line-no">325</span><span id="line-325"> /**</span>
<span class="source-line-no">326</span><span id="line-326"> * Benchmark compression/decompression throughput.</span>
<span class="source-line-no">327</span><span id="line-327"> * @param previousTotalSize Total size used for verification. Use -1 if unknown.</span>
<span class="source-line-no">328</span><span id="line-328"> * @param codec Tested encoder.</span>
<span class="source-line-no">329</span><span id="line-329"> * @return Size of uncompressed data.</span>
<span class="source-line-no">330</span><span id="line-330"> */</span>
<span class="source-line-no">331</span><span id="line-331"> private int benchmarkEncoder(int previousTotalSize, EncodedDataBlock codec) {</span>
<span class="source-line-no">332</span><span id="line-332"> int prevTotalSize = previousTotalSize;</span>
<span class="source-line-no">333</span><span id="line-333"> int totalSize = 0;</span>
<span class="source-line-no">334</span><span id="line-334"></span>
<span class="source-line-no">335</span><span id="line-335"> // decompression time</span>
<span class="source-line-no">336</span><span id="line-336"> List&lt;Long&gt; durations = new ArrayList&lt;&gt;();</span>
<span class="source-line-no">337</span><span id="line-337"> for (int itTime = 0; itTime &lt; benchmarkNTimes; ++itTime) {</span>
<span class="source-line-no">338</span><span id="line-338"> totalSize = 0;</span>
<span class="source-line-no">339</span><span id="line-339"></span>
<span class="source-line-no">340</span><span id="line-340"> Iterator&lt;ExtendedCell&gt; it;</span>
<span class="source-line-no">341</span><span id="line-341"></span>
<span class="source-line-no">342</span><span id="line-342"> it = codec.getIterator(HFileBlock.headerSize(useHBaseChecksum));</span>
<span class="source-line-no">343</span><span id="line-343"></span>
<span class="source-line-no">344</span><span id="line-344"> // count only the algorithm time, without memory allocations</span>
<span class="source-line-no">345</span><span id="line-345"> // (expect first time)</span>
<span class="source-line-no">346</span><span id="line-346"> final long startTime = System.nanoTime();</span>
<span class="source-line-no">347</span><span id="line-347"> while (it.hasNext()) {</span>
<span class="source-line-no">348</span><span id="line-348"> totalSize += KeyValueUtil.ensureKeyValue(it.next()).getLength();</span>
<span class="source-line-no">349</span><span id="line-349"> }</span>
<span class="source-line-no">350</span><span id="line-350"> final long finishTime = System.nanoTime();</span>
<span class="source-line-no">351</span><span id="line-351"> if (itTime &gt;= benchmarkNOmit) {</span>
<span class="source-line-no">352</span><span id="line-352"> durations.add(finishTime - startTime);</span>
<span class="source-line-no">353</span><span id="line-353"> }</span>
<span class="source-line-no">354</span><span id="line-354"></span>
<span class="source-line-no">355</span><span id="line-355"> if (prevTotalSize != -1 &amp;&amp; prevTotalSize != totalSize) {</span>
<span class="source-line-no">356</span><span id="line-356"> throw new IllegalStateException(</span>
<span class="source-line-no">357</span><span id="line-357"> String.format("Algorithm '%s' decoded data to different size", codec.toString()));</span>
<span class="source-line-no">358</span><span id="line-358"> }</span>
<span class="source-line-no">359</span><span id="line-359"> prevTotalSize = totalSize;</span>
<span class="source-line-no">360</span><span id="line-360"> }</span>
<span class="source-line-no">361</span><span id="line-361"></span>
<span class="source-line-no">362</span><span id="line-362"> List&lt;Long&gt; encodingDurations = new ArrayList&lt;&gt;();</span>
<span class="source-line-no">363</span><span id="line-363"> for (int itTime = 0; itTime &lt; benchmarkNTimes; ++itTime) {</span>
<span class="source-line-no">364</span><span id="line-364"> final long startTime = System.nanoTime();</span>
<span class="source-line-no">365</span><span id="line-365"> codec.encodeData();</span>
<span class="source-line-no">366</span><span id="line-366"> final long finishTime = System.nanoTime();</span>
<span class="source-line-no">367</span><span id="line-367"> if (itTime &gt;= benchmarkNOmit) {</span>
<span class="source-line-no">368</span><span id="line-368"> encodingDurations.add(finishTime - startTime);</span>
<span class="source-line-no">369</span><span id="line-369"> }</span>
<span class="source-line-no">370</span><span id="line-370"> }</span>
<span class="source-line-no">371</span><span id="line-371"></span>
<span class="source-line-no">372</span><span id="line-372"> System.out.println(codec.toString() + ":");</span>
<span class="source-line-no">373</span><span id="line-373"> printBenchmarkResult(totalSize, encodingDurations, Manipulation.ENCODING);</span>
<span class="source-line-no">374</span><span id="line-374"> printBenchmarkResult(totalSize, durations, Manipulation.DECODING);</span>
<span class="source-line-no">375</span><span id="line-375"> System.out.println();</span>
<span class="source-line-no">376</span><span id="line-376"></span>
<span class="source-line-no">377</span><span id="line-377"> return prevTotalSize;</span>
<span class="source-line-no">378</span><span id="line-378"> }</span>
<span class="source-line-no">379</span><span id="line-379"></span>
<span class="source-line-no">380</span><span id="line-380"> private void benchmarkDefaultCompression(int totalSize, byte[] rawBuffer) throws IOException {</span>
<span class="source-line-no">381</span><span id="line-381"> benchmarkAlgorithm(compressionAlgorithm, compressionAlgorithmName.toUpperCase(Locale.ROOT),</span>
<span class="source-line-no">382</span><span id="line-382"> rawBuffer, 0, totalSize);</span>
<span class="source-line-no">383</span><span id="line-383"> }</span>
<span class="source-line-no">384</span><span id="line-384"></span>
<span class="source-line-no">385</span><span id="line-385"> /**</span>
<span class="source-line-no">386</span><span id="line-386"> * Check decompress performance of a given algorithm and print it.</span>
<span class="source-line-no">387</span><span id="line-387"> * @param algorithm Compression algorithm.</span>
<span class="source-line-no">388</span><span id="line-388"> * @param name Name of algorithm.</span>
<span class="source-line-no">389</span><span id="line-389"> * @param buffer Buffer to be compressed.</span>
<span class="source-line-no">390</span><span id="line-390"> * @param offset Position of the beginning of the data.</span>
<span class="source-line-no">391</span><span id="line-391"> * @param length Length of data in buffer.</span>
<span class="source-line-no">392</span><span id="line-392"> */</span>
<span class="source-line-no">393</span><span id="line-393"> public void benchmarkAlgorithm(Compression.Algorithm algorithm, String name, byte[] buffer,</span>
<span class="source-line-no">394</span><span id="line-394"> int offset, int length) throws IOException {</span>
<span class="source-line-no">395</span><span id="line-395"> System.out.println(name + ":");</span>
<span class="source-line-no">396</span><span id="line-396"></span>
<span class="source-line-no">397</span><span id="line-397"> // compress it</span>
<span class="source-line-no">398</span><span id="line-398"> List&lt;Long&gt; compressDurations = new ArrayList&lt;&gt;();</span>
<span class="source-line-no">399</span><span id="line-399"> ByteArrayOutputStream compressedStream = new ByteArrayOutputStream();</span>
<span class="source-line-no">400</span><span id="line-400"> CompressionOutputStream compressingStream =</span>
<span class="source-line-no">401</span><span id="line-401"> algorithm.createPlainCompressionStream(compressedStream, compressor);</span>
<span class="source-line-no">402</span><span id="line-402"> try {</span>
<span class="source-line-no">403</span><span id="line-403"> for (int itTime = 0; itTime &lt; benchmarkNTimes; ++itTime) {</span>
<span class="source-line-no">404</span><span id="line-404"> final long startTime = System.nanoTime();</span>
<span class="source-line-no">405</span><span id="line-405"> // The compressedStream should reset before compressingStream resetState since in GZ</span>
<span class="source-line-no">406</span><span id="line-406"> // resetStatue will write header in the outputstream.</span>
<span class="source-line-no">407</span><span id="line-407"> compressedStream.reset();</span>
<span class="source-line-no">408</span><span id="line-408"> compressingStream.resetState();</span>
<span class="source-line-no">409</span><span id="line-409"> compressingStream.write(buffer, offset, length);</span>
<span class="source-line-no">410</span><span id="line-410"> compressingStream.flush();</span>
<span class="source-line-no">411</span><span id="line-411"> compressedStream.toByteArray();</span>
<span class="source-line-no">412</span><span id="line-412"></span>
<span class="source-line-no">413</span><span id="line-413"> final long finishTime = System.nanoTime();</span>
<span class="source-line-no">414</span><span id="line-414"></span>
<span class="source-line-no">415</span><span id="line-415"> // add time record</span>
<span class="source-line-no">416</span><span id="line-416"> if (itTime &gt;= benchmarkNOmit) {</span>
<span class="source-line-no">417</span><span id="line-417"> compressDurations.add(finishTime - startTime);</span>
<span class="source-line-no">418</span><span id="line-418"> }</span>
<span class="source-line-no">419</span><span id="line-419"> }</span>
<span class="source-line-no">420</span><span id="line-420"> } catch (IOException e) {</span>
<span class="source-line-no">421</span><span id="line-421"> throw new RuntimeException(</span>
<span class="source-line-no">422</span><span id="line-422"> String.format("Benchmark, or encoding algorithm '%s' cause some stream problems", name), e);</span>
<span class="source-line-no">423</span><span id="line-423"> }</span>
<span class="source-line-no">424</span><span id="line-424"> compressingStream.close();</span>
<span class="source-line-no">425</span><span id="line-425"> printBenchmarkResult(length, compressDurations, Manipulation.COMPRESSION);</span>
<span class="source-line-no">426</span><span id="line-426"></span>
<span class="source-line-no">427</span><span id="line-427"> byte[] compBuffer = compressedStream.toByteArray();</span>
<span class="source-line-no">428</span><span id="line-428"></span>
<span class="source-line-no">429</span><span id="line-429"> // uncompress it several times and measure performance</span>
<span class="source-line-no">430</span><span id="line-430"> List&lt;Long&gt; durations = new ArrayList&lt;&gt;();</span>
<span class="source-line-no">431</span><span id="line-431"> for (int itTime = 0; itTime &lt; benchmarkNTimes; ++itTime) {</span>
<span class="source-line-no">432</span><span id="line-432"> final long startTime = System.nanoTime();</span>
<span class="source-line-no">433</span><span id="line-433"> byte[] newBuf = new byte[length + 1];</span>
<span class="source-line-no">434</span><span id="line-434"></span>
<span class="source-line-no">435</span><span id="line-435"> try {</span>
<span class="source-line-no">436</span><span id="line-436"> ByteArrayInputStream downStream =</span>
<span class="source-line-no">437</span><span id="line-437"> new ByteArrayInputStream(compBuffer, 0, compBuffer.length);</span>
<span class="source-line-no">438</span><span id="line-438"> InputStream decompressedStream =</span>
<span class="source-line-no">439</span><span id="line-439"> algorithm.createDecompressionStream(downStream, decompressor, 0);</span>
<span class="source-line-no">440</span><span id="line-440"></span>
<span class="source-line-no">441</span><span id="line-441"> int destOffset = 0;</span>
<span class="source-line-no">442</span><span id="line-442"> int nextChunk;</span>
<span class="source-line-no">443</span><span id="line-443"> while ((nextChunk = decompressedStream.available()) &gt; 0) {</span>
<span class="source-line-no">444</span><span id="line-444"> destOffset += decompressedStream.read(newBuf, destOffset, nextChunk);</span>
<span class="source-line-no">445</span><span id="line-445"> }</span>
<span class="source-line-no">446</span><span id="line-446"> decompressedStream.close();</span>
<span class="source-line-no">447</span><span id="line-447"></span>
<span class="source-line-no">448</span><span id="line-448"> } catch (IOException e) {</span>
<span class="source-line-no">449</span><span id="line-449"> throw new RuntimeException(</span>
<span class="source-line-no">450</span><span id="line-450"> String.format("Decoding path in '%s' algorithm cause exception ", name), e);</span>
<span class="source-line-no">451</span><span id="line-451"> }</span>
<span class="source-line-no">452</span><span id="line-452"></span>
<span class="source-line-no">453</span><span id="line-453"> final long finishTime = System.nanoTime();</span>
<span class="source-line-no">454</span><span id="line-454"></span>
<span class="source-line-no">455</span><span id="line-455"> // check correctness</span>
<span class="source-line-no">456</span><span id="line-456"> if (0 != Bytes.compareTo(buffer, 0, length, newBuf, 0, length)) {</span>
<span class="source-line-no">457</span><span id="line-457"> int prefix = 0;</span>
<span class="source-line-no">458</span><span id="line-458"> for (; prefix &lt; buffer.length &amp;&amp; prefix &lt; newBuf.length; ++prefix) {</span>
<span class="source-line-no">459</span><span id="line-459"> if (buffer[prefix] != newBuf[prefix]) {</span>
<span class="source-line-no">460</span><span id="line-460"> break;</span>
<span class="source-line-no">461</span><span id="line-461"> }</span>
<span class="source-line-no">462</span><span id="line-462"> }</span>
<span class="source-line-no">463</span><span id="line-463"> throw new RuntimeException(String.format("Algorithm '%s' is corrupting the data", name));</span>
<span class="source-line-no">464</span><span id="line-464"> }</span>
<span class="source-line-no">465</span><span id="line-465"></span>
<span class="source-line-no">466</span><span id="line-466"> // add time record</span>
<span class="source-line-no">467</span><span id="line-467"> if (itTime &gt;= benchmarkNOmit) {</span>
<span class="source-line-no">468</span><span id="line-468"> durations.add(finishTime - startTime);</span>
<span class="source-line-no">469</span><span id="line-469"> }</span>
<span class="source-line-no">470</span><span id="line-470"> }</span>
<span class="source-line-no">471</span><span id="line-471"> printBenchmarkResult(length, durations, Manipulation.DECOMPRESSION);</span>
<span class="source-line-no">472</span><span id="line-472"> System.out.println();</span>
<span class="source-line-no">473</span><span id="line-473"> }</span>
<span class="source-line-no">474</span><span id="line-474"></span>
<span class="source-line-no">475</span><span id="line-475"> private static final double BYTES_IN_MB = 1024 * 1024.0;</span>
<span class="source-line-no">476</span><span id="line-476"> private static final double NS_IN_SEC = 1000.0 * 1000.0 * 1000.0;</span>
<span class="source-line-no">477</span><span id="line-477"> private static final double MB_SEC_COEF = NS_IN_SEC / BYTES_IN_MB;</span>
<span class="source-line-no">478</span><span id="line-478"></span>
<span class="source-line-no">479</span><span id="line-479"> private static void printBenchmarkResult(int totalSize, List&lt;Long&gt; durationsInNanoSec,</span>
<span class="source-line-no">480</span><span id="line-480"> Manipulation manipulation) {</span>
<span class="source-line-no">481</span><span id="line-481"> final int n = durationsInNanoSec.size();</span>
<span class="source-line-no">482</span><span id="line-482"> long meanTime = 0;</span>
<span class="source-line-no">483</span><span id="line-483"> for (long time : durationsInNanoSec) {</span>
<span class="source-line-no">484</span><span id="line-484"> meanTime += time;</span>
<span class="source-line-no">485</span><span id="line-485"> }</span>
<span class="source-line-no">486</span><span id="line-486"> meanTime /= n;</span>
<span class="source-line-no">487</span><span id="line-487"></span>
<span class="source-line-no">488</span><span id="line-488"> double meanMBPerSec = totalSize * MB_SEC_COEF / meanTime;</span>
<span class="source-line-no">489</span><span id="line-489"> double mbPerSecSTD = 0;</span>
<span class="source-line-no">490</span><span id="line-490"> if (n &gt; 0) {</span>
<span class="source-line-no">491</span><span id="line-491"> for (long time : durationsInNanoSec) {</span>
<span class="source-line-no">492</span><span id="line-492"> double mbPerSec = totalSize * MB_SEC_COEF / time;</span>
<span class="source-line-no">493</span><span id="line-493"> double dev = mbPerSec - meanMBPerSec;</span>
<span class="source-line-no">494</span><span id="line-494"> mbPerSecSTD += dev * dev;</span>
<span class="source-line-no">495</span><span id="line-495"> }</span>
<span class="source-line-no">496</span><span id="line-496"> mbPerSecSTD = Math.sqrt(mbPerSecSTD / n);</span>
<span class="source-line-no">497</span><span id="line-497"> }</span>
<span class="source-line-no">498</span><span id="line-498"></span>
<span class="source-line-no">499</span><span id="line-499"> outputTuple(manipulation + " performance", "%6.2f MB/s (+/- %.2f MB/s)", meanMBPerSec,</span>
<span class="source-line-no">500</span><span id="line-500"> mbPerSecSTD);</span>
<span class="source-line-no">501</span><span id="line-501"> }</span>
<span class="source-line-no">502</span><span id="line-502"></span>
<span class="source-line-no">503</span><span id="line-503"> private static void outputTuple(String caption, String format, Object... values) {</span>
<span class="source-line-no">504</span><span id="line-504"> if (format.startsWith(INT_FORMAT)) {</span>
<span class="source-line-no">505</span><span id="line-505"> format = "%s" + format.substring(INT_FORMAT.length());</span>
<span class="source-line-no">506</span><span id="line-506"> values[0] = DELIMITED_DECIMAL_FORMAT.format(values[0]);</span>
<span class="source-line-no">507</span><span id="line-507"> }</span>
<span class="source-line-no">508</span><span id="line-508"></span>
<span class="source-line-no">509</span><span id="line-509"> StringBuilder sb = new StringBuilder();</span>
<span class="source-line-no">510</span><span id="line-510"> sb.append(" ");</span>
<span class="source-line-no">511</span><span id="line-511"> sb.append(caption);</span>
<span class="source-line-no">512</span><span id="line-512"> sb.append(":");</span>
<span class="source-line-no">513</span><span id="line-513"></span>
<span class="source-line-no">514</span><span id="line-514"> String v = String.format(format, values);</span>
<span class="source-line-no">515</span><span id="line-515"> int padding = 60 - sb.length() - v.length();</span>
<span class="source-line-no">516</span><span id="line-516"> for (int i = 0; i &lt; padding; ++i) {</span>
<span class="source-line-no">517</span><span id="line-517"> sb.append(' ');</span>
<span class="source-line-no">518</span><span id="line-518"> }</span>
<span class="source-line-no">519</span><span id="line-519"> sb.append(v);</span>
<span class="source-line-no">520</span><span id="line-520"> System.out.println(sb);</span>
<span class="source-line-no">521</span><span id="line-521"> }</span>
<span class="source-line-no">522</span><span id="line-522"></span>
<span class="source-line-no">523</span><span id="line-523"> /**</span>
<span class="source-line-no">524</span><span id="line-524"> * Display statistics of different compression algorithms.</span>
<span class="source-line-no">525</span><span id="line-525"> */</span>
<span class="source-line-no">526</span><span id="line-526"> public void displayStatistics() throws IOException {</span>
<span class="source-line-no">527</span><span id="line-527"> final String comprAlgo = compressionAlgorithmName.toUpperCase(Locale.ROOT);</span>
<span class="source-line-no">528</span><span id="line-528"> long rawBytes = totalKeyLength + totalPrefixLength + totalValueLength;</span>
<span class="source-line-no">529</span><span id="line-529"></span>
<span class="source-line-no">530</span><span id="line-530"> System.out.println("Raw data size:");</span>
<span class="source-line-no">531</span><span id="line-531"> outputTuple("Raw bytes", INT_FORMAT, rawBytes);</span>
<span class="source-line-no">532</span><span id="line-532"> outputTuplePct("Key bytes", totalKeyLength);</span>
<span class="source-line-no">533</span><span id="line-533"> outputTuplePct("Value bytes", totalValueLength);</span>
<span class="source-line-no">534</span><span id="line-534"> outputTuplePct("KV infrastructure", totalPrefixLength);</span>
<span class="source-line-no">535</span><span id="line-535"> outputTuplePct("CF overhead", totalCFLength);</span>
<span class="source-line-no">536</span><span id="line-536"> outputTuplePct("Total key redundancy", totalKeyRedundancyLength);</span>
<span class="source-line-no">537</span><span id="line-537"></span>
<span class="source-line-no">538</span><span id="line-538"> int compressedSize = EncodedDataBlock.getCompressedSize(compressionAlgorithm, compressor,</span>
<span class="source-line-no">539</span><span id="line-539"> rawKVs, 0, rawKVs.length);</span>
<span class="source-line-no">540</span><span id="line-540"> outputTuple(comprAlgo + " only size", INT_FORMAT, compressedSize);</span>
<span class="source-line-no">541</span><span id="line-541"> outputSavings(comprAlgo + " only", compressedSize, rawBytes);</span>
<span class="source-line-no">542</span><span id="line-542"> System.out.println();</span>
<span class="source-line-no">543</span><span id="line-543"></span>
<span class="source-line-no">544</span><span id="line-544"> for (EncodedDataBlock codec : codecs) {</span>
<span class="source-line-no">545</span><span id="line-545"> System.out.println(codec.toString());</span>
<span class="source-line-no">546</span><span id="line-546"> long encodedBytes = codec.getSize();</span>
<span class="source-line-no">547</span><span id="line-547"> outputTuple("Encoded bytes", INT_FORMAT, encodedBytes);</span>
<span class="source-line-no">548</span><span id="line-548"> outputSavings("Key encoding", encodedBytes - totalValueLength, rawBytes - totalValueLength);</span>
<span class="source-line-no">549</span><span id="line-549"> outputSavings("Total encoding", encodedBytes, rawBytes);</span>
<span class="source-line-no">550</span><span id="line-550"></span>
<span class="source-line-no">551</span><span id="line-551"> int encodedCompressedSize = codec.getEncodedCompressedSize(compressionAlgorithm, compressor);</span>
<span class="source-line-no">552</span><span id="line-552"> outputTuple("Encoding + " + comprAlgo + " size", INT_FORMAT, encodedCompressedSize);</span>
<span class="source-line-no">553</span><span id="line-553"> outputSavings("Encoding + " + comprAlgo, encodedCompressedSize, rawBytes);</span>
<span class="source-line-no">554</span><span id="line-554"> outputSavings("Encoding with " + comprAlgo, encodedCompressedSize, compressedSize);</span>
<span class="source-line-no">555</span><span id="line-555"></span>
<span class="source-line-no">556</span><span id="line-556"> System.out.println();</span>
<span class="source-line-no">557</span><span id="line-557"> }</span>
<span class="source-line-no">558</span><span id="line-558"> }</span>
<span class="source-line-no">559</span><span id="line-559"></span>
<span class="source-line-no">560</span><span id="line-560"> private void outputTuplePct(String caption, long size) {</span>
<span class="source-line-no">561</span><span id="line-561"> outputTuple(caption, INT_FORMAT + " (" + PCT_FORMAT + ")", size, size * 100.0 / rawKVs.length);</span>
<span class="source-line-no">562</span><span id="line-562"> }</span>
<span class="source-line-no">563</span><span id="line-563"></span>
<span class="source-line-no">564</span><span id="line-564"> private void outputSavings(String caption, long part, long whole) {</span>
<span class="source-line-no">565</span><span id="line-565"> double pct = 100.0 * (1 - 1.0 * part / whole);</span>
<span class="source-line-no">566</span><span id="line-566"> double times = whole * 1.0 / part;</span>
<span class="source-line-no">567</span><span id="line-567"> outputTuple(caption + " savings", PCT_FORMAT + " (%.2f x)", pct, times);</span>
<span class="source-line-no">568</span><span id="line-568"> }</span>
<span class="source-line-no">569</span><span id="line-569"></span>
<span class="source-line-no">570</span><span id="line-570"> /**</span>
<span class="source-line-no">571</span><span id="line-571"> * Test a data block encoder on the given HFile. Output results to console.</span>
<span class="source-line-no">572</span><span id="line-572"> * @param kvLimit The limit of KeyValue which will be analyzed.</span>
<span class="source-line-no">573</span><span id="line-573"> * @param hfilePath an HFile path on the file system.</span>
<span class="source-line-no">574</span><span id="line-574"> * @param compressionName Compression algorithm used for comparison.</span>
<span class="source-line-no">575</span><span id="line-575"> * @param doBenchmark Run performance benchmarks.</span>
<span class="source-line-no">576</span><span id="line-576"> * @param doVerify Verify correctness.</span>
<span class="source-line-no">577</span><span id="line-577"> * @throws IOException When pathName is incorrect.</span>
<span class="source-line-no">578</span><span id="line-578"> */</span>
<span class="source-line-no">579</span><span id="line-579"> public static void testCodecs(Configuration conf, int kvLimit, String hfilePath,</span>
<span class="source-line-no">580</span><span id="line-580"> String compressionName, boolean doBenchmark, boolean doVerify) throws IOException {</span>
<span class="source-line-no">581</span><span id="line-581"> // create environment</span>
<span class="source-line-no">582</span><span id="line-582"> Path path = new Path(hfilePath);</span>
<span class="source-line-no">583</span><span id="line-583"> CacheConfig cacheConf = new CacheConfig(conf);</span>
<span class="source-line-no">584</span><span id="line-584"> FileSystem fs = FileSystem.get(conf);</span>
<span class="source-line-no">585</span><span id="line-585"> HStoreFile hsf = new HStoreFile(fs, path, conf, cacheConf, BloomType.NONE, true);</span>
<span class="source-line-no">586</span><span id="line-586"> hsf.initReader();</span>
<span class="source-line-no">587</span><span id="line-587"> StoreFileReader reader = hsf.getReader();</span>
<span class="source-line-no">588</span><span id="line-588"> reader.loadFileInfo();</span>
<span class="source-line-no">589</span><span id="line-589"> KeyValueScanner scanner =</span>
<span class="source-line-no">590</span><span id="line-590"> reader.getStoreFileScanner(true, true, false, hsf.getMaxMemStoreTS(), 0, false);</span>
<span class="source-line-no">591</span><span id="line-591"> USE_TAG = reader.getHFileReader().getFileContext().isIncludesTags();</span>
<span class="source-line-no">592</span><span id="line-592"> // run the utilities</span>
<span class="source-line-no">593</span><span id="line-593"> DataBlockEncodingTool comp = new DataBlockEncodingTool(conf, compressionName);</span>
<span class="source-line-no">594</span><span id="line-594"> int majorVersion = reader.getHFileVersion();</span>
<span class="source-line-no">595</span><span id="line-595"> comp.useHBaseChecksum = majorVersion &gt; 2 || (majorVersion == 2</span>
<span class="source-line-no">596</span><span id="line-596"> &amp;&amp; reader.getHFileMinorVersion() &gt;= HFileReaderImpl.MINOR_VERSION_WITH_CHECKSUM);</span>
<span class="source-line-no">597</span><span id="line-597"> comp.checkStatistics(scanner, kvLimit);</span>
<span class="source-line-no">598</span><span id="line-598"> if (doVerify) {</span>
<span class="source-line-no">599</span><span id="line-599"> comp.verifyCodecs(scanner, kvLimit);</span>
<span class="source-line-no">600</span><span id="line-600"> }</span>
<span class="source-line-no">601</span><span id="line-601"> if (doBenchmark) {</span>
<span class="source-line-no">602</span><span id="line-602"> comp.benchmarkCodecs();</span>
<span class="source-line-no">603</span><span id="line-603"> }</span>
<span class="source-line-no">604</span><span id="line-604"> comp.displayStatistics();</span>
<span class="source-line-no">605</span><span id="line-605"></span>
<span class="source-line-no">606</span><span id="line-606"> // cleanup</span>
<span class="source-line-no">607</span><span id="line-607"> scanner.close();</span>
<span class="source-line-no">608</span><span id="line-608"> reader.close(cacheConf.shouldEvictOnClose());</span>
<span class="source-line-no">609</span><span id="line-609"> }</span>
<span class="source-line-no">610</span><span id="line-610"></span>
<span class="source-line-no">611</span><span id="line-611"> private static void printUsage(Options options) {</span>
<span class="source-line-no">612</span><span id="line-612"> System.err.println("Usage:");</span>
<span class="source-line-no">613</span><span id="line-613"> System.err</span>
<span class="source-line-no">614</span><span id="line-614"> .println(String.format("./hbase %s &lt;options&gt;", DataBlockEncodingTool.class.getName()));</span>
<span class="source-line-no">615</span><span id="line-615"> System.err.println("Options:");</span>
<span class="source-line-no">616</span><span id="line-616"> for (Object it : options.getOptions()) {</span>
<span class="source-line-no">617</span><span id="line-617"> Option opt = (Option) it;</span>
<span class="source-line-no">618</span><span id="line-618"> if (opt.hasArg()) {</span>
<span class="source-line-no">619</span><span id="line-619"> System.err.println(</span>
<span class="source-line-no">620</span><span id="line-620"> String.format("-%s %s: %s", opt.getOpt(), opt.getArgName(), opt.getDescription()));</span>
<span class="source-line-no">621</span><span id="line-621"> } else {</span>
<span class="source-line-no">622</span><span id="line-622"> System.err.println(String.format("-%s: %s", opt.getOpt(), opt.getDescription()));</span>
<span class="source-line-no">623</span><span id="line-623"> }</span>
<span class="source-line-no">624</span><span id="line-624"> }</span>
<span class="source-line-no">625</span><span id="line-625"> }</span>
<span class="source-line-no">626</span><span id="line-626"></span>
<span class="source-line-no">627</span><span id="line-627"> /**</span>
<span class="source-line-no">628</span><span id="line-628"> * A command line interface to benchmarks. Parses command-line arguments and runs the appropriate</span>
<span class="source-line-no">629</span><span id="line-629"> * benchmarks.</span>
<span class="source-line-no">630</span><span id="line-630"> * @param args Should have length at least 1 and holds the file path to HFile.</span>
<span class="source-line-no">631</span><span id="line-631"> * @throws IOException If you specified the wrong file.</span>
<span class="source-line-no">632</span><span id="line-632"> */</span>
<span class="source-line-no">633</span><span id="line-633"> public static void main(final String[] args) throws IOException {</span>
<span class="source-line-no">634</span><span id="line-634"> // set up user arguments</span>
<span class="source-line-no">635</span><span id="line-635"> Options options = new Options();</span>
<span class="source-line-no">636</span><span id="line-636"> options.addOption(OPT_HFILE_NAME, true, "HFile to analyse (REQUIRED)");</span>
<span class="source-line-no">637</span><span id="line-637"> options.getOption(OPT_HFILE_NAME).setArgName("FILENAME");</span>
<span class="source-line-no">638</span><span id="line-638"> options.addOption(OPT_KV_LIMIT, true,</span>
<span class="source-line-no">639</span><span id="line-639"> "Maximum number of KeyValues to process. A benchmark stops running "</span>
<span class="source-line-no">640</span><span id="line-640"> + "after iterating over this many KV pairs.");</span>
<span class="source-line-no">641</span><span id="line-641"> options.getOption(OPT_KV_LIMIT).setArgName("NUMBER");</span>
<span class="source-line-no">642</span><span id="line-642"> options.addOption(OPT_MEASURE_THROUGHPUT, false, "Measure read throughput");</span>
<span class="source-line-no">643</span><span id="line-643"> options.addOption(OPT_OMIT_CORRECTNESS_TEST, false, "Omit corectness tests.");</span>
<span class="source-line-no">644</span><span id="line-644"> options.addOption(OPT_COMPRESSION_ALGORITHM, true,</span>
<span class="source-line-no">645</span><span id="line-645"> "What kind of compression algorithm use for comparison.");</span>
<span class="source-line-no">646</span><span id="line-646"> options.addOption(OPT_BENCHMARK_N_TIMES, true,</span>
<span class="source-line-no">647</span><span id="line-647"> "Number of times to run each benchmark. Default value: " + DEFAULT_BENCHMARK_N_TIMES);</span>
<span class="source-line-no">648</span><span id="line-648"> options.addOption(OPT_BENCHMARK_N_OMIT, true,</span>
<span class="source-line-no">649</span><span id="line-649"> "Number of first runs of every benchmark to exclude from " + "statistics ("</span>
<span class="source-line-no">650</span><span id="line-650"> + DEFAULT_BENCHMARK_N_OMIT + " by default, so that " + "only the last "</span>
<span class="source-line-no">651</span><span id="line-651"> + (DEFAULT_BENCHMARK_N_TIMES - DEFAULT_BENCHMARK_N_OMIT)</span>
<span class="source-line-no">652</span><span id="line-652"> + " times are included in statistics.)");</span>
<span class="source-line-no">653</span><span id="line-653"></span>
<span class="source-line-no">654</span><span id="line-654"> // parse arguments</span>
<span class="source-line-no">655</span><span id="line-655"> CommandLineParser parser = new PosixParser();</span>
<span class="source-line-no">656</span><span id="line-656"> CommandLine cmd = null;</span>
<span class="source-line-no">657</span><span id="line-657"> try {</span>
<span class="source-line-no">658</span><span id="line-658"> cmd = parser.parse(options, args);</span>
<span class="source-line-no">659</span><span id="line-659"> } catch (ParseException e) {</span>
<span class="source-line-no">660</span><span id="line-660"> System.err.println("Could not parse arguments!");</span>
<span class="source-line-no">661</span><span id="line-661"> System.exit(-1);</span>
<span class="source-line-no">662</span><span id="line-662"> return; // avoid warning</span>
<span class="source-line-no">663</span><span id="line-663"> }</span>
<span class="source-line-no">664</span><span id="line-664"></span>
<span class="source-line-no">665</span><span id="line-665"> int kvLimit = Integer.MAX_VALUE;</span>
<span class="source-line-no">666</span><span id="line-666"> if (cmd.hasOption(OPT_KV_LIMIT)) {</span>
<span class="source-line-no">667</span><span id="line-667"> kvLimit = Integer.parseInt(cmd.getOptionValue(OPT_KV_LIMIT));</span>
<span class="source-line-no">668</span><span id="line-668"> if (kvLimit &lt;= 0) {</span>
<span class="source-line-no">669</span><span id="line-669"> LOG.error("KV_LIMIT should not less than 1.");</span>
<span class="source-line-no">670</span><span id="line-670"> }</span>
<span class="source-line-no">671</span><span id="line-671"> }</span>
<span class="source-line-no">672</span><span id="line-672"></span>
<span class="source-line-no">673</span><span id="line-673"> // basic argument sanity checks</span>
<span class="source-line-no">674</span><span id="line-674"> if (!cmd.hasOption(OPT_HFILE_NAME)) {</span>
<span class="source-line-no">675</span><span id="line-675"> LOG.error("Please specify HFile name using the " + OPT_HFILE_NAME + " option");</span>
<span class="source-line-no">676</span><span id="line-676"> printUsage(options);</span>
<span class="source-line-no">677</span><span id="line-677"> System.exit(-1);</span>
<span class="source-line-no">678</span><span id="line-678"> }</span>
<span class="source-line-no">679</span><span id="line-679"></span>
<span class="source-line-no">680</span><span id="line-680"> String pathName = cmd.getOptionValue(OPT_HFILE_NAME);</span>
<span class="source-line-no">681</span><span id="line-681"> String compressionName = DEFAULT_COMPRESSION.getName();</span>
<span class="source-line-no">682</span><span id="line-682"> if (cmd.hasOption(OPT_COMPRESSION_ALGORITHM)) {</span>
<span class="source-line-no">683</span><span id="line-683"> compressionName = cmd.getOptionValue(OPT_COMPRESSION_ALGORITHM).toLowerCase(Locale.ROOT);</span>
<span class="source-line-no">684</span><span id="line-684"> }</span>
<span class="source-line-no">685</span><span id="line-685"> boolean doBenchmark = cmd.hasOption(OPT_MEASURE_THROUGHPUT);</span>
<span class="source-line-no">686</span><span id="line-686"> boolean doVerify = !cmd.hasOption(OPT_OMIT_CORRECTNESS_TEST);</span>
<span class="source-line-no">687</span><span id="line-687"></span>
<span class="source-line-no">688</span><span id="line-688"> if (cmd.hasOption(OPT_BENCHMARK_N_TIMES)) {</span>
<span class="source-line-no">689</span><span id="line-689"> benchmarkNTimes = Integer.valueOf(cmd.getOptionValue(OPT_BENCHMARK_N_TIMES));</span>
<span class="source-line-no">690</span><span id="line-690"> }</span>
<span class="source-line-no">691</span><span id="line-691"> if (cmd.hasOption(OPT_BENCHMARK_N_OMIT)) {</span>
<span class="source-line-no">692</span><span id="line-692"> benchmarkNOmit = Integer.valueOf(cmd.getOptionValue(OPT_BENCHMARK_N_OMIT));</span>
<span class="source-line-no">693</span><span id="line-693"> }</span>
<span class="source-line-no">694</span><span id="line-694"> if (benchmarkNTimes &lt; benchmarkNOmit) {</span>
<span class="source-line-no">695</span><span id="line-695"> LOG.error("The number of times to run each benchmark (" + benchmarkNTimes</span>
<span class="source-line-no">696</span><span id="line-696"> + ") must be greater than the number of benchmark runs to exclude " + "from statistics ("</span>
<span class="source-line-no">697</span><span id="line-697"> + benchmarkNOmit + ")");</span>
<span class="source-line-no">698</span><span id="line-698"> System.exit(1);</span>
<span class="source-line-no">699</span><span id="line-699"> }</span>
<span class="source-line-no">700</span><span id="line-700"> LOG.info("Running benchmark " + benchmarkNTimes + " times. " + "Excluding the first "</span>
<span class="source-line-no">701</span><span id="line-701"> + benchmarkNOmit + " times from statistics.");</span>
<span class="source-line-no">702</span><span id="line-702"></span>
<span class="source-line-no">703</span><span id="line-703"> final Configuration conf = HBaseConfiguration.create();</span>
<span class="source-line-no">704</span><span id="line-704"> testCodecs(conf, kvLimit, pathName, compressionName, doBenchmark, doVerify);</span>
<span class="source-line-no">705</span><span id="line-705"> }</span>
<span class="source-line-no">706</span><span id="line-706"></span>
<span class="source-line-no">707</span><span id="line-707">}</span>
</pre>
</div>
</main>
</body>
</html>