| <!DOCTYPE HTML> |
| <html lang="en"> |
| <head> |
| <!-- Generated by javadoc (17) --> |
| <title>Source code</title> |
| <meta name="viewport" content="width=device-width, initial-scale=1"> |
| <meta name="description" content="source: package: org.apache.hadoop.hbase.io.hfile, class: HFile, interface: Reader"> |
| <meta name="generator" content="javadoc/SourceToHTMLConverter"> |
| <link rel="stylesheet" type="text/css" href="../../../../../../../stylesheet.css" title="Style"> |
| </head> |
| <body class="source-page"> |
| <main role="main"> |
| <div class="source-container"> |
| <pre><span class="source-line-no">001</span><span id="line-1">/*</span> |
| <span class="source-line-no">002</span><span id="line-2"> * Licensed to the Apache Software Foundation (ASF) under one</span> |
| <span class="source-line-no">003</span><span id="line-3"> * or more contributor license agreements. See the NOTICE file</span> |
| <span class="source-line-no">004</span><span id="line-4"> * distributed with this work for additional information</span> |
| <span class="source-line-no">005</span><span id="line-5"> * regarding copyright ownership. The ASF licenses this file</span> |
| <span class="source-line-no">006</span><span id="line-6"> * to you under the Apache License, Version 2.0 (the</span> |
| <span class="source-line-no">007</span><span id="line-7"> * "License"); you may not use this file except in compliance</span> |
| <span class="source-line-no">008</span><span id="line-8"> * with the License. You may obtain a copy of the License at</span> |
| <span class="source-line-no">009</span><span id="line-9"> *</span> |
| <span class="source-line-no">010</span><span id="line-10"> * http://www.apache.org/licenses/LICENSE-2.0</span> |
| <span class="source-line-no">011</span><span id="line-11"> *</span> |
| <span class="source-line-no">012</span><span id="line-12"> * Unless required by applicable law or agreed to in writing, software</span> |
| <span class="source-line-no">013</span><span id="line-13"> * distributed under the License is distributed on an "AS IS" BASIS,</span> |
| <span class="source-line-no">014</span><span id="line-14"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span> |
| <span class="source-line-no">015</span><span id="line-15"> * See the License for the specific language governing permissions and</span> |
| <span class="source-line-no">016</span><span id="line-16"> * limitations under the License.</span> |
| <span class="source-line-no">017</span><span id="line-17"> */</span> |
| <span class="source-line-no">018</span><span id="line-18">package org.apache.hadoop.hbase.io.hfile;</span> |
| <span class="source-line-no">019</span><span id="line-19"></span> |
| <span class="source-line-no">020</span><span id="line-20">import java.io.Closeable;</span> |
| <span class="source-line-no">021</span><span id="line-21">import java.io.DataInput;</span> |
| <span class="source-line-no">022</span><span id="line-22">import java.io.IOException;</span> |
| <span class="source-line-no">023</span><span id="line-23">import java.net.InetSocketAddress;</span> |
| <span class="source-line-no">024</span><span id="line-24">import java.util.ArrayList;</span> |
| <span class="source-line-no">025</span><span id="line-25">import java.util.List;</span> |
| <span class="source-line-no">026</span><span id="line-26">import java.util.Optional;</span> |
| <span class="source-line-no">027</span><span id="line-27">import java.util.concurrent.atomic.LongAdder;</span> |
| <span class="source-line-no">028</span><span id="line-28">import org.apache.commons.io.IOUtils;</span> |
| <span class="source-line-no">029</span><span id="line-29">import org.apache.hadoop.conf.Configuration;</span> |
| <span class="source-line-no">030</span><span id="line-30">import org.apache.hadoop.fs.FSDataOutputStream;</span> |
| <span class="source-line-no">031</span><span id="line-31">import org.apache.hadoop.fs.FileStatus;</span> |
| <span class="source-line-no">032</span><span id="line-32">import org.apache.hadoop.fs.FileSystem;</span> |
| <span class="source-line-no">033</span><span id="line-33">import org.apache.hadoop.fs.Path;</span> |
| <span class="source-line-no">034</span><span id="line-34">import org.apache.hadoop.fs.PathFilter;</span> |
| <span class="source-line-no">035</span><span id="line-35">import org.apache.hadoop.hbase.CellComparator;</span> |
| <span class="source-line-no">036</span><span id="line-36">import org.apache.hadoop.hbase.ExtendedCell;</span> |
| <span class="source-line-no">037</span><span id="line-37">import org.apache.hadoop.hbase.HConstants;</span> |
| <span class="source-line-no">038</span><span id="line-38">import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;</span> |
| <span class="source-line-no">039</span><span id="line-39">import org.apache.hadoop.hbase.io.MetricsIO;</span> |
| <span class="source-line-no">040</span><span id="line-40">import org.apache.hadoop.hbase.io.compress.Compression;</span> |
| <span class="source-line-no">041</span><span id="line-41">import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;</span> |
| <span class="source-line-no">042</span><span id="line-42">import org.apache.hadoop.hbase.io.hfile.ReaderContext.ReaderType;</span> |
| <span class="source-line-no">043</span><span id="line-43">import org.apache.hadoop.hbase.ipc.RpcServer;</span> |
| <span class="source-line-no">044</span><span id="line-44">import org.apache.hadoop.hbase.regionserver.CellSink;</span> |
| <span class="source-line-no">045</span><span id="line-45">import org.apache.hadoop.hbase.regionserver.ShipperListener;</span> |
| <span class="source-line-no">046</span><span id="line-46">import org.apache.hadoop.hbase.util.BloomFilterWriter;</span> |
| <span class="source-line-no">047</span><span id="line-47">import org.apache.hadoop.hbase.util.Bytes;</span> |
| <span class="source-line-no">048</span><span id="line-48">import org.apache.hadoop.hbase.util.FSUtils;</span> |
| <span class="source-line-no">049</span><span id="line-49">import org.apache.hadoop.io.Writable;</span> |
| <span class="source-line-no">050</span><span id="line-50">import org.apache.yetus.audience.InterfaceAudience;</span> |
| <span class="source-line-no">051</span><span id="line-51">import org.slf4j.Logger;</span> |
| <span class="source-line-no">052</span><span id="line-52">import org.slf4j.LoggerFactory;</span> |
| <span class="source-line-no">053</span><span id="line-53"></span> |
| <span class="source-line-no">054</span><span id="line-54">import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;</span> |
| <span class="source-line-no">055</span><span id="line-55"></span> |
| <span class="source-line-no">056</span><span id="line-56">/**</span> |
| <span class="source-line-no">057</span><span id="line-57"> * File format for hbase. A file of sorted key/value pairs. Both keys and values are byte arrays.</span> |
| <span class="source-line-no">058</span><span id="line-58"> * <p></span> |
| <span class="source-line-no">059</span><span id="line-59"> * The memory footprint of a HFile includes the following (below is taken from the <a</span> |
| <span class="source-line-no">060</span><span id="line-60"> * href=https://issues.apache.org/jira/browse/HADOOP-3315>TFile</a> documentation but applies also</span> |
| <span class="source-line-no">061</span><span id="line-61"> * to HFile):</span> |
| <span class="source-line-no">062</span><span id="line-62"> * <ul></span> |
| <span class="source-line-no">063</span><span id="line-63"> * <li>Some constant overhead of reading or writing a compressed block.</span> |
| <span class="source-line-no">064</span><span id="line-64"> * <ul></span> |
| <span class="source-line-no">065</span><span id="line-65"> * <li>Each compressed block requires one compression/decompression codec for I/O.</span> |
| <span class="source-line-no">066</span><span id="line-66"> * <li>Temporary space to buffer the key.</span> |
| <span class="source-line-no">067</span><span id="line-67"> * <li>Temporary space to buffer the value.</span> |
| <span class="source-line-no">068</span><span id="line-68"> * </ul></span> |
| <span class="source-line-no">069</span><span id="line-69"> * <li>HFile index, which is proportional to the total number of Data Blocks. The total amount of</span> |
| <span class="source-line-no">070</span><span id="line-70"> * memory needed to hold the index can be estimated as (56+AvgKeySize)*NumBlocks.</span> |
| <span class="source-line-no">071</span><span id="line-71"> * </ul></span> |
| <span class="source-line-no">072</span><span id="line-72"> * Suggestions on performance optimization.</span> |
| <span class="source-line-no">073</span><span id="line-73"> * <ul></span> |
| <span class="source-line-no">074</span><span id="line-74"> * <li>Minimum block size. We recommend a setting of minimum block size between 8KB to 1MB for</span> |
| <span class="source-line-no">075</span><span id="line-75"> * general usage. Larger block size is preferred if files are primarily for sequential access.</span> |
| <span class="source-line-no">076</span><span id="line-76"> * However, it would lead to inefficient random access (because there are more data to decompress).</span> |
| <span class="source-line-no">077</span><span id="line-77"> * Smaller blocks are good for random access, but require more memory to hold the block index, and</span> |
| <span class="source-line-no">078</span><span id="line-78"> * may be slower to create (because we must flush the compressor stream at the conclusion of each</span> |
| <span class="source-line-no">079</span><span id="line-79"> * data block, which leads to an FS I/O flush). Further, due to the internal caching in Compression</span> |
| <span class="source-line-no">080</span><span id="line-80"> * codec, the smallest possible block size would be around 20KB-30KB.</span> |
| <span class="source-line-no">081</span><span id="line-81"> * <li>The current implementation does not offer true multi-threading for reading. The</span> |
| <span class="source-line-no">082</span><span id="line-82"> * implementation uses FSDataInputStream seek()+read(), which is shown to be much faster than</span> |
| <span class="source-line-no">083</span><span id="line-83"> * positioned-read call in single thread mode. However, it also means that if multiple threads</span> |
| <span class="source-line-no">084</span><span id="line-84"> * attempt to access the same HFile (using multiple scanners) simultaneously, the actual I/O is</span> |
| <span class="source-line-no">085</span><span id="line-85"> * carried out sequentially even if they access different DFS blocks (Reexamine! pread seems to be</span> |
| <span class="source-line-no">086</span><span id="line-86"> * 10% faster than seek+read in my testing -- stack).</span> |
| <span class="source-line-no">087</span><span id="line-87"> * <li>Compression codec. Use "none" if the data is not very compressable (by compressable, I mean a</span> |
| <span class="source-line-no">088</span><span id="line-88"> * compression ratio at least 2:1). Generally, use "lzo" as the starting point for experimenting.</span> |
| <span class="source-line-no">089</span><span id="line-89"> * "gz" overs slightly better compression ratio over "lzo" but requires 4x CPU to compress and 2x</span> |
| <span class="source-line-no">090</span><span id="line-90"> * CPU to decompress, comparing to "lzo".</span> |
| <span class="source-line-no">091</span><span id="line-91"> * </ul></span> |
| <span class="source-line-no">092</span><span id="line-92"> * For more on the background behind HFile, see <a</span> |
| <span class="source-line-no">093</span><span id="line-93"> * href=https://issues.apache.org/jira/browse/HBASE-61>HBASE-61</a>.</span> |
| <span class="source-line-no">094</span><span id="line-94"> * <p></span> |
| <span class="source-line-no">095</span><span id="line-95"> * File is made of data blocks followed by meta data blocks (if any), a fileinfo block, data block</span> |
| <span class="source-line-no">096</span><span id="line-96"> * index, meta data block index, and a fixed size trailer which records the offsets at which file</span> |
| <span class="source-line-no">097</span><span id="line-97"> * changes content type.</span> |
| <span class="source-line-no">098</span><span id="line-98"> *</span> |
| <span class="source-line-no">099</span><span id="line-99"> * <pre></span> |
| <span class="source-line-no">100</span><span id="line-100"> * &lt;data blocks&gt;&lt;meta blocks&gt;&lt;fileinfo&gt;&lt;</span> |
| <span class="source-line-no">101</span><span id="line-101"> * data index&gt;&lt;meta index&gt;&lt;trailer&gt;</span> |
| <span class="source-line-no">102</span><span id="line-102"> * </pre></span> |
| <span class="source-line-no">103</span><span id="line-103"> *</span> |
| <span class="source-line-no">104</span><span id="line-104"> * Each block has a bit of magic at its start. Block are comprised of key/values. In data blocks,</span> |
| <span class="source-line-no">105</span><span id="line-105"> * they are both byte arrays. Metadata blocks are a String key and a byte array value. An empty file</span> |
| <span class="source-line-no">106</span><span id="line-106"> * looks like this:</span> |
| <span class="source-line-no">107</span><span id="line-107"> *</span> |
| <span class="source-line-no">108</span><span id="line-108"> * <pre></span> |
| <span class="source-line-no">109</span><span id="line-109"> * &lt;fileinfo&gt;&lt;trailer&gt;</span> |
| <span class="source-line-no">110</span><span id="line-110"> * </pre></span> |
| <span class="source-line-no">111</span><span id="line-111"> *</span> |
| <span class="source-line-no">112</span><span id="line-112"> * . That is, there are not data nor meta blocks present.</span> |
| <span class="source-line-no">113</span><span id="line-113"> * <p></span> |
| <span class="source-line-no">114</span><span id="line-114"> * TODO: Do scanners need to be able to take a start and end row? TODO: Should BlockIndex know the</span> |
| <span class="source-line-no">115</span><span id="line-115"> * name of its file? Should it have a Path that points at its file say for the case where an index</span> |
| <span class="source-line-no">116</span><span id="line-116"> * lives apart from an HFile instance?</span> |
| <span class="source-line-no">117</span><span id="line-117"> */</span> |
| <span class="source-line-no">118</span><span id="line-118">@InterfaceAudience.Private</span> |
| <span class="source-line-no">119</span><span id="line-119">public final class HFile {</span> |
| <span class="source-line-no">120</span><span id="line-120"> // LOG is being used in HFileBlock and CheckSumUtil</span> |
| <span class="source-line-no">121</span><span id="line-121"> static final Logger LOG = LoggerFactory.getLogger(HFile.class);</span> |
| <span class="source-line-no">122</span><span id="line-122"></span> |
| <span class="source-line-no">123</span><span id="line-123"> /**</span> |
| <span class="source-line-no">124</span><span id="line-124"> * Maximum length of key in HFile.</span> |
| <span class="source-line-no">125</span><span id="line-125"> */</span> |
| <span class="source-line-no">126</span><span id="line-126"> public final static int MAXIMUM_KEY_LENGTH = Integer.MAX_VALUE;</span> |
| <span class="source-line-no">127</span><span id="line-127"></span> |
| <span class="source-line-no">128</span><span id="line-128"> /**</span> |
| <span class="source-line-no">129</span><span id="line-129"> * Default compression: none.</span> |
| <span class="source-line-no">130</span><span id="line-130"> */</span> |
| <span class="source-line-no">131</span><span id="line-131"> public final static Compression.Algorithm DEFAULT_COMPRESSION_ALGORITHM =</span> |
| <span class="source-line-no">132</span><span id="line-132"> Compression.Algorithm.NONE;</span> |
| <span class="source-line-no">133</span><span id="line-133"></span> |
| <span class="source-line-no">134</span><span id="line-134"> /** Minimum supported HFile format version */</span> |
| <span class="source-line-no">135</span><span id="line-135"> public static final int MIN_FORMAT_VERSION = 2;</span> |
| <span class="source-line-no">136</span><span id="line-136"></span> |
| <span class="source-line-no">137</span><span id="line-137"> /**</span> |
| <span class="source-line-no">138</span><span id="line-138"> * Maximum supported HFile format version</span> |
| <span class="source-line-no">139</span><span id="line-139"> */</span> |
| <span class="source-line-no">140</span><span id="line-140"> public static final int MAX_FORMAT_VERSION = 3;</span> |
| <span class="source-line-no">141</span><span id="line-141"></span> |
| <span class="source-line-no">142</span><span id="line-142"> /**</span> |
| <span class="source-line-no">143</span><span id="line-143"> * Minimum HFile format version with support for persisting cell tags</span> |
| <span class="source-line-no">144</span><span id="line-144"> */</span> |
| <span class="source-line-no">145</span><span id="line-145"> public static final int MIN_FORMAT_VERSION_WITH_TAGS = 3;</span> |
| <span class="source-line-no">146</span><span id="line-146"></span> |
| <span class="source-line-no">147</span><span id="line-147"> /** Default compression name: none. */</span> |
| <span class="source-line-no">148</span><span id="line-148"> public final static String DEFAULT_COMPRESSION = DEFAULT_COMPRESSION_ALGORITHM.getName();</span> |
| <span class="source-line-no">149</span><span id="line-149"></span> |
| <span class="source-line-no">150</span><span id="line-150"> /** Meta data block name for bloom filter bits. */</span> |
| <span class="source-line-no">151</span><span id="line-151"> public static final String BLOOM_FILTER_DATA_KEY = "BLOOM_FILTER_DATA";</span> |
| <span class="source-line-no">152</span><span id="line-152"></span> |
| <span class="source-line-no">153</span><span id="line-153"> /**</span> |
| <span class="source-line-no">154</span><span id="line-154"> * We assume that HFile path ends with ROOT_DIR/TABLE_NAME/REGION_NAME/CF_NAME/HFILE, so it has at</span> |
| <span class="source-line-no">155</span><span id="line-155"> * least this many levels of nesting. This is needed for identifying table and CF name from an</span> |
| <span class="source-line-no">156</span><span id="line-156"> * HFile path.</span> |
| <span class="source-line-no">157</span><span id="line-157"> */</span> |
| <span class="source-line-no">158</span><span id="line-158"> public final static int MIN_NUM_HFILE_PATH_LEVELS = 5;</span> |
| <span class="source-line-no">159</span><span id="line-159"></span> |
| <span class="source-line-no">160</span><span id="line-160"> /**</span> |
| <span class="source-line-no">161</span><span id="line-161"> * The number of bytes per checksum.</span> |
| <span class="source-line-no">162</span><span id="line-162"> */</span> |
| <span class="source-line-no">163</span><span id="line-163"> public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024;</span> |
| <span class="source-line-no">164</span><span id="line-164"></span> |
| <span class="source-line-no">165</span><span id="line-165"> // For measuring number of checksum failures</span> |
| <span class="source-line-no">166</span><span id="line-166"> static final LongAdder CHECKSUM_FAILURES = new LongAdder();</span> |
| <span class="source-line-no">167</span><span id="line-167"></span> |
| <span class="source-line-no">168</span><span id="line-168"> // For tests. Gets incremented when we read a block whether from HDFS or from Cache.</span> |
| <span class="source-line-no">169</span><span id="line-169"> public static final LongAdder DATABLOCK_READ_COUNT = new LongAdder();</span> |
| <span class="source-line-no">170</span><span id="line-170"></span> |
| <span class="source-line-no">171</span><span id="line-171"> /**</span> |
| <span class="source-line-no">172</span><span id="line-172"> * Shutdown constructor.</span> |
| <span class="source-line-no">173</span><span id="line-173"> */</span> |
| <span class="source-line-no">174</span><span id="line-174"> private HFile() {</span> |
| <span class="source-line-no">175</span><span id="line-175"> }</span> |
| <span class="source-line-no">176</span><span id="line-176"></span> |
| <span class="source-line-no">177</span><span id="line-177"> /**</span> |
| <span class="source-line-no">178</span><span id="line-178"> * Number of checksum verification failures. It also clears the counter.</span> |
| <span class="source-line-no">179</span><span id="line-179"> */</span> |
| <span class="source-line-no">180</span><span id="line-180"> public static final long getAndResetChecksumFailuresCount() {</span> |
| <span class="source-line-no">181</span><span id="line-181"> return CHECKSUM_FAILURES.sumThenReset();</span> |
| <span class="source-line-no">182</span><span id="line-182"> }</span> |
| <span class="source-line-no">183</span><span id="line-183"></span> |
| <span class="source-line-no">184</span><span id="line-184"> /**</span> |
| <span class="source-line-no">185</span><span id="line-185"> * Number of checksum verification failures. It also clears the counter.</span> |
| <span class="source-line-no">186</span><span id="line-186"> */</span> |
| <span class="source-line-no">187</span><span id="line-187"> public static final long getChecksumFailuresCount() {</span> |
| <span class="source-line-no">188</span><span id="line-188"> return CHECKSUM_FAILURES.sum();</span> |
| <span class="source-line-no">189</span><span id="line-189"> }</span> |
| <span class="source-line-no">190</span><span id="line-190"></span> |
| <span class="source-line-no">191</span><span id="line-191"> public static final void updateReadLatency(long latencyMillis, boolean pread, boolean tooSlow) {</span> |
| <span class="source-line-no">192</span><span id="line-192"> RpcServer.getCurrentCall().ifPresent(call -> call.updateFsReadTime(latencyMillis));</span> |
| <span class="source-line-no">193</span><span id="line-193"> if (pread) {</span> |
| <span class="source-line-no">194</span><span id="line-194"> MetricsIO.getInstance().updateFsPreadTime(latencyMillis);</span> |
| <span class="source-line-no">195</span><span id="line-195"> } else {</span> |
| <span class="source-line-no">196</span><span id="line-196"> MetricsIO.getInstance().updateFsReadTime(latencyMillis);</span> |
| <span class="source-line-no">197</span><span id="line-197"> }</span> |
| <span class="source-line-no">198</span><span id="line-198"> if (tooSlow) {</span> |
| <span class="source-line-no">199</span><span id="line-199"> MetricsIO.getInstance().incrSlowFsRead();</span> |
| <span class="source-line-no">200</span><span id="line-200"> }</span> |
| <span class="source-line-no">201</span><span id="line-201"> }</span> |
| <span class="source-line-no">202</span><span id="line-202"></span> |
| <span class="source-line-no">203</span><span id="line-203"> public static final void updateWriteLatency(long latencyMillis) {</span> |
| <span class="source-line-no">204</span><span id="line-204"> MetricsIO.getInstance().updateFsWriteTime(latencyMillis);</span> |
| <span class="source-line-no">205</span><span id="line-205"> }</span> |
| <span class="source-line-no">206</span><span id="line-206"></span> |
| <span class="source-line-no">207</span><span id="line-207"> /** API required to write an {@link HFile} */</span> |
| <span class="source-line-no">208</span><span id="line-208"> public interface Writer extends Closeable, CellSink, ShipperListener {</span> |
| <span class="source-line-no">209</span><span id="line-209"> /** Max memstore (mvcc) timestamp in FileInfo */</span> |
| <span class="source-line-no">210</span><span id="line-210"> public static final byte[] MAX_MEMSTORE_TS_KEY = Bytes.toBytes("MAX_MEMSTORE_TS_KEY");</span> |
| <span class="source-line-no">211</span><span id="line-211"></span> |
| <span class="source-line-no">212</span><span id="line-212"> /** Add an element to the file info map. */</span> |
| <span class="source-line-no">213</span><span id="line-213"> void appendFileInfo(byte[] key, byte[] value) throws IOException;</span> |
| <span class="source-line-no">214</span><span id="line-214"></span> |
| <span class="source-line-no">215</span><span id="line-215"> /** Returns the path to this {@link HFile} */</span> |
| <span class="source-line-no">216</span><span id="line-216"> Path getPath();</span> |
| <span class="source-line-no">217</span><span id="line-217"></span> |
| <span class="source-line-no">218</span><span id="line-218"> /**</span> |
| <span class="source-line-no">219</span><span id="line-219"> * Adds an inline block writer such as a multi-level block index writer or a compound Bloom</span> |
| <span class="source-line-no">220</span><span id="line-220"> * filter writer.</span> |
| <span class="source-line-no">221</span><span id="line-221"> */</span> |
| <span class="source-line-no">222</span><span id="line-222"> void addInlineBlockWriter(InlineBlockWriter bloomWriter);</span> |
| <span class="source-line-no">223</span><span id="line-223"></span> |
| <span class="source-line-no">224</span><span id="line-224"> // The below three methods take Writables. We'd like to undo Writables but undoing the below</span> |
| <span class="source-line-no">225</span><span id="line-225"> // would be pretty painful. Could take a byte [] or a Message but we want to be backward</span> |
| <span class="source-line-no">226</span><span id="line-226"> // compatible around hfiles so would need to map between Message and Writable or byte [] and</span> |
| <span class="source-line-no">227</span><span id="line-227"> // current Writable serialization. This would be a bit of work to little gain. Thats my</span> |
| <span class="source-line-no">228</span><span id="line-228"> // thinking at moment. St.Ack 20121129</span> |
| <span class="source-line-no">229</span><span id="line-229"></span> |
| <span class="source-line-no">230</span><span id="line-230"> void appendMetaBlock(String bloomFilterMetaKey, Writable metaWriter);</span> |
| <span class="source-line-no">231</span><span id="line-231"></span> |
| <span class="source-line-no">232</span><span id="line-232"> /**</span> |
| <span class="source-line-no">233</span><span id="line-233"> * Store general Bloom filter in the file. This does not deal with Bloom filter internals but is</span> |
| <span class="source-line-no">234</span><span id="line-234"> * necessary, since Bloom filters are stored differently in HFile version 1 and version 2.</span> |
| <span class="source-line-no">235</span><span id="line-235"> */</span> |
| <span class="source-line-no">236</span><span id="line-236"> void addGeneralBloomFilter(BloomFilterWriter bfw);</span> |
| <span class="source-line-no">237</span><span id="line-237"></span> |
| <span class="source-line-no">238</span><span id="line-238"> /**</span> |
| <span class="source-line-no">239</span><span id="line-239"> * Store delete family Bloom filter in the file, which is only supported in HFile V2.</span> |
| <span class="source-line-no">240</span><span id="line-240"> */</span> |
| <span class="source-line-no">241</span><span id="line-241"> void addDeleteFamilyBloomFilter(BloomFilterWriter bfw) throws IOException;</span> |
| <span class="source-line-no">242</span><span id="line-242"></span> |
| <span class="source-line-no">243</span><span id="line-243"> /**</span> |
| <span class="source-line-no">244</span><span id="line-244"> * Return the file context for the HFile this writer belongs to</span> |
| <span class="source-line-no">245</span><span id="line-245"> */</span> |
| <span class="source-line-no">246</span><span id="line-246"> HFileContext getFileContext();</span> |
| <span class="source-line-no">247</span><span id="line-247"> }</span> |
| <span class="source-line-no">248</span><span id="line-248"></span> |
| <span class="source-line-no">249</span><span id="line-249"> /**</span> |
| <span class="source-line-no">250</span><span id="line-250"> * This variety of ways to construct writers is used throughout the code, and we want to be able</span> |
| <span class="source-line-no">251</span><span id="line-251"> * to swap writer implementations.</span> |
| <span class="source-line-no">252</span><span id="line-252"> */</span> |
| <span class="source-line-no">253</span><span id="line-253"> public static class WriterFactory {</span> |
| <span class="source-line-no">254</span><span id="line-254"> protected final Configuration conf;</span> |
| <span class="source-line-no">255</span><span id="line-255"> protected final CacheConfig cacheConf;</span> |
| <span class="source-line-no">256</span><span id="line-256"> protected FileSystem fs;</span> |
| <span class="source-line-no">257</span><span id="line-257"> protected Path path;</span> |
| <span class="source-line-no">258</span><span id="line-258"> protected FSDataOutputStream ostream;</span> |
| <span class="source-line-no">259</span><span id="line-259"> protected InetSocketAddress[] favoredNodes;</span> |
| <span class="source-line-no">260</span><span id="line-260"> private HFileContext fileContext;</span> |
| <span class="source-line-no">261</span><span id="line-261"> protected boolean shouldDropBehind = false;</span> |
| <span class="source-line-no">262</span><span id="line-262"></span> |
| <span class="source-line-no">263</span><span id="line-263"> WriterFactory(Configuration conf, CacheConfig cacheConf) {</span> |
| <span class="source-line-no">264</span><span id="line-264"> this.conf = conf;</span> |
| <span class="source-line-no">265</span><span id="line-265"> this.cacheConf = cacheConf;</span> |
| <span class="source-line-no">266</span><span id="line-266"> }</span> |
| <span class="source-line-no">267</span><span id="line-267"></span> |
| <span class="source-line-no">268</span><span id="line-268"> public WriterFactory withPath(FileSystem fs, Path path) {</span> |
| <span class="source-line-no">269</span><span id="line-269"> Preconditions.checkNotNull(fs);</span> |
| <span class="source-line-no">270</span><span id="line-270"> Preconditions.checkNotNull(path);</span> |
| <span class="source-line-no">271</span><span id="line-271"> this.fs = fs;</span> |
| <span class="source-line-no">272</span><span id="line-272"> this.path = path;</span> |
| <span class="source-line-no">273</span><span id="line-273"> return this;</span> |
| <span class="source-line-no">274</span><span id="line-274"> }</span> |
| <span class="source-line-no">275</span><span id="line-275"></span> |
| <span class="source-line-no">276</span><span id="line-276"> public WriterFactory withOutputStream(FSDataOutputStream ostream) {</span> |
| <span class="source-line-no">277</span><span id="line-277"> Preconditions.checkNotNull(ostream);</span> |
| <span class="source-line-no">278</span><span id="line-278"> this.ostream = ostream;</span> |
| <span class="source-line-no">279</span><span id="line-279"> return this;</span> |
| <span class="source-line-no">280</span><span id="line-280"> }</span> |
| <span class="source-line-no">281</span><span id="line-281"></span> |
| <span class="source-line-no">282</span><span id="line-282"> public WriterFactory withFavoredNodes(InetSocketAddress[] favoredNodes) {</span> |
| <span class="source-line-no">283</span><span id="line-283"> // Deliberately not checking for null here.</span> |
| <span class="source-line-no">284</span><span id="line-284"> this.favoredNodes = favoredNodes;</span> |
| <span class="source-line-no">285</span><span id="line-285"> return this;</span> |
| <span class="source-line-no">286</span><span id="line-286"> }</span> |
| <span class="source-line-no">287</span><span id="line-287"></span> |
| <span class="source-line-no">288</span><span id="line-288"> public WriterFactory withFileContext(HFileContext fileContext) {</span> |
| <span class="source-line-no">289</span><span id="line-289"> this.fileContext = fileContext;</span> |
| <span class="source-line-no">290</span><span id="line-290"> return this;</span> |
| <span class="source-line-no">291</span><span id="line-291"> }</span> |
| <span class="source-line-no">292</span><span id="line-292"></span> |
| <span class="source-line-no">293</span><span id="line-293"> public WriterFactory withShouldDropCacheBehind(boolean shouldDropBehind) {</span> |
| <span class="source-line-no">294</span><span id="line-294"> this.shouldDropBehind = shouldDropBehind;</span> |
| <span class="source-line-no">295</span><span id="line-295"> return this;</span> |
| <span class="source-line-no">296</span><span id="line-296"> }</span> |
| <span class="source-line-no">297</span><span id="line-297"></span> |
| <span class="source-line-no">298</span><span id="line-298"> public Writer create() throws IOException {</span> |
| <span class="source-line-no">299</span><span id="line-299"> if ((path != null ? 1 : 0) + (ostream != null ? 1 : 0) != 1) {</span> |
| <span class="source-line-no">300</span><span id="line-300"> throw new AssertionError("Please specify exactly one of " + "filesystem/path or path");</span> |
| <span class="source-line-no">301</span><span id="line-301"> }</span> |
| <span class="source-line-no">302</span><span id="line-302"> if (path != null) {</span> |
| <span class="source-line-no">303</span><span id="line-303"> ostream = HFileWriterImpl.createOutputStream(conf, fs, path, favoredNodes);</span> |
| <span class="source-line-no">304</span><span id="line-304"> try {</span> |
| <span class="source-line-no">305</span><span id="line-305"> ostream.setDropBehind(shouldDropBehind && cacheConf.shouldDropBehindCompaction());</span> |
| <span class="source-line-no">306</span><span id="line-306"> } catch (UnsupportedOperationException uoe) {</span> |
| <span class="source-line-no">307</span><span id="line-307"> LOG.trace("Unable to set drop behind on {}", path, uoe);</span> |
| <span class="source-line-no">308</span><span id="line-308"> LOG.debug("Unable to set drop behind on {}", path.getName());</span> |
| <span class="source-line-no">309</span><span id="line-309"> }</span> |
| <span class="source-line-no">310</span><span id="line-310"> }</span> |
| <span class="source-line-no">311</span><span id="line-311"> return new HFileWriterImpl(conf, cacheConf, path, ostream, fileContext);</span> |
| <span class="source-line-no">312</span><span id="line-312"> }</span> |
| <span class="source-line-no">313</span><span id="line-313"> }</span> |
| <span class="source-line-no">314</span><span id="line-314"></span> |
| <span class="source-line-no">315</span><span id="line-315"> /** The configuration key for HFile version to use for new files */</span> |
| <span class="source-line-no">316</span><span id="line-316"> public static final String FORMAT_VERSION_KEY = "hfile.format.version";</span> |
| <span class="source-line-no">317</span><span id="line-317"></span> |
| <span class="source-line-no">318</span><span id="line-318"> public static int getFormatVersion(Configuration conf) {</span> |
| <span class="source-line-no">319</span><span id="line-319"> int version = conf.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION);</span> |
| <span class="source-line-no">320</span><span id="line-320"> checkFormatVersion(version);</span> |
| <span class="source-line-no">321</span><span id="line-321"> return version;</span> |
| <span class="source-line-no">322</span><span id="line-322"> }</span> |
| <span class="source-line-no">323</span><span id="line-323"></span> |
| <span class="source-line-no">324</span><span id="line-324"> /**</span> |
| <span class="source-line-no">325</span><span id="line-325"> * Returns the factory to be used to create {@link HFile} writers. Disables block cache access for</span> |
| <span class="source-line-no">326</span><span id="line-326"> * all writers created through the returned factory.</span> |
| <span class="source-line-no">327</span><span id="line-327"> */</span> |
| <span class="source-line-no">328</span><span id="line-328"> public static final WriterFactory getWriterFactoryNoCache(Configuration conf) {</span> |
| <span class="source-line-no">329</span><span id="line-329"> return HFile.getWriterFactory(conf, CacheConfig.DISABLED);</span> |
| <span class="source-line-no">330</span><span id="line-330"> }</span> |
| <span class="source-line-no">331</span><span id="line-331"></span> |
| <span class="source-line-no">332</span><span id="line-332"> /**</span> |
| <span class="source-line-no">333</span><span id="line-333"> * Returns the factory to be used to create {@link HFile} writers</span> |
| <span class="source-line-no">334</span><span id="line-334"> */</span> |
| <span class="source-line-no">335</span><span id="line-335"> public static final WriterFactory getWriterFactory(Configuration conf, CacheConfig cacheConf) {</span> |
| <span class="source-line-no">336</span><span id="line-336"> int version = getFormatVersion(conf);</span> |
| <span class="source-line-no">337</span><span id="line-337"> switch (version) {</span> |
| <span class="source-line-no">338</span><span id="line-338"> case 2:</span> |
| <span class="source-line-no">339</span><span id="line-339"> throw new IllegalArgumentException("This should never happen. "</span> |
| <span class="source-line-no">340</span><span id="line-340"> + "Did you change hfile.format.version to read v2? This version of the software writes v3"</span> |
| <span class="source-line-no">341</span><span id="line-341"> + " hfiles only (but it can read v2 files without having to update hfile.format.version "</span> |
| <span class="source-line-no">342</span><span id="line-342"> + "in hbase-site.xml)");</span> |
| <span class="source-line-no">343</span><span id="line-343"> case 3:</span> |
| <span class="source-line-no">344</span><span id="line-344"> return new HFile.WriterFactory(conf, cacheConf);</span> |
| <span class="source-line-no">345</span><span id="line-345"> default:</span> |
| <span class="source-line-no">346</span><span id="line-346"> throw new IllegalArgumentException(</span> |
| <span class="source-line-no">347</span><span id="line-347"> "Cannot create writer for HFile " + "format version " + version);</span> |
| <span class="source-line-no">348</span><span id="line-348"> }</span> |
| <span class="source-line-no">349</span><span id="line-349"> }</span> |
| <span class="source-line-no">350</span><span id="line-350"></span> |
| <span class="source-line-no">351</span><span id="line-351"> /**</span> |
| <span class="source-line-no">352</span><span id="line-352"> * An abstraction used by the block index. Implementations will check cache for any asked-for</span> |
| <span class="source-line-no">353</span><span id="line-353"> * block and return cached block if found. Otherwise, after reading from fs, will try and put</span> |
| <span class="source-line-no">354</span><span id="line-354"> * block into cache before returning.</span> |
| <span class="source-line-no">355</span><span id="line-355"> */</span> |
| <span class="source-line-no">356</span><span id="line-356"> public interface CachingBlockReader {</span> |
| <span class="source-line-no">357</span><span id="line-357"> /**</span> |
| <span class="source-line-no">358</span><span id="line-358"> * Read in a file block.</span> |
| <span class="source-line-no">359</span><span id="line-359"> * @param offset offset to read.</span> |
| <span class="source-line-no">360</span><span id="line-360"> * @param onDiskBlockSize size of the block</span> |
| <span class="source-line-no">361</span><span id="line-361"> * @param isCompaction is this block being read as part of a compaction</span> |
| <span class="source-line-no">362</span><span id="line-362"> * @param expectedBlockType the block type we are expecting to read with this read</span> |
| <span class="source-line-no">363</span><span id="line-363"> * operation, or null to read whatever block type is available</span> |
| <span class="source-line-no">364</span><span id="line-364"> * and avoid checking (that might reduce caching efficiency of</span> |
| <span class="source-line-no">365</span><span id="line-365"> * encoded data blocks)</span> |
| <span class="source-line-no">366</span><span id="line-366"> * @param expectedDataBlockEncoding the data block encoding the caller is expecting data blocks</span> |
| <span class="source-line-no">367</span><span id="line-367"> * to be in, or null to not perform this check and return the</span> |
| <span class="source-line-no">368</span><span id="line-368"> * block irrespective of the encoding. This check only applies</span> |
| <span class="source-line-no">369</span><span id="line-369"> * to data blocks and can be set to null when the caller is</span> |
| <span class="source-line-no">370</span><span id="line-370"> * expecting to read a non-data block and has set</span> |
| <span class="source-line-no">371</span><span id="line-371"> * expectedBlockType accordingly.</span> |
| <span class="source-line-no">372</span><span id="line-372"> * @return Block wrapped in a ByteBuffer.</span> |
| <span class="source-line-no">373</span><span id="line-373"> */</span> |
| <span class="source-line-no">374</span><span id="line-374"> HFileBlock readBlock(long offset, long onDiskBlockSize, boolean cacheBlock, final boolean pread,</span> |
| <span class="source-line-no">375</span><span id="line-375"> final boolean isCompaction, final boolean updateCacheMetrics, BlockType expectedBlockType,</span> |
| <span class="source-line-no">376</span><span id="line-376"> DataBlockEncoding expectedDataBlockEncoding) throws IOException;</span> |
| <span class="source-line-no">377</span><span id="line-377"></span> |
| <span class="source-line-no">378</span><span id="line-378"> HFileBlock readBlock(long offset, long onDiskBlockSize, boolean cacheBlock, final boolean pread,</span> |
| <span class="source-line-no">379</span><span id="line-379"> final boolean isCompaction, final boolean updateCacheMetrics, BlockType expectedBlockType,</span> |
| <span class="source-line-no">380</span><span id="line-380"> DataBlockEncoding expectedDataBlockEncoding, boolean cacheOnly) throws IOException;</span> |
| <span class="source-line-no">381</span><span id="line-381"> }</span> |
| <span class="source-line-no">382</span><span id="line-382"></span> |
| <span class="source-line-no">383</span><span id="line-383"> /** An interface used by clients to open and iterate an {@link HFile}. */</span> |
| <span class="source-line-no">384</span><span id="line-384"> public interface Reader extends Closeable, CachingBlockReader {</span> |
| <span class="source-line-no">385</span><span id="line-385"> /**</span> |
| <span class="source-line-no">386</span><span id="line-386"> * Returns this reader's "name". Usually the last component of the path. Needs to be constant as</span> |
| <span class="source-line-no">387</span><span id="line-387"> * the file is being moved to support caching on write.</span> |
| <span class="source-line-no">388</span><span id="line-388"> */</span> |
| <span class="source-line-no">389</span><span id="line-389"> String getName();</span> |
| <span class="source-line-no">390</span><span id="line-390"></span> |
| <span class="source-line-no">391</span><span id="line-391"> CellComparator getComparator();</span> |
| <span class="source-line-no">392</span><span id="line-392"></span> |
| <span class="source-line-no">393</span><span id="line-393"> HFileScanner getScanner(Configuration conf, boolean cacheBlocks, boolean pread,</span> |
| <span class="source-line-no">394</span><span id="line-394"> boolean isCompaction);</span> |
| <span class="source-line-no">395</span><span id="line-395"></span> |
| <span class="source-line-no">396</span><span id="line-396"> HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException;</span> |
| <span class="source-line-no">397</span><span id="line-397"></span> |
| <span class="source-line-no">398</span><span id="line-398"> Optional<ExtendedCell> getLastKey();</span> |
| <span class="source-line-no">399</span><span id="line-399"></span> |
| <span class="source-line-no">400</span><span id="line-400"> Optional<ExtendedCell> midKey() throws IOException;</span> |
| <span class="source-line-no">401</span><span id="line-401"></span> |
| <span class="source-line-no">402</span><span id="line-402"> long length();</span> |
| <span class="source-line-no">403</span><span id="line-403"></span> |
| <span class="source-line-no">404</span><span id="line-404"> long getEntries();</span> |
| <span class="source-line-no">405</span><span id="line-405"></span> |
| <span class="source-line-no">406</span><span id="line-406"> Optional<ExtendedCell> getFirstKey();</span> |
| <span class="source-line-no">407</span><span id="line-407"></span> |
| <span class="source-line-no">408</span><span id="line-408"> long indexSize();</span> |
| <span class="source-line-no">409</span><span id="line-409"></span> |
| <span class="source-line-no">410</span><span id="line-410"> Optional<byte[]> getFirstRowKey();</span> |
| <span class="source-line-no">411</span><span id="line-411"></span> |
| <span class="source-line-no">412</span><span id="line-412"> Optional<byte[]> getLastRowKey();</span> |
| <span class="source-line-no">413</span><span id="line-413"></span> |
| <span class="source-line-no">414</span><span id="line-414"> FixedFileTrailer getTrailer();</span> |
| <span class="source-line-no">415</span><span id="line-415"></span> |
| <span class="source-line-no">416</span><span id="line-416"> void setDataBlockIndexReader(HFileBlockIndex.CellBasedKeyBlockIndexReader reader);</span> |
| <span class="source-line-no">417</span><span id="line-417"></span> |
| <span class="source-line-no">418</span><span id="line-418"> HFileBlockIndex.CellBasedKeyBlockIndexReader getDataBlockIndexReader();</span> |
| <span class="source-line-no">419</span><span id="line-419"></span> |
| <span class="source-line-no">420</span><span id="line-420"> void setMetaBlockIndexReader(HFileBlockIndex.ByteArrayKeyBlockIndexReader reader);</span> |
| <span class="source-line-no">421</span><span id="line-421"></span> |
| <span class="source-line-no">422</span><span id="line-422"> HFileBlockIndex.ByteArrayKeyBlockIndexReader getMetaBlockIndexReader();</span> |
| <span class="source-line-no">423</span><span id="line-423"></span> |
| <span class="source-line-no">424</span><span id="line-424"> HFileScanner getScanner(Configuration conf, boolean cacheBlocks, boolean pread);</span> |
| <span class="source-line-no">425</span><span id="line-425"></span> |
| <span class="source-line-no">426</span><span id="line-426"> /**</span> |
| <span class="source-line-no">427</span><span id="line-427"> * Retrieves general Bloom filter metadata as appropriate for each {@link HFile} version. Knows</span> |
| <span class="source-line-no">428</span><span id="line-428"> * nothing about how that metadata is structured.</span> |
| <span class="source-line-no">429</span><span id="line-429"> */</span> |
| <span class="source-line-no">430</span><span id="line-430"> DataInput getGeneralBloomFilterMetadata() throws IOException;</span> |
| <span class="source-line-no">431</span><span id="line-431"></span> |
| <span class="source-line-no">432</span><span id="line-432"> /**</span> |
| <span class="source-line-no">433</span><span id="line-433"> * Retrieves delete family Bloom filter metadata as appropriate for each {@link HFile} version.</span> |
| <span class="source-line-no">434</span><span id="line-434"> * Knows nothing about how that metadata is structured.</span> |
| <span class="source-line-no">435</span><span id="line-435"> */</span> |
| <span class="source-line-no">436</span><span id="line-436"> DataInput getDeleteBloomFilterMetadata() throws IOException;</span> |
| <span class="source-line-no">437</span><span id="line-437"></span> |
| <span class="source-line-no">438</span><span id="line-438"> Path getPath();</span> |
| <span class="source-line-no">439</span><span id="line-439"></span> |
| <span class="source-line-no">440</span><span id="line-440"> /** Close method with optional evictOnClose */</span> |
| <span class="source-line-no">441</span><span id="line-441"> void close(boolean evictOnClose) throws IOException;</span> |
| <span class="source-line-no">442</span><span id="line-442"></span> |
| <span class="source-line-no">443</span><span id="line-443"> DataBlockEncoding getDataBlockEncoding();</span> |
| <span class="source-line-no">444</span><span id="line-444"></span> |
| <span class="source-line-no">445</span><span id="line-445"> boolean hasMVCCInfo();</span> |
| <span class="source-line-no">446</span><span id="line-446"></span> |
| <span class="source-line-no">447</span><span id="line-447"> /**</span> |
| <span class="source-line-no">448</span><span id="line-448"> * Return the file context of the HFile this reader belongs to</span> |
| <span class="source-line-no">449</span><span id="line-449"> */</span> |
| <span class="source-line-no">450</span><span id="line-450"> HFileContext getFileContext();</span> |
| <span class="source-line-no">451</span><span id="line-451"></span> |
| <span class="source-line-no">452</span><span id="line-452"> boolean isPrimaryReplicaReader();</span> |
| <span class="source-line-no">453</span><span id="line-453"></span> |
| <span class="source-line-no">454</span><span id="line-454"> DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction);</span> |
| <span class="source-line-no">455</span><span id="line-455"></span> |
| <span class="source-line-no">456</span><span id="line-456"> HFileBlock.FSReader getUncachedBlockReader();</span> |
| <span class="source-line-no">457</span><span id="line-457"></span> |
| <span class="source-line-no">458</span><span id="line-458"> boolean prefetchComplete();</span> |
| <span class="source-line-no">459</span><span id="line-459"></span> |
| <span class="source-line-no">460</span><span id="line-460"> boolean prefetchStarted();</span> |
| <span class="source-line-no">461</span><span id="line-461"></span> |
| <span class="source-line-no">462</span><span id="line-462"> /**</span> |
| <span class="source-line-no">463</span><span id="line-463"> * To close the stream's socket. Note: This can be concurrently called from multiple threads and</span> |
| <span class="source-line-no">464</span><span id="line-464"> * implementation should take care of thread safety.</span> |
| <span class="source-line-no">465</span><span id="line-465"> */</span> |
| <span class="source-line-no">466</span><span id="line-466"> void unbufferStream();</span> |
| <span class="source-line-no">467</span><span id="line-467"></span> |
| <span class="source-line-no">468</span><span id="line-468"> ReaderContext getContext();</span> |
| <span class="source-line-no">469</span><span id="line-469"></span> |
| <span class="source-line-no">470</span><span id="line-470"> HFileInfo getHFileInfo();</span> |
| <span class="source-line-no">471</span><span id="line-471"></span> |
| <span class="source-line-no">472</span><span id="line-472"> void setDataBlockEncoder(HFileDataBlockEncoder dataBlockEncoder);</span> |
| <span class="source-line-no">473</span><span id="line-473"> }</span> |
| <span class="source-line-no">474</span><span id="line-474"></span> |
| <span class="source-line-no">475</span><span id="line-475"> /**</span> |
| <span class="source-line-no">476</span><span id="line-476"> * Method returns the reader given the specified arguments. TODO This is a bad abstraction. See</span> |
| <span class="source-line-no">477</span><span id="line-477"> * HBASE-6635.</span> |
| <span class="source-line-no">478</span><span id="line-478"> * @param context Reader context info</span> |
| <span class="source-line-no">479</span><span id="line-479"> * @param fileInfo HFile info</span> |
| <span class="source-line-no">480</span><span id="line-480"> * @param cacheConf Cache configuation values, cannot be null.</span> |
| <span class="source-line-no">481</span><span id="line-481"> * @param conf Configuration</span> |
| <span class="source-line-no">482</span><span id="line-482"> * @return an appropriate instance of HFileReader</span> |
| <span class="source-line-no">483</span><span id="line-483"> * @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException</span> |
| <span class="source-line-no">484</span><span id="line-484"> */</span> |
| <span class="source-line-no">485</span><span id="line-485"> @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "SF_SWITCH_FALLTHROUGH",</span> |
| <span class="source-line-no">486</span><span id="line-486"> justification = "Intentional")</span> |
| <span class="source-line-no">487</span><span id="line-487"> public static Reader createReader(ReaderContext context, HFileInfo fileInfo,</span> |
| <span class="source-line-no">488</span><span id="line-488"> CacheConfig cacheConf, Configuration conf) throws IOException {</span> |
| <span class="source-line-no">489</span><span id="line-489"> try {</span> |
| <span class="source-line-no">490</span><span id="line-490"> if (context.getReaderType() == ReaderType.STREAM) {</span> |
| <span class="source-line-no">491</span><span id="line-491"> // stream reader will share trailer with pread reader, see HFileStreamReader#copyFields</span> |
| <span class="source-line-no">492</span><span id="line-492"> return new HFileStreamReader(context, fileInfo, cacheConf, conf);</span> |
| <span class="source-line-no">493</span><span id="line-493"> }</span> |
| <span class="source-line-no">494</span><span id="line-494"> FixedFileTrailer trailer = fileInfo.getTrailer();</span> |
| <span class="source-line-no">495</span><span id="line-495"> switch (trailer.getMajorVersion()) {</span> |
| <span class="source-line-no">496</span><span id="line-496"> case 2:</span> |
| <span class="source-line-no">497</span><span id="line-497"> LOG.debug("Opening HFile v2 with v3 reader");</span> |
| <span class="source-line-no">498</span><span id="line-498"> // Fall through. FindBugs: SF_SWITCH_FALLTHROUGH</span> |
| <span class="source-line-no">499</span><span id="line-499"> case 3:</span> |
| <span class="source-line-no">500</span><span id="line-500"> return new HFilePreadReader(context, fileInfo, cacheConf, conf);</span> |
| <span class="source-line-no">501</span><span id="line-501"> default:</span> |
| <span class="source-line-no">502</span><span id="line-502"> throw new IllegalArgumentException("Invalid HFile version " + trailer.getMajorVersion());</span> |
| <span class="source-line-no">503</span><span id="line-503"> }</span> |
| <span class="source-line-no">504</span><span id="line-504"> } catch (Throwable t) {</span> |
| <span class="source-line-no">505</span><span id="line-505"> IOUtils.closeQuietly(context.getInputStreamWrapper(),</span> |
| <span class="source-line-no">506</span><span id="line-506"> e -> LOG.warn("failed to close input stream wrapper", e));</span> |
| <span class="source-line-no">507</span><span id="line-507"> throw new CorruptHFileException(</span> |
| <span class="source-line-no">508</span><span id="line-508"> "Problem reading HFile Trailer from file " + context.getFilePath(), t);</span> |
| <span class="source-line-no">509</span><span id="line-509"> } finally {</span> |
| <span class="source-line-no">510</span><span id="line-510"> context.getInputStreamWrapper().unbuffer();</span> |
| <span class="source-line-no">511</span><span id="line-511"> }</span> |
| <span class="source-line-no">512</span><span id="line-512"> }</span> |
| <span class="source-line-no">513</span><span id="line-513"></span> |
| <span class="source-line-no">514</span><span id="line-514"> /**</span> |
| <span class="source-line-no">515</span><span id="line-515"> * Creates reader with cache configuration disabled</span> |
| <span class="source-line-no">516</span><span id="line-516"> * @param fs filesystem</span> |
| <span class="source-line-no">517</span><span id="line-517"> * @param path Path to file to read</span> |
| <span class="source-line-no">518</span><span id="line-518"> * @param conf Configuration</span> |
| <span class="source-line-no">519</span><span id="line-519"> * @return an active Reader instance</span> |
| <span class="source-line-no">520</span><span id="line-520"> * @throws IOException Will throw a CorruptHFileException (DoNotRetryIOException subtype) if hfile</span> |
| <span class="source-line-no">521</span><span id="line-521"> * is corrupt/invalid.</span> |
| <span class="source-line-no">522</span><span id="line-522"> */</span> |
| <span class="source-line-no">523</span><span id="line-523"> public static Reader createReader(FileSystem fs, Path path, Configuration conf)</span> |
| <span class="source-line-no">524</span><span id="line-524"> throws IOException {</span> |
| <span class="source-line-no">525</span><span id="line-525"> // The primaryReplicaReader is mainly used for constructing block cache key, so if we do not use</span> |
| <span class="source-line-no">526</span><span id="line-526"> // block cache then it is OK to set it as any value. We use true here.</span> |
| <span class="source-line-no">527</span><span id="line-527"> return createReader(fs, path, CacheConfig.DISABLED, true, conf);</span> |
| <span class="source-line-no">528</span><span id="line-528"> }</span> |
| <span class="source-line-no">529</span><span id="line-529"></span> |
| <span class="source-line-no">530</span><span id="line-530"> /**</span> |
| <span class="source-line-no">531</span><span id="line-531"> * @param fs filesystem</span> |
| <span class="source-line-no">532</span><span id="line-532"> * @param path Path to file to read</span> |
| <span class="source-line-no">533</span><span id="line-533"> * @param cacheConf This must not be null.</span> |
| <span class="source-line-no">534</span><span id="line-534"> * @param primaryReplicaReader true if this is a reader for primary replica</span> |
| <span class="source-line-no">535</span><span id="line-535"> * @param conf Configuration</span> |
| <span class="source-line-no">536</span><span id="line-536"> * @return an active Reader instance</span> |
| <span class="source-line-no">537</span><span id="line-537"> * @throws IOException Will throw a CorruptHFileException (DoNotRetryIOException subtype) if hfile</span> |
| <span class="source-line-no">538</span><span id="line-538"> * is corrupt/invalid.</span> |
| <span class="source-line-no">539</span><span id="line-539"> * @see CacheConfig#CacheConfig(Configuration)</span> |
| <span class="source-line-no">540</span><span id="line-540"> */</span> |
| <span class="source-line-no">541</span><span id="line-541"> public static Reader createReader(FileSystem fs, Path path, CacheConfig cacheConf,</span> |
| <span class="source-line-no">542</span><span id="line-542"> boolean primaryReplicaReader, Configuration conf) throws IOException {</span> |
| <span class="source-line-no">543</span><span id="line-543"> Preconditions.checkNotNull(cacheConf, "Cannot create Reader with null CacheConf");</span> |
| <span class="source-line-no">544</span><span id="line-544"> FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fs, path);</span> |
| <span class="source-line-no">545</span><span id="line-545"> ReaderContext context =</span> |
| <span class="source-line-no">546</span><span id="line-546"> new ReaderContextBuilder().withFilePath(path).withInputStreamWrapper(stream)</span> |
| <span class="source-line-no">547</span><span id="line-547"> .withFileSize(fs.getFileStatus(path).getLen()).withFileSystem(stream.getHfs())</span> |
| <span class="source-line-no">548</span><span id="line-548"> .withPrimaryReplicaReader(primaryReplicaReader).withReaderType(ReaderType.PREAD).build();</span> |
| <span class="source-line-no">549</span><span id="line-549"> HFileInfo fileInfo = new HFileInfo(context, conf);</span> |
| <span class="source-line-no">550</span><span id="line-550"> Reader reader = createReader(context, fileInfo, cacheConf, conf);</span> |
| <span class="source-line-no">551</span><span id="line-551"> fileInfo.initMetaAndIndex(reader);</span> |
| <span class="source-line-no">552</span><span id="line-552"> return reader;</span> |
| <span class="source-line-no">553</span><span id="line-553"> }</span> |
| <span class="source-line-no">554</span><span id="line-554"></span> |
| <span class="source-line-no">555</span><span id="line-555"> /**</span> |
| <span class="source-line-no">556</span><span id="line-556"> * Returns true if the specified file has a valid HFile Trailer.</span> |
| <span class="source-line-no">557</span><span id="line-557"> * @param fs filesystem</span> |
| <span class="source-line-no">558</span><span id="line-558"> * @param path Path to file to verify</span> |
| <span class="source-line-no">559</span><span id="line-559"> * @return true if the file has a valid HFile Trailer, otherwise false</span> |
| <span class="source-line-no">560</span><span id="line-560"> * @throws IOException if failed to read from the underlying stream</span> |
| <span class="source-line-no">561</span><span id="line-561"> */</span> |
| <span class="source-line-no">562</span><span id="line-562"> public static boolean isHFileFormat(final FileSystem fs, final Path path) throws IOException {</span> |
| <span class="source-line-no">563</span><span id="line-563"> return isHFileFormat(fs, fs.getFileStatus(path));</span> |
| <span class="source-line-no">564</span><span id="line-564"> }</span> |
| <span class="source-line-no">565</span><span id="line-565"></span> |
| <span class="source-line-no">566</span><span id="line-566"> /**</span> |
| <span class="source-line-no">567</span><span id="line-567"> * Returns true if the specified file has a valid HFile Trailer.</span> |
| <span class="source-line-no">568</span><span id="line-568"> * @param fs filesystem</span> |
| <span class="source-line-no">569</span><span id="line-569"> * @param fileStatus the file to verify</span> |
| <span class="source-line-no">570</span><span id="line-570"> * @return true if the file has a valid HFile Trailer, otherwise false</span> |
| <span class="source-line-no">571</span><span id="line-571"> * @throws IOException if failed to read from the underlying stream</span> |
| <span class="source-line-no">572</span><span id="line-572"> */</span> |
| <span class="source-line-no">573</span><span id="line-573"> public static boolean isHFileFormat(final FileSystem fs, final FileStatus fileStatus)</span> |
| <span class="source-line-no">574</span><span id="line-574"> throws IOException {</span> |
| <span class="source-line-no">575</span><span id="line-575"> final Path path = fileStatus.getPath();</span> |
| <span class="source-line-no">576</span><span id="line-576"> final long size = fileStatus.getLen();</span> |
| <span class="source-line-no">577</span><span id="line-577"> try (FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, path)) {</span> |
| <span class="source-line-no">578</span><span id="line-578"> boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum();</span> |
| <span class="source-line-no">579</span><span id="line-579"> assert !isHBaseChecksum; // Initially we must read with FS checksum.</span> |
| <span class="source-line-no">580</span><span id="line-580"> FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size);</span> |
| <span class="source-line-no">581</span><span id="line-581"> return true;</span> |
| <span class="source-line-no">582</span><span id="line-582"> } catch (IllegalArgumentException e) {</span> |
| <span class="source-line-no">583</span><span id="line-583"> return false;</span> |
| <span class="source-line-no">584</span><span id="line-584"> }</span> |
| <span class="source-line-no">585</span><span id="line-585"> }</span> |
| <span class="source-line-no">586</span><span id="line-586"></span> |
| <span class="source-line-no">587</span><span id="line-587"> /**</span> |
| <span class="source-line-no">588</span><span id="line-588"> * Get names of supported compression algorithms. The names are acceptable by HFile.Writer.</span> |
| <span class="source-line-no">589</span><span id="line-589"> * @return Array of strings, each represents a supported compression algorithm. Currently, the</span> |
| <span class="source-line-no">590</span><span id="line-590"> * following compression algorithms are supported.</span> |
| <span class="source-line-no">591</span><span id="line-591"> * <ul></span> |
| <span class="source-line-no">592</span><span id="line-592"> * <li>"none" - No compression.</span> |
| <span class="source-line-no">593</span><span id="line-593"> * <li>"gz" - GZIP compression.</span> |
| <span class="source-line-no">594</span><span id="line-594"> * </ul></span> |
| <span class="source-line-no">595</span><span id="line-595"> */</span> |
| <span class="source-line-no">596</span><span id="line-596"> public static String[] getSupportedCompressionAlgorithms() {</span> |
| <span class="source-line-no">597</span><span id="line-597"> return Compression.getSupportedAlgorithms();</span> |
| <span class="source-line-no">598</span><span id="line-598"> }</span> |
| <span class="source-line-no">599</span><span id="line-599"></span> |
| <span class="source-line-no">600</span><span id="line-600"> // Utility methods.</span> |
| <span class="source-line-no">601</span><span id="line-601"> /*</span> |
| <span class="source-line-no">602</span><span id="line-602"> * @param l Long to convert to an int.</span> |
| <span class="source-line-no">603</span><span id="line-603"> * @return <code>l</code> cast as an int.</span> |
| <span class="source-line-no">604</span><span id="line-604"> */</span> |
| <span class="source-line-no">605</span><span id="line-605"> static int longToInt(final long l) {</span> |
| <span class="source-line-no">606</span><span id="line-606"> // Expecting the size() of a block not exceeding 4GB. Assuming the</span> |
| <span class="source-line-no">607</span><span id="line-607"> // size() will wrap to negative integer if it exceeds 2GB (From tfile).</span> |
| <span class="source-line-no">608</span><span id="line-608"> return (int) (l & 0x00000000ffffffffL);</span> |
| <span class="source-line-no">609</span><span id="line-609"> }</span> |
| <span class="source-line-no">610</span><span id="line-610"></span> |
| <span class="source-line-no">611</span><span id="line-611"> /**</span> |
| <span class="source-line-no">612</span><span id="line-612"> * Returns all HFiles belonging to the given region directory. Could return an empty list.</span> |
| <span class="source-line-no">613</span><span id="line-613"> * @param fs The file system reference.</span> |
| <span class="source-line-no">614</span><span id="line-614"> * @param regionDir The region directory to scan.</span> |
| <span class="source-line-no">615</span><span id="line-615"> * @return The list of files found.</span> |
| <span class="source-line-no">616</span><span id="line-616"> * @throws IOException When scanning the files fails.</span> |
| <span class="source-line-no">617</span><span id="line-617"> */</span> |
| <span class="source-line-no">618</span><span id="line-618"> public static List<Path> getStoreFiles(FileSystem fs, Path regionDir) throws IOException {</span> |
| <span class="source-line-no">619</span><span id="line-619"> List<Path> regionHFiles = new ArrayList<>();</span> |
| <span class="source-line-no">620</span><span id="line-620"> PathFilter dirFilter = new FSUtils.DirFilter(fs);</span> |
| <span class="source-line-no">621</span><span id="line-621"> FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter);</span> |
| <span class="source-line-no">622</span><span id="line-622"> for (FileStatus dir : familyDirs) {</span> |
| <span class="source-line-no">623</span><span id="line-623"> FileStatus[] files = fs.listStatus(dir.getPath());</span> |
| <span class="source-line-no">624</span><span id="line-624"> for (FileStatus file : files) {</span> |
| <span class="source-line-no">625</span><span id="line-625"> if (</span> |
| <span class="source-line-no">626</span><span id="line-626"> !file.isDirectory()</span> |
| <span class="source-line-no">627</span><span id="line-627"> && (!file.getPath().toString().contains(HConstants.HREGION_OLDLOGDIR_NAME))</span> |
| <span class="source-line-no">628</span><span id="line-628"> && (!file.getPath().toString().contains(HConstants.RECOVERED_EDITS_DIR))</span> |
| <span class="source-line-no">629</span><span id="line-629"> ) {</span> |
| <span class="source-line-no">630</span><span id="line-630"> regionHFiles.add(file.getPath());</span> |
| <span class="source-line-no">631</span><span id="line-631"> }</span> |
| <span class="source-line-no">632</span><span id="line-632"> }</span> |
| <span class="source-line-no">633</span><span id="line-633"> }</span> |
| <span class="source-line-no">634</span><span id="line-634"> return regionHFiles;</span> |
| <span class="source-line-no">635</span><span id="line-635"> }</span> |
| <span class="source-line-no">636</span><span id="line-636"></span> |
| <span class="source-line-no">637</span><span id="line-637"> /**</span> |
| <span class="source-line-no">638</span><span id="line-638"> * Checks the given {@link HFile} format version, and throws an exception if invalid. Note that if</span> |
| <span class="source-line-no">639</span><span id="line-639"> * the version number comes from an input file and has not been verified, the caller needs to</span> |
| <span class="source-line-no">640</span><span id="line-640"> * re-throw an {@link IOException} to indicate that this is not a software error, but corrupted</span> |
| <span class="source-line-no">641</span><span id="line-641"> * input.</span> |
| <span class="source-line-no">642</span><span id="line-642"> * @param version an HFile version</span> |
| <span class="source-line-no">643</span><span id="line-643"> * @throws IllegalArgumentException if the version is invalid</span> |
| <span class="source-line-no">644</span><span id="line-644"> */</span> |
| <span class="source-line-no">645</span><span id="line-645"> public static void checkFormatVersion(int version) throws IllegalArgumentException {</span> |
| <span class="source-line-no">646</span><span id="line-646"> if (version < MIN_FORMAT_VERSION || version > MAX_FORMAT_VERSION) {</span> |
| <span class="source-line-no">647</span><span id="line-647"> throw new IllegalArgumentException("Invalid HFile version: " + version + " (expected to be "</span> |
| <span class="source-line-no">648</span><span id="line-648"> + "between " + MIN_FORMAT_VERSION + " and " + MAX_FORMAT_VERSION + ")");</span> |
| <span class="source-line-no">649</span><span id="line-649"> }</span> |
| <span class="source-line-no">650</span><span id="line-650"> }</span> |
| <span class="source-line-no">651</span><span id="line-651"></span> |
| <span class="source-line-no">652</span><span id="line-652"> public static void checkHFileVersion(final Configuration c) {</span> |
| <span class="source-line-no">653</span><span id="line-653"> int version = c.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION);</span> |
| <span class="source-line-no">654</span><span id="line-654"> if (version < MAX_FORMAT_VERSION || version > MAX_FORMAT_VERSION) {</span> |
| <span class="source-line-no">655</span><span id="line-655"> throw new IllegalArgumentException(</span> |
| <span class="source-line-no">656</span><span id="line-656"> "The setting for " + FORMAT_VERSION_KEY + " (in your hbase-*.xml files) is " + version</span> |
| <span class="source-line-no">657</span><span id="line-657"> + " which does not match " + MAX_FORMAT_VERSION</span> |
| <span class="source-line-no">658</span><span id="line-658"> + "; are you running with a configuration from an older or newer hbase install (an "</span> |
| <span class="source-line-no">659</span><span id="line-659"> + "incompatible hbase-default.xml or hbase-site.xml on your CLASSPATH)?");</span> |
| <span class="source-line-no">660</span><span id="line-660"> }</span> |
| <span class="source-line-no">661</span><span id="line-661"> }</span> |
| <span class="source-line-no">662</span><span id="line-662"></span> |
| <span class="source-line-no">663</span><span id="line-663"> public static void main(String[] args) throws Exception {</span> |
| <span class="source-line-no">664</span><span id="line-664"> // delegate to preserve old behavior</span> |
| <span class="source-line-no">665</span><span id="line-665"> HFilePrettyPrinter.main(args);</span> |
| <span class="source-line-no">666</span><span id="line-666"> }</span> |
| <span class="source-line-no">667</span><span id="line-667">}</span> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </pre> |
| </div> |
| </main> |
| </body> |
| </html> |