| <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> |
| <html lang="en"> |
| <head> |
| <title>Source code</title> |
| <link rel="stylesheet" type="text/css" href="../../../../../../../stylesheet.css" title="Style"> |
| </head> |
| <body> |
| <div class="sourceContainer"> |
| <pre><span class="sourceLineNo">001</span>/*<a name="line.1"></a> |
| <span class="sourceLineNo">002</span> * Licensed to the Apache Software Foundation (ASF) under one<a name="line.2"></a> |
| <span class="sourceLineNo">003</span> * or more contributor license agreements. See the NOTICE file<a name="line.3"></a> |
| <span class="sourceLineNo">004</span> * distributed with this work for additional information<a name="line.4"></a> |
| <span class="sourceLineNo">005</span> * regarding copyright ownership. The ASF licenses this file<a name="line.5"></a> |
| <span class="sourceLineNo">006</span> * to you under the Apache License, Version 2.0 (the<a name="line.6"></a> |
| <span class="sourceLineNo">007</span> * "License"); you may not use this file except in compliance<a name="line.7"></a> |
| <span class="sourceLineNo">008</span> * with the License. You may obtain a copy of the License at<a name="line.8"></a> |
| <span class="sourceLineNo">009</span> *<a name="line.9"></a> |
| <span class="sourceLineNo">010</span> * http://www.apache.org/licenses/LICENSE-2.0<a name="line.10"></a> |
| <span class="sourceLineNo">011</span> *<a name="line.11"></a> |
| <span class="sourceLineNo">012</span> * Unless required by applicable law or agreed to in writing, software<a name="line.12"></a> |
| <span class="sourceLineNo">013</span> * distributed under the License is distributed on an "AS IS" BASIS,<a name="line.13"></a> |
| <span class="sourceLineNo">014</span> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.<a name="line.14"></a> |
| <span class="sourceLineNo">015</span> * See the License for the specific language governing permissions and<a name="line.15"></a> |
| <span class="sourceLineNo">016</span> * limitations under the License.<a name="line.16"></a> |
| <span class="sourceLineNo">017</span> */<a name="line.17"></a> |
| <span class="sourceLineNo">018</span>package org.apache.hadoop.hbase.io.hfile;<a name="line.18"></a> |
| <span class="sourceLineNo">019</span><a name="line.19"></a> |
| <span class="sourceLineNo">020</span>import java.io.ByteArrayOutputStream;<a name="line.20"></a> |
| <span class="sourceLineNo">021</span>import java.io.DataInput;<a name="line.21"></a> |
| <span class="sourceLineNo">022</span>import java.io.DataInputStream;<a name="line.22"></a> |
| <span class="sourceLineNo">023</span>import java.io.DataOutput;<a name="line.23"></a> |
| <span class="sourceLineNo">024</span>import java.io.DataOutputStream;<a name="line.24"></a> |
| <span class="sourceLineNo">025</span>import java.io.IOException;<a name="line.25"></a> |
| <span class="sourceLineNo">026</span>import java.nio.ByteBuffer;<a name="line.26"></a> |
| <span class="sourceLineNo">027</span>import java.util.ArrayList;<a name="line.27"></a> |
| <span class="sourceLineNo">028</span>import java.util.Collections;<a name="line.28"></a> |
| <span class="sourceLineNo">029</span>import java.util.List;<a name="line.29"></a> |
| <span class="sourceLineNo">030</span>import java.util.concurrent.atomic.AtomicReference;<a name="line.30"></a> |
| <span class="sourceLineNo">031</span>import org.apache.hadoop.conf.Configuration;<a name="line.31"></a> |
| <span class="sourceLineNo">032</span>import org.apache.hadoop.fs.FSDataOutputStream;<a name="line.32"></a> |
| <span class="sourceLineNo">033</span>import org.apache.hadoop.hbase.ByteBufferKeyOnlyKeyValue;<a name="line.33"></a> |
| <span class="sourceLineNo">034</span>import org.apache.hadoop.hbase.Cell;<a name="line.34"></a> |
| <span class="sourceLineNo">035</span>import org.apache.hadoop.hbase.CellComparator;<a name="line.35"></a> |
| <span class="sourceLineNo">036</span>import org.apache.hadoop.hbase.CellUtil;<a name="line.36"></a> |
| <span class="sourceLineNo">037</span>import org.apache.hadoop.hbase.KeyValue;<a name="line.37"></a> |
| <span class="sourceLineNo">038</span>import org.apache.hadoop.hbase.KeyValue.KeyOnlyKeyValue;<a name="line.38"></a> |
| <span class="sourceLineNo">039</span>import org.apache.hadoop.hbase.PrivateCellUtil;<a name="line.39"></a> |
| <span class="sourceLineNo">040</span>import org.apache.hadoop.hbase.io.HeapSize;<a name="line.40"></a> |
| <span class="sourceLineNo">041</span>import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;<a name="line.41"></a> |
| <span class="sourceLineNo">042</span>import org.apache.hadoop.hbase.io.hfile.HFile.CachingBlockReader;<a name="line.42"></a> |
| <span class="sourceLineNo">043</span>import org.apache.hadoop.hbase.nio.ByteBuff;<a name="line.43"></a> |
| <span class="sourceLineNo">044</span>import org.apache.hadoop.hbase.regionserver.KeyValueScanner;<a name="line.44"></a> |
| <span class="sourceLineNo">045</span>import org.apache.hadoop.hbase.util.Bytes;<a name="line.45"></a> |
| <span class="sourceLineNo">046</span>import org.apache.hadoop.hbase.util.ClassSize;<a name="line.46"></a> |
| <span class="sourceLineNo">047</span>import org.apache.hadoop.hbase.util.ObjectIntPair;<a name="line.47"></a> |
| <span class="sourceLineNo">048</span>import org.apache.hadoop.io.WritableUtils;<a name="line.48"></a> |
| <span class="sourceLineNo">049</span>import org.apache.hadoop.util.StringUtils;<a name="line.49"></a> |
| <span class="sourceLineNo">050</span>import org.apache.yetus.audience.InterfaceAudience;<a name="line.50"></a> |
| <span class="sourceLineNo">051</span>import org.slf4j.Logger;<a name="line.51"></a> |
| <span class="sourceLineNo">052</span>import org.slf4j.LoggerFactory;<a name="line.52"></a> |
| <span class="sourceLineNo">053</span><a name="line.53"></a> |
| <span class="sourceLineNo">054</span>/**<a name="line.54"></a> |
| <span class="sourceLineNo">055</span> * Provides functionality to write ({@link BlockIndexWriter}) and read BlockIndexReader single-level<a name="line.55"></a> |
| <span class="sourceLineNo">056</span> * and multi-level block indexes. Examples of how to use the block index writer can be found in<a name="line.56"></a> |
| <span class="sourceLineNo">057</span> * {@link org.apache.hadoop.hbase.io.hfile.CompoundBloomFilterWriter} and {@link HFileWriterImpl}.<a name="line.57"></a> |
| <span class="sourceLineNo">058</span> * Examples of how to use the reader can be found in {@link HFileReaderImpl} and<a name="line.58"></a> |
| <span class="sourceLineNo">059</span> * org.apache.hadoop.hbase.io.hfile.TestHFileBlockIndex.<a name="line.59"></a> |
| <span class="sourceLineNo">060</span> */<a name="line.60"></a> |
| <span class="sourceLineNo">061</span>@InterfaceAudience.Private<a name="line.61"></a> |
| <span class="sourceLineNo">062</span>public class HFileBlockIndex {<a name="line.62"></a> |
| <span class="sourceLineNo">063</span><a name="line.63"></a> |
| <span class="sourceLineNo">064</span> private static final Logger LOG = LoggerFactory.getLogger(HFileBlockIndex.class);<a name="line.64"></a> |
| <span class="sourceLineNo">065</span><a name="line.65"></a> |
| <span class="sourceLineNo">066</span> static final int DEFAULT_MAX_CHUNK_SIZE = 128 * 1024;<a name="line.66"></a> |
| <span class="sourceLineNo">067</span><a name="line.67"></a> |
| <span class="sourceLineNo">068</span> /**<a name="line.68"></a> |
| <span class="sourceLineNo">069</span> * The maximum size guideline for index blocks (both leaf, intermediate, and root). If not<a name="line.69"></a> |
| <span class="sourceLineNo">070</span> * specified, <code>DEFAULT_MAX_CHUNK_SIZE</code> is used.<a name="line.70"></a> |
| <span class="sourceLineNo">071</span> */<a name="line.71"></a> |
| <span class="sourceLineNo">072</span> public static final String MAX_CHUNK_SIZE_KEY = "hfile.index.block.max.size";<a name="line.72"></a> |
| <span class="sourceLineNo">073</span><a name="line.73"></a> |
| <span class="sourceLineNo">074</span> /**<a name="line.74"></a> |
| <span class="sourceLineNo">075</span> * Minimum number of entries in a single index block. Even if we are above the<a name="line.75"></a> |
| <span class="sourceLineNo">076</span> * hfile.index.block.max.size we will keep writing to the same block unless we have that many<a name="line.76"></a> |
| <span class="sourceLineNo">077</span> * entries. We should have at least a few entries so that we don't have too many levels in the<a name="line.77"></a> |
| <span class="sourceLineNo">078</span> * multi-level index. This should be at least 2 to make sure there is no infinite recursion.<a name="line.78"></a> |
| <span class="sourceLineNo">079</span> */<a name="line.79"></a> |
| <span class="sourceLineNo">080</span> public static final String MIN_INDEX_NUM_ENTRIES_KEY = "hfile.index.block.min.entries";<a name="line.80"></a> |
| <span class="sourceLineNo">081</span><a name="line.81"></a> |
| <span class="sourceLineNo">082</span> static final int DEFAULT_MIN_INDEX_NUM_ENTRIES = 16;<a name="line.82"></a> |
| <span class="sourceLineNo">083</span><a name="line.83"></a> |
| <span class="sourceLineNo">084</span> /**<a name="line.84"></a> |
| <span class="sourceLineNo">085</span> * The number of bytes stored in each "secondary index" entry in addition to key bytes in the<a name="line.85"></a> |
| <span class="sourceLineNo">086</span> * non-root index block format. The first long is the file offset of the deeper-level block the<a name="line.86"></a> |
| <span class="sourceLineNo">087</span> * entry points to, and the int that follows is that block's on-disk size without including<a name="line.87"></a> |
| <span class="sourceLineNo">088</span> * header.<a name="line.88"></a> |
| <span class="sourceLineNo">089</span> */<a name="line.89"></a> |
| <span class="sourceLineNo">090</span> static final int SECONDARY_INDEX_ENTRY_OVERHEAD = Bytes.SIZEOF_INT + Bytes.SIZEOF_LONG;<a name="line.90"></a> |
| <span class="sourceLineNo">091</span><a name="line.91"></a> |
| <span class="sourceLineNo">092</span> /**<a name="line.92"></a> |
| <span class="sourceLineNo">093</span> * Error message when trying to use inline block API in single-level mode.<a name="line.93"></a> |
| <span class="sourceLineNo">094</span> */<a name="line.94"></a> |
| <span class="sourceLineNo">095</span> private static final String INLINE_BLOCKS_NOT_ALLOWED =<a name="line.95"></a> |
| <span class="sourceLineNo">096</span> "Inline blocks are not allowed in the single-level-only mode";<a name="line.96"></a> |
| <span class="sourceLineNo">097</span><a name="line.97"></a> |
| <span class="sourceLineNo">098</span> /**<a name="line.98"></a> |
| <span class="sourceLineNo">099</span> * The size of a meta-data record used for finding the mid-key in a multi-level index. Consists of<a name="line.99"></a> |
| <span class="sourceLineNo">100</span> * the middle leaf-level index block offset (long), its on-disk size without header included<a name="line.100"></a> |
| <span class="sourceLineNo">101</span> * (int), and the mid-key entry's zero-based index in that leaf index block.<a name="line.101"></a> |
| <span class="sourceLineNo">102</span> */<a name="line.102"></a> |
| <span class="sourceLineNo">103</span> protected static final int MID_KEY_METADATA_SIZE = Bytes.SIZEOF_LONG + 2 * Bytes.SIZEOF_INT;<a name="line.103"></a> |
| <span class="sourceLineNo">104</span><a name="line.104"></a> |
| <span class="sourceLineNo">105</span> /**<a name="line.105"></a> |
| <span class="sourceLineNo">106</span> * An implementation of the BlockIndexReader that deals with block keys which are plain byte[]<a name="line.106"></a> |
| <span class="sourceLineNo">107</span> * like MetaBlock or the Bloom Block for ROW bloom. Does not need a comparator. It can work on<a name="line.107"></a> |
| <span class="sourceLineNo">108</span> * Bytes.BYTES_RAWCOMPARATOR<a name="line.108"></a> |
| <span class="sourceLineNo">109</span> */<a name="line.109"></a> |
| <span class="sourceLineNo">110</span> static class ByteArrayKeyBlockIndexReader extends BlockIndexReader {<a name="line.110"></a> |
| <span class="sourceLineNo">111</span><a name="line.111"></a> |
| <span class="sourceLineNo">112</span> private byte[][] blockKeys;<a name="line.112"></a> |
| <span class="sourceLineNo">113</span><a name="line.113"></a> |
| <span class="sourceLineNo">114</span> public ByteArrayKeyBlockIndexReader(final int treeLevel) {<a name="line.114"></a> |
| <span class="sourceLineNo">115</span> // Can be null for METAINDEX block<a name="line.115"></a> |
| <span class="sourceLineNo">116</span> searchTreeLevel = treeLevel;<a name="line.116"></a> |
| <span class="sourceLineNo">117</span> }<a name="line.117"></a> |
| <span class="sourceLineNo">118</span><a name="line.118"></a> |
| <span class="sourceLineNo">119</span> @Override<a name="line.119"></a> |
| <span class="sourceLineNo">120</span> protected long calculateHeapSizeForBlockKeys(long heapSize) {<a name="line.120"></a> |
| <span class="sourceLineNo">121</span> // Calculating the size of blockKeys<a name="line.121"></a> |
| <span class="sourceLineNo">122</span> if (blockKeys != null) {<a name="line.122"></a> |
| <span class="sourceLineNo">123</span> heapSize += ClassSize.REFERENCE;<a name="line.123"></a> |
| <span class="sourceLineNo">124</span> // Adding array + references overhead<a name="line.124"></a> |
| <span class="sourceLineNo">125</span> heapSize += ClassSize.align(ClassSize.ARRAY + blockKeys.length * ClassSize.REFERENCE);<a name="line.125"></a> |
| <span class="sourceLineNo">126</span><a name="line.126"></a> |
| <span class="sourceLineNo">127</span> // Adding bytes<a name="line.127"></a> |
| <span class="sourceLineNo">128</span> for (byte[] key : blockKeys) {<a name="line.128"></a> |
| <span class="sourceLineNo">129</span> heapSize += ClassSize.align(ClassSize.ARRAY + key.length);<a name="line.129"></a> |
| <span class="sourceLineNo">130</span> }<a name="line.130"></a> |
| <span class="sourceLineNo">131</span> }<a name="line.131"></a> |
| <span class="sourceLineNo">132</span> return heapSize;<a name="line.132"></a> |
| <span class="sourceLineNo">133</span> }<a name="line.133"></a> |
| <span class="sourceLineNo">134</span><a name="line.134"></a> |
| <span class="sourceLineNo">135</span> @Override<a name="line.135"></a> |
| <span class="sourceLineNo">136</span> public boolean isEmpty() {<a name="line.136"></a> |
| <span class="sourceLineNo">137</span> return blockKeys.length == 0;<a name="line.137"></a> |
| <span class="sourceLineNo">138</span> }<a name="line.138"></a> |
| <span class="sourceLineNo">139</span><a name="line.139"></a> |
| <span class="sourceLineNo">140</span> /**<a name="line.140"></a> |
| <span class="sourceLineNo">141</span> * from 0 to {@link #getRootBlockCount() - 1}<a name="line.141"></a> |
| <span class="sourceLineNo">142</span> */<a name="line.142"></a> |
| <span class="sourceLineNo">143</span> public byte[] getRootBlockKey(int i) {<a name="line.143"></a> |
| <span class="sourceLineNo">144</span> return blockKeys[i];<a name="line.144"></a> |
| <span class="sourceLineNo">145</span> }<a name="line.145"></a> |
| <span class="sourceLineNo">146</span><a name="line.146"></a> |
| <span class="sourceLineNo">147</span> @Override<a name="line.147"></a> |
| <span class="sourceLineNo">148</span> public BlockWithScanInfo loadDataBlockWithScanInfo(Cell key, HFileBlock currentBlock,<a name="line.148"></a> |
| <span class="sourceLineNo">149</span> boolean cacheBlocks, boolean pread, boolean isCompaction,<a name="line.149"></a> |
| <span class="sourceLineNo">150</span> DataBlockEncoding expectedDataBlockEncoding, CachingBlockReader cachingBlockReader)<a name="line.150"></a> |
| <span class="sourceLineNo">151</span> throws IOException {<a name="line.151"></a> |
| <span class="sourceLineNo">152</span> // this would not be needed<a name="line.152"></a> |
| <span class="sourceLineNo">153</span> return null;<a name="line.153"></a> |
| <span class="sourceLineNo">154</span> }<a name="line.154"></a> |
| <span class="sourceLineNo">155</span><a name="line.155"></a> |
| <span class="sourceLineNo">156</span> @Override<a name="line.156"></a> |
| <span class="sourceLineNo">157</span> public Cell midkey(CachingBlockReader cachingBlockReader) throws IOException {<a name="line.157"></a> |
| <span class="sourceLineNo">158</span> // Not needed here<a name="line.158"></a> |
| <span class="sourceLineNo">159</span> return null;<a name="line.159"></a> |
| <span class="sourceLineNo">160</span> }<a name="line.160"></a> |
| <span class="sourceLineNo">161</span><a name="line.161"></a> |
| <span class="sourceLineNo">162</span> @Override<a name="line.162"></a> |
| <span class="sourceLineNo">163</span> protected void initialize(int numEntries) {<a name="line.163"></a> |
| <span class="sourceLineNo">164</span> blockKeys = new byte[numEntries][];<a name="line.164"></a> |
| <span class="sourceLineNo">165</span> }<a name="line.165"></a> |
| <span class="sourceLineNo">166</span><a name="line.166"></a> |
| <span class="sourceLineNo">167</span> @Override<a name="line.167"></a> |
| <span class="sourceLineNo">168</span> protected void add(final byte[] key, final long offset, final int dataSize) {<a name="line.168"></a> |
| <span class="sourceLineNo">169</span> blockOffsets[rootCount] = offset;<a name="line.169"></a> |
| <span class="sourceLineNo">170</span> blockKeys[rootCount] = key;<a name="line.170"></a> |
| <span class="sourceLineNo">171</span> blockDataSizes[rootCount] = dataSize;<a name="line.171"></a> |
| <span class="sourceLineNo">172</span> rootCount++;<a name="line.172"></a> |
| <span class="sourceLineNo">173</span> }<a name="line.173"></a> |
| <span class="sourceLineNo">174</span><a name="line.174"></a> |
| <span class="sourceLineNo">175</span> @Override<a name="line.175"></a> |
| <span class="sourceLineNo">176</span> public int rootBlockContainingKey(byte[] key, int offset, int length, CellComparator comp) {<a name="line.176"></a> |
| <span class="sourceLineNo">177</span> int pos = Bytes.binarySearch(blockKeys, key, offset, length);<a name="line.177"></a> |
| <span class="sourceLineNo">178</span> // pos is between -(blockKeys.length + 1) to blockKeys.length - 1, see<a name="line.178"></a> |
| <span class="sourceLineNo">179</span> // binarySearch's javadoc.<a name="line.179"></a> |
| <span class="sourceLineNo">180</span><a name="line.180"></a> |
| <span class="sourceLineNo">181</span> if (pos >= 0) {<a name="line.181"></a> |
| <span class="sourceLineNo">182</span> // This means this is an exact match with an element of blockKeys.<a name="line.182"></a> |
| <span class="sourceLineNo">183</span> assert pos < blockKeys.length;<a name="line.183"></a> |
| <span class="sourceLineNo">184</span> return pos;<a name="line.184"></a> |
| <span class="sourceLineNo">185</span> }<a name="line.185"></a> |
| <span class="sourceLineNo">186</span><a name="line.186"></a> |
| <span class="sourceLineNo">187</span> // Otherwise, pos = -(i + 1), where blockKeys[i - 1] < key < blockKeys[i],<a name="line.187"></a> |
| <span class="sourceLineNo">188</span> // and i is in [0, blockKeys.length]. We are returning j = i - 1 such that<a name="line.188"></a> |
| <span class="sourceLineNo">189</span> // blockKeys[j] <= key < blockKeys[j + 1]. In particular, j = -1 if<a name="line.189"></a> |
| <span class="sourceLineNo">190</span> // key < blockKeys[0], meaning the file does not contain the given key.<a name="line.190"></a> |
| <span class="sourceLineNo">191</span><a name="line.191"></a> |
| <span class="sourceLineNo">192</span> int i = -pos - 1;<a name="line.192"></a> |
| <span class="sourceLineNo">193</span> assert 0 <= i && i <= blockKeys.length;<a name="line.193"></a> |
| <span class="sourceLineNo">194</span> return i - 1;<a name="line.194"></a> |
| <span class="sourceLineNo">195</span> }<a name="line.195"></a> |
| <span class="sourceLineNo">196</span><a name="line.196"></a> |
| <span class="sourceLineNo">197</span> @Override<a name="line.197"></a> |
| <span class="sourceLineNo">198</span> public int rootBlockContainingKey(Cell key) {<a name="line.198"></a> |
| <span class="sourceLineNo">199</span> // Should not be called on this because here it deals only with byte[]<a name="line.199"></a> |
| <span class="sourceLineNo">200</span> throw new UnsupportedOperationException(<a name="line.200"></a> |
| <span class="sourceLineNo">201</span> "Cannot search for a key that is of Cell type. Only plain byte array keys "<a name="line.201"></a> |
| <span class="sourceLineNo">202</span> + "can be searched for");<a name="line.202"></a> |
| <span class="sourceLineNo">203</span> }<a name="line.203"></a> |
| <span class="sourceLineNo">204</span><a name="line.204"></a> |
| <span class="sourceLineNo">205</span> @Override<a name="line.205"></a> |
| <span class="sourceLineNo">206</span> public String toString() {<a name="line.206"></a> |
| <span class="sourceLineNo">207</span> StringBuilder sb = new StringBuilder();<a name="line.207"></a> |
| <span class="sourceLineNo">208</span> sb.append("size=" + rootCount).append("\n");<a name="line.208"></a> |
| <span class="sourceLineNo">209</span> for (int i = 0; i < rootCount; i++) {<a name="line.209"></a> |
| <span class="sourceLineNo">210</span> sb.append("key=").append(KeyValue.keyToString(blockKeys[i])).append("\n offset=")<a name="line.210"></a> |
| <span class="sourceLineNo">211</span> .append(blockOffsets[i]).append(", dataSize=" + blockDataSizes[i]).append("\n");<a name="line.211"></a> |
| <span class="sourceLineNo">212</span> }<a name="line.212"></a> |
| <span class="sourceLineNo">213</span> return sb.toString();<a name="line.213"></a> |
| <span class="sourceLineNo">214</span> }<a name="line.214"></a> |
| <span class="sourceLineNo">215</span> }<a name="line.215"></a> |
| <span class="sourceLineNo">216</span><a name="line.216"></a> |
| <span class="sourceLineNo">217</span> /**<a name="line.217"></a> |
| <span class="sourceLineNo">218</span> * An implementation of the BlockIndexReader that deals with block keys which are the key part of<a name="line.218"></a> |
| <span class="sourceLineNo">219</span> * a cell like the Data block index or the ROW_COL bloom blocks This needs a comparator to work<a name="line.219"></a> |
| <span class="sourceLineNo">220</span> * with the Cells<a name="line.220"></a> |
| <span class="sourceLineNo">221</span> */<a name="line.221"></a> |
| <span class="sourceLineNo">222</span> static class CellBasedKeyBlockIndexReader extends BlockIndexReader {<a name="line.222"></a> |
| <span class="sourceLineNo">223</span><a name="line.223"></a> |
| <span class="sourceLineNo">224</span> private Cell[] blockKeys;<a name="line.224"></a> |
| <span class="sourceLineNo">225</span> /** Pre-computed mid-key */<a name="line.225"></a> |
| <span class="sourceLineNo">226</span> private AtomicReference<Cell> midKey = new AtomicReference<>();<a name="line.226"></a> |
| <span class="sourceLineNo">227</span> /** Needed doing lookup on blocks. */<a name="line.227"></a> |
| <span class="sourceLineNo">228</span> protected CellComparator comparator;<a name="line.228"></a> |
| <span class="sourceLineNo">229</span><a name="line.229"></a> |
| <span class="sourceLineNo">230</span> public CellBasedKeyBlockIndexReader(final CellComparator c, final int treeLevel) {<a name="line.230"></a> |
| <span class="sourceLineNo">231</span> // Can be null for METAINDEX block<a name="line.231"></a> |
| <span class="sourceLineNo">232</span> comparator = c;<a name="line.232"></a> |
| <span class="sourceLineNo">233</span> searchTreeLevel = treeLevel;<a name="line.233"></a> |
| <span class="sourceLineNo">234</span> }<a name="line.234"></a> |
| <span class="sourceLineNo">235</span><a name="line.235"></a> |
| <span class="sourceLineNo">236</span> @Override<a name="line.236"></a> |
| <span class="sourceLineNo">237</span> protected long calculateHeapSizeForBlockKeys(long heapSize) {<a name="line.237"></a> |
| <span class="sourceLineNo">238</span> if (blockKeys != null) {<a name="line.238"></a> |
| <span class="sourceLineNo">239</span> heapSize += ClassSize.REFERENCE;<a name="line.239"></a> |
| <span class="sourceLineNo">240</span> // Adding array + references overhead<a name="line.240"></a> |
| <span class="sourceLineNo">241</span> heapSize += ClassSize.align(ClassSize.ARRAY + blockKeys.length * ClassSize.REFERENCE);<a name="line.241"></a> |
| <span class="sourceLineNo">242</span><a name="line.242"></a> |
| <span class="sourceLineNo">243</span> // Adding blockKeys<a name="line.243"></a> |
| <span class="sourceLineNo">244</span> for (Cell key : blockKeys) {<a name="line.244"></a> |
| <span class="sourceLineNo">245</span> heapSize += ClassSize.align(key.heapSize());<a name="line.245"></a> |
| <span class="sourceLineNo">246</span> }<a name="line.246"></a> |
| <span class="sourceLineNo">247</span> }<a name="line.247"></a> |
| <span class="sourceLineNo">248</span> // Add comparator and the midkey atomicreference<a name="line.248"></a> |
| <span class="sourceLineNo">249</span> heapSize += 2 * ClassSize.REFERENCE;<a name="line.249"></a> |
| <span class="sourceLineNo">250</span> return heapSize;<a name="line.250"></a> |
| <span class="sourceLineNo">251</span> }<a name="line.251"></a> |
| <span class="sourceLineNo">252</span><a name="line.252"></a> |
| <span class="sourceLineNo">253</span> @Override<a name="line.253"></a> |
| <span class="sourceLineNo">254</span> public boolean isEmpty() {<a name="line.254"></a> |
| <span class="sourceLineNo">255</span> return blockKeys.length == 0;<a name="line.255"></a> |
| <span class="sourceLineNo">256</span> }<a name="line.256"></a> |
| <span class="sourceLineNo">257</span><a name="line.257"></a> |
| <span class="sourceLineNo">258</span> /**<a name="line.258"></a> |
| <span class="sourceLineNo">259</span> * from 0 to {@link #getRootBlockCount() - 1}<a name="line.259"></a> |
| <span class="sourceLineNo">260</span> */<a name="line.260"></a> |
| <span class="sourceLineNo">261</span> public Cell getRootBlockKey(int i) {<a name="line.261"></a> |
| <span class="sourceLineNo">262</span> return blockKeys[i];<a name="line.262"></a> |
| <span class="sourceLineNo">263</span> }<a name="line.263"></a> |
| <span class="sourceLineNo">264</span><a name="line.264"></a> |
| <span class="sourceLineNo">265</span> @Override<a name="line.265"></a> |
| <span class="sourceLineNo">266</span> public BlockWithScanInfo loadDataBlockWithScanInfo(Cell key, HFileBlock currentBlock,<a name="line.266"></a> |
| <span class="sourceLineNo">267</span> boolean cacheBlocks, boolean pread, boolean isCompaction,<a name="line.267"></a> |
| <span class="sourceLineNo">268</span> DataBlockEncoding expectedDataBlockEncoding, CachingBlockReader cachingBlockReader)<a name="line.268"></a> |
| <span class="sourceLineNo">269</span> throws IOException {<a name="line.269"></a> |
| <span class="sourceLineNo">270</span> int rootLevelIndex = rootBlockContainingKey(key);<a name="line.270"></a> |
| <span class="sourceLineNo">271</span> if (rootLevelIndex < 0 || rootLevelIndex >= blockOffsets.length) {<a name="line.271"></a> |
| <span class="sourceLineNo">272</span> return null;<a name="line.272"></a> |
| <span class="sourceLineNo">273</span> }<a name="line.273"></a> |
| <span class="sourceLineNo">274</span><a name="line.274"></a> |
| <span class="sourceLineNo">275</span> // the next indexed key<a name="line.275"></a> |
| <span class="sourceLineNo">276</span> Cell nextIndexedKey = null;<a name="line.276"></a> |
| <span class="sourceLineNo">277</span><a name="line.277"></a> |
| <span class="sourceLineNo">278</span> // Read the next-level (intermediate or leaf) index block.<a name="line.278"></a> |
| <span class="sourceLineNo">279</span> long currentOffset = blockOffsets[rootLevelIndex];<a name="line.279"></a> |
| <span class="sourceLineNo">280</span> int currentOnDiskSize = blockDataSizes[rootLevelIndex];<a name="line.280"></a> |
| <span class="sourceLineNo">281</span><a name="line.281"></a> |
| <span class="sourceLineNo">282</span> if (rootLevelIndex < blockKeys.length - 1) {<a name="line.282"></a> |
| <span class="sourceLineNo">283</span> nextIndexedKey = blockKeys[rootLevelIndex + 1];<a name="line.283"></a> |
| <span class="sourceLineNo">284</span> } else {<a name="line.284"></a> |
| <span class="sourceLineNo">285</span> nextIndexedKey = KeyValueScanner.NO_NEXT_INDEXED_KEY;<a name="line.285"></a> |
| <span class="sourceLineNo">286</span> }<a name="line.286"></a> |
| <span class="sourceLineNo">287</span><a name="line.287"></a> |
| <span class="sourceLineNo">288</span> int lookupLevel = 1; // How many levels deep we are in our lookup.<a name="line.288"></a> |
| <span class="sourceLineNo">289</span> int index = -1;<a name="line.289"></a> |
| <span class="sourceLineNo">290</span><a name="line.290"></a> |
| <span class="sourceLineNo">291</span> HFileBlock block = null;<a name="line.291"></a> |
| <span class="sourceLineNo">292</span> KeyOnlyKeyValue tmpNextIndexKV = new KeyValue.KeyOnlyKeyValue();<a name="line.292"></a> |
| <span class="sourceLineNo">293</span> while (true) {<a name="line.293"></a> |
| <span class="sourceLineNo">294</span> try {<a name="line.294"></a> |
| <span class="sourceLineNo">295</span> // Must initialize it with null here, because if don't and once an exception happen in<a name="line.295"></a> |
| <span class="sourceLineNo">296</span> // readBlock, then we'll release the previous assigned block twice in the finally block.<a name="line.296"></a> |
| <span class="sourceLineNo">297</span> // (See HBASE-22422)<a name="line.297"></a> |
| <span class="sourceLineNo">298</span> block = null;<a name="line.298"></a> |
| <span class="sourceLineNo">299</span> if (currentBlock != null && currentBlock.getOffset() == currentOffset) {<a name="line.299"></a> |
| <span class="sourceLineNo">300</span> // Avoid reading the same block again, even with caching turned off.<a name="line.300"></a> |
| <span class="sourceLineNo">301</span> // This is crucial for compaction-type workload which might have<a name="line.301"></a> |
| <span class="sourceLineNo">302</span> // caching turned off. This is like a one-block cache inside the<a name="line.302"></a> |
| <span class="sourceLineNo">303</span> // scanner.<a name="line.303"></a> |
| <span class="sourceLineNo">304</span> block = currentBlock;<a name="line.304"></a> |
| <span class="sourceLineNo">305</span> } else {<a name="line.305"></a> |
| <span class="sourceLineNo">306</span> // Call HFile's caching block reader API. We always cache index<a name="line.306"></a> |
| <span class="sourceLineNo">307</span> // blocks, otherwise we might get terrible performance.<a name="line.307"></a> |
| <span class="sourceLineNo">308</span> boolean shouldCache = cacheBlocks || (lookupLevel < searchTreeLevel);<a name="line.308"></a> |
| <span class="sourceLineNo">309</span> BlockType expectedBlockType;<a name="line.309"></a> |
| <span class="sourceLineNo">310</span> if (lookupLevel < searchTreeLevel - 1) {<a name="line.310"></a> |
| <span class="sourceLineNo">311</span> expectedBlockType = BlockType.INTERMEDIATE_INDEX;<a name="line.311"></a> |
| <span class="sourceLineNo">312</span> } else if (lookupLevel == searchTreeLevel - 1) {<a name="line.312"></a> |
| <span class="sourceLineNo">313</span> expectedBlockType = BlockType.LEAF_INDEX;<a name="line.313"></a> |
| <span class="sourceLineNo">314</span> } else {<a name="line.314"></a> |
| <span class="sourceLineNo">315</span> // this also accounts for ENCODED_DATA<a name="line.315"></a> |
| <span class="sourceLineNo">316</span> expectedBlockType = BlockType.DATA;<a name="line.316"></a> |
| <span class="sourceLineNo">317</span> }<a name="line.317"></a> |
| <span class="sourceLineNo">318</span> block = cachingBlockReader.readBlock(currentOffset, currentOnDiskSize, shouldCache,<a name="line.318"></a> |
| <span class="sourceLineNo">319</span> pread, isCompaction, true, expectedBlockType, expectedDataBlockEncoding);<a name="line.319"></a> |
| <span class="sourceLineNo">320</span> }<a name="line.320"></a> |
| <span class="sourceLineNo">321</span><a name="line.321"></a> |
| <span class="sourceLineNo">322</span> if (block == null) {<a name="line.322"></a> |
| <span class="sourceLineNo">323</span> throw new IOException("Failed to read block at offset " + currentOffset<a name="line.323"></a> |
| <span class="sourceLineNo">324</span> + ", onDiskSize=" + currentOnDiskSize);<a name="line.324"></a> |
| <span class="sourceLineNo">325</span> }<a name="line.325"></a> |
| <span class="sourceLineNo">326</span><a name="line.326"></a> |
| <span class="sourceLineNo">327</span> // Found a data block, break the loop and check our level in the tree.<a name="line.327"></a> |
| <span class="sourceLineNo">328</span> if (block.getBlockType().isData()) {<a name="line.328"></a> |
| <span class="sourceLineNo">329</span> break;<a name="line.329"></a> |
| <span class="sourceLineNo">330</span> }<a name="line.330"></a> |
| <span class="sourceLineNo">331</span><a name="line.331"></a> |
| <span class="sourceLineNo">332</span> // Not a data block. This must be a leaf-level or intermediate-level<a name="line.332"></a> |
| <span class="sourceLineNo">333</span> // index block. We don't allow going deeper than searchTreeLevel.<a name="line.333"></a> |
| <span class="sourceLineNo">334</span> if (++lookupLevel > searchTreeLevel) {<a name="line.334"></a> |
| <span class="sourceLineNo">335</span> throw new IOException("Search Tree Level overflow: lookupLevel=" + lookupLevel<a name="line.335"></a> |
| <span class="sourceLineNo">336</span> + ", searchTreeLevel=" + searchTreeLevel);<a name="line.336"></a> |
| <span class="sourceLineNo">337</span> }<a name="line.337"></a> |
| <span class="sourceLineNo">338</span><a name="line.338"></a> |
| <span class="sourceLineNo">339</span> // Locate the entry corresponding to the given key in the non-root<a name="line.339"></a> |
| <span class="sourceLineNo">340</span> // (leaf or intermediate-level) index block.<a name="line.340"></a> |
| <span class="sourceLineNo">341</span> ByteBuff buffer = block.getBufferWithoutHeader();<a name="line.341"></a> |
| <span class="sourceLineNo">342</span> index = locateNonRootIndexEntry(buffer, key, comparator);<a name="line.342"></a> |
| <span class="sourceLineNo">343</span> if (index == -1) {<a name="line.343"></a> |
| <span class="sourceLineNo">344</span> // This has to be changed<a name="line.344"></a> |
| <span class="sourceLineNo">345</span> // For now change this to key value<a name="line.345"></a> |
| <span class="sourceLineNo">346</span> throw new IOException("The key " + CellUtil.getCellKeyAsString(key) + " is before the"<a name="line.346"></a> |
| <span class="sourceLineNo">347</span> + " first key of the non-root index block " + block);<a name="line.347"></a> |
| <span class="sourceLineNo">348</span> }<a name="line.348"></a> |
| <span class="sourceLineNo">349</span><a name="line.349"></a> |
| <span class="sourceLineNo">350</span> currentOffset = buffer.getLong();<a name="line.350"></a> |
| <span class="sourceLineNo">351</span> currentOnDiskSize = buffer.getInt();<a name="line.351"></a> |
| <span class="sourceLineNo">352</span><a name="line.352"></a> |
| <span class="sourceLineNo">353</span> // Only update next indexed key if there is a next indexed key in the current level<a name="line.353"></a> |
| <span class="sourceLineNo">354</span> byte[] nonRootIndexedKey = getNonRootIndexedKey(buffer, index + 1);<a name="line.354"></a> |
| <span class="sourceLineNo">355</span> if (nonRootIndexedKey != null) {<a name="line.355"></a> |
| <span class="sourceLineNo">356</span> tmpNextIndexKV.setKey(nonRootIndexedKey, 0, nonRootIndexedKey.length);<a name="line.356"></a> |
| <span class="sourceLineNo">357</span> nextIndexedKey = tmpNextIndexKV;<a name="line.357"></a> |
| <span class="sourceLineNo">358</span> }<a name="line.358"></a> |
| <span class="sourceLineNo">359</span> } finally {<a name="line.359"></a> |
| <span class="sourceLineNo">360</span> if (block != null && !block.getBlockType().isData()) {<a name="line.360"></a> |
| <span class="sourceLineNo">361</span> // Release the block immediately if it is not the data block<a name="line.361"></a> |
| <span class="sourceLineNo">362</span> block.release();<a name="line.362"></a> |
| <span class="sourceLineNo">363</span> }<a name="line.363"></a> |
| <span class="sourceLineNo">364</span> }<a name="line.364"></a> |
| <span class="sourceLineNo">365</span> }<a name="line.365"></a> |
| <span class="sourceLineNo">366</span><a name="line.366"></a> |
| <span class="sourceLineNo">367</span> if (lookupLevel != searchTreeLevel) {<a name="line.367"></a> |
| <span class="sourceLineNo">368</span> assert block.getBlockType().isData();<a name="line.368"></a> |
| <span class="sourceLineNo">369</span> // Though we have retrieved a data block we have found an issue<a name="line.369"></a> |
| <span class="sourceLineNo">370</span> // in the retrieved data block. Hence returned the block so that<a name="line.370"></a> |
| <span class="sourceLineNo">371</span> // the ref count can be decremented<a name="line.371"></a> |
| <span class="sourceLineNo">372</span> if (block != null) {<a name="line.372"></a> |
| <span class="sourceLineNo">373</span> block.release();<a name="line.373"></a> |
| <span class="sourceLineNo">374</span> }<a name="line.374"></a> |
| <span class="sourceLineNo">375</span> throw new IOException("Reached a data block at level " + lookupLevel<a name="line.375"></a> |
| <span class="sourceLineNo">376</span> + " but the number of levels is " + searchTreeLevel);<a name="line.376"></a> |
| <span class="sourceLineNo">377</span> }<a name="line.377"></a> |
| <span class="sourceLineNo">378</span><a name="line.378"></a> |
| <span class="sourceLineNo">379</span> // set the next indexed key for the current block.<a name="line.379"></a> |
| <span class="sourceLineNo">380</span> return new BlockWithScanInfo(block, nextIndexedKey);<a name="line.380"></a> |
| <span class="sourceLineNo">381</span> }<a name="line.381"></a> |
| <span class="sourceLineNo">382</span><a name="line.382"></a> |
| <span class="sourceLineNo">383</span> @Override<a name="line.383"></a> |
| <span class="sourceLineNo">384</span> public Cell midkey(CachingBlockReader cachingBlockReader) throws IOException {<a name="line.384"></a> |
| <span class="sourceLineNo">385</span> if (rootCount == 0) throw new IOException("HFile empty");<a name="line.385"></a> |
| <span class="sourceLineNo">386</span><a name="line.386"></a> |
| <span class="sourceLineNo">387</span> Cell targetMidKey = this.midKey.get();<a name="line.387"></a> |
| <span class="sourceLineNo">388</span> if (targetMidKey != null) {<a name="line.388"></a> |
| <span class="sourceLineNo">389</span> return targetMidKey;<a name="line.389"></a> |
| <span class="sourceLineNo">390</span> }<a name="line.390"></a> |
| <span class="sourceLineNo">391</span><a name="line.391"></a> |
| <span class="sourceLineNo">392</span> if (midLeafBlockOffset >= 0) {<a name="line.392"></a> |
| <span class="sourceLineNo">393</span> if (cachingBlockReader == null) {<a name="line.393"></a> |
| <span class="sourceLineNo">394</span> throw new IOException(<a name="line.394"></a> |
| <span class="sourceLineNo">395</span> "Have to read the middle leaf block but " + "no block reader available");<a name="line.395"></a> |
| <span class="sourceLineNo">396</span> }<a name="line.396"></a> |
| <span class="sourceLineNo">397</span><a name="line.397"></a> |
| <span class="sourceLineNo">398</span> // Caching, using pread, assuming this is not a compaction.<a name="line.398"></a> |
| <span class="sourceLineNo">399</span> HFileBlock midLeafBlock = cachingBlockReader.readBlock(midLeafBlockOffset,<a name="line.399"></a> |
| <span class="sourceLineNo">400</span> midLeafBlockOnDiskSize, true, true, false, true, BlockType.LEAF_INDEX, null);<a name="line.400"></a> |
| <span class="sourceLineNo">401</span> try {<a name="line.401"></a> |
| <span class="sourceLineNo">402</span> byte[] bytes = getNonRootIndexedKey(midLeafBlock.getBufferWithoutHeader(), midKeyEntry);<a name="line.402"></a> |
| <span class="sourceLineNo">403</span> assert bytes != null;<a name="line.403"></a> |
| <span class="sourceLineNo">404</span> targetMidKey = new KeyValue.KeyOnlyKeyValue(bytes, 0, bytes.length);<a name="line.404"></a> |
| <span class="sourceLineNo">405</span> } finally {<a name="line.405"></a> |
| <span class="sourceLineNo">406</span> midLeafBlock.release();<a name="line.406"></a> |
| <span class="sourceLineNo">407</span> }<a name="line.407"></a> |
| <span class="sourceLineNo">408</span> } else {<a name="line.408"></a> |
| <span class="sourceLineNo">409</span> // The middle of the root-level index.<a name="line.409"></a> |
| <span class="sourceLineNo">410</span> targetMidKey = blockKeys[rootCount / 2];<a name="line.410"></a> |
| <span class="sourceLineNo">411</span> }<a name="line.411"></a> |
| <span class="sourceLineNo">412</span><a name="line.412"></a> |
| <span class="sourceLineNo">413</span> this.midKey.set(targetMidKey);<a name="line.413"></a> |
| <span class="sourceLineNo">414</span> return targetMidKey;<a name="line.414"></a> |
| <span class="sourceLineNo">415</span> }<a name="line.415"></a> |
| <span class="sourceLineNo">416</span><a name="line.416"></a> |
| <span class="sourceLineNo">417</span> @Override<a name="line.417"></a> |
| <span class="sourceLineNo">418</span> protected void initialize(int numEntries) {<a name="line.418"></a> |
| <span class="sourceLineNo">419</span> blockKeys = new Cell[numEntries];<a name="line.419"></a> |
| <span class="sourceLineNo">420</span> }<a name="line.420"></a> |
| <span class="sourceLineNo">421</span><a name="line.421"></a> |
| <span class="sourceLineNo">422</span> /**<a name="line.422"></a> |
| <span class="sourceLineNo">423</span> * Adds a new entry in the root block index. Only used when reading.<a name="line.423"></a> |
| <span class="sourceLineNo">424</span> * @param key Last key in the block<a name="line.424"></a> |
| <span class="sourceLineNo">425</span> * @param offset file offset where the block is stored<a name="line.425"></a> |
| <span class="sourceLineNo">426</span> * @param dataSize the uncompressed data size<a name="line.426"></a> |
| <span class="sourceLineNo">427</span> */<a name="line.427"></a> |
| <span class="sourceLineNo">428</span> @Override<a name="line.428"></a> |
| <span class="sourceLineNo">429</span> protected void add(final byte[] key, final long offset, final int dataSize) {<a name="line.429"></a> |
| <span class="sourceLineNo">430</span> blockOffsets[rootCount] = offset;<a name="line.430"></a> |
| <span class="sourceLineNo">431</span> // Create the blockKeys as Cells once when the reader is opened<a name="line.431"></a> |
| <span class="sourceLineNo">432</span> blockKeys[rootCount] = new KeyValue.KeyOnlyKeyValue(key, 0, key.length);<a name="line.432"></a> |
| <span class="sourceLineNo">433</span> blockDataSizes[rootCount] = dataSize;<a name="line.433"></a> |
| <span class="sourceLineNo">434</span> rootCount++;<a name="line.434"></a> |
| <span class="sourceLineNo">435</span> }<a name="line.435"></a> |
| <span class="sourceLineNo">436</span><a name="line.436"></a> |
| <span class="sourceLineNo">437</span> @Override<a name="line.437"></a> |
| <span class="sourceLineNo">438</span> public int rootBlockContainingKey(final byte[] key, int offset, int length,<a name="line.438"></a> |
| <span class="sourceLineNo">439</span> CellComparator comp) {<a name="line.439"></a> |
| <span class="sourceLineNo">440</span> // This should always be called with Cell not with a byte[] key<a name="line.440"></a> |
| <span class="sourceLineNo">441</span> throw new UnsupportedOperationException("Cannot find for a key containing plain byte "<a name="line.441"></a> |
| <span class="sourceLineNo">442</span> + "array. Only cell based keys can be searched for");<a name="line.442"></a> |
| <span class="sourceLineNo">443</span> }<a name="line.443"></a> |
| <span class="sourceLineNo">444</span><a name="line.444"></a> |
| <span class="sourceLineNo">445</span> @Override<a name="line.445"></a> |
| <span class="sourceLineNo">446</span> public int rootBlockContainingKey(Cell key) {<a name="line.446"></a> |
| <span class="sourceLineNo">447</span> // Here the comparator should not be null as this happens for the root-level block<a name="line.447"></a> |
| <span class="sourceLineNo">448</span> int pos = Bytes.binarySearch(blockKeys, key, comparator);<a name="line.448"></a> |
| <span class="sourceLineNo">449</span> // pos is between -(blockKeys.length + 1) to blockKeys.length - 1, see<a name="line.449"></a> |
| <span class="sourceLineNo">450</span> // binarySearch's javadoc.<a name="line.450"></a> |
| <span class="sourceLineNo">451</span><a name="line.451"></a> |
| <span class="sourceLineNo">452</span> if (pos >= 0) {<a name="line.452"></a> |
| <span class="sourceLineNo">453</span> // This means this is an exact match with an element of blockKeys.<a name="line.453"></a> |
| <span class="sourceLineNo">454</span> assert pos < blockKeys.length;<a name="line.454"></a> |
| <span class="sourceLineNo">455</span> return pos;<a name="line.455"></a> |
| <span class="sourceLineNo">456</span> }<a name="line.456"></a> |
| <span class="sourceLineNo">457</span><a name="line.457"></a> |
| <span class="sourceLineNo">458</span> // Otherwise, pos = -(i + 1), where blockKeys[i - 1] < key < blockKeys[i],<a name="line.458"></a> |
| <span class="sourceLineNo">459</span> // and i is in [0, blockKeys.length]. We are returning j = i - 1 such that<a name="line.459"></a> |
| <span class="sourceLineNo">460</span> // blockKeys[j] <= key < blockKeys[j + 1]. In particular, j = -1 if<a name="line.460"></a> |
| <span class="sourceLineNo">461</span> // key < blockKeys[0], meaning the file does not contain the given key.<a name="line.461"></a> |
| <span class="sourceLineNo">462</span><a name="line.462"></a> |
| <span class="sourceLineNo">463</span> int i = -pos - 1;<a name="line.463"></a> |
| <span class="sourceLineNo">464</span> assert 0 <= i && i <= blockKeys.length;<a name="line.464"></a> |
| <span class="sourceLineNo">465</span> return i - 1;<a name="line.465"></a> |
| <span class="sourceLineNo">466</span> }<a name="line.466"></a> |
| <span class="sourceLineNo">467</span><a name="line.467"></a> |
| <span class="sourceLineNo">468</span> @Override<a name="line.468"></a> |
| <span class="sourceLineNo">469</span> public String toString() {<a name="line.469"></a> |
| <span class="sourceLineNo">470</span> StringBuilder sb = new StringBuilder();<a name="line.470"></a> |
| <span class="sourceLineNo">471</span> sb.append("size=" + rootCount).append("\n");<a name="line.471"></a> |
| <span class="sourceLineNo">472</span> for (int i = 0; i < rootCount; i++) {<a name="line.472"></a> |
| <span class="sourceLineNo">473</span> sb.append("key=").append((blockKeys[i])).append("\n offset=").append(blockOffsets[i])<a name="line.473"></a> |
| <span class="sourceLineNo">474</span> .append(", dataSize=" + blockDataSizes[i]).append("\n");<a name="line.474"></a> |
| <span class="sourceLineNo">475</span> }<a name="line.475"></a> |
| <span class="sourceLineNo">476</span> return sb.toString();<a name="line.476"></a> |
| <span class="sourceLineNo">477</span> }<a name="line.477"></a> |
| <span class="sourceLineNo">478</span> }<a name="line.478"></a> |
| <span class="sourceLineNo">479</span><a name="line.479"></a> |
| <span class="sourceLineNo">480</span> static class CellBasedKeyBlockIndexReaderV2 extends CellBasedKeyBlockIndexReader {<a name="line.480"></a> |
| <span class="sourceLineNo">481</span><a name="line.481"></a> |
| <span class="sourceLineNo">482</span> private HFileIndexBlockEncoder indexBlockEncoder;<a name="line.482"></a> |
| <span class="sourceLineNo">483</span><a name="line.483"></a> |
| <span class="sourceLineNo">484</span> private HFileIndexBlockEncoder.EncodedSeeker seeker;<a name="line.484"></a> |
| <span class="sourceLineNo">485</span><a name="line.485"></a> |
| <span class="sourceLineNo">486</span> public CellBasedKeyBlockIndexReaderV2(final CellComparator c, final int treeLevel) {<a name="line.486"></a> |
| <span class="sourceLineNo">487</span> this(c, treeLevel, null);<a name="line.487"></a> |
| <span class="sourceLineNo">488</span> }<a name="line.488"></a> |
| <span class="sourceLineNo">489</span><a name="line.489"></a> |
| <span class="sourceLineNo">490</span> public CellBasedKeyBlockIndexReaderV2(final CellComparator c, final int treeLevel,<a name="line.490"></a> |
| <span class="sourceLineNo">491</span> HFileIndexBlockEncoder indexBlockEncoder) {<a name="line.491"></a> |
| <span class="sourceLineNo">492</span> super(c, treeLevel);<a name="line.492"></a> |
| <span class="sourceLineNo">493</span> // Can be null for METAINDEX block<a name="line.493"></a> |
| <span class="sourceLineNo">494</span> this.indexBlockEncoder =<a name="line.494"></a> |
| <span class="sourceLineNo">495</span> indexBlockEncoder != null ? indexBlockEncoder : NoOpIndexBlockEncoder.INSTANCE;<a name="line.495"></a> |
| <span class="sourceLineNo">496</span> }<a name="line.496"></a> |
| <span class="sourceLineNo">497</span><a name="line.497"></a> |
| <span class="sourceLineNo">498</span> @Override<a name="line.498"></a> |
| <span class="sourceLineNo">499</span> public boolean isEmpty() {<a name="line.499"></a> |
| <span class="sourceLineNo">500</span> return seeker.isEmpty();<a name="line.500"></a> |
| <span class="sourceLineNo">501</span> }<a name="line.501"></a> |
| <span class="sourceLineNo">502</span><a name="line.502"></a> |
| <span class="sourceLineNo">503</span> @Override<a name="line.503"></a> |
| <span class="sourceLineNo">504</span> public BlockWithScanInfo loadDataBlockWithScanInfo(Cell key, HFileBlock currentBlock,<a name="line.504"></a> |
| <span class="sourceLineNo">505</span> boolean cacheBlocks, boolean pread, boolean isCompaction,<a name="line.505"></a> |
| <span class="sourceLineNo">506</span> DataBlockEncoding expectedDataBlockEncoding, CachingBlockReader cachingBlockReader)<a name="line.506"></a> |
| <span class="sourceLineNo">507</span> throws IOException {<a name="line.507"></a> |
| <span class="sourceLineNo">508</span> return seeker.loadDataBlockWithScanInfo(key, currentBlock, cacheBlocks, pread, isCompaction,<a name="line.508"></a> |
| <span class="sourceLineNo">509</span> expectedDataBlockEncoding, cachingBlockReader);<a name="line.509"></a> |
| <span class="sourceLineNo">510</span> }<a name="line.510"></a> |
| <span class="sourceLineNo">511</span><a name="line.511"></a> |
| <span class="sourceLineNo">512</span> @Override<a name="line.512"></a> |
| <span class="sourceLineNo">513</span> public Cell midkey(CachingBlockReader cachingBlockReader) throws IOException {<a name="line.513"></a> |
| <span class="sourceLineNo">514</span> return seeker.midkey(cachingBlockReader);<a name="line.514"></a> |
| <span class="sourceLineNo">515</span> }<a name="line.515"></a> |
| <span class="sourceLineNo">516</span><a name="line.516"></a> |
| <span class="sourceLineNo">517</span> /**<a name="line.517"></a> |
| <span class="sourceLineNo">518</span> * from 0 to {@link #getRootBlockCount() - 1}<a name="line.518"></a> |
| <span class="sourceLineNo">519</span> */<a name="line.519"></a> |
| <span class="sourceLineNo">520</span> public Cell getRootBlockKey(int i) {<a name="line.520"></a> |
| <span class="sourceLineNo">521</span> return seeker.getRootBlockKey(i);<a name="line.521"></a> |
| <span class="sourceLineNo">522</span> }<a name="line.522"></a> |
| <span class="sourceLineNo">523</span><a name="line.523"></a> |
| <span class="sourceLineNo">524</span> @Override<a name="line.524"></a> |
| <span class="sourceLineNo">525</span> public int getRootBlockCount() {<a name="line.525"></a> |
| <span class="sourceLineNo">526</span> return seeker.getRootBlockCount();<a name="line.526"></a> |
| <span class="sourceLineNo">527</span> }<a name="line.527"></a> |
| <span class="sourceLineNo">528</span><a name="line.528"></a> |
| <span class="sourceLineNo">529</span> @Override<a name="line.529"></a> |
| <span class="sourceLineNo">530</span> public int rootBlockContainingKey(Cell key) {<a name="line.530"></a> |
| <span class="sourceLineNo">531</span> return seeker.rootBlockContainingKey(key);<a name="line.531"></a> |
| <span class="sourceLineNo">532</span> }<a name="line.532"></a> |
| <span class="sourceLineNo">533</span><a name="line.533"></a> |
| <span class="sourceLineNo">534</span> @Override<a name="line.534"></a> |
| <span class="sourceLineNo">535</span> protected long calculateHeapSizeForBlockKeys(long heapSize) {<a name="line.535"></a> |
| <span class="sourceLineNo">536</span> heapSize = super.calculateHeapSizeForBlockKeys(heapSize);<a name="line.536"></a> |
| <span class="sourceLineNo">537</span> if (seeker != null) {<a name="line.537"></a> |
| <span class="sourceLineNo">538</span> heapSize += ClassSize.REFERENCE;<a name="line.538"></a> |
| <span class="sourceLineNo">539</span> heapSize += ClassSize.align(seeker.heapSize());<a name="line.539"></a> |
| <span class="sourceLineNo">540</span> }<a name="line.540"></a> |
| <span class="sourceLineNo">541</span> return heapSize;<a name="line.541"></a> |
| <span class="sourceLineNo">542</span> }<a name="line.542"></a> |
| <span class="sourceLineNo">543</span><a name="line.543"></a> |
| <span class="sourceLineNo">544</span> @Override<a name="line.544"></a> |
| <span class="sourceLineNo">545</span> public void readMultiLevelIndexRoot(HFileBlock blk, final int numEntries) throws IOException {<a name="line.545"></a> |
| <span class="sourceLineNo">546</span> seeker = indexBlockEncoder.createSeeker();<a name="line.546"></a> |
| <span class="sourceLineNo">547</span> seeker.initRootIndex(blk, numEntries, comparator, searchTreeLevel);<a name="line.547"></a> |
| <span class="sourceLineNo">548</span> }<a name="line.548"></a> |
| <span class="sourceLineNo">549</span><a name="line.549"></a> |
| <span class="sourceLineNo">550</span> @Override<a name="line.550"></a> |
| <span class="sourceLineNo">551</span> public String toString() {<a name="line.551"></a> |
| <span class="sourceLineNo">552</span> return seeker.toString();<a name="line.552"></a> |
| <span class="sourceLineNo">553</span> }<a name="line.553"></a> |
| <span class="sourceLineNo">554</span> }<a name="line.554"></a> |
| <span class="sourceLineNo">555</span><a name="line.555"></a> |
| <span class="sourceLineNo">556</span> /**<a name="line.556"></a> |
| <span class="sourceLineNo">557</span> * The reader will always hold the root level index in the memory. Index blocks at all other<a name="line.557"></a> |
| <span class="sourceLineNo">558</span> * levels will be cached in the LRU cache in practice, although this API does not enforce that.<a name="line.558"></a> |
| <span class="sourceLineNo">559</span> * <p><a name="line.559"></a> |
| <span class="sourceLineNo">560</span> * All non-root (leaf and intermediate) index blocks contain what we call a "secondary index": an<a name="line.560"></a> |
| <span class="sourceLineNo">561</span> * array of offsets to the entries within the block. This allows us to do binary search for the<a name="line.561"></a> |
| <span class="sourceLineNo">562</span> * entry corresponding to the given key without having to deserialize the block.<a name="line.562"></a> |
| <span class="sourceLineNo">563</span> */<a name="line.563"></a> |
| <span class="sourceLineNo">564</span> static abstract class BlockIndexReader implements HeapSize {<a name="line.564"></a> |
| <span class="sourceLineNo">565</span><a name="line.565"></a> |
| <span class="sourceLineNo">566</span> protected long[] blockOffsets;<a name="line.566"></a> |
| <span class="sourceLineNo">567</span> protected int[] blockDataSizes;<a name="line.567"></a> |
| <span class="sourceLineNo">568</span> protected int rootCount = 0;<a name="line.568"></a> |
| <span class="sourceLineNo">569</span><a name="line.569"></a> |
| <span class="sourceLineNo">570</span> // Mid-key metadata.<a name="line.570"></a> |
| <span class="sourceLineNo">571</span> protected long midLeafBlockOffset = -1;<a name="line.571"></a> |
| <span class="sourceLineNo">572</span> protected int midLeafBlockOnDiskSize = -1;<a name="line.572"></a> |
| <span class="sourceLineNo">573</span> protected int midKeyEntry = -1;<a name="line.573"></a> |
| <span class="sourceLineNo">574</span><a name="line.574"></a> |
| <span class="sourceLineNo">575</span> /**<a name="line.575"></a> |
| <span class="sourceLineNo">576</span> * The number of levels in the block index tree. One if there is only root level, two for root<a name="line.576"></a> |
| <span class="sourceLineNo">577</span> * and leaf levels, etc.<a name="line.577"></a> |
| <span class="sourceLineNo">578</span> */<a name="line.578"></a> |
| <span class="sourceLineNo">579</span> protected int searchTreeLevel;<a name="line.579"></a> |
| <span class="sourceLineNo">580</span><a name="line.580"></a> |
| <span class="sourceLineNo">581</span> /** Returns true if the block index is empty. */<a name="line.581"></a> |
| <span class="sourceLineNo">582</span> public abstract boolean isEmpty();<a name="line.582"></a> |
| <span class="sourceLineNo">583</span><a name="line.583"></a> |
| <span class="sourceLineNo">584</span> /**<a name="line.584"></a> |
| <span class="sourceLineNo">585</span> * Verifies that the block index is non-empty and throws an {@link IllegalStateException}<a name="line.585"></a> |
| <span class="sourceLineNo">586</span> * otherwise.<a name="line.586"></a> |
| <span class="sourceLineNo">587</span> */<a name="line.587"></a> |
| <span class="sourceLineNo">588</span> public void ensureNonEmpty() {<a name="line.588"></a> |
| <span class="sourceLineNo">589</span> if (isEmpty()) {<a name="line.589"></a> |
| <span class="sourceLineNo">590</span> throw new IllegalStateException("Block index is empty or not loaded");<a name="line.590"></a> |
| <span class="sourceLineNo">591</span> }<a name="line.591"></a> |
| <span class="sourceLineNo">592</span> }<a name="line.592"></a> |
| <span class="sourceLineNo">593</span><a name="line.593"></a> |
| <span class="sourceLineNo">594</span> /**<a name="line.594"></a> |
| <span class="sourceLineNo">595</span> * Return the data block which contains this key. This function will only be called when the<a name="line.595"></a> |
| <span class="sourceLineNo">596</span> * HFile version is larger than 1.<a name="line.596"></a> |
| <span class="sourceLineNo">597</span> * @param key the key we are looking for<a name="line.597"></a> |
| <span class="sourceLineNo">598</span> * @param currentBlock the current block, to avoid re-reading the same block<a name="line.598"></a> |
| <span class="sourceLineNo">599</span> * @param expectedDataBlockEncoding the data block encoding the caller is expecting the data<a name="line.599"></a> |
| <span class="sourceLineNo">600</span> * block to be in, or null to not perform this check and return<a name="line.600"></a> |
| <span class="sourceLineNo">601</span> * the block irrespective of the encoding<a name="line.601"></a> |
| <span class="sourceLineNo">602</span> * @return reader a basic way to load blocks<a name="line.602"></a> |
| <span class="sourceLineNo">603</span> */<a name="line.603"></a> |
| <span class="sourceLineNo">604</span> public HFileBlock seekToDataBlock(final Cell key, HFileBlock currentBlock, boolean cacheBlocks,<a name="line.604"></a> |
| <span class="sourceLineNo">605</span> boolean pread, boolean isCompaction, DataBlockEncoding expectedDataBlockEncoding,<a name="line.605"></a> |
| <span class="sourceLineNo">606</span> CachingBlockReader cachingBlockReader) throws IOException {<a name="line.606"></a> |
| <span class="sourceLineNo">607</span> BlockWithScanInfo blockWithScanInfo = loadDataBlockWithScanInfo(key, currentBlock,<a name="line.607"></a> |
| <span class="sourceLineNo">608</span> cacheBlocks, pread, isCompaction, expectedDataBlockEncoding, cachingBlockReader);<a name="line.608"></a> |
| <span class="sourceLineNo">609</span> if (blockWithScanInfo == null) {<a name="line.609"></a> |
| <span class="sourceLineNo">610</span> return null;<a name="line.610"></a> |
| <span class="sourceLineNo">611</span> } else {<a name="line.611"></a> |
| <span class="sourceLineNo">612</span> return blockWithScanInfo.getHFileBlock();<a name="line.612"></a> |
| <span class="sourceLineNo">613</span> }<a name="line.613"></a> |
| <span class="sourceLineNo">614</span> }<a name="line.614"></a> |
| <span class="sourceLineNo">615</span><a name="line.615"></a> |
| <span class="sourceLineNo">616</span> /**<a name="line.616"></a> |
| <span class="sourceLineNo">617</span> * Return the BlockWithScanInfo, a data structure which contains the Data HFileBlock with other<a name="line.617"></a> |
| <span class="sourceLineNo">618</span> * scan info such as the key that starts the next HFileBlock. This function will only be called<a name="line.618"></a> |
| <span class="sourceLineNo">619</span> * when the HFile version is larger than 1.<a name="line.619"></a> |
| <span class="sourceLineNo">620</span> * @param key the key we are looking for<a name="line.620"></a> |
| <span class="sourceLineNo">621</span> * @param currentBlock the current block, to avoid re-reading the same block<a name="line.621"></a> |
| <span class="sourceLineNo">622</span> * @param expectedDataBlockEncoding the data block encoding the caller is expecting the data<a name="line.622"></a> |
| <span class="sourceLineNo">623</span> * block to be in, or null to not perform this check and return<a name="line.623"></a> |
| <span class="sourceLineNo">624</span> * the block irrespective of the encoding.<a name="line.624"></a> |
| <span class="sourceLineNo">625</span> * @return the BlockWithScanInfo which contains the DataBlock with other scan info such as<a name="line.625"></a> |
| <span class="sourceLineNo">626</span> * nextIndexedKey.<a name="line.626"></a> |
| <span class="sourceLineNo">627</span> */<a name="line.627"></a> |
| <span class="sourceLineNo">628</span> public abstract BlockWithScanInfo loadDataBlockWithScanInfo(Cell key, HFileBlock currentBlock,<a name="line.628"></a> |
| <span class="sourceLineNo">629</span> boolean cacheBlocks, boolean pread, boolean isCompaction,<a name="line.629"></a> |
| <span class="sourceLineNo">630</span> DataBlockEncoding expectedDataBlockEncoding, CachingBlockReader cachingBlockReader)<a name="line.630"></a> |
| <span class="sourceLineNo">631</span> throws IOException;<a name="line.631"></a> |
| <span class="sourceLineNo">632</span><a name="line.632"></a> |
| <span class="sourceLineNo">633</span> /**<a name="line.633"></a> |
| <span class="sourceLineNo">634</span> * An approximation to the {@link HFile}'s mid-key. Operates on block boundaries, and does not<a name="line.634"></a> |
| <span class="sourceLineNo">635</span> * go inside blocks. In other words, returns the first key of the middle block of the file.<a name="line.635"></a> |
| <span class="sourceLineNo">636</span> * @return the first key of the middle block<a name="line.636"></a> |
| <span class="sourceLineNo">637</span> */<a name="line.637"></a> |
| <span class="sourceLineNo">638</span> public abstract Cell midkey(CachingBlockReader cachingBlockReader) throws IOException;<a name="line.638"></a> |
| <span class="sourceLineNo">639</span><a name="line.639"></a> |
| <span class="sourceLineNo">640</span> /**<a name="line.640"></a> |
| <span class="sourceLineNo">641</span> * @param i from 0 to {@link #getRootBlockCount() - 1}<a name="line.641"></a> |
| <span class="sourceLineNo">642</span> */<a name="line.642"></a> |
| <span class="sourceLineNo">643</span> public long getRootBlockOffset(int i) {<a name="line.643"></a> |
| <span class="sourceLineNo">644</span> return blockOffsets[i];<a name="line.644"></a> |
| <span class="sourceLineNo">645</span> }<a name="line.645"></a> |
| <span class="sourceLineNo">646</span><a name="line.646"></a> |
| <span class="sourceLineNo">647</span> /**<a name="line.647"></a> |
| <span class="sourceLineNo">648</span> * @param i zero-based index of a root-level block<a name="line.648"></a> |
| <span class="sourceLineNo">649</span> * @return the on-disk size of the root-level block for version 2, or the uncompressed size for<a name="line.649"></a> |
| <span class="sourceLineNo">650</span> * version 1<a name="line.650"></a> |
| <span class="sourceLineNo">651</span> */<a name="line.651"></a> |
| <span class="sourceLineNo">652</span> public int getRootBlockDataSize(int i) {<a name="line.652"></a> |
| <span class="sourceLineNo">653</span> return blockDataSizes[i];<a name="line.653"></a> |
| <span class="sourceLineNo">654</span> }<a name="line.654"></a> |
| <span class="sourceLineNo">655</span><a name="line.655"></a> |
| <span class="sourceLineNo">656</span> /** Returns the number of root-level blocks in this block index */<a name="line.656"></a> |
| <span class="sourceLineNo">657</span> public int getRootBlockCount() {<a name="line.657"></a> |
| <span class="sourceLineNo">658</span> return rootCount;<a name="line.658"></a> |
| <span class="sourceLineNo">659</span> }<a name="line.659"></a> |
| <span class="sourceLineNo">660</span><a name="line.660"></a> |
| <span class="sourceLineNo">661</span> /**<a name="line.661"></a> |
| <span class="sourceLineNo">662</span> * Finds the root-level index block containing the given key. Key to find the comparator to be<a name="line.662"></a> |
| <span class="sourceLineNo">663</span> * used<a name="line.663"></a> |
| <span class="sourceLineNo">664</span> * @return Offset of block containing <code>key</code> (between 0 and the number of blocks - 1)<a name="line.664"></a> |
| <span class="sourceLineNo">665</span> * or -1 if this file does not contain the request.<a name="line.665"></a> |
| <span class="sourceLineNo">666</span> */<a name="line.666"></a> |
| <span class="sourceLineNo">667</span> // When we want to find the meta index block or bloom block for ROW bloom<a name="line.667"></a> |
| <span class="sourceLineNo">668</span> // type Bytes.BYTES_RAWCOMPARATOR would be enough. For the ROW_COL bloom case we need the<a name="line.668"></a> |
| <span class="sourceLineNo">669</span> // CellComparator.<a name="line.669"></a> |
| <span class="sourceLineNo">670</span> public abstract int rootBlockContainingKey(final byte[] key, int offset, int length,<a name="line.670"></a> |
| <span class="sourceLineNo">671</span> CellComparator comp);<a name="line.671"></a> |
| <span class="sourceLineNo">672</span><a name="line.672"></a> |
| <span class="sourceLineNo">673</span> /**<a name="line.673"></a> |
| <span class="sourceLineNo">674</span> * Finds the root-level index block containing the given key. Key to find<a name="line.674"></a> |
| <span class="sourceLineNo">675</span> * @return Offset of block containing <code>key</code> (between 0 and the number of blocks - 1)<a name="line.675"></a> |
| <span class="sourceLineNo">676</span> * or -1 if this file does not contain the request.<a name="line.676"></a> |
| <span class="sourceLineNo">677</span> */<a name="line.677"></a> |
| <span class="sourceLineNo">678</span> // When we want to find the meta index block or bloom block for ROW bloom<a name="line.678"></a> |
| <span class="sourceLineNo">679</span> // type<a name="line.679"></a> |
| <span class="sourceLineNo">680</span> // Bytes.BYTES_RAWCOMPARATOR would be enough. For the ROW_COL bloom case we<a name="line.680"></a> |
| <span class="sourceLineNo">681</span> // need the CellComparator.<a name="line.681"></a> |
| <span class="sourceLineNo">682</span> public int rootBlockContainingKey(final byte[] key, int offset, int length) {<a name="line.682"></a> |
| <span class="sourceLineNo">683</span> return rootBlockContainingKey(key, offset, length, null);<a name="line.683"></a> |
| <span class="sourceLineNo">684</span> }<a name="line.684"></a> |
| <span class="sourceLineNo">685</span><a name="line.685"></a> |
| <span class="sourceLineNo">686</span> /**<a name="line.686"></a> |
| <span class="sourceLineNo">687</span> * Finds the root-level index block containing the given key. Key to find<a name="line.687"></a> |
| <span class="sourceLineNo">688</span> */<a name="line.688"></a> |
| <span class="sourceLineNo">689</span> public abstract int rootBlockContainingKey(final Cell key);<a name="line.689"></a> |
| <span class="sourceLineNo">690</span><a name="line.690"></a> |
| <span class="sourceLineNo">691</span> /**<a name="line.691"></a> |
| <span class="sourceLineNo">692</span> * The indexed key at the ith position in the nonRootIndex. The position starts at 0.<a name="line.692"></a> |
| <span class="sourceLineNo">693</span> * @param i the ith position<a name="line.693"></a> |
| <span class="sourceLineNo">694</span> * @return The indexed key at the ith position in the nonRootIndex.<a name="line.694"></a> |
| <span class="sourceLineNo">695</span> */<a name="line.695"></a> |
| <span class="sourceLineNo">696</span> static byte[] getNonRootIndexedKey(ByteBuff nonRootIndex, int i) {<a name="line.696"></a> |
| <span class="sourceLineNo">697</span> int numEntries = nonRootIndex.getInt(0);<a name="line.697"></a> |
| <span class="sourceLineNo">698</span> if (i < 0 || i >= numEntries) {<a name="line.698"></a> |
| <span class="sourceLineNo">699</span> return null;<a name="line.699"></a> |
| <span class="sourceLineNo">700</span> }<a name="line.700"></a> |
| <span class="sourceLineNo">701</span><a name="line.701"></a> |
| <span class="sourceLineNo">702</span> // Entries start after the number of entries and the secondary index.<a name="line.702"></a> |
| <span class="sourceLineNo">703</span> // The secondary index takes numEntries + 1 ints.<a name="line.703"></a> |
| <span class="sourceLineNo">704</span> int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2);<a name="line.704"></a> |
| <span class="sourceLineNo">705</span> // Targetkey's offset relative to the end of secondary index<a name="line.705"></a> |
| <span class="sourceLineNo">706</span> int targetKeyRelOffset = nonRootIndex.getInt(Bytes.SIZEOF_INT * (i + 1));<a name="line.706"></a> |
| <span class="sourceLineNo">707</span><a name="line.707"></a> |
| <span class="sourceLineNo">708</span> // The offset of the target key in the blockIndex buffer<a name="line.708"></a> |
| <span class="sourceLineNo">709</span> int targetKeyOffset = entriesOffset // Skip secondary index<a name="line.709"></a> |
| <span class="sourceLineNo">710</span> + targetKeyRelOffset // Skip all entries until mid<a name="line.710"></a> |
| <span class="sourceLineNo">711</span> + SECONDARY_INDEX_ENTRY_OVERHEAD; // Skip offset and on-disk-size<a name="line.711"></a> |
| <span class="sourceLineNo">712</span><a name="line.712"></a> |
| <span class="sourceLineNo">713</span> // We subtract the two consecutive secondary index elements, which<a name="line.713"></a> |
| <span class="sourceLineNo">714</span> // gives us the size of the whole (offset, onDiskSize, key) tuple. We<a name="line.714"></a> |
| <span class="sourceLineNo">715</span> // then need to subtract the overhead of offset and onDiskSize.<a name="line.715"></a> |
| <span class="sourceLineNo">716</span> int targetKeyLength = nonRootIndex.getInt(Bytes.SIZEOF_INT * (i + 2)) - targetKeyRelOffset<a name="line.716"></a> |
| <span class="sourceLineNo">717</span> - SECONDARY_INDEX_ENTRY_OVERHEAD;<a name="line.717"></a> |
| <span class="sourceLineNo">718</span><a name="line.718"></a> |
| <span class="sourceLineNo">719</span> // TODO check whether we can make BB backed Cell here? So can avoid bytes copy.<a name="line.719"></a> |
| <span class="sourceLineNo">720</span> return nonRootIndex.toBytes(targetKeyOffset, targetKeyLength);<a name="line.720"></a> |
| <span class="sourceLineNo">721</span> }<a name="line.721"></a> |
| <span class="sourceLineNo">722</span><a name="line.722"></a> |
| <span class="sourceLineNo">723</span> /**<a name="line.723"></a> |
| <span class="sourceLineNo">724</span> * Performs a binary search over a non-root level index block. Utilizes the secondary index,<a name="line.724"></a> |
| <span class="sourceLineNo">725</span> * which records the offsets of (offset, onDiskSize, firstKey) tuples of all entries. the key we<a name="line.725"></a> |
| <span class="sourceLineNo">726</span> * are searching for offsets to individual entries in the blockIndex buffer the non-root index<a name="line.726"></a> |
| <span class="sourceLineNo">727</span> * block buffer, starting with the secondary index. The position is ignored.<a name="line.727"></a> |
| <span class="sourceLineNo">728</span> * @return the index i in [0, numEntries - 1] such that keys[i] <= key < keys[i + 1], if keys is<a name="line.728"></a> |
| <span class="sourceLineNo">729</span> * the array of all keys being searched, or -1 otherwise<a name="line.729"></a> |
| <span class="sourceLineNo">730</span> */<a name="line.730"></a> |
| <span class="sourceLineNo">731</span> static int binarySearchNonRootIndex(Cell key, ByteBuff nonRootIndex,<a name="line.731"></a> |
| <span class="sourceLineNo">732</span> CellComparator comparator) {<a name="line.732"></a> |
| <span class="sourceLineNo">733</span><a name="line.733"></a> |
| <span class="sourceLineNo">734</span> int numEntries = nonRootIndex.getIntAfterPosition(0);<a name="line.734"></a> |
| <span class="sourceLineNo">735</span> int low = 0;<a name="line.735"></a> |
| <span class="sourceLineNo">736</span> int high = numEntries - 1;<a name="line.736"></a> |
| <span class="sourceLineNo">737</span> int mid = 0;<a name="line.737"></a> |
| <span class="sourceLineNo">738</span><a name="line.738"></a> |
| <span class="sourceLineNo">739</span> // Entries start after the number of entries and the secondary index.<a name="line.739"></a> |
| <span class="sourceLineNo">740</span> // The secondary index takes numEntries + 1 ints.<a name="line.740"></a> |
| <span class="sourceLineNo">741</span> int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2);<a name="line.741"></a> |
| <span class="sourceLineNo">742</span><a name="line.742"></a> |
| <span class="sourceLineNo">743</span> // If we imagine that keys[-1] = -Infinity and<a name="line.743"></a> |
| <span class="sourceLineNo">744</span> // keys[numEntries] = Infinity, then we are maintaining an invariant that<a name="line.744"></a> |
| <span class="sourceLineNo">745</span> // keys[low - 1] < key < keys[high + 1] while narrowing down the range.<a name="line.745"></a> |
| <span class="sourceLineNo">746</span> ByteBufferKeyOnlyKeyValue nonRootIndexkeyOnlyKV = new ByteBufferKeyOnlyKeyValue();<a name="line.746"></a> |
| <span class="sourceLineNo">747</span> ObjectIntPair<ByteBuffer> pair = new ObjectIntPair<>();<a name="line.747"></a> |
| <span class="sourceLineNo">748</span> while (low <= high) {<a name="line.748"></a> |
| <span class="sourceLineNo">749</span> mid = low + ((high - low) >> 1);<a name="line.749"></a> |
| <span class="sourceLineNo">750</span><a name="line.750"></a> |
| <span class="sourceLineNo">751</span> // Midkey's offset relative to the end of secondary index<a name="line.751"></a> |
| <span class="sourceLineNo">752</span> int midKeyRelOffset = nonRootIndex.getIntAfterPosition(Bytes.SIZEOF_INT * (mid + 1));<a name="line.752"></a> |
| <span class="sourceLineNo">753</span><a name="line.753"></a> |
| <span class="sourceLineNo">754</span> // The offset of the middle key in the blockIndex buffer<a name="line.754"></a> |
| <span class="sourceLineNo">755</span> int midKeyOffset = entriesOffset // Skip secondary index<a name="line.755"></a> |
| <span class="sourceLineNo">756</span> + midKeyRelOffset // Skip all entries until mid<a name="line.756"></a> |
| <span class="sourceLineNo">757</span> + SECONDARY_INDEX_ENTRY_OVERHEAD; // Skip offset and on-disk-size<a name="line.757"></a> |
| <span class="sourceLineNo">758</span><a name="line.758"></a> |
| <span class="sourceLineNo">759</span> // We subtract the two consecutive secondary index elements, which<a name="line.759"></a> |
| <span class="sourceLineNo">760</span> // gives us the size of the whole (offset, onDiskSize, key) tuple. We<a name="line.760"></a> |
| <span class="sourceLineNo">761</span> // then need to subtract the overhead of offset and onDiskSize.<a name="line.761"></a> |
| <span class="sourceLineNo">762</span> int midLength = nonRootIndex.getIntAfterPosition(Bytes.SIZEOF_INT * (mid + 2))<a name="line.762"></a> |
| <span class="sourceLineNo">763</span> - midKeyRelOffset - SECONDARY_INDEX_ENTRY_OVERHEAD;<a name="line.763"></a> |
| <span class="sourceLineNo">764</span><a name="line.764"></a> |
| <span class="sourceLineNo">765</span> // we have to compare in this order, because the comparator order<a name="line.765"></a> |
| <span class="sourceLineNo">766</span> // has special logic when the 'left side' is a special key.<a name="line.766"></a> |
| <span class="sourceLineNo">767</span> // TODO make KeyOnlyKeyValue to be Buffer backed and avoid array() call. This has to be<a name="line.767"></a> |
| <span class="sourceLineNo">768</span> // done after HBASE-12224 & HBASE-12282<a name="line.768"></a> |
| <span class="sourceLineNo">769</span> // TODO avoid array call.<a name="line.769"></a> |
| <span class="sourceLineNo">770</span> nonRootIndex.asSubByteBuffer(midKeyOffset, midLength, pair);<a name="line.770"></a> |
| <span class="sourceLineNo">771</span> nonRootIndexkeyOnlyKV.setKey(pair.getFirst(), pair.getSecond(), midLength);<a name="line.771"></a> |
| <span class="sourceLineNo">772</span> int cmp = PrivateCellUtil.compareKeyIgnoresMvcc(comparator, key, nonRootIndexkeyOnlyKV);<a name="line.772"></a> |
| <span class="sourceLineNo">773</span><a name="line.773"></a> |
| <span class="sourceLineNo">774</span> // key lives above the midpoint<a name="line.774"></a> |
| <span class="sourceLineNo">775</span> if (cmp > 0) low = mid + 1; // Maintain the invariant that keys[low - 1] < key<a name="line.775"></a> |
| <span class="sourceLineNo">776</span> // key lives below the midpoint<a name="line.776"></a> |
| <span class="sourceLineNo">777</span> else if (cmp < 0) high = mid - 1; // Maintain the invariant that key < keys[high + 1]<a name="line.777"></a> |
| <span class="sourceLineNo">778</span> else return mid; // exact match<a name="line.778"></a> |
| <span class="sourceLineNo">779</span> }<a name="line.779"></a> |
| <span class="sourceLineNo">780</span><a name="line.780"></a> |
| <span class="sourceLineNo">781</span> // As per our invariant, keys[low - 1] < key < keys[high + 1], meaning<a name="line.781"></a> |
| <span class="sourceLineNo">782</span> // that low - 1 < high + 1 and (low - high) <= 1. As per the loop break<a name="line.782"></a> |
| <span class="sourceLineNo">783</span> // condition, low >= high + 1. Therefore, low = high + 1.<a name="line.783"></a> |
| <span class="sourceLineNo">784</span><a name="line.784"></a> |
| <span class="sourceLineNo">785</span> if (low != high + 1) {<a name="line.785"></a> |
| <span class="sourceLineNo">786</span> throw new IllegalStateException(<a name="line.786"></a> |
| <span class="sourceLineNo">787</span> "Binary search broken: low=" + low + " " + "instead of " + (high + 1));<a name="line.787"></a> |
| <span class="sourceLineNo">788</span> }<a name="line.788"></a> |
| <span class="sourceLineNo">789</span><a name="line.789"></a> |
| <span class="sourceLineNo">790</span> // OK, our invariant says that keys[low - 1] < key < keys[low]. We need to<a name="line.790"></a> |
| <span class="sourceLineNo">791</span> // return i such that keys[i] <= key < keys[i + 1]. Therefore i = low - 1.<a name="line.791"></a> |
| <span class="sourceLineNo">792</span> int i = low - 1;<a name="line.792"></a> |
| <span class="sourceLineNo">793</span><a name="line.793"></a> |
| <span class="sourceLineNo">794</span> // Some extra validation on the result.<a name="line.794"></a> |
| <span class="sourceLineNo">795</span> if (i < -1 || i >= numEntries) {<a name="line.795"></a> |
| <span class="sourceLineNo">796</span> throw new IllegalStateException("Binary search broken: result is " + i<a name="line.796"></a> |
| <span class="sourceLineNo">797</span> + " but expected to be between -1 and (numEntries - 1) = " + (numEntries - 1));<a name="line.797"></a> |
| <span class="sourceLineNo">798</span> }<a name="line.798"></a> |
| <span class="sourceLineNo">799</span><a name="line.799"></a> |
| <span class="sourceLineNo">800</span> return i;<a name="line.800"></a> |
| <span class="sourceLineNo">801</span> }<a name="line.801"></a> |
| <span class="sourceLineNo">802</span><a name="line.802"></a> |
| <span class="sourceLineNo">803</span> /**<a name="line.803"></a> |
| <span class="sourceLineNo">804</span> * Search for one key using the secondary index in a non-root block. In case of success,<a name="line.804"></a> |
| <span class="sourceLineNo">805</span> * positions the provided buffer at the entry of interest, where the file offset and the<a name="line.805"></a> |
| <span class="sourceLineNo">806</span> * on-disk-size can be read. a non-root block without header. Initial position does not matter.<a name="line.806"></a> |
| <span class="sourceLineNo">807</span> * the byte array containing the key<a name="line.807"></a> |
| <span class="sourceLineNo">808</span> * @return the index position where the given key was found, otherwise return -1 in the case the<a name="line.808"></a> |
| <span class="sourceLineNo">809</span> * given key is before the first key.<a name="line.809"></a> |
| <span class="sourceLineNo">810</span> */<a name="line.810"></a> |
| <span class="sourceLineNo">811</span> static int locateNonRootIndexEntry(ByteBuff nonRootBlock, Cell key, CellComparator comparator) {<a name="line.811"></a> |
| <span class="sourceLineNo">812</span> int entryIndex = binarySearchNonRootIndex(key, nonRootBlock, comparator);<a name="line.812"></a> |
| <span class="sourceLineNo">813</span><a name="line.813"></a> |
| <span class="sourceLineNo">814</span> if (entryIndex != -1) {<a name="line.814"></a> |
| <span class="sourceLineNo">815</span> int numEntries = nonRootBlock.getIntAfterPosition(0);<a name="line.815"></a> |
| <span class="sourceLineNo">816</span><a name="line.816"></a> |
| <span class="sourceLineNo">817</span> // The end of secondary index and the beginning of entries themselves.<a name="line.817"></a> |
| <span class="sourceLineNo">818</span> int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2);<a name="line.818"></a> |
| <span class="sourceLineNo">819</span><a name="line.819"></a> |
| <span class="sourceLineNo">820</span> // The offset of the entry we are interested in relative to the end of<a name="line.820"></a> |
| <span class="sourceLineNo">821</span> // the secondary index.<a name="line.821"></a> |
| <span class="sourceLineNo">822</span> int entryRelOffset = nonRootBlock.getIntAfterPosition(Bytes.SIZEOF_INT * (1 + entryIndex));<a name="line.822"></a> |
| <span class="sourceLineNo">823</span><a name="line.823"></a> |
| <span class="sourceLineNo">824</span> nonRootBlock.position(entriesOffset + entryRelOffset);<a name="line.824"></a> |
| <span class="sourceLineNo">825</span> }<a name="line.825"></a> |
| <span class="sourceLineNo">826</span><a name="line.826"></a> |
| <span class="sourceLineNo">827</span> return entryIndex;<a name="line.827"></a> |
| <span class="sourceLineNo">828</span> }<a name="line.828"></a> |
| <span class="sourceLineNo">829</span><a name="line.829"></a> |
| <span class="sourceLineNo">830</span> /**<a name="line.830"></a> |
| <span class="sourceLineNo">831</span> * Read in the root-level index from the given input stream. Must match what was written into<a name="line.831"></a> |
| <span class="sourceLineNo">832</span> * the root level by {@link BlockIndexWriter#writeIndexBlocks(FSDataOutputStream)} at the offset<a name="line.832"></a> |
| <span class="sourceLineNo">833</span> * that function returned.<a name="line.833"></a> |
| <span class="sourceLineNo">834</span> * @param in the buffered input stream or wrapped byte input stream<a name="line.834"></a> |
| <span class="sourceLineNo">835</span> * @param numEntries the number of root-level index entries<a name="line.835"></a> |
| <span class="sourceLineNo">836</span> */<a name="line.836"></a> |
| <span class="sourceLineNo">837</span> public void readRootIndex(DataInput in, final int numEntries) throws IOException {<a name="line.837"></a> |
| <span class="sourceLineNo">838</span> blockOffsets = new long[numEntries];<a name="line.838"></a> |
| <span class="sourceLineNo">839</span> initialize(numEntries);<a name="line.839"></a> |
| <span class="sourceLineNo">840</span> blockDataSizes = new int[numEntries];<a name="line.840"></a> |
| <span class="sourceLineNo">841</span><a name="line.841"></a> |
| <span class="sourceLineNo">842</span> // If index size is zero, no index was written.<a name="line.842"></a> |
| <span class="sourceLineNo">843</span> if (numEntries > 0) {<a name="line.843"></a> |
| <span class="sourceLineNo">844</span> for (int i = 0; i < numEntries; ++i) {<a name="line.844"></a> |
| <span class="sourceLineNo">845</span> long offset = in.readLong();<a name="line.845"></a> |
| <span class="sourceLineNo">846</span> int dataSize = in.readInt();<a name="line.846"></a> |
| <span class="sourceLineNo">847</span> byte[] key = Bytes.readByteArray(in);<a name="line.847"></a> |
| <span class="sourceLineNo">848</span> add(key, offset, dataSize);<a name="line.848"></a> |
| <span class="sourceLineNo">849</span> }<a name="line.849"></a> |
| <span class="sourceLineNo">850</span> }<a name="line.850"></a> |
| <span class="sourceLineNo">851</span> }<a name="line.851"></a> |
| <span class="sourceLineNo">852</span><a name="line.852"></a> |
| <span class="sourceLineNo">853</span> protected abstract void initialize(int numEntries);<a name="line.853"></a> |
| <span class="sourceLineNo">854</span><a name="line.854"></a> |
| <span class="sourceLineNo">855</span> protected abstract void add(final byte[] key, final long offset, final int dataSize);<a name="line.855"></a> |
| <span class="sourceLineNo">856</span><a name="line.856"></a> |
| <span class="sourceLineNo">857</span> /**<a name="line.857"></a> |
| <span class="sourceLineNo">858</span> * Read in the root-level index from the given input stream. Must match what was written into<a name="line.858"></a> |
| <span class="sourceLineNo">859</span> * the root level by {@link BlockIndexWriter#writeIndexBlocks(FSDataOutputStream)} at the offset<a name="line.859"></a> |
| <span class="sourceLineNo">860</span> * that function returned.<a name="line.860"></a> |
| <span class="sourceLineNo">861</span> * @param blk the HFile block<a name="line.861"></a> |
| <span class="sourceLineNo">862</span> * @param numEntries the number of root-level index entries<a name="line.862"></a> |
| <span class="sourceLineNo">863</span> * @return the buffered input stream or wrapped byte input stream<a name="line.863"></a> |
| <span class="sourceLineNo">864</span> */<a name="line.864"></a> |
| <span class="sourceLineNo">865</span> public DataInputStream readRootIndex(HFileBlock blk, final int numEntries) throws IOException {<a name="line.865"></a> |
| <span class="sourceLineNo">866</span> DataInputStream in = blk.getByteStream();<a name="line.866"></a> |
| <span class="sourceLineNo">867</span> readRootIndex(in, numEntries);<a name="line.867"></a> |
| <span class="sourceLineNo">868</span> return in;<a name="line.868"></a> |
| <span class="sourceLineNo">869</span> }<a name="line.869"></a> |
| <span class="sourceLineNo">870</span><a name="line.870"></a> |
| <span class="sourceLineNo">871</span> /**<a name="line.871"></a> |
| <span class="sourceLineNo">872</span> * Read the root-level metadata of a multi-level block index. Based on<a name="line.872"></a> |
| <span class="sourceLineNo">873</span> * {@link #readRootIndex(DataInput, int)}, but also reads metadata necessary to compute the<a name="line.873"></a> |
| <span class="sourceLineNo">874</span> * mid-key in a multi-level index.<a name="line.874"></a> |
| <span class="sourceLineNo">875</span> * @param blk the HFile block<a name="line.875"></a> |
| <span class="sourceLineNo">876</span> * @param numEntries the number of root-level index entries<a name="line.876"></a> |
| <span class="sourceLineNo">877</span> */<a name="line.877"></a> |
| <span class="sourceLineNo">878</span> public void readMultiLevelIndexRoot(HFileBlock blk, final int numEntries) throws IOException {<a name="line.878"></a> |
| <span class="sourceLineNo">879</span> DataInputStream in = readRootIndex(blk, numEntries);<a name="line.879"></a> |
| <span class="sourceLineNo">880</span> // HFileBlock.getByteStream() returns a byte stream for reading the data(excluding checksum)<a name="line.880"></a> |
| <span class="sourceLineNo">881</span> // of root index block, so after reading the root index there is no need to subtract the<a name="line.881"></a> |
| <span class="sourceLineNo">882</span> // checksum bytes.<a name="line.882"></a> |
| <span class="sourceLineNo">883</span> if (in.available() < MID_KEY_METADATA_SIZE) {<a name="line.883"></a> |
| <span class="sourceLineNo">884</span> // No mid-key metadata available.<a name="line.884"></a> |
| <span class="sourceLineNo">885</span> return;<a name="line.885"></a> |
| <span class="sourceLineNo">886</span> }<a name="line.886"></a> |
| <span class="sourceLineNo">887</span> midLeafBlockOffset = in.readLong();<a name="line.887"></a> |
| <span class="sourceLineNo">888</span> midLeafBlockOnDiskSize = in.readInt();<a name="line.888"></a> |
| <span class="sourceLineNo">889</span> midKeyEntry = in.readInt();<a name="line.889"></a> |
| <span class="sourceLineNo">890</span> }<a name="line.890"></a> |
| <span class="sourceLineNo">891</span><a name="line.891"></a> |
| <span class="sourceLineNo">892</span> @Override<a name="line.892"></a> |
| <span class="sourceLineNo">893</span> public long heapSize() {<a name="line.893"></a> |
| <span class="sourceLineNo">894</span> // The BlockIndexReader does not have the blockKey, comparator and the midkey atomic reference<a name="line.894"></a> |
| <span class="sourceLineNo">895</span> long heapSize =<a name="line.895"></a> |
| <span class="sourceLineNo">896</span> ClassSize.align(3 * ClassSize.REFERENCE + 2 * Bytes.SIZEOF_INT + ClassSize.OBJECT);<a name="line.896"></a> |
| <span class="sourceLineNo">897</span><a name="line.897"></a> |
| <span class="sourceLineNo">898</span> // Mid-key metadata.<a name="line.898"></a> |
| <span class="sourceLineNo">899</span> heapSize += MID_KEY_METADATA_SIZE;<a name="line.899"></a> |
| <span class="sourceLineNo">900</span><a name="line.900"></a> |
| <span class="sourceLineNo">901</span> heapSize = calculateHeapSizeForBlockKeys(heapSize);<a name="line.901"></a> |
| <span class="sourceLineNo">902</span><a name="line.902"></a> |
| <span class="sourceLineNo">903</span> if (blockOffsets != null) {<a name="line.903"></a> |
| <span class="sourceLineNo">904</span> heapSize += ClassSize.align(ClassSize.ARRAY + blockOffsets.length * Bytes.SIZEOF_LONG);<a name="line.904"></a> |
| <span class="sourceLineNo">905</span> }<a name="line.905"></a> |
| <span class="sourceLineNo">906</span><a name="line.906"></a> |
| <span class="sourceLineNo">907</span> if (blockDataSizes != null) {<a name="line.907"></a> |
| <span class="sourceLineNo">908</span> heapSize += ClassSize.align(ClassSize.ARRAY + blockDataSizes.length * Bytes.SIZEOF_INT);<a name="line.908"></a> |
| <span class="sourceLineNo">909</span> }<a name="line.909"></a> |
| <span class="sourceLineNo">910</span><a name="line.910"></a> |
| <span class="sourceLineNo">911</span> return ClassSize.align(heapSize);<a name="line.911"></a> |
| <span class="sourceLineNo">912</span> }<a name="line.912"></a> |
| <span class="sourceLineNo">913</span><a name="line.913"></a> |
| <span class="sourceLineNo">914</span> protected abstract long calculateHeapSizeForBlockKeys(long heapSize);<a name="line.914"></a> |
| <span class="sourceLineNo">915</span> }<a name="line.915"></a> |
| <span class="sourceLineNo">916</span><a name="line.916"></a> |
| <span class="sourceLineNo">917</span> /**<a name="line.917"></a> |
| <span class="sourceLineNo">918</span> * Writes the block index into the output stream. Generate the tree from bottom up. The leaf level<a name="line.918"></a> |
| <span class="sourceLineNo">919</span> * is written to disk as a sequence of inline blocks, if it is larger than a certain number of<a name="line.919"></a> |
| <span class="sourceLineNo">920</span> * bytes. If the leaf level is not large enough, we write all entries to the root level instead.<a name="line.920"></a> |
| <span class="sourceLineNo">921</span> * After all leaf blocks have been written, we end up with an index referencing the resulting leaf<a name="line.921"></a> |
| <span class="sourceLineNo">922</span> * index blocks. If that index is larger than the allowed root index size, the writer will break<a name="line.922"></a> |
| <span class="sourceLineNo">923</span> * it up into reasonable-size intermediate-level index block chunks write those chunks out, and<a name="line.923"></a> |
| <span class="sourceLineNo">924</span> * create another index referencing those chunks. This will be repeated until the remaining index<a name="line.924"></a> |
| <span class="sourceLineNo">925</span> * is small enough to become the root index. However, in most practical cases we will only have<a name="line.925"></a> |
| <span class="sourceLineNo">926</span> * leaf-level blocks and the root index, or just the root index.<a name="line.926"></a> |
| <span class="sourceLineNo">927</span> */<a name="line.927"></a> |
| <span class="sourceLineNo">928</span> public static class BlockIndexWriter implements InlineBlockWriter {<a name="line.928"></a> |
| <span class="sourceLineNo">929</span> /**<a name="line.929"></a> |
| <span class="sourceLineNo">930</span> * While the index is being written, this represents the current block index referencing all<a name="line.930"></a> |
| <span class="sourceLineNo">931</span> * leaf blocks, with one exception. If the file is being closed and there are not enough blocks<a name="line.931"></a> |
| <span class="sourceLineNo">932</span> * to complete even a single leaf block, no leaf blocks get written and this contains the entire<a name="line.932"></a> |
| <span class="sourceLineNo">933</span> * block index. After all levels of the index were written by<a name="line.933"></a> |
| <span class="sourceLineNo">934</span> * {@link #writeIndexBlocks(FSDataOutputStream)}, this contains the final root-level index.<a name="line.934"></a> |
| <span class="sourceLineNo">935</span> */<a name="line.935"></a> |
| <span class="sourceLineNo">936</span> private BlockIndexChunk rootChunk = new BlockIndexChunkImpl();<a name="line.936"></a> |
| <span class="sourceLineNo">937</span><a name="line.937"></a> |
| <span class="sourceLineNo">938</span> /**<a name="line.938"></a> |
| <span class="sourceLineNo">939</span> * Current leaf-level chunk. New entries referencing data blocks get added to this chunk until<a name="line.939"></a> |
| <span class="sourceLineNo">940</span> * it grows large enough to be written to disk.<a name="line.940"></a> |
| <span class="sourceLineNo">941</span> */<a name="line.941"></a> |
| <span class="sourceLineNo">942</span> private BlockIndexChunk curInlineChunk = new BlockIndexChunkImpl();<a name="line.942"></a> |
| <span class="sourceLineNo">943</span><a name="line.943"></a> |
| <span class="sourceLineNo">944</span> /**<a name="line.944"></a> |
| <span class="sourceLineNo">945</span> * The number of block index levels. This is one if there is only root level (even empty), two<a name="line.945"></a> |
| <span class="sourceLineNo">946</span> * if there a leaf level and root level, and is higher if there are intermediate levels. This is<a name="line.946"></a> |
| <span class="sourceLineNo">947</span> * only final after {@link #writeIndexBlocks(FSDataOutputStream)} has been called. The initial<a name="line.947"></a> |
| <span class="sourceLineNo">948</span> * value accounts for the root level, and will be increased to two as soon as we find out there<a name="line.948"></a> |
| <span class="sourceLineNo">949</span> * is a leaf-level in {@link #blockWritten(long, int, int)}.<a name="line.949"></a> |
| <span class="sourceLineNo">950</span> */<a name="line.950"></a> |
| <span class="sourceLineNo">951</span> private int numLevels = 1;<a name="line.951"></a> |
| <span class="sourceLineNo">952</span><a name="line.952"></a> |
| <span class="sourceLineNo">953</span> private HFileBlock.Writer blockWriter;<a name="line.953"></a> |
| <span class="sourceLineNo">954</span> private byte[] firstKey = null;<a name="line.954"></a> |
| <span class="sourceLineNo">955</span><a name="line.955"></a> |
| <span class="sourceLineNo">956</span> /**<a name="line.956"></a> |
| <span class="sourceLineNo">957</span> * The total number of leaf-level entries, i.e. entries referenced by leaf-level blocks. For the<a name="line.957"></a> |
| <span class="sourceLineNo">958</span> * data block index this is equal to the number of data blocks.<a name="line.958"></a> |
| <span class="sourceLineNo">959</span> */<a name="line.959"></a> |
| <span class="sourceLineNo">960</span> private long totalNumEntries;<a name="line.960"></a> |
| <span class="sourceLineNo">961</span><a name="line.961"></a> |
| <span class="sourceLineNo">962</span> /** Total compressed size of all index blocks. */<a name="line.962"></a> |
| <span class="sourceLineNo">963</span> private long totalBlockOnDiskSize;<a name="line.963"></a> |
| <span class="sourceLineNo">964</span><a name="line.964"></a> |
| <span class="sourceLineNo">965</span> /** Total uncompressed size of all index blocks. */<a name="line.965"></a> |
| <span class="sourceLineNo">966</span> private long totalBlockUncompressedSize;<a name="line.966"></a> |
| <span class="sourceLineNo">967</span><a name="line.967"></a> |
| <span class="sourceLineNo">968</span> /** The maximum size guideline of all multi-level index blocks. */<a name="line.968"></a> |
| <span class="sourceLineNo">969</span> private int maxChunkSize;<a name="line.969"></a> |
| <span class="sourceLineNo">970</span><a name="line.970"></a> |
| <span class="sourceLineNo">971</span> /** The maximum level of multi-level index blocks */<a name="line.971"></a> |
| <span class="sourceLineNo">972</span> private int minIndexNumEntries;<a name="line.972"></a> |
| <span class="sourceLineNo">973</span><a name="line.973"></a> |
| <span class="sourceLineNo">974</span> /** Whether we require this block index to always be single-level. */<a name="line.974"></a> |
| <span class="sourceLineNo">975</span> private boolean singleLevelOnly;<a name="line.975"></a> |
| <span class="sourceLineNo">976</span><a name="line.976"></a> |
| <span class="sourceLineNo">977</span> /** CacheConfig, or null if cache-on-write is disabled */<a name="line.977"></a> |
| <span class="sourceLineNo">978</span> private CacheConfig cacheConf;<a name="line.978"></a> |
| <span class="sourceLineNo">979</span><a name="line.979"></a> |
| <span class="sourceLineNo">980</span> /** Name to use for computing cache keys */<a name="line.980"></a> |
| <span class="sourceLineNo">981</span> private String nameForCaching;<a name="line.981"></a> |
| <span class="sourceLineNo">982</span><a name="line.982"></a> |
| <span class="sourceLineNo">983</span> /** Type of encoding used for index blocks in HFile */<a name="line.983"></a> |
| <span class="sourceLineNo">984</span> private HFileIndexBlockEncoder indexBlockEncoder;<a name="line.984"></a> |
| <span class="sourceLineNo">985</span><a name="line.985"></a> |
| <span class="sourceLineNo">986</span> /** Creates a single-level block index writer */<a name="line.986"></a> |
| <span class="sourceLineNo">987</span> public BlockIndexWriter() {<a name="line.987"></a> |
| <span class="sourceLineNo">988</span> this(null, null, null, null);<a name="line.988"></a> |
| <span class="sourceLineNo">989</span> singleLevelOnly = true;<a name="line.989"></a> |
| <span class="sourceLineNo">990</span> }<a name="line.990"></a> |
| <span class="sourceLineNo">991</span><a name="line.991"></a> |
| <span class="sourceLineNo">992</span> /**<a name="line.992"></a> |
| <span class="sourceLineNo">993</span> * Creates a multi-level block index writer.<a name="line.993"></a> |
| <span class="sourceLineNo">994</span> * @param blockWriter the block writer to use to write index blocks<a name="line.994"></a> |
| <span class="sourceLineNo">995</span> * @param cacheConf used to determine when and how a block should be cached-on-write.<a name="line.995"></a> |
| <span class="sourceLineNo">996</span> */<a name="line.996"></a> |
| <span class="sourceLineNo">997</span> public BlockIndexWriter(HFileBlock.Writer blockWriter, CacheConfig cacheConf,<a name="line.997"></a> |
| <span class="sourceLineNo">998</span> String nameForCaching, HFileIndexBlockEncoder indexBlockEncoder) {<a name="line.998"></a> |
| <span class="sourceLineNo">999</span> if ((cacheConf == null) != (nameForCaching == null)) {<a name="line.999"></a> |
| <span class="sourceLineNo">1000</span> throw new IllegalArgumentException(<a name="line.1000"></a> |
| <span class="sourceLineNo">1001</span> "Block cache and file name for " + "caching must be both specified or both null");<a name="line.1001"></a> |
| <span class="sourceLineNo">1002</span> }<a name="line.1002"></a> |
| <span class="sourceLineNo">1003</span><a name="line.1003"></a> |
| <span class="sourceLineNo">1004</span> this.blockWriter = blockWriter;<a name="line.1004"></a> |
| <span class="sourceLineNo">1005</span> this.cacheConf = cacheConf;<a name="line.1005"></a> |
| <span class="sourceLineNo">1006</span> this.nameForCaching = nameForCaching;<a name="line.1006"></a> |
| <span class="sourceLineNo">1007</span> this.maxChunkSize = HFileBlockIndex.DEFAULT_MAX_CHUNK_SIZE;<a name="line.1007"></a> |
| <span class="sourceLineNo">1008</span> this.minIndexNumEntries = HFileBlockIndex.DEFAULT_MIN_INDEX_NUM_ENTRIES;<a name="line.1008"></a> |
| <span class="sourceLineNo">1009</span> this.indexBlockEncoder =<a name="line.1009"></a> |
| <span class="sourceLineNo">1010</span> indexBlockEncoder != null ? indexBlockEncoder : NoOpIndexBlockEncoder.INSTANCE;<a name="line.1010"></a> |
| <span class="sourceLineNo">1011</span> }<a name="line.1011"></a> |
| <span class="sourceLineNo">1012</span><a name="line.1012"></a> |
| <span class="sourceLineNo">1013</span> public void setMaxChunkSize(int maxChunkSize) {<a name="line.1013"></a> |
| <span class="sourceLineNo">1014</span> if (maxChunkSize <= 0) {<a name="line.1014"></a> |
| <span class="sourceLineNo">1015</span> throw new IllegalArgumentException("Invalid maximum index block size");<a name="line.1015"></a> |
| <span class="sourceLineNo">1016</span> }<a name="line.1016"></a> |
| <span class="sourceLineNo">1017</span> this.maxChunkSize = maxChunkSize;<a name="line.1017"></a> |
| <span class="sourceLineNo">1018</span> }<a name="line.1018"></a> |
| <span class="sourceLineNo">1019</span><a name="line.1019"></a> |
| <span class="sourceLineNo">1020</span> public void setMinIndexNumEntries(int minIndexNumEntries) {<a name="line.1020"></a> |
| <span class="sourceLineNo">1021</span> if (minIndexNumEntries <= 1) {<a name="line.1021"></a> |
| <span class="sourceLineNo">1022</span> throw new IllegalArgumentException("Invalid maximum index level, should be >= 2");<a name="line.1022"></a> |
| <span class="sourceLineNo">1023</span> }<a name="line.1023"></a> |
| <span class="sourceLineNo">1024</span> this.minIndexNumEntries = minIndexNumEntries;<a name="line.1024"></a> |
| <span class="sourceLineNo">1025</span> }<a name="line.1025"></a> |
| <span class="sourceLineNo">1026</span><a name="line.1026"></a> |
| <span class="sourceLineNo">1027</span> /**<a name="line.1027"></a> |
| <span class="sourceLineNo">1028</span> * Writes the root level and intermediate levels of the block index into the output stream,<a name="line.1028"></a> |
| <span class="sourceLineNo">1029</span> * generating the tree from bottom up. Assumes that the leaf level has been inline-written to<a name="line.1029"></a> |
| <span class="sourceLineNo">1030</span> * the disk if there is enough data for more than one leaf block. We iterate by breaking the<a name="line.1030"></a> |
| <span class="sourceLineNo">1031</span> * current level of the block index, starting with the index of all leaf-level blocks, into<a name="line.1031"></a> |
| <span class="sourceLineNo">1032</span> * chunks small enough to be written to disk, and generate its parent level, until we end up<a name="line.1032"></a> |
| <span class="sourceLineNo">1033</span> * with a level small enough to become the root level. If the leaf level is not large enough,<a name="line.1033"></a> |
| <span class="sourceLineNo">1034</span> * there is no inline block index anymore, so we only write that level of block index to disk as<a name="line.1034"></a> |
| <span class="sourceLineNo">1035</span> * the root level.<a name="line.1035"></a> |
| <span class="sourceLineNo">1036</span> * @param out FSDataOutputStream<a name="line.1036"></a> |
| <span class="sourceLineNo">1037</span> * @return position at which we entered the root-level index.<a name="line.1037"></a> |
| <span class="sourceLineNo">1038</span> */<a name="line.1038"></a> |
| <span class="sourceLineNo">1039</span> public long writeIndexBlocks(FSDataOutputStream out) throws IOException {<a name="line.1039"></a> |
| <span class="sourceLineNo">1040</span> if (curInlineChunk != null && curInlineChunk.getNumEntries() != 0) {<a name="line.1040"></a> |
| <span class="sourceLineNo">1041</span> throw new IOException("Trying to write a multi-level block index, " + "but are "<a name="line.1041"></a> |
| <span class="sourceLineNo">1042</span> + curInlineChunk.getNumEntries() + " entries in the " + "last inline chunk.");<a name="line.1042"></a> |
| <span class="sourceLineNo">1043</span> }<a name="line.1043"></a> |
| <span class="sourceLineNo">1044</span><a name="line.1044"></a> |
| <span class="sourceLineNo">1045</span> // We need to get mid-key metadata before we create intermediate<a name="line.1045"></a> |
| <span class="sourceLineNo">1046</span> // indexes and overwrite the root chunk.<a name="line.1046"></a> |
| <span class="sourceLineNo">1047</span> byte[] midKeyMetadata = numLevels > 1 ? rootChunk.getMidKeyMetadata() : null;<a name="line.1047"></a> |
| <span class="sourceLineNo">1048</span><a name="line.1048"></a> |
| <span class="sourceLineNo">1049</span> if (curInlineChunk != null) {<a name="line.1049"></a> |
| <span class="sourceLineNo">1050</span> while (<a name="line.1050"></a> |
| <span class="sourceLineNo">1051</span> rootChunk.getRootSize() > maxChunkSize<a name="line.1051"></a> |
| <span class="sourceLineNo">1052</span> // HBASE-16288: if firstKey is larger than maxChunkSize we will loop indefinitely<a name="line.1052"></a> |
| <span class="sourceLineNo">1053</span> && rootChunk.getNumEntries() > minIndexNumEntries<a name="line.1053"></a> |
| <span class="sourceLineNo">1054</span> // Sanity check. We will not hit this (minIndexNumEntries ^ 16) blocks can be addressed<a name="line.1054"></a> |
| <span class="sourceLineNo">1055</span> && numLevels < 16<a name="line.1055"></a> |
| <span class="sourceLineNo">1056</span> ) {<a name="line.1056"></a> |
| <span class="sourceLineNo">1057</span> rootChunk = writeIntermediateLevel(out, rootChunk);<a name="line.1057"></a> |
| <span class="sourceLineNo">1058</span> numLevels += 1;<a name="line.1058"></a> |
| <span class="sourceLineNo">1059</span> }<a name="line.1059"></a> |
| <span class="sourceLineNo">1060</span> }<a name="line.1060"></a> |
| <span class="sourceLineNo">1061</span><a name="line.1061"></a> |
| <span class="sourceLineNo">1062</span> // write the root level<a name="line.1062"></a> |
| <span class="sourceLineNo">1063</span> long rootLevelIndexPos = out.getPos();<a name="line.1063"></a> |
| <span class="sourceLineNo">1064</span><a name="line.1064"></a> |
| <span class="sourceLineNo">1065</span> {<a name="line.1065"></a> |
| <span class="sourceLineNo">1066</span> DataOutput blockStream = blockWriter.startWriting(BlockType.ROOT_INDEX);<a name="line.1066"></a> |
| <span class="sourceLineNo">1067</span> indexBlockEncoder.encode(rootChunk, true, blockStream);<a name="line.1067"></a> |
| <span class="sourceLineNo">1068</span> if (midKeyMetadata != null) blockStream.write(midKeyMetadata);<a name="line.1068"></a> |
| <span class="sourceLineNo">1069</span> blockWriter.writeHeaderAndData(out);<a name="line.1069"></a> |
| <span class="sourceLineNo">1070</span> if (cacheConf != null) {<a name="line.1070"></a> |
| <span class="sourceLineNo">1071</span> cacheConf.getBlockCache().ifPresent(cache -> {<a name="line.1071"></a> |
| <span class="sourceLineNo">1072</span> HFileBlock blockForCaching = blockWriter.getBlockForCaching(cacheConf);<a name="line.1072"></a> |
| <span class="sourceLineNo">1073</span> cache.cacheBlock(new BlockCacheKey(nameForCaching, rootLevelIndexPos, true,<a name="line.1073"></a> |
| <span class="sourceLineNo">1074</span> blockForCaching.getBlockType()), blockForCaching);<a name="line.1074"></a> |
| <span class="sourceLineNo">1075</span> });<a name="line.1075"></a> |
| <span class="sourceLineNo">1076</span> }<a name="line.1076"></a> |
| <span class="sourceLineNo">1077</span> }<a name="line.1077"></a> |
| <span class="sourceLineNo">1078</span><a name="line.1078"></a> |
| <span class="sourceLineNo">1079</span> // Add root index block size<a name="line.1079"></a> |
| <span class="sourceLineNo">1080</span> totalBlockOnDiskSize += blockWriter.getOnDiskSizeWithoutHeader();<a name="line.1080"></a> |
| <span class="sourceLineNo">1081</span> totalBlockUncompressedSize += blockWriter.getUncompressedSizeWithoutHeader();<a name="line.1081"></a> |
| <span class="sourceLineNo">1082</span><a name="line.1082"></a> |
| <span class="sourceLineNo">1083</span> if (LOG.isTraceEnabled()) {<a name="line.1083"></a> |
| <span class="sourceLineNo">1084</span> LOG.trace("Wrote a " + numLevels + "-level index with root level at pos "<a name="line.1084"></a> |
| <span class="sourceLineNo">1085</span> + rootLevelIndexPos + ", " + rootChunk.getNumEntries() + " root-level entries, "<a name="line.1085"></a> |
| <span class="sourceLineNo">1086</span> + totalNumEntries + " total entries, "<a name="line.1086"></a> |
| <span class="sourceLineNo">1087</span> + StringUtils.humanReadableInt(this.totalBlockOnDiskSize) + " on-disk size, "<a name="line.1087"></a> |
| <span class="sourceLineNo">1088</span> + StringUtils.humanReadableInt(totalBlockUncompressedSize) + " total uncompressed size.");<a name="line.1088"></a> |
| <span class="sourceLineNo">1089</span> }<a name="line.1089"></a> |
| <span class="sourceLineNo">1090</span> return rootLevelIndexPos;<a name="line.1090"></a> |
| <span class="sourceLineNo">1091</span> }<a name="line.1091"></a> |
| <span class="sourceLineNo">1092</span><a name="line.1092"></a> |
| <span class="sourceLineNo">1093</span> /**<a name="line.1093"></a> |
| <span class="sourceLineNo">1094</span> * Writes the block index data as a single level only. Does not do any block framing.<a name="line.1094"></a> |
| <span class="sourceLineNo">1095</span> * @param out the buffered output stream to write the index to. Typically a stream<a name="line.1095"></a> |
| <span class="sourceLineNo">1096</span> * writing into an {@link HFile} block.<a name="line.1096"></a> |
| <span class="sourceLineNo">1097</span> * @param description a short description of the index being written. Used in a log message.<a name="line.1097"></a> |
| <span class="sourceLineNo">1098</span> */<a name="line.1098"></a> |
| <span class="sourceLineNo">1099</span> public void writeSingleLevelIndex(DataOutput out, String description) throws IOException {<a name="line.1099"></a> |
| <span class="sourceLineNo">1100</span> expectNumLevels(1);<a name="line.1100"></a> |
| <span class="sourceLineNo">1101</span><a name="line.1101"></a> |
| <span class="sourceLineNo">1102</span> if (!singleLevelOnly) throw new IOException("Single-level mode is turned off");<a name="line.1102"></a> |
| <span class="sourceLineNo">1103</span><a name="line.1103"></a> |
| <span class="sourceLineNo">1104</span> if (rootChunk.getNumEntries() > 0)<a name="line.1104"></a> |
| <span class="sourceLineNo">1105</span> throw new IOException("Root-level entries already added in " + "single-level mode");<a name="line.1105"></a> |
| <span class="sourceLineNo">1106</span><a name="line.1106"></a> |
| <span class="sourceLineNo">1107</span> rootChunk = curInlineChunk;<a name="line.1107"></a> |
| <span class="sourceLineNo">1108</span> curInlineChunk = new BlockIndexChunkImpl();<a name="line.1108"></a> |
| <span class="sourceLineNo">1109</span><a name="line.1109"></a> |
| <span class="sourceLineNo">1110</span> if (LOG.isTraceEnabled()) {<a name="line.1110"></a> |
| <span class="sourceLineNo">1111</span> LOG.trace("Wrote a single-level " + description + " index with " + rootChunk.getNumEntries()<a name="line.1111"></a> |
| <span class="sourceLineNo">1112</span> + " entries, " + rootChunk.getRootSize() + " bytes");<a name="line.1112"></a> |
| <span class="sourceLineNo">1113</span> }<a name="line.1113"></a> |
| <span class="sourceLineNo">1114</span> indexBlockEncoder.encode(rootChunk, true, out);<a name="line.1114"></a> |
| <span class="sourceLineNo">1115</span> }<a name="line.1115"></a> |
| <span class="sourceLineNo">1116</span><a name="line.1116"></a> |
| <span class="sourceLineNo">1117</span> /**<a name="line.1117"></a> |
| <span class="sourceLineNo">1118</span> * Split the current level of the block index into intermediate index blocks of permitted size<a name="line.1118"></a> |
| <span class="sourceLineNo">1119</span> * and write those blocks to disk. Return the next level of the block index referencing those<a name="line.1119"></a> |
| <span class="sourceLineNo">1120</span> * intermediate-level blocks.<a name="line.1120"></a> |
| <span class="sourceLineNo">1121</span> * @param currentLevel the current level of the block index, such as the a chunk referencing all<a name="line.1121"></a> |
| <span class="sourceLineNo">1122</span> * leaf-level index blocks<a name="line.1122"></a> |
| <span class="sourceLineNo">1123</span> * @return the parent level block index, which becomes the root index after a few (usually zero)<a name="line.1123"></a> |
| <span class="sourceLineNo">1124</span> * iterations<a name="line.1124"></a> |
| <span class="sourceLineNo">1125</span> */<a name="line.1125"></a> |
| <span class="sourceLineNo">1126</span> private BlockIndexChunk writeIntermediateLevel(FSDataOutputStream out,<a name="line.1126"></a> |
| <span class="sourceLineNo">1127</span> BlockIndexChunk currentLevel) throws IOException {<a name="line.1127"></a> |
| <span class="sourceLineNo">1128</span> // Entries referencing intermediate-level blocks we are about to create.<a name="line.1128"></a> |
| <span class="sourceLineNo">1129</span> BlockIndexChunk parent = new BlockIndexChunkImpl();<a name="line.1129"></a> |
| <span class="sourceLineNo">1130</span><a name="line.1130"></a> |
| <span class="sourceLineNo">1131</span> // The current intermediate-level block index chunk.<a name="line.1131"></a> |
| <span class="sourceLineNo">1132</span> BlockIndexChunk curChunk = new BlockIndexChunkImpl();<a name="line.1132"></a> |
| <span class="sourceLineNo">1133</span><a name="line.1133"></a> |
| <span class="sourceLineNo">1134</span> for (int i = 0; i < currentLevel.getNumEntries(); ++i) {<a name="line.1134"></a> |
| <span class="sourceLineNo">1135</span> curChunk.add(currentLevel.getBlockKey(i), currentLevel.getBlockOffset(i),<a name="line.1135"></a> |
| <span class="sourceLineNo">1136</span> currentLevel.getOnDiskDataSize(i));<a name="line.1136"></a> |
| <span class="sourceLineNo">1137</span><a name="line.1137"></a> |
| <span class="sourceLineNo">1138</span> // HBASE-16288: We have to have at least minIndexNumEntries(16) items in the index so that<a name="line.1138"></a> |
| <span class="sourceLineNo">1139</span> // we won't end up with too-many levels for a index with very large rowKeys. Also, if the<a name="line.1139"></a> |
| <span class="sourceLineNo">1140</span> // first key is larger than maxChunkSize this will cause infinite recursion.<a name="line.1140"></a> |
| <span class="sourceLineNo">1141</span> if (i >= minIndexNumEntries && curChunk.getRootSize() >= maxChunkSize) {<a name="line.1141"></a> |
| <span class="sourceLineNo">1142</span> writeIntermediateBlock(out, parent, curChunk);<a name="line.1142"></a> |
| <span class="sourceLineNo">1143</span> }<a name="line.1143"></a> |
| <span class="sourceLineNo">1144</span> }<a name="line.1144"></a> |
| <span class="sourceLineNo">1145</span><a name="line.1145"></a> |
| <span class="sourceLineNo">1146</span> if (curChunk.getNumEntries() > 0) {<a name="line.1146"></a> |
| <span class="sourceLineNo">1147</span> writeIntermediateBlock(out, parent, curChunk);<a name="line.1147"></a> |
| <span class="sourceLineNo">1148</span> }<a name="line.1148"></a> |
| <span class="sourceLineNo">1149</span><a name="line.1149"></a> |
| <span class="sourceLineNo">1150</span> return parent;<a name="line.1150"></a> |
| <span class="sourceLineNo">1151</span> }<a name="line.1151"></a> |
| <span class="sourceLineNo">1152</span><a name="line.1152"></a> |
| <span class="sourceLineNo">1153</span> private void writeIntermediateBlock(FSDataOutputStream out, BlockIndexChunk parent,<a name="line.1153"></a> |
| <span class="sourceLineNo">1154</span> BlockIndexChunk curChunk) throws IOException {<a name="line.1154"></a> |
| <span class="sourceLineNo">1155</span> long beginOffset = out.getPos();<a name="line.1155"></a> |
| <span class="sourceLineNo">1156</span> DataOutputStream dos = blockWriter.startWriting(BlockType.INTERMEDIATE_INDEX);<a name="line.1156"></a> |
| <span class="sourceLineNo">1157</span> indexBlockEncoder.encode(curChunk, false, dos);<a name="line.1157"></a> |
| <span class="sourceLineNo">1158</span> byte[] curFirstKey = curChunk.getBlockKey(0);<a name="line.1158"></a> |
| <span class="sourceLineNo">1159</span> blockWriter.writeHeaderAndData(out);<a name="line.1159"></a> |
| <span class="sourceLineNo">1160</span><a name="line.1160"></a> |
| <span class="sourceLineNo">1161</span> if (getCacheOnWrite()) {<a name="line.1161"></a> |
| <span class="sourceLineNo">1162</span> cacheConf.getBlockCache().ifPresent(cache -> {<a name="line.1162"></a> |
| <span class="sourceLineNo">1163</span> HFileBlock blockForCaching = blockWriter.getBlockForCaching(cacheConf);<a name="line.1163"></a> |
| <span class="sourceLineNo">1164</span> cache.cacheBlock(<a name="line.1164"></a> |
| <span class="sourceLineNo">1165</span> new BlockCacheKey(nameForCaching, beginOffset, true, blockForCaching.getBlockType()),<a name="line.1165"></a> |
| <span class="sourceLineNo">1166</span> blockForCaching);<a name="line.1166"></a> |
| <span class="sourceLineNo">1167</span> });<a name="line.1167"></a> |
| <span class="sourceLineNo">1168</span> }<a name="line.1168"></a> |
| <span class="sourceLineNo">1169</span><a name="line.1169"></a> |
| <span class="sourceLineNo">1170</span> // Add intermediate index block size<a name="line.1170"></a> |
| <span class="sourceLineNo">1171</span> totalBlockOnDiskSize += blockWriter.getOnDiskSizeWithoutHeader();<a name="line.1171"></a> |
| <span class="sourceLineNo">1172</span> totalBlockUncompressedSize += blockWriter.getUncompressedSizeWithoutHeader();<a name="line.1172"></a> |
| <span class="sourceLineNo">1173</span><a name="line.1173"></a> |
| <span class="sourceLineNo">1174</span> // OFFSET is the beginning offset the chunk of block index entries.<a name="line.1174"></a> |
| <span class="sourceLineNo">1175</span> // SIZE is the total byte size of the chunk of block index entries<a name="line.1175"></a> |
| <span class="sourceLineNo">1176</span> // + the secondary index size<a name="line.1176"></a> |
| <span class="sourceLineNo">1177</span> // FIRST_KEY is the first key in the chunk of block index<a name="line.1177"></a> |
| <span class="sourceLineNo">1178</span> // entries.<a name="line.1178"></a> |
| <span class="sourceLineNo">1179</span> parent.add(curFirstKey, beginOffset, blockWriter.getOnDiskSizeWithHeader());<a name="line.1179"></a> |
| <span class="sourceLineNo">1180</span><a name="line.1180"></a> |
| <span class="sourceLineNo">1181</span> // clear current block index chunk<a name="line.1181"></a> |
| <span class="sourceLineNo">1182</span> curChunk.clear();<a name="line.1182"></a> |
| <span class="sourceLineNo">1183</span> curFirstKey = null;<a name="line.1183"></a> |
| <span class="sourceLineNo">1184</span> }<a name="line.1184"></a> |
| <span class="sourceLineNo">1185</span><a name="line.1185"></a> |
| <span class="sourceLineNo">1186</span> /** Returns how many block index entries there are in the root level */<a name="line.1186"></a> |
| <span class="sourceLineNo">1187</span> public final int getNumRootEntries() {<a name="line.1187"></a> |
| <span class="sourceLineNo">1188</span> return rootChunk.getNumEntries();<a name="line.1188"></a> |
| <span class="sourceLineNo">1189</span> }<a name="line.1189"></a> |
| <span class="sourceLineNo">1190</span><a name="line.1190"></a> |
| <span class="sourceLineNo">1191</span> /** Returns the number of levels in this block index. */<a name="line.1191"></a> |
| <span class="sourceLineNo">1192</span> public int getNumLevels() {<a name="line.1192"></a> |
| <span class="sourceLineNo">1193</span> return numLevels;<a name="line.1193"></a> |
| <span class="sourceLineNo">1194</span> }<a name="line.1194"></a> |
| <span class="sourceLineNo">1195</span><a name="line.1195"></a> |
| <span class="sourceLineNo">1196</span> private void expectNumLevels(int expectedNumLevels) {<a name="line.1196"></a> |
| <span class="sourceLineNo">1197</span> if (numLevels != expectedNumLevels) {<a name="line.1197"></a> |
| <span class="sourceLineNo">1198</span> throw new IllegalStateException("Number of block index levels is " + numLevels<a name="line.1198"></a> |
| <span class="sourceLineNo">1199</span> + "but is expected to be " + expectedNumLevels);<a name="line.1199"></a> |
| <span class="sourceLineNo">1200</span> }<a name="line.1200"></a> |
| <span class="sourceLineNo">1201</span> }<a name="line.1201"></a> |
| <span class="sourceLineNo">1202</span><a name="line.1202"></a> |
| <span class="sourceLineNo">1203</span> /**<a name="line.1203"></a> |
| <span class="sourceLineNo">1204</span> * Whether there is an inline block ready to be written. In general, we write an leaf-level<a name="line.1204"></a> |
| <span class="sourceLineNo">1205</span> * index block as an inline block as soon as its size as serialized in the non-root format<a name="line.1205"></a> |
| <span class="sourceLineNo">1206</span> * reaches a certain threshold.<a name="line.1206"></a> |
| <span class="sourceLineNo">1207</span> */<a name="line.1207"></a> |
| <span class="sourceLineNo">1208</span> @Override<a name="line.1208"></a> |
| <span class="sourceLineNo">1209</span> public boolean shouldWriteBlock(boolean closing) {<a name="line.1209"></a> |
| <span class="sourceLineNo">1210</span> if (singleLevelOnly) {<a name="line.1210"></a> |
| <span class="sourceLineNo">1211</span> throw new UnsupportedOperationException(INLINE_BLOCKS_NOT_ALLOWED);<a name="line.1211"></a> |
| <span class="sourceLineNo">1212</span> }<a name="line.1212"></a> |
| <span class="sourceLineNo">1213</span><a name="line.1213"></a> |
| <span class="sourceLineNo">1214</span> if (curInlineChunk == null) {<a name="line.1214"></a> |
| <span class="sourceLineNo">1215</span> throw new IllegalStateException("curInlineChunk is null; has shouldWriteBlock been "<a name="line.1215"></a> |
| <span class="sourceLineNo">1216</span> + "called with closing=true and then called again?");<a name="line.1216"></a> |
| <span class="sourceLineNo">1217</span> }<a name="line.1217"></a> |
| <span class="sourceLineNo">1218</span><a name="line.1218"></a> |
| <span class="sourceLineNo">1219</span> if (curInlineChunk.getNumEntries() == 0) {<a name="line.1219"></a> |
| <span class="sourceLineNo">1220</span> return false;<a name="line.1220"></a> |
| <span class="sourceLineNo">1221</span> }<a name="line.1221"></a> |
| <span class="sourceLineNo">1222</span><a name="line.1222"></a> |
| <span class="sourceLineNo">1223</span> // We do have some entries in the current inline chunk.<a name="line.1223"></a> |
| <span class="sourceLineNo">1224</span> if (closing) {<a name="line.1224"></a> |
| <span class="sourceLineNo">1225</span> if (rootChunk.getNumEntries() == 0) {<a name="line.1225"></a> |
| <span class="sourceLineNo">1226</span> // We did not add any leaf-level blocks yet. Instead of creating a<a name="line.1226"></a> |
| <span class="sourceLineNo">1227</span> // leaf level with one block, move these entries to the root level.<a name="line.1227"></a> |
| <span class="sourceLineNo">1228</span><a name="line.1228"></a> |
| <span class="sourceLineNo">1229</span> expectNumLevels(1);<a name="line.1229"></a> |
| <span class="sourceLineNo">1230</span> rootChunk = curInlineChunk;<a name="line.1230"></a> |
| <span class="sourceLineNo">1231</span> curInlineChunk = null; // Disallow adding any more index entries.<a name="line.1231"></a> |
| <span class="sourceLineNo">1232</span> return false;<a name="line.1232"></a> |
| <span class="sourceLineNo">1233</span> }<a name="line.1233"></a> |
| <span class="sourceLineNo">1234</span><a name="line.1234"></a> |
| <span class="sourceLineNo">1235</span> return true;<a name="line.1235"></a> |
| <span class="sourceLineNo">1236</span> } else {<a name="line.1236"></a> |
| <span class="sourceLineNo">1237</span> return curInlineChunk.getNonRootSize() >= maxChunkSize;<a name="line.1237"></a> |
| <span class="sourceLineNo">1238</span> }<a name="line.1238"></a> |
| <span class="sourceLineNo">1239</span> }<a name="line.1239"></a> |
| <span class="sourceLineNo">1240</span><a name="line.1240"></a> |
| <span class="sourceLineNo">1241</span> /**<a name="line.1241"></a> |
| <span class="sourceLineNo">1242</span> * Write out the current inline index block. Inline blocks are non-root blocks, so the non-root<a name="line.1242"></a> |
| <span class="sourceLineNo">1243</span> * index format is used.<a name="line.1243"></a> |
| <span class="sourceLineNo">1244</span> */<a name="line.1244"></a> |
| <span class="sourceLineNo">1245</span> @Override<a name="line.1245"></a> |
| <span class="sourceLineNo">1246</span> public void writeInlineBlock(DataOutput out) throws IOException {<a name="line.1246"></a> |
| <span class="sourceLineNo">1247</span> if (singleLevelOnly) throw new UnsupportedOperationException(INLINE_BLOCKS_NOT_ALLOWED);<a name="line.1247"></a> |
| <span class="sourceLineNo">1248</span><a name="line.1248"></a> |
| <span class="sourceLineNo">1249</span> // Write the inline block index to the output stream in the non-root<a name="line.1249"></a> |
| <span class="sourceLineNo">1250</span> // index block format.<a name="line.1250"></a> |
| <span class="sourceLineNo">1251</span> indexBlockEncoder.encode(curInlineChunk, false, out);<a name="line.1251"></a> |
| <span class="sourceLineNo">1252</span><a name="line.1252"></a> |
| <span class="sourceLineNo">1253</span> // Save the first key of the inline block so that we can add it to the<a name="line.1253"></a> |
| <span class="sourceLineNo">1254</span> // parent-level index.<a name="line.1254"></a> |
| <span class="sourceLineNo">1255</span> firstKey = curInlineChunk.getBlockKey(0);<a name="line.1255"></a> |
| <span class="sourceLineNo">1256</span><a name="line.1256"></a> |
| <span class="sourceLineNo">1257</span> // Start a new inline index block<a name="line.1257"></a> |
| <span class="sourceLineNo">1258</span> curInlineChunk.clear();<a name="line.1258"></a> |
| <span class="sourceLineNo">1259</span> }<a name="line.1259"></a> |
| <span class="sourceLineNo">1260</span><a name="line.1260"></a> |
| <span class="sourceLineNo">1261</span> /**<a name="line.1261"></a> |
| <span class="sourceLineNo">1262</span> * Called after an inline block has been written so that we can add an entry referring to that<a name="line.1262"></a> |
| <span class="sourceLineNo">1263</span> * block to the parent-level index.<a name="line.1263"></a> |
| <span class="sourceLineNo">1264</span> */<a name="line.1264"></a> |
| <span class="sourceLineNo">1265</span> @Override<a name="line.1265"></a> |
| <span class="sourceLineNo">1266</span> public void blockWritten(long offset, int onDiskSize, int uncompressedSize) {<a name="line.1266"></a> |
| <span class="sourceLineNo">1267</span> // Add leaf index block size<a name="line.1267"></a> |
| <span class="sourceLineNo">1268</span> totalBlockOnDiskSize += onDiskSize;<a name="line.1268"></a> |
| <span class="sourceLineNo">1269</span> totalBlockUncompressedSize += uncompressedSize;<a name="line.1269"></a> |
| <span class="sourceLineNo">1270</span><a name="line.1270"></a> |
| <span class="sourceLineNo">1271</span> if (singleLevelOnly) throw new UnsupportedOperationException(INLINE_BLOCKS_NOT_ALLOWED);<a name="line.1271"></a> |
| <span class="sourceLineNo">1272</span><a name="line.1272"></a> |
| <span class="sourceLineNo">1273</span> if (firstKey == null) {<a name="line.1273"></a> |
| <span class="sourceLineNo">1274</span> throw new IllegalStateException(<a name="line.1274"></a> |
| <span class="sourceLineNo">1275</span> "Trying to add second-level index " + "entry with offset=" + offset + " and onDiskSize="<a name="line.1275"></a> |
| <span class="sourceLineNo">1276</span> + onDiskSize + "but the first key was not set in writeInlineBlock");<a name="line.1276"></a> |
| <span class="sourceLineNo">1277</span> }<a name="line.1277"></a> |
| <span class="sourceLineNo">1278</span><a name="line.1278"></a> |
| <span class="sourceLineNo">1279</span> if (rootChunk.getNumEntries() == 0) {<a name="line.1279"></a> |
| <span class="sourceLineNo">1280</span> // We are writing the first leaf block, so increase index level.<a name="line.1280"></a> |
| <span class="sourceLineNo">1281</span> expectNumLevels(1);<a name="line.1281"></a> |
| <span class="sourceLineNo">1282</span> numLevels = 2;<a name="line.1282"></a> |
| <span class="sourceLineNo">1283</span> }<a name="line.1283"></a> |
| <span class="sourceLineNo">1284</span><a name="line.1284"></a> |
| <span class="sourceLineNo">1285</span> // Add another entry to the second-level index. Include the number of<a name="line.1285"></a> |
| <span class="sourceLineNo">1286</span> // entries in all previous leaf-level chunks for mid-key calculation.<a name="line.1286"></a> |
| <span class="sourceLineNo">1287</span> rootChunk.add(firstKey, offset, onDiskSize, totalNumEntries);<a name="line.1287"></a> |
| <span class="sourceLineNo">1288</span> firstKey = null;<a name="line.1288"></a> |
| <span class="sourceLineNo">1289</span> }<a name="line.1289"></a> |
| <span class="sourceLineNo">1290</span><a name="line.1290"></a> |
| <span class="sourceLineNo">1291</span> @Override<a name="line.1291"></a> |
| <span class="sourceLineNo">1292</span> public BlockType getInlineBlockType() {<a name="line.1292"></a> |
| <span class="sourceLineNo">1293</span> return BlockType.LEAF_INDEX;<a name="line.1293"></a> |
| <span class="sourceLineNo">1294</span> }<a name="line.1294"></a> |
| <span class="sourceLineNo">1295</span><a name="line.1295"></a> |
| <span class="sourceLineNo">1296</span> /**<a name="line.1296"></a> |
| <span class="sourceLineNo">1297</span> * Add one index entry to the current leaf-level block. When the leaf-level block gets large<a name="line.1297"></a> |
| <span class="sourceLineNo">1298</span> * enough, it will be flushed to disk as an inline block.<a name="line.1298"></a> |
| <span class="sourceLineNo">1299</span> * @param firstKey the first key of the data block<a name="line.1299"></a> |
| <span class="sourceLineNo">1300</span> * @param blockOffset the offset of the data block<a name="line.1300"></a> |
| <span class="sourceLineNo">1301</span> * @param blockDataSize the on-disk size of the data block ({@link HFile} format version 2), or<a name="line.1301"></a> |
| <span class="sourceLineNo">1302</span> * the uncompressed size of the data block ( {@link HFile} format version<a name="line.1302"></a> |
| <span class="sourceLineNo">1303</span> * 1).<a name="line.1303"></a> |
| <span class="sourceLineNo">1304</span> */<a name="line.1304"></a> |
| <span class="sourceLineNo">1305</span> public void addEntry(byte[] firstKey, long blockOffset, int blockDataSize) {<a name="line.1305"></a> |
| <span class="sourceLineNo">1306</span> curInlineChunk.add(firstKey, blockOffset, blockDataSize);<a name="line.1306"></a> |
| <span class="sourceLineNo">1307</span> ++totalNumEntries;<a name="line.1307"></a> |
| <span class="sourceLineNo">1308</span> }<a name="line.1308"></a> |
| <span class="sourceLineNo">1309</span><a name="line.1309"></a> |
| <span class="sourceLineNo">1310</span> /**<a name="line.1310"></a> |
| <span class="sourceLineNo">1311</span> * @throws IOException if we happened to write a multi-level index.<a name="line.1311"></a> |
| <span class="sourceLineNo">1312</span> */<a name="line.1312"></a> |
| <span class="sourceLineNo">1313</span> public void ensureSingleLevel() throws IOException {<a name="line.1313"></a> |
| <span class="sourceLineNo">1314</span> if (numLevels > 1) {<a name="line.1314"></a> |
| <span class="sourceLineNo">1315</span> throw new IOException(<a name="line.1315"></a> |
| <span class="sourceLineNo">1316</span> "Wrote a " + numLevels + "-level index with " + rootChunk.getNumEntries()<a name="line.1316"></a> |
| <span class="sourceLineNo">1317</span> + " root-level entries, but " + "this is expected to be a single-level block index.");<a name="line.1317"></a> |
| <span class="sourceLineNo">1318</span> }<a name="line.1318"></a> |
| <span class="sourceLineNo">1319</span> }<a name="line.1319"></a> |
| <span class="sourceLineNo">1320</span><a name="line.1320"></a> |
| <span class="sourceLineNo">1321</span> /**<a name="line.1321"></a> |
| <span class="sourceLineNo">1322</span> * @return true if we are using cache-on-write. This is configured by the caller of the<a name="line.1322"></a> |
| <span class="sourceLineNo">1323</span> * constructor by either passing a valid block cache or null.<a name="line.1323"></a> |
| <span class="sourceLineNo">1324</span> */<a name="line.1324"></a> |
| <span class="sourceLineNo">1325</span> @Override<a name="line.1325"></a> |
| <span class="sourceLineNo">1326</span> public boolean getCacheOnWrite() {<a name="line.1326"></a> |
| <span class="sourceLineNo">1327</span> return cacheConf != null && cacheConf.shouldCacheIndexesOnWrite();<a name="line.1327"></a> |
| <span class="sourceLineNo">1328</span> }<a name="line.1328"></a> |
| <span class="sourceLineNo">1329</span><a name="line.1329"></a> |
| <span class="sourceLineNo">1330</span> /**<a name="line.1330"></a> |
| <span class="sourceLineNo">1331</span> * The total uncompressed size of the root index block, intermediate-level index blocks, and<a name="line.1331"></a> |
| <span class="sourceLineNo">1332</span> * leaf-level index blocks.<a name="line.1332"></a> |
| <span class="sourceLineNo">1333</span> * @return the total uncompressed size of all index blocks<a name="line.1333"></a> |
| <span class="sourceLineNo">1334</span> */<a name="line.1334"></a> |
| <span class="sourceLineNo">1335</span> public long getTotalUncompressedSize() {<a name="line.1335"></a> |
| <span class="sourceLineNo">1336</span> return totalBlockUncompressedSize;<a name="line.1336"></a> |
| <span class="sourceLineNo">1337</span> }<a name="line.1337"></a> |
| <span class="sourceLineNo">1338</span><a name="line.1338"></a> |
| <span class="sourceLineNo">1339</span> }<a name="line.1339"></a> |
| <span class="sourceLineNo">1340</span><a name="line.1340"></a> |
| <span class="sourceLineNo">1341</span> /**<a name="line.1341"></a> |
| <span class="sourceLineNo">1342</span> * A single chunk of the block index in the process of writing. The data in this chunk can become<a name="line.1342"></a> |
| <span class="sourceLineNo">1343</span> * a leaf-level, intermediate-level, or root index block.<a name="line.1343"></a> |
| <span class="sourceLineNo">1344</span> */<a name="line.1344"></a> |
| <span class="sourceLineNo">1345</span> static class BlockIndexChunkImpl implements BlockIndexChunk {<a name="line.1345"></a> |
| <span class="sourceLineNo">1346</span><a name="line.1346"></a> |
| <span class="sourceLineNo">1347</span> /** First keys of the key range corresponding to each index entry. */<a name="line.1347"></a> |
| <span class="sourceLineNo">1348</span> private final List<byte[]> blockKeys = new ArrayList<>();<a name="line.1348"></a> |
| <span class="sourceLineNo">1349</span><a name="line.1349"></a> |
| <span class="sourceLineNo">1350</span> /** Block offset in backing stream. */<a name="line.1350"></a> |
| <span class="sourceLineNo">1351</span> private final List<Long> blockOffsets = new ArrayList<>();<a name="line.1351"></a> |
| <span class="sourceLineNo">1352</span><a name="line.1352"></a> |
| <span class="sourceLineNo">1353</span> /** On-disk data sizes of lower-level data or index blocks. */<a name="line.1353"></a> |
| <span class="sourceLineNo">1354</span> private final List<Integer> onDiskDataSizes = new ArrayList<>();<a name="line.1354"></a> |
| <span class="sourceLineNo">1355</span><a name="line.1355"></a> |
| <span class="sourceLineNo">1356</span> /**<a name="line.1356"></a> |
| <span class="sourceLineNo">1357</span> * The cumulative number of sub-entries, i.e. entries on deeper-level block index entries.<a name="line.1357"></a> |
| <span class="sourceLineNo">1358</span> * numSubEntriesAt[i] is the number of sub-entries in the blocks corresponding to this chunk's<a name="line.1358"></a> |
| <span class="sourceLineNo">1359</span> * entries #0 through #i inclusively.<a name="line.1359"></a> |
| <span class="sourceLineNo">1360</span> */<a name="line.1360"></a> |
| <span class="sourceLineNo">1361</span> private final List<Long> numSubEntriesAt = new ArrayList<>();<a name="line.1361"></a> |
| <span class="sourceLineNo">1362</span><a name="line.1362"></a> |
| <span class="sourceLineNo">1363</span> /**<a name="line.1363"></a> |
| <span class="sourceLineNo">1364</span> * The offset of the next entry to be added, relative to the end of the "secondary index" in the<a name="line.1364"></a> |
| <span class="sourceLineNo">1365</span> * "non-root" format representation of this index chunk. This is the next value to be added to<a name="line.1365"></a> |
| <span class="sourceLineNo">1366</span> * the secondary index.<a name="line.1366"></a> |
| <span class="sourceLineNo">1367</span> */<a name="line.1367"></a> |
| <span class="sourceLineNo">1368</span> private int curTotalNonRootEntrySize = 0;<a name="line.1368"></a> |
| <span class="sourceLineNo">1369</span><a name="line.1369"></a> |
| <span class="sourceLineNo">1370</span> /**<a name="line.1370"></a> |
| <span class="sourceLineNo">1371</span> * The accumulated size of this chunk if stored in the root index format.<a name="line.1371"></a> |
| <span class="sourceLineNo">1372</span> */<a name="line.1372"></a> |
| <span class="sourceLineNo">1373</span> private int curTotalRootSize = 0;<a name="line.1373"></a> |
| <span class="sourceLineNo">1374</span><a name="line.1374"></a> |
| <span class="sourceLineNo">1375</span> /**<a name="line.1375"></a> |
| <span class="sourceLineNo">1376</span> * The "secondary index" used for binary search over variable-length records in a "non-root"<a name="line.1376"></a> |
| <span class="sourceLineNo">1377</span> * format block. These offsets are relative to the end of this secondary index.<a name="line.1377"></a> |
| <span class="sourceLineNo">1378</span> */<a name="line.1378"></a> |
| <span class="sourceLineNo">1379</span> private final List<Integer> secondaryIndexOffsetMarks = new ArrayList<>();<a name="line.1379"></a> |
| <span class="sourceLineNo">1380</span><a name="line.1380"></a> |
| <span class="sourceLineNo">1381</span> /**<a name="line.1381"></a> |
| <span class="sourceLineNo">1382</span> * Adds a new entry to this block index chunk.<a name="line.1382"></a> |
| <span class="sourceLineNo">1383</span> * @param firstKey the first key in the block pointed to by this entry<a name="line.1383"></a> |
| <span class="sourceLineNo">1384</span> * @param blockOffset the offset of the next-level block pointed to by this entry<a name="line.1384"></a> |
| <span class="sourceLineNo">1385</span> * @param onDiskDataSize the on-disk data of the block pointed to by this entry,<a name="line.1385"></a> |
| <span class="sourceLineNo">1386</span> * including header size<a name="line.1386"></a> |
| <span class="sourceLineNo">1387</span> * @param curTotalNumSubEntries if this chunk is the root index chunk under construction, this<a name="line.1387"></a> |
| <span class="sourceLineNo">1388</span> * specifies the current total number of sub-entries in all<a name="line.1388"></a> |
| <span class="sourceLineNo">1389</span> * leaf-level chunks, including the one corresponding to the<a name="line.1389"></a> |
| <span class="sourceLineNo">1390</span> * second-level entry being added.<a name="line.1390"></a> |
| <span class="sourceLineNo">1391</span> */<a name="line.1391"></a> |
| <span class="sourceLineNo">1392</span> @Override<a name="line.1392"></a> |
| <span class="sourceLineNo">1393</span> public void add(byte[] firstKey, long blockOffset, int onDiskDataSize,<a name="line.1393"></a> |
| <span class="sourceLineNo">1394</span> long curTotalNumSubEntries) {<a name="line.1394"></a> |
| <span class="sourceLineNo">1395</span> // Record the offset for the secondary index<a name="line.1395"></a> |
| <span class="sourceLineNo">1396</span> secondaryIndexOffsetMarks.add(curTotalNonRootEntrySize);<a name="line.1396"></a> |
| <span class="sourceLineNo">1397</span> curTotalNonRootEntrySize += SECONDARY_INDEX_ENTRY_OVERHEAD + firstKey.length;<a name="line.1397"></a> |
| <span class="sourceLineNo">1398</span><a name="line.1398"></a> |
| <span class="sourceLineNo">1399</span> curTotalRootSize += Bytes.SIZEOF_LONG + Bytes.SIZEOF_INT<a name="line.1399"></a> |
| <span class="sourceLineNo">1400</span> + WritableUtils.getVIntSize(firstKey.length) + firstKey.length;<a name="line.1400"></a> |
| <span class="sourceLineNo">1401</span><a name="line.1401"></a> |
| <span class="sourceLineNo">1402</span> blockKeys.add(firstKey);<a name="line.1402"></a> |
| <span class="sourceLineNo">1403</span> blockOffsets.add(blockOffset);<a name="line.1403"></a> |
| <span class="sourceLineNo">1404</span> onDiskDataSizes.add(onDiskDataSize);<a name="line.1404"></a> |
| <span class="sourceLineNo">1405</span><a name="line.1405"></a> |
| <span class="sourceLineNo">1406</span> if (curTotalNumSubEntries != -1) {<a name="line.1406"></a> |
| <span class="sourceLineNo">1407</span> numSubEntriesAt.add(curTotalNumSubEntries);<a name="line.1407"></a> |
| <span class="sourceLineNo">1408</span><a name="line.1408"></a> |
| <span class="sourceLineNo">1409</span> // Make sure the parallel arrays are in sync.<a name="line.1409"></a> |
| <span class="sourceLineNo">1410</span> if (numSubEntriesAt.size() != blockKeys.size()) {<a name="line.1410"></a> |
| <span class="sourceLineNo">1411</span> throw new IllegalStateException("Only have key/value count " + "stats for "<a name="line.1411"></a> |
| <span class="sourceLineNo">1412</span> + numSubEntriesAt.size() + " block index " + "entries out of " + blockKeys.size());<a name="line.1412"></a> |
| <span class="sourceLineNo">1413</span> }<a name="line.1413"></a> |
| <span class="sourceLineNo">1414</span> }<a name="line.1414"></a> |
| <span class="sourceLineNo">1415</span> }<a name="line.1415"></a> |
| <span class="sourceLineNo">1416</span><a name="line.1416"></a> |
| <span class="sourceLineNo">1417</span> /**<a name="line.1417"></a> |
| <span class="sourceLineNo">1418</span> * The same as {@link #add(byte[], long, int, long)} but does not take the key/value into<a name="line.1418"></a> |
| <span class="sourceLineNo">1419</span> * account. Used for single-level indexes.<a name="line.1419"></a> |
| <span class="sourceLineNo">1420</span> * @see #add(byte[], long, int, long)<a name="line.1420"></a> |
| <span class="sourceLineNo">1421</span> */<a name="line.1421"></a> |
| <span class="sourceLineNo">1422</span> @Override<a name="line.1422"></a> |
| <span class="sourceLineNo">1423</span> public void add(byte[] firstKey, long blockOffset, int onDiskDataSize) {<a name="line.1423"></a> |
| <span class="sourceLineNo">1424</span> add(firstKey, blockOffset, onDiskDataSize, -1);<a name="line.1424"></a> |
| <span class="sourceLineNo">1425</span> }<a name="line.1425"></a> |
| <span class="sourceLineNo">1426</span><a name="line.1426"></a> |
| <span class="sourceLineNo">1427</span> @Override<a name="line.1427"></a> |
| <span class="sourceLineNo">1428</span> public void clear() {<a name="line.1428"></a> |
| <span class="sourceLineNo">1429</span> blockKeys.clear();<a name="line.1429"></a> |
| <span class="sourceLineNo">1430</span> blockOffsets.clear();<a name="line.1430"></a> |
| <span class="sourceLineNo">1431</span> onDiskDataSizes.clear();<a name="line.1431"></a> |
| <span class="sourceLineNo">1432</span> secondaryIndexOffsetMarks.clear();<a name="line.1432"></a> |
| <span class="sourceLineNo">1433</span> numSubEntriesAt.clear();<a name="line.1433"></a> |
| <span class="sourceLineNo">1434</span> curTotalNonRootEntrySize = 0;<a name="line.1434"></a> |
| <span class="sourceLineNo">1435</span> curTotalRootSize = 0;<a name="line.1435"></a> |
| <span class="sourceLineNo">1436</span> }<a name="line.1436"></a> |
| <span class="sourceLineNo">1437</span><a name="line.1437"></a> |
| <span class="sourceLineNo">1438</span> /**<a name="line.1438"></a> |
| <span class="sourceLineNo">1439</span> * Finds the entry corresponding to the deeper-level index block containing the given<a name="line.1439"></a> |
| <span class="sourceLineNo">1440</span> * deeper-level entry (a "sub-entry"), assuming a global 0-based ordering of sub-entries.<a name="line.1440"></a> |
| <span class="sourceLineNo">1441</span> * <p><a name="line.1441"></a> |
| <span class="sourceLineNo">1442</span> * <i> Implementation note. </i> We are looking for i such that numSubEntriesAt[i - 1] <= k <<a name="line.1442"></a> |
| <span class="sourceLineNo">1443</span> * numSubEntriesAt[i], because a deeper-level block #i (0-based) contains sub-entries #<a name="line.1443"></a> |
| <span class="sourceLineNo">1444</span> * numSubEntriesAt[i - 1]'th through numSubEntriesAt[i] - 1, assuming a global 0-based ordering<a name="line.1444"></a> |
| <span class="sourceLineNo">1445</span> * of sub-entries. i is by definition the insertion point of k in numSubEntriesAt.<a name="line.1445"></a> |
| <span class="sourceLineNo">1446</span> * @param k sub-entry index, from 0 to the total number sub-entries - 1<a name="line.1446"></a> |
| <span class="sourceLineNo">1447</span> * @return the 0-based index of the entry corresponding to the given sub-entry<a name="line.1447"></a> |
| <span class="sourceLineNo">1448</span> */<a name="line.1448"></a> |
| <span class="sourceLineNo">1449</span> @Override<a name="line.1449"></a> |
| <span class="sourceLineNo">1450</span> public int getEntryBySubEntry(long k) {<a name="line.1450"></a> |
| <span class="sourceLineNo">1451</span> // We define mid-key as the key corresponding to k'th sub-entry<a name="line.1451"></a> |
| <span class="sourceLineNo">1452</span> // (0-based).<a name="line.1452"></a> |
| <span class="sourceLineNo">1453</span><a name="line.1453"></a> |
| <span class="sourceLineNo">1454</span> int i = Collections.binarySearch(numSubEntriesAt, k);<a name="line.1454"></a> |
| <span class="sourceLineNo">1455</span><a name="line.1455"></a> |
| <span class="sourceLineNo">1456</span> // Exact match: cumulativeWeight[i] = k. This means chunks #0 through<a name="line.1456"></a> |
| <span class="sourceLineNo">1457</span> // #i contain exactly k sub-entries, and the sub-entry #k (0-based)<a name="line.1457"></a> |
| <span class="sourceLineNo">1458</span> // is in the (i + 1)'th chunk.<a name="line.1458"></a> |
| <span class="sourceLineNo">1459</span> if (i >= 0) return i + 1;<a name="line.1459"></a> |
| <span class="sourceLineNo">1460</span><a name="line.1460"></a> |
| <span class="sourceLineNo">1461</span> // Inexact match. Return the insertion point.<a name="line.1461"></a> |
| <span class="sourceLineNo">1462</span> return -i - 1;<a name="line.1462"></a> |
| <span class="sourceLineNo">1463</span> }<a name="line.1463"></a> |
| <span class="sourceLineNo">1464</span><a name="line.1464"></a> |
| <span class="sourceLineNo">1465</span> /**<a name="line.1465"></a> |
| <span class="sourceLineNo">1466</span> * Used when writing the root block index of a multi-level block index. Serializes additional<a name="line.1466"></a> |
| <span class="sourceLineNo">1467</span> * information allowing to efficiently identify the mid-key.<a name="line.1467"></a> |
| <span class="sourceLineNo">1468</span> * @return a few serialized fields for finding the mid-key<a name="line.1468"></a> |
| <span class="sourceLineNo">1469</span> * @throws IOException if could not create metadata for computing mid-key<a name="line.1469"></a> |
| <span class="sourceLineNo">1470</span> */<a name="line.1470"></a> |
| <span class="sourceLineNo">1471</span> @Override<a name="line.1471"></a> |
| <span class="sourceLineNo">1472</span> public byte[] getMidKeyMetadata() throws IOException {<a name="line.1472"></a> |
| <span class="sourceLineNo">1473</span> ByteArrayOutputStream baos = new ByteArrayOutputStream(MID_KEY_METADATA_SIZE);<a name="line.1473"></a> |
| <span class="sourceLineNo">1474</span> DataOutputStream baosDos = new DataOutputStream(baos);<a name="line.1474"></a> |
| <span class="sourceLineNo">1475</span> long totalNumSubEntries = numSubEntriesAt.get(blockKeys.size() - 1);<a name="line.1475"></a> |
| <span class="sourceLineNo">1476</span> if (totalNumSubEntries == 0) {<a name="line.1476"></a> |
| <span class="sourceLineNo">1477</span> throw new IOException("No leaf-level entries, mid-key unavailable");<a name="line.1477"></a> |
| <span class="sourceLineNo">1478</span> }<a name="line.1478"></a> |
| <span class="sourceLineNo">1479</span> long midKeySubEntry = (totalNumSubEntries - 1) / 2;<a name="line.1479"></a> |
| <span class="sourceLineNo">1480</span> int midKeyEntry = getEntryBySubEntry(midKeySubEntry);<a name="line.1480"></a> |
| <span class="sourceLineNo">1481</span><a name="line.1481"></a> |
| <span class="sourceLineNo">1482</span> baosDos.writeLong(blockOffsets.get(midKeyEntry));<a name="line.1482"></a> |
| <span class="sourceLineNo">1483</span> baosDos.writeInt(onDiskDataSizes.get(midKeyEntry));<a name="line.1483"></a> |
| <span class="sourceLineNo">1484</span><a name="line.1484"></a> |
| <span class="sourceLineNo">1485</span> long numSubEntriesBefore = midKeyEntry > 0 ? numSubEntriesAt.get(midKeyEntry - 1) : 0;<a name="line.1485"></a> |
| <span class="sourceLineNo">1486</span> long subEntryWithinEntry = midKeySubEntry - numSubEntriesBefore;<a name="line.1486"></a> |
| <span class="sourceLineNo">1487</span> if (subEntryWithinEntry < 0 || subEntryWithinEntry > Integer.MAX_VALUE) {<a name="line.1487"></a> |
| <span class="sourceLineNo">1488</span> throw new IOException("Could not identify mid-key index within the "<a name="line.1488"></a> |
| <span class="sourceLineNo">1489</span> + "leaf-level block containing mid-key: out of range (" + subEntryWithinEntry<a name="line.1489"></a> |
| <span class="sourceLineNo">1490</span> + ", numSubEntriesBefore=" + numSubEntriesBefore + ", midKeySubEntry=" + midKeySubEntry<a name="line.1490"></a> |
| <span class="sourceLineNo">1491</span> + ")");<a name="line.1491"></a> |
| <span class="sourceLineNo">1492</span> }<a name="line.1492"></a> |
| <span class="sourceLineNo">1493</span><a name="line.1493"></a> |
| <span class="sourceLineNo">1494</span> baosDos.writeInt((int) subEntryWithinEntry);<a name="line.1494"></a> |
| <span class="sourceLineNo">1495</span><a name="line.1495"></a> |
| <span class="sourceLineNo">1496</span> if (baosDos.size() != MID_KEY_METADATA_SIZE) {<a name="line.1496"></a> |
| <span class="sourceLineNo">1497</span> throw new IOException("Could not write mid-key metadata: size=" + baosDos.size()<a name="line.1497"></a> |
| <span class="sourceLineNo">1498</span> + ", correct size: " + MID_KEY_METADATA_SIZE);<a name="line.1498"></a> |
| <span class="sourceLineNo">1499</span> }<a name="line.1499"></a> |
| <span class="sourceLineNo">1500</span><a name="line.1500"></a> |
| <span class="sourceLineNo">1501</span> // Close just to be good citizens, although this has no effect.<a name="line.1501"></a> |
| <span class="sourceLineNo">1502</span> baos.close();<a name="line.1502"></a> |
| <span class="sourceLineNo">1503</span><a name="line.1503"></a> |
| <span class="sourceLineNo">1504</span> return baos.toByteArray();<a name="line.1504"></a> |
| <span class="sourceLineNo">1505</span> }<a name="line.1505"></a> |
| <span class="sourceLineNo">1506</span><a name="line.1506"></a> |
| <span class="sourceLineNo">1507</span> /** Returns the size of this chunk if stored in the non-root index block format */<a name="line.1507"></a> |
| <span class="sourceLineNo">1508</span> @Override<a name="line.1508"></a> |
| <span class="sourceLineNo">1509</span> public int getNonRootSize() {<a name="line.1509"></a> |
| <span class="sourceLineNo">1510</span> return Bytes.SIZEOF_INT // Number of entries<a name="line.1510"></a> |
| <span class="sourceLineNo">1511</span> + Bytes.SIZEOF_INT * (blockKeys.size() + 1) // Secondary index<a name="line.1511"></a> |
| <span class="sourceLineNo">1512</span> + curTotalNonRootEntrySize; // All entries<a name="line.1512"></a> |
| <span class="sourceLineNo">1513</span> }<a name="line.1513"></a> |
| <span class="sourceLineNo">1514</span><a name="line.1514"></a> |
| <span class="sourceLineNo">1515</span> @Override<a name="line.1515"></a> |
| <span class="sourceLineNo">1516</span> public int getCurTotalNonRootEntrySize() {<a name="line.1516"></a> |
| <span class="sourceLineNo">1517</span> return curTotalNonRootEntrySize;<a name="line.1517"></a> |
| <span class="sourceLineNo">1518</span> }<a name="line.1518"></a> |
| <span class="sourceLineNo">1519</span><a name="line.1519"></a> |
| <span class="sourceLineNo">1520</span> @Override<a name="line.1520"></a> |
| <span class="sourceLineNo">1521</span> public List<byte[]> getBlockKeys() {<a name="line.1521"></a> |
| <span class="sourceLineNo">1522</span> return blockKeys;<a name="line.1522"></a> |
| <span class="sourceLineNo">1523</span> }<a name="line.1523"></a> |
| <span class="sourceLineNo">1524</span><a name="line.1524"></a> |
| <span class="sourceLineNo">1525</span> @Override<a name="line.1525"></a> |
| <span class="sourceLineNo">1526</span> public List<Integer> getSecondaryIndexOffsetMarks() {<a name="line.1526"></a> |
| <span class="sourceLineNo">1527</span> return secondaryIndexOffsetMarks;<a name="line.1527"></a> |
| <span class="sourceLineNo">1528</span> }<a name="line.1528"></a> |
| <span class="sourceLineNo">1529</span><a name="line.1529"></a> |
| <span class="sourceLineNo">1530</span> /** Returns the size of this chunk if stored in the root index block format */<a name="line.1530"></a> |
| <span class="sourceLineNo">1531</span> @Override<a name="line.1531"></a> |
| <span class="sourceLineNo">1532</span> public int getRootSize() {<a name="line.1532"></a> |
| <span class="sourceLineNo">1533</span> return curTotalRootSize;<a name="line.1533"></a> |
| <span class="sourceLineNo">1534</span> }<a name="line.1534"></a> |
| <span class="sourceLineNo">1535</span><a name="line.1535"></a> |
| <span class="sourceLineNo">1536</span> /** Returns the number of entries in this block index chunk */<a name="line.1536"></a> |
| <span class="sourceLineNo">1537</span> public int getNumEntries() {<a name="line.1537"></a> |
| <span class="sourceLineNo">1538</span> return blockKeys.size();<a name="line.1538"></a> |
| <span class="sourceLineNo">1539</span> }<a name="line.1539"></a> |
| <span class="sourceLineNo">1540</span><a name="line.1540"></a> |
| <span class="sourceLineNo">1541</span> public byte[] getBlockKey(int i) {<a name="line.1541"></a> |
| <span class="sourceLineNo">1542</span> return blockKeys.get(i);<a name="line.1542"></a> |
| <span class="sourceLineNo">1543</span> }<a name="line.1543"></a> |
| <span class="sourceLineNo">1544</span><a name="line.1544"></a> |
| <span class="sourceLineNo">1545</span> public long getBlockOffset(int i) {<a name="line.1545"></a> |
| <span class="sourceLineNo">1546</span> return blockOffsets.get(i);<a name="line.1546"></a> |
| <span class="sourceLineNo">1547</span> }<a name="line.1547"></a> |
| <span class="sourceLineNo">1548</span><a name="line.1548"></a> |
| <span class="sourceLineNo">1549</span> public int getOnDiskDataSize(int i) {<a name="line.1549"></a> |
| <span class="sourceLineNo">1550</span> return onDiskDataSizes.get(i);<a name="line.1550"></a> |
| <span class="sourceLineNo">1551</span> }<a name="line.1551"></a> |
| <span class="sourceLineNo">1552</span><a name="line.1552"></a> |
| <span class="sourceLineNo">1553</span> public long getCumulativeNumKV(int i) {<a name="line.1553"></a> |
| <span class="sourceLineNo">1554</span> if (i < 0) return 0;<a name="line.1554"></a> |
| <span class="sourceLineNo">1555</span> return numSubEntriesAt.get(i);<a name="line.1555"></a> |
| <span class="sourceLineNo">1556</span> }<a name="line.1556"></a> |
| <span class="sourceLineNo">1557</span><a name="line.1557"></a> |
| <span class="sourceLineNo">1558</span> }<a name="line.1558"></a> |
| <span class="sourceLineNo">1559</span><a name="line.1559"></a> |
| <span class="sourceLineNo">1560</span> public static int getMaxChunkSize(Configuration conf) {<a name="line.1560"></a> |
| <span class="sourceLineNo">1561</span> return conf.getInt(MAX_CHUNK_SIZE_KEY, DEFAULT_MAX_CHUNK_SIZE);<a name="line.1561"></a> |
| <span class="sourceLineNo">1562</span> }<a name="line.1562"></a> |
| <span class="sourceLineNo">1563</span><a name="line.1563"></a> |
| <span class="sourceLineNo">1564</span> public static int getMinIndexNumEntries(Configuration conf) {<a name="line.1564"></a> |
| <span class="sourceLineNo">1565</span> return conf.getInt(MIN_INDEX_NUM_ENTRIES_KEY, DEFAULT_MIN_INDEX_NUM_ENTRIES);<a name="line.1565"></a> |
| <span class="sourceLineNo">1566</span> }<a name="line.1566"></a> |
| <span class="sourceLineNo">1567</span>}<a name="line.1567"></a> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </pre> |
| </div> |
| </body> |
| </html> |