| <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
| <html xmlns="http://www.w3.org/1999/xhtml"> |
| <head> |
| <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/> |
| <meta http-equiv="X-UA-Compatible" content="IE=9"/> |
| <meta name="generator" content="Doxygen 1.8.6"/> |
| <title>Impala: be/src/exec/incr-stats-util.cc Source File</title> |
| <link href="tabs.css" rel="stylesheet" type="text/css"/> |
| <script type="text/javascript" src="jquery.js"></script> |
| <script type="text/javascript" src="dynsections.js"></script> |
| <link href="navtree.css" rel="stylesheet" type="text/css"/> |
| <script type="text/javascript" src="resize.js"></script> |
| <script type="text/javascript" src="navtree.js"></script> |
| <script type="text/javascript"> |
| $(document).ready(initResizable); |
| $(window).load(resizeHeight); |
| </script> |
| <link href="search/search.css" rel="stylesheet" type="text/css"/> |
| <script type="text/javascript" src="search/search.js"></script> |
| <script type="text/javascript"> |
| $(document).ready(function() { searchBox.OnSelectItem(0); }); |
| </script> |
| <link href="doxygen.css" rel="stylesheet" type="text/css" /> |
| </head> |
| <body> |
| <div id="top"><!-- do not remove this div, it is closed by doxygen! --> |
| <div id="titlearea"> |
| <table cellspacing="0" cellpadding="0"> |
| <tbody> |
| <tr style="height: 56px;"> |
| <td style="padding-left: 0.5em;"> |
| <div id="projectname">Impala |
| </div> |
| <div id="projectbrief">Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.</div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <!-- end header part --> |
| <!-- Generated by Doxygen 1.8.6 --> |
| <script type="text/javascript"> |
| var searchBox = new SearchBox("searchBox", "search",false,'Search'); |
| </script> |
| <div id="navrow1" class="tabs"> |
| <ul class="tablist"> |
| <li><a href="index.html"><span>Main Page</span></a></li> |
| <li><a href="namespaces.html"><span>Namespaces</span></a></li> |
| <li><a href="annotated.html"><span>Classes</span></a></li> |
| <li class="current"><a href="files.html"><span>Files</span></a></li> |
| <li> |
| <div id="MSearchBox" class="MSearchBoxInactive"> |
| <span class="left"> |
| <img id="MSearchSelect" src="search/mag_sel.png" |
| onmouseover="return searchBox.OnSearchSelectShow()" |
| onmouseout="return searchBox.OnSearchSelectHide()" |
| alt=""/> |
| <input type="text" id="MSearchField" value="Search" accesskey="S" |
| onfocus="searchBox.OnSearchFieldFocus(true)" |
| onblur="searchBox.OnSearchFieldFocus(false)" |
| onkeyup="searchBox.OnSearchFieldChange(event)"/> |
| </span><span class="right"> |
| <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a> |
| </span> |
| </div> |
| </li> |
| </ul> |
| </div> |
| <div id="navrow2" class="tabs2"> |
| <ul class="tablist"> |
| <li><a href="files.html"><span>File List</span></a></li> |
| <li><a href="globals.html"><span>File Members</span></a></li> |
| </ul> |
| </div> |
| </div><!-- top --> |
| <div id="side-nav" class="ui-resizable side-nav-resizable"> |
| <div id="nav-tree"> |
| <div id="nav-tree-contents"> |
| <div id="nav-sync" class="sync"></div> |
| </div> |
| </div> |
| <div id="splitbar" style="-moz-user-select:none;" |
| class="ui-resizable-handle"> |
| </div> |
| </div> |
| <script type="text/javascript"> |
| $(document).ready(function(){initNavTree('incr-stats-util_8cc_source.html','');}); |
| </script> |
| <div id="doc-content"> |
| <!-- window showing the filter options --> |
| <div id="MSearchSelectWindow" |
| onmouseover="return searchBox.OnSearchSelectShow()" |
| onmouseout="return searchBox.OnSearchSelectHide()" |
| onkeydown="return searchBox.OnSearchSelectKey(event)"> |
| <a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark"> </span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark"> </span>Classes</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark"> </span>Namespaces</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark"> </span>Files</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark"> </span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark"> </span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark"> </span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark"> </span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(8)"><span class="SelectionMark"> </span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(9)"><span class="SelectionMark"> </span>Friends</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(10)"><span class="SelectionMark"> </span>Macros</a></div> |
| |
| <!-- iframe showing the search results (closed by default) --> |
| <div id="MSearchResultsWindow"> |
| <iframe src="javascript:void(0)" frameborder="0" |
| name="MSearchResults" id="MSearchResults"> |
| </iframe> |
| </div> |
| |
| <div class="header"> |
| <div class="headertitle"> |
| <div class="title">incr-stats-util.cc</div> </div> |
| </div><!--header--> |
| <div class="contents"> |
| <a href="incr-stats-util_8cc.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno"> 1</span> <span class="comment">// Copyright 2014 Cloudera Inc.</span></div> |
| <div class="line"><a name="l00002"></a><span class="lineno"> 2</span> <span class="comment">//</span></div> |
| <div class="line"><a name="l00003"></a><span class="lineno"> 3</span> <span class="comment">// Licensed under the Apache License, Version 2.0 (the "License");</span></div> |
| <div class="line"><a name="l00004"></a><span class="lineno"> 4</span> <span class="comment">// you may not use this file except in compliance with the License.</span></div> |
| <div class="line"><a name="l00005"></a><span class="lineno"> 5</span> <span class="comment">// You may obtain a copy of the License at</span></div> |
| <div class="line"><a name="l00006"></a><span class="lineno"> 6</span> <span class="comment">//</span></div> |
| <div class="line"><a name="l00007"></a><span class="lineno"> 7</span> <span class="comment">// http://www.apache.org/licenses/LICENSE-2.0</span></div> |
| <div class="line"><a name="l00008"></a><span class="lineno"> 8</span> <span class="comment">//</span></div> |
| <div class="line"><a name="l00009"></a><span class="lineno"> 9</span> <span class="comment">// Unless required by applicable law or agreed to in writing, software</span></div> |
| <div class="line"><a name="l00010"></a><span class="lineno"> 10</span> <span class="comment">// distributed under the License is distributed on an "AS IS" BASIS,</span></div> |
| <div class="line"><a name="l00011"></a><span class="lineno"> 11</span> <span class="comment">// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span></div> |
| <div class="line"><a name="l00012"></a><span class="lineno"> 12</span> <span class="comment">// See the License for the specific language governing permissions and</span></div> |
| <div class="line"><a name="l00013"></a><span class="lineno"> 13</span> <span class="comment">// limitations under the License.</span></div> |
| <div class="line"><a name="l00014"></a><span class="lineno"> 14</span> </div> |
| <div class="line"><a name="l00015"></a><span class="lineno"> 15</span> <span class="preprocessor">#include "<a class="code" href="incr-stats-util_8h.html">incr-stats-util.h</a>"</span></div> |
| <div class="line"><a name="l00016"></a><span class="lineno"> 16</span> </div> |
| <div class="line"><a name="l00017"></a><span class="lineno"> 17</span> <span class="preprocessor">#include <boost/foreach.hpp></span></div> |
| <div class="line"><a name="l00018"></a><span class="lineno"> 18</span> <span class="preprocessor">#include <boost/unordered_set.hpp></span></div> |
| <div class="line"><a name="l00019"></a><span class="lineno"> 19</span> <span class="preprocessor">#include <gutil/strings/substitute.h></span></div> |
| <div class="line"><a name="l00020"></a><span class="lineno"> 20</span> <span class="preprocessor">#include <cmath></span></div> |
| <div class="line"><a name="l00021"></a><span class="lineno"> 21</span> <span class="preprocessor">#include <sstream></span></div> |
| <div class="line"><a name="l00022"></a><span class="lineno"> 22</span> </div> |
| <div class="line"><a name="l00023"></a><span class="lineno"> 23</span> <span class="preprocessor">#include "<a class="code" href="logging_8h.html">common/logging.h</a>"</span></div> |
| <div class="line"><a name="l00024"></a><span class="lineno"> 24</span> <span class="preprocessor">#include "<a class="code" href="hs2-util_8h.html">service/hs2-util.h</a>"</span></div> |
| <div class="line"><a name="l00025"></a><span class="lineno"> 25</span> <span class="preprocessor">#include "<a class="code" href="udf_8h.html">udf/udf.h</a>"</span></div> |
| <div class="line"><a name="l00026"></a><span class="lineno"> 26</span> <span class="preprocessor">#include "gen-cpp/CatalogService_types.h"</span></div> |
| <div class="line"><a name="l00027"></a><span class="lineno"> 27</span> <span class="preprocessor">#include "gen-cpp/CatalogObjects_types.h"</span></div> |
| <div class="line"><a name="l00028"></a><span class="lineno"> 28</span> <span class="preprocessor">#include "<a class="code" href="aggregate-functions_8h.html">exprs/aggregate-functions.h</a>"</span></div> |
| <div class="line"><a name="l00029"></a><span class="lineno"> 29</span> </div> |
| <div class="line"><a name="l00030"></a><span class="lineno"> 30</span> <span class="preprocessor">#include "<a class="code" href="names_8h.html">common/names.h</a>"</span></div> |
| <div class="line"><a name="l00031"></a><span class="lineno"> 31</span> </div> |
| <div class="line"><a name="l00032"></a><span class="lineno"> 32</span> <span class="keyword">using namespace </span>apache::hive::service::cli::thrift;</div> |
| <div class="line"><a name="l00033"></a><span class="lineno"> 33</span> <span class="keyword">using namespace </span>impala;</div> |
| <div class="line"><a name="l00034"></a><span class="lineno"> 34</span> <span class="keyword">using namespace </span>impala_udf;</div> |
| <div class="line"><a name="l00035"></a><span class="lineno"> 35</span> <span class="keyword">using namespace </span>strings;</div> |
| <div class="line"><a name="l00036"></a><span class="lineno"> 36</span> </div> |
| <div class="line"><a name="l00037"></a><span class="lineno"> 37</span> <span class="comment">// Finalize method for the NDV_NO_FINALIZE() UDA, which only copies the intermediate state</span></div> |
| <div class="line"><a name="l00038"></a><span class="lineno"> 38</span> <span class="comment">// of the NDV computation into its output StringVal.</span></div> |
| <div class="line"><a name="l00039"></a><span class="lineno"><a class="line" href="incr-stats-util_8cc.html#af13a2b8fc5ed714abb9ce44f58edc2db"> 39</a></span> <a class="code" href="structimpala__udf_1_1StringVal.html">StringVal</a> <a class="code" href="incr-stats-util_8cc.html#af13a2b8fc5ed714abb9ce44f58edc2db">IncrementNdvFinalize</a>(<a class="code" href="classimpala__udf_1_1FunctionContext.html">FunctionContext</a>* ctx, <span class="keyword">const</span> <a class="code" href="structimpala__udf_1_1StringVal.html">StringVal</a>& src) {</div> |
| <div class="line"><a name="l00040"></a><span class="lineno"> 40</span>  DCHECK(!src.<a class="code" href="structimpala__udf_1_1AnyVal.html#aded62c6b250c62814c94271856b06cbe">is_null</a>);</div> |
| <div class="line"><a name="l00041"></a><span class="lineno"> 41</span>  DCHECK_EQ(src.<a class="code" href="structimpala__udf_1_1StringVal.html#a72a872d5acebca872db6bd37eb4ae184">len</a>, <a class="code" href="incr-stats-util-test_8cc.html#a4a1314e36faa5381e72ffd752d5c3975">AggregateFunctions::HLL_LEN</a>);</div> |
| <div class="line"><a name="l00042"></a><span class="lineno"> 42</span>  <a class="code" href="structimpala__udf_1_1StringVal.html">StringVal</a> result_str(ctx, src.<a class="code" href="structimpala__udf_1_1StringVal.html#a72a872d5acebca872db6bd37eb4ae184">len</a>);</div> |
| <div class="line"><a name="l00043"></a><span class="lineno"> 43</span>  memcpy(result_str.<a class="code" href="structimpala__udf_1_1StringVal.html#a1152442079d2471ec396076f37bf6c35">ptr</a>, src.<a class="code" href="structimpala__udf_1_1StringVal.html#a1152442079d2471ec396076f37bf6c35">ptr</a>, src.<a class="code" href="structimpala__udf_1_1StringVal.html#a72a872d5acebca872db6bd37eb4ae184">len</a>);</div> |
| <div class="line"><a name="l00044"></a><span class="lineno"> 44</span>  ctx-><a class="code" href="classimpala__udf_1_1FunctionContext.html#a14904de2d63cac14b1a1ef09b0653a14">Free</a>(src.<a class="code" href="structimpala__udf_1_1StringVal.html#a1152442079d2471ec396076f37bf6c35">ptr</a>);</div> |
| <div class="line"><a name="l00045"></a><span class="lineno"> 45</span>  <span class="keywordflow">return</span> result_str;</div> |
| <div class="line"><a name="l00046"></a><span class="lineno"> 46</span> }</div> |
| <div class="line"><a name="l00047"></a><span class="lineno"> 47</span> </div> |
| <div class="line"><a name="l00048"></a><span class="lineno"> 48</span> <span class="comment">// To save space when sending NDV estimates around the cluster, we compress them using</span></div> |
| <div class="line"><a name="l00049"></a><span class="lineno"> 49</span> <span class="comment">// RLE, since they are often sparse. The resulting string has the form CVCVCVCV where C is</span></div> |
| <div class="line"><a name="l00050"></a><span class="lineno"> 50</span> <span class="comment">// the count, i.e. the number of times the subsequent V (value) should be repeated in the</span></div> |
| <div class="line"><a name="l00051"></a><span class="lineno"> 51</span> <span class="comment">// output string. C is between 0 and 255 inclusive, the count it represents is one more</span></div> |
| <div class="line"><a name="l00052"></a><span class="lineno"> 52</span> <span class="comment">// than the absolute value of C (since we never have a 0 count, and want to use the full</span></div> |
| <div class="line"><a name="l00053"></a><span class="lineno"> 53</span> <span class="comment">// range available to us).</span></div> |
| <div class="line"><a name="l00054"></a><span class="lineno"> 54</span> <span class="comment">//</span></div> |
| <div class="line"><a name="l00055"></a><span class="lineno"> 55</span> <span class="comment">// The output parameter is_encoded is set to true only if the RLE-compressed string is</span></div> |
| <div class="line"><a name="l00056"></a><span class="lineno"> 56</span> <span class="comment">// shorter than the input. Otherwise it is set to false, and the input is returned</span></div> |
| <div class="line"><a name="l00057"></a><span class="lineno"> 57</span> <span class="comment">// unencoded.</span></div> |
| <div class="line"><a name="l00058"></a><span class="lineno"><a class="line" href="incr-stats-util_8cc.html#ac119443ab23486b522706d2460885a97"> 58</a></span> <span class="keywordtype">string</span> <a class="code" href="incr-stats-util_8cc.html#ac119443ab23486b522706d2460885a97">EncodeNdv</a>(<span class="keyword">const</span> <span class="keywordtype">string</span>& ndv, <span class="keywordtype">bool</span>* is_encoded) {</div> |
| <div class="line"><a name="l00059"></a><span class="lineno"> 59</span>  DCHECK_EQ(ndv.size(), <a class="code" href="incr-stats-util-test_8cc.html#a4a1314e36faa5381e72ffd752d5c3975">AggregateFunctions::HLL_LEN</a>);</div> |
| <div class="line"><a name="l00060"></a><span class="lineno"> 60</span>  <span class="keywordtype">string</span> encoded_ndv(<a class="code" href="incr-stats-util-test_8cc.html#a4a1314e36faa5381e72ffd752d5c3975">AggregateFunctions::HLL_LEN</a>, 0);</div> |
| <div class="line"><a name="l00061"></a><span class="lineno"> 61</span>  <span class="keywordtype">int</span> <a class="code" href="namespacegen__ir__descriptions.html#a0d6572990c8dd576a55a21ff11d6deb7">idx</a> = 0;</div> |
| <div class="line"><a name="l00062"></a><span class="lineno"> 62</span>  <span class="keywordtype">char</span> last = ndv[0];</div> |
| <div class="line"><a name="l00063"></a><span class="lineno"> 63</span> </div> |
| <div class="line"><a name="l00064"></a><span class="lineno"> 64</span>  <span class="comment">// Keep a count of how many times a value appears in succession. We encode this count as</span></div> |
| <div class="line"><a name="l00065"></a><span class="lineno"> 65</span>  <span class="comment">// a byte 0-255, but the actual count is always one more than the encoded value</span></div> |
| <div class="line"><a name="l00066"></a><span class="lineno"> 66</span>  <span class="comment">// (i.e. in the range 1-256 inclusive).</span></div> |
| <div class="line"><a name="l00067"></a><span class="lineno"> 67</span>  uint8_t <a class="code" href="partitioning-throughput-test_8cc.html#ad43c3812e6d13e0518d9f8b8f463ffcf">count</a> = 0;</div> |
| <div class="line"><a name="l00068"></a><span class="lineno"> 68</span>  <span class="keywordflow">for</span> (<span class="keywordtype">int</span> i = 1; i < <a class="code" href="incr-stats-util-test_8cc.html#a4a1314e36faa5381e72ffd752d5c3975">AggregateFunctions::HLL_LEN</a>; ++i) {</div> |
| <div class="line"><a name="l00069"></a><span class="lineno"> 69</span>  <span class="keywordflow">if</span> (ndv[i] != last || count == numeric_limits<uint8_t>::max()) {</div> |
| <div class="line"><a name="l00070"></a><span class="lineno"> 70</span>  <span class="keywordflow">if</span> (idx + 2 > AggregateFunctions::HLL_LEN) <span class="keywordflow">break</span>;</div> |
| <div class="line"><a name="l00071"></a><span class="lineno"> 71</span>  <span class="comment">// Write a (count, value) pair to two successive bytes</span></div> |
| <div class="line"><a name="l00072"></a><span class="lineno"> 72</span>  encoded_ndv[idx++] = <a class="code" href="partitioning-throughput-test_8cc.html#ad43c3812e6d13e0518d9f8b8f463ffcf">count</a>;</div> |
| <div class="line"><a name="l00073"></a><span class="lineno"> 73</span>  count = 0;</div> |
| <div class="line"><a name="l00074"></a><span class="lineno"> 74</span>  encoded_ndv[idx++] = last;</div> |
| <div class="line"><a name="l00075"></a><span class="lineno"> 75</span>  last = ndv[i];</div> |
| <div class="line"><a name="l00076"></a><span class="lineno"> 76</span>  } <span class="keywordflow">else</span> {</div> |
| <div class="line"><a name="l00077"></a><span class="lineno"> 77</span>  ++<a class="code" href="partitioning-throughput-test_8cc.html#ad43c3812e6d13e0518d9f8b8f463ffcf">count</a>;</div> |
| <div class="line"><a name="l00078"></a><span class="lineno"> 78</span>  }</div> |
| <div class="line"><a name="l00079"></a><span class="lineno"> 79</span>  }</div> |
| <div class="line"><a name="l00080"></a><span class="lineno"> 80</span> </div> |
| <div class="line"><a name="l00081"></a><span class="lineno"> 81</span>  <span class="comment">// +2 for the remaining two bytes written below</span></div> |
| <div class="line"><a name="l00082"></a><span class="lineno"> 82</span>  <span class="keywordflow">if</span> (idx + 2 > AggregateFunctions::HLL_LEN) {</div> |
| <div class="line"><a name="l00083"></a><span class="lineno"> 83</span>  *is_encoded = <span class="keyword">false</span>;</div> |
| <div class="line"><a name="l00084"></a><span class="lineno"> 84</span>  <span class="keywordflow">return</span> ndv;</div> |
| <div class="line"><a name="l00085"></a><span class="lineno"> 85</span>  }</div> |
| <div class="line"><a name="l00086"></a><span class="lineno"> 86</span> </div> |
| <div class="line"><a name="l00087"></a><span class="lineno"> 87</span>  encoded_ndv[idx++] = <a class="code" href="partitioning-throughput-test_8cc.html#ad43c3812e6d13e0518d9f8b8f463ffcf">count</a>;</div> |
| <div class="line"><a name="l00088"></a><span class="lineno"> 88</span>  encoded_ndv[idx++] = last;</div> |
| <div class="line"><a name="l00089"></a><span class="lineno"> 89</span> </div> |
| <div class="line"><a name="l00090"></a><span class="lineno"> 90</span>  *is_encoded = <span class="keyword">true</span>;</div> |
| <div class="line"><a name="l00091"></a><span class="lineno"> 91</span>  encoded_ndv.resize(idx);</div> |
| <div class="line"><a name="l00092"></a><span class="lineno"> 92</span>  DCHECK_GT(encoded_ndv.size(), 0);</div> |
| <div class="line"><a name="l00093"></a><span class="lineno"> 93</span>  DCHECK_LE(encoded_ndv.size(), <a class="code" href="incr-stats-util-test_8cc.html#a4a1314e36faa5381e72ffd752d5c3975">AggregateFunctions::HLL_LEN</a>);</div> |
| <div class="line"><a name="l00094"></a><span class="lineno"> 94</span>  <span class="keywordflow">return</span> encoded_ndv;</div> |
| <div class="line"><a name="l00095"></a><span class="lineno"> 95</span> }</div> |
| <div class="line"><a name="l00096"></a><span class="lineno"> 96</span> </div> |
| <div class="line"><a name="l00097"></a><span class="lineno"><a class="line" href="incr-stats-util_8cc.html#a89c8672b1593c5cb907376e88dfa053a"> 97</a></span> <span class="keywordtype">string</span> <a class="code" href="incr-stats-util_8cc.html#a89c8672b1593c5cb907376e88dfa053a">DecodeNdv</a>(<span class="keyword">const</span> <span class="keywordtype">string</span>& ndv, <span class="keywordtype">bool</span> is_encoded) {</div> |
| <div class="line"><a name="l00098"></a><span class="lineno"> 98</span>  <span class="keywordflow">if</span> (!is_encoded) <span class="keywordflow">return</span> ndv;</div> |
| <div class="line"><a name="l00099"></a><span class="lineno"> 99</span>  DCHECK_EQ(ndv.size() % 2, 0);</div> |
| <div class="line"><a name="l00100"></a><span class="lineno"> 100</span>  <span class="keywordtype">string</span> decoded_ndv(<a class="code" href="incr-stats-util-test_8cc.html#a4a1314e36faa5381e72ffd752d5c3975">AggregateFunctions::HLL_LEN</a>, 0);</div> |
| <div class="line"><a name="l00101"></a><span class="lineno"> 101</span>  <span class="keywordtype">int</span> <a class="code" href="namespacegen__ir__descriptions.html#a0d6572990c8dd576a55a21ff11d6deb7">idx</a> = 0;</div> |
| <div class="line"><a name="l00102"></a><span class="lineno"> 102</span>  <span class="keywordflow">for</span> (<span class="keywordtype">int</span> i = 0; i < ndv.size(); i += 2) {</div> |
| <div class="line"><a name="l00103"></a><span class="lineno"> 103</span>  <span class="keywordflow">for</span> (<span class="keywordtype">int</span> j = 0; j < (static_cast<uint8_t>(ndv[i])) + 1; ++j) {</div> |
| <div class="line"><a name="l00104"></a><span class="lineno"> 104</span>  decoded_ndv[idx++] = ndv[i+1];</div> |
| <div class="line"><a name="l00105"></a><span class="lineno"> 105</span>  }</div> |
| <div class="line"><a name="l00106"></a><span class="lineno"> 106</span>  }</div> |
| <div class="line"><a name="l00107"></a><span class="lineno"> 107</span>  DCHECK_EQ(idx, <a class="code" href="incr-stats-util-test_8cc.html#a4a1314e36faa5381e72ffd752d5c3975">AggregateFunctions::HLL_LEN</a>);</div> |
| <div class="line"><a name="l00108"></a><span class="lineno"> 108</span>  <span class="keywordflow">return</span> decoded_ndv;</div> |
| <div class="line"><a name="l00109"></a><span class="lineno"> 109</span> }</div> |
| <div class="line"><a name="l00110"></a><span class="lineno"> 110</span> </div> |
| <div class="line"><a name="l00111"></a><span class="lineno"> 111</span> <span class="comment">// A container for statistics for a single column that are aggregated partition by</span></div> |
| <div class="line"><a name="l00112"></a><span class="lineno"> 112</span> <span class="comment">// partition during the incremental computation of column stats. The aggregations are</span></div> |
| <div class="line"><a name="l00113"></a><span class="lineno"> 113</span> <span class="comment">// updated during Update(), and the final statistics are computed by Finalize().</span></div> |
| <div class="line"><a name="l00114"></a><span class="lineno"><a class="line" href="structPerColumnStats.html"> 114</a></span> <span class="keyword">struct </span><a class="code" href="structPerColumnStats.html">PerColumnStats</a> {</div> |
| <div class="line"><a name="l00115"></a><span class="lineno"> 115</span>  <span class="comment">// Should have length AggregateFunctions::HLL_PRECISION. Intermediate buckets for the</span></div> |
| <div class="line"><a name="l00116"></a><span class="lineno"> 116</span>  <span class="comment">// HLL calculation.</span></div> |
| <div class="line"><a name="l00117"></a><span class="lineno"><a class="line" href="structPerColumnStats.html#a3be87c212b5835a98bd102ea350110d3"> 117</a></span>  <span class="keywordtype">string</span> <a class="code" href="structPerColumnStats.html#a3be87c212b5835a98bd102ea350110d3">intermediate_ndv</a>;</div> |
| <div class="line"><a name="l00118"></a><span class="lineno"> 118</span> </div> |
| <div class="line"><a name="l00119"></a><span class="lineno"> 119</span>  <span class="comment">// The total number of nulls counted, or -1 for no sample.</span></div> |
| <div class="line"><a name="l00120"></a><span class="lineno"><a class="line" href="structPerColumnStats.html#a1fff46d1bf99c8664c68ba2a06705f4c"> 120</a></span>  int64_t <a class="code" href="structPerColumnStats.html#a1fff46d1bf99c8664c68ba2a06705f4c">num_nulls</a>;</div> |
| <div class="line"><a name="l00121"></a><span class="lineno"> 121</span> </div> |
| <div class="line"><a name="l00122"></a><span class="lineno"> 122</span>  <span class="comment">// The maximum width of the column, in bytes.</span></div> |
| <div class="line"><a name="l00123"></a><span class="lineno"><a class="line" href="structPerColumnStats.html#a3cbc36f0eeca7cf06bbd9a52a986efc7"> 123</a></span>  int32_t <a class="code" href="structPerColumnStats.html#a3cbc36f0eeca7cf06bbd9a52a986efc7">max_width</a>;</div> |
| <div class="line"><a name="l00124"></a><span class="lineno"> 124</span> </div> |
| <div class="line"><a name="l00125"></a><span class="lineno"> 125</span>  <span class="comment">// The total number of rows</span></div> |
| <div class="line"><a name="l00126"></a><span class="lineno"><a class="line" href="structPerColumnStats.html#a7148386c16fd51493de77d905b2fba0e"> 126</a></span>  int64_t <a class="code" href="structPerColumnStats.html#a7148386c16fd51493de77d905b2fba0e">num_rows</a>;</div> |
| <div class="line"><a name="l00127"></a><span class="lineno"> 127</span> </div> |
| <div class="line"><a name="l00128"></a><span class="lineno"> 128</span>  <span class="comment">// The sum of avg_width * num_rows for each partition, so that avg_width can be</span></div> |
| <div class="line"><a name="l00129"></a><span class="lineno"> 129</span>  <span class="comment">// correctly computed during Finalize()</span></div> |
| <div class="line"><a name="l00130"></a><span class="lineno"><a class="line" href="structPerColumnStats.html#adada0352408dc6820c88d75c5baa5c60"> 130</a></span>  <span class="keywordtype">double</span> <a class="code" href="structPerColumnStats.html#adada0352408dc6820c88d75c5baa5c60">total_width</a>;</div> |
| <div class="line"><a name="l00131"></a><span class="lineno"> 131</span> </div> |
| <div class="line"><a name="l00132"></a><span class="lineno"> 132</span>  <span class="comment">// Populated after Finalize(), the result of the HLL computation</span></div> |
| <div class="line"><a name="l00133"></a><span class="lineno"><a class="line" href="structPerColumnStats.html#a0c35cdf9bede903799a2d60a4b69a01e"> 133</a></span>  int64_t <a class="code" href="structPerColumnStats.html#a0c35cdf9bede903799a2d60a4b69a01e">ndv_estimate</a>;</div> |
| <div class="line"><a name="l00134"></a><span class="lineno"> 134</span> </div> |
| <div class="line"><a name="l00135"></a><span class="lineno"> 135</span>  <span class="comment">// The average column width, in bytes (but may have non-integer value)</span></div> |
| <div class="line"><a name="l00136"></a><span class="lineno"><a class="line" href="structPerColumnStats.html#a93cbe26df3eb6decba12dcbdd5b39a90"> 136</a></span>  <span class="keywordtype">double</span> <a class="code" href="structPerColumnStats.html#a93cbe26df3eb6decba12dcbdd5b39a90">avg_width</a>;</div> |
| <div class="line"><a name="l00137"></a><span class="lineno"> 137</span> </div> |
| <div class="line"><a name="l00138"></a><span class="lineno"><a class="line" href="structPerColumnStats.html#a9e4141bc73e1656c823d547368d2ee74"> 138</a></span>  <a class="code" href="structPerColumnStats.html#a9e4141bc73e1656c823d547368d2ee74">PerColumnStats</a>()</div> |
| <div class="line"><a name="l00139"></a><span class="lineno"> 139</span>  : intermediate_ndv(<a class="code" href="classimpala_1_1AggregateFunctions.html">AggregateFunctions</a>::<a class="code" href="incr-stats-util-test_8cc.html#a4a1314e36faa5381e72ffd752d5c3975">HLL_LEN</a>, 0), num_nulls(-1),</div> |
| <div class="line"><a name="l00140"></a><span class="lineno"> 140</span>  max_width(0), num_rows(0), avg_width(0) { }</div> |
| <div class="line"><a name="l00141"></a><span class="lineno"> 141</span> </div> |
| <div class="line"><a name="l00142"></a><span class="lineno"> 142</span>  <span class="comment">// Updates all aggregate statistics with a new set of measurements.</span></div> |
| <div class="line"><a name="l00143"></a><span class="lineno"><a class="line" href="structPerColumnStats.html#a9db29e9f39c9df9a93d345172d6882e1"> 143</a></span>  <span class="keywordtype">void</span> <a class="code" href="structPerColumnStats.html#a9db29e9f39c9df9a93d345172d6882e1">Update</a>(<span class="keyword">const</span> <span class="keywordtype">string</span>& ndv, int64_t num_new_rows, <span class="keywordtype">double</span> new_avg_width,</div> |
| <div class="line"><a name="l00144"></a><span class="lineno"> 144</span>  int32_t max_new_width, int64_t num_new_nulls) {</div> |
| <div class="line"><a name="l00145"></a><span class="lineno"> 145</span>  DCHECK_EQ(intermediate_ndv.size(), ndv.size()) << <span class="stringliteral">"Incompatible intermediate NDVs"</span>;</div> |
| <div class="line"><a name="l00146"></a><span class="lineno"> 146</span>  DCHECK_GE(num_new_rows, 0);</div> |
| <div class="line"><a name="l00147"></a><span class="lineno"> 147</span>  DCHECK_GE(max_new_width, 0);</div> |
| <div class="line"><a name="l00148"></a><span class="lineno"> 148</span>  DCHECK_GE(new_avg_width, 0);</div> |
| <div class="line"><a name="l00149"></a><span class="lineno"> 149</span>  DCHECK_GE(num_new_nulls, -1);</div> |
| <div class="line"><a name="l00150"></a><span class="lineno"> 150</span>  <span class="keywordflow">for</span> (<span class="keywordtype">int</span> j = 0; j < ndv.size(); ++j) {</div> |
| <div class="line"><a name="l00151"></a><span class="lineno"> 151</span>  intermediate_ndv[j] = ::max(intermediate_ndv[j], ndv[j]);</div> |
| <div class="line"><a name="l00152"></a><span class="lineno"> 152</span>  }</div> |
| <div class="line"><a name="l00153"></a><span class="lineno"> 153</span>  <span class="keywordflow">if</span> (num_new_nulls >= 0) num_nulls += num_new_nulls;</div> |
| <div class="line"><a name="l00154"></a><span class="lineno"> 154</span>  max_width = ::max(max_width, max_new_width);</div> |
| <div class="line"><a name="l00155"></a><span class="lineno"> 155</span>  avg_width += (new_avg_width * num_new_rows);</div> |
| <div class="line"><a name="l00156"></a><span class="lineno"> 156</span>  num_rows += num_new_rows;</div> |
| <div class="line"><a name="l00157"></a><span class="lineno"> 157</span>  }</div> |
| <div class="line"><a name="l00158"></a><span class="lineno"> 158</span> </div> |
| <div class="line"><a name="l00159"></a><span class="lineno"> 159</span>  <span class="comment">// Performs any stats computations that are not distributive, that is they may not be</span></div> |
| <div class="line"><a name="l00160"></a><span class="lineno"> 160</span>  <span class="comment">// computed in part during Update(). After this method returns, ndv_estimate and</span></div> |
| <div class="line"><a name="l00161"></a><span class="lineno"> 161</span>  <span class="comment">// avg_width contain valid values.</span></div> |
| <div class="line"><a name="l00162"></a><span class="lineno"><a class="line" href="structPerColumnStats.html#af24afd73513060d9989bf20d22d6064e"> 162</a></span>  <span class="keywordtype">void</span> <a class="code" href="structPerColumnStats.html#af24afd73513060d9989bf20d22d6064e">Finalize</a>() {</div> |
| <div class="line"><a name="l00163"></a><span class="lineno"> 163</span>  ndv_estimate = AggregateFunctions::HllFinalEstimate(</div> |
| <div class="line"><a name="l00164"></a><span class="lineno"> 164</span>  reinterpret_cast<const uint8_t*>(intermediate_ndv.data()),</div> |
| <div class="line"><a name="l00165"></a><span class="lineno"> 165</span>  intermediate_ndv.size());</div> |
| <div class="line"><a name="l00166"></a><span class="lineno"> 166</span>  avg_width = num_rows == 0 ? 0 : avg_width / num_rows;</div> |
| <div class="line"><a name="l00167"></a><span class="lineno"> 167</span>  }</div> |
| <div class="line"><a name="l00168"></a><span class="lineno"> 168</span> </div> |
| <div class="line"><a name="l00169"></a><span class="lineno"><a class="line" href="structPerColumnStats.html#a1347d5dc153e58257792f28122f44d13"> 169</a></span>  TColumnStats <a class="code" href="structPerColumnStats.html#a1347d5dc153e58257792f28122f44d13">ToTColumnStats</a>()<span class="keyword"> const </span>{</div> |
| <div class="line"><a name="l00170"></a><span class="lineno"> 170</span>  TColumnStats col_stats;</div> |
| <div class="line"><a name="l00171"></a><span class="lineno"> 171</span>  col_stats.__set_num_distinct_values(ndv_estimate);</div> |
| <div class="line"><a name="l00172"></a><span class="lineno"> 172</span>  col_stats.__set_num_nulls(num_nulls);</div> |
| <div class="line"><a name="l00173"></a><span class="lineno"> 173</span>  col_stats.__set_max_size(max_width);</div> |
| <div class="line"><a name="l00174"></a><span class="lineno"> 174</span>  col_stats.__set_avg_size(avg_width);</div> |
| <div class="line"><a name="l00175"></a><span class="lineno"> 175</span>  <span class="keywordflow">return</span> col_stats;</div> |
| <div class="line"><a name="l00176"></a><span class="lineno"> 176</span>  }</div> |
| <div class="line"><a name="l00177"></a><span class="lineno"> 177</span> </div> |
| <div class="line"><a name="l00178"></a><span class="lineno"> 178</span>  <span class="comment">// Returns a string with debug information for this</span></div> |
| <div class="line"><a name="l00179"></a><span class="lineno"><a class="line" href="structPerColumnStats.html#afadcf8c66ab7baaa6610774fcf2245de"> 179</a></span>  <span class="keywordtype">string</span> <a class="code" href="structPerColumnStats.html#afadcf8c66ab7baaa6610774fcf2245de">DebugString</a>()<span class="keyword"> const </span>{</div> |
| <div class="line"><a name="l00180"></a><span class="lineno"> 180</span>  <span class="keywordflow">return</span> Substitute(</div> |
| <div class="line"><a name="l00181"></a><span class="lineno"> 181</span>  <span class="stringliteral">"ndv: $0, num_nulls: $1, max_width: $2, avg_width: $3, num_rows: $4"</span>,</div> |
| <div class="line"><a name="l00182"></a><span class="lineno"> 182</span>  ndv_estimate, num_nulls, max_width, avg_width, num_rows);</div> |
| <div class="line"><a name="l00183"></a><span class="lineno"> 183</span>  }</div> |
| <div class="line"><a name="l00184"></a><span class="lineno"> 184</span> };</div> |
| <div class="line"><a name="l00185"></a><span class="lineno"> 185</span> </div> |
| <div class="line"><a name="l00186"></a><span class="lineno"> 186</span> <span class="keyword">namespace </span>impala {</div> |
| <div class="line"><a name="l00187"></a><span class="lineno"> 187</span> </div> |
| <div class="line"><a name="l00188"></a><span class="lineno"><a class="line" href="namespaceimpala.html#a1a02780a0ef5ef9c20f452b39db5f446"> 188</a></span> <span class="keywordtype">void</span> <a class="code" href="namespaceimpala.html#a1a02780a0ef5ef9c20f452b39db5f446">FinalizePartitionedColumnStats</a>(<span class="keyword">const</span> TTableSchema& col_stats_schema,</div> |
| <div class="line"><a name="l00189"></a><span class="lineno"> 189</span>  <span class="keyword">const</span> vector<TPartitionStats>& existing_part_stats,</div> |
| <div class="line"><a name="l00190"></a><span class="lineno"> 190</span>  <span class="keyword">const</span> vector<vector<string> >& expected_partitions, <span class="keyword">const</span> TRowSet& rowset,</div> |
| <div class="line"><a name="l00191"></a><span class="lineno"> 191</span>  int32_t num_partition_cols, TAlterTableUpdateStatsParams* params) {</div> |
| <div class="line"><a name="l00192"></a><span class="lineno"> 192</span>  <span class="comment">// The rowset should have the following schema: for every column in the source table,</span></div> |
| <div class="line"><a name="l00193"></a><span class="lineno"> 193</span>  <span class="comment">// five columns are produced, one row per partition.</span></div> |
| <div class="line"><a name="l00194"></a><span class="lineno"> 194</span>  <span class="comment">// <ndv buckets>, <num nulls>, <max width>, <avg width>, <count rows></span></div> |
| <div class="line"><a name="l00195"></a><span class="lineno"> 195</span>  <span class="keyword">static</span> <span class="keyword">const</span> <span class="keywordtype">int</span> COLUMNS_PER_STAT = 5;</div> |
| <div class="line"><a name="l00196"></a><span class="lineno"> 196</span> </div> |
| <div class="line"><a name="l00197"></a><span class="lineno"> 197</span>  <span class="keyword">const</span> <span class="keywordtype">int</span> num_cols =</div> |
| <div class="line"><a name="l00198"></a><span class="lineno"> 198</span>  (col_stats_schema.columns.size() - num_partition_cols) / COLUMNS_PER_STAT;</div> |
| <div class="line"><a name="l00199"></a><span class="lineno"> 199</span>  unordered_set<vector<string> > seen_partitions;</div> |
| <div class="line"><a name="l00200"></a><span class="lineno"> 200</span>  vector<PerColumnStats> stats(num_cols);</div> |
| <div class="line"><a name="l00201"></a><span class="lineno"> 201</span> </div> |
| <div class="line"><a name="l00202"></a><span class="lineno"> 202</span>  <span class="keywordflow">if</span> (rowset.rows.size() > 0) {</div> |
| <div class="line"><a name="l00203"></a><span class="lineno"> 203</span>  DCHECK_GE(rowset.rows[0].colVals.size(), COLUMNS_PER_STAT);</div> |
| <div class="line"><a name="l00204"></a><span class="lineno"> 204</span>  params->__isset.partition_stats = <span class="keyword">true</span>;</div> |
| <div class="line"><a name="l00205"></a><span class="lineno"> 205</span>  BOOST_FOREACH(<span class="keyword">const</span> TRow& col_stats_row, rowset.rows) {</div> |
| <div class="line"><a name="l00206"></a><span class="lineno"> 206</span>  <span class="comment">// The last few columns are partition columns that the results are grouped by, and</span></div> |
| <div class="line"><a name="l00207"></a><span class="lineno"> 207</span>  <span class="comment">// so uniquely identify the partition that these stats belong to.</span></div> |
| <div class="line"><a name="l00208"></a><span class="lineno"> 208</span>  vector<string> partition_key_vals;</div> |
| <div class="line"><a name="l00209"></a><span class="lineno"> 209</span>  partition_key_vals.reserve(col_stats_row.colVals.size());</div> |
| <div class="line"><a name="l00210"></a><span class="lineno"> 210</span>  <span class="keywordflow">for</span> (<span class="keywordtype">int</span> j = num_cols * COLUMNS_PER_STAT; j < col_stats_row.colVals.size(); ++j) {</div> |
| <div class="line"><a name="l00211"></a><span class="lineno"> 211</span>  stringstream ss;</div> |
| <div class="line"><a name="l00212"></a><span class="lineno"> 212</span>  <a class="code" href="namespaceimpala.html#a3d439da3c73d5f3a86b0c7df3436eda5">PrintTColumnValue</a>(col_stats_row.colVals[j], &ss);</div> |
| <div class="line"><a name="l00213"></a><span class="lineno"> 213</span>  partition_key_vals.push_back(ss.str());</div> |
| <div class="line"><a name="l00214"></a><span class="lineno"> 214</span>  }</div> |
| <div class="line"><a name="l00215"></a><span class="lineno"> 215</span>  seen_partitions.insert(partition_key_vals);</div> |
| <div class="line"><a name="l00216"></a><span class="lineno"> 216</span> </div> |
| <div class="line"><a name="l00217"></a><span class="lineno"> 217</span>  TPartitionStats* part_stat = &params->partition_stats[partition_key_vals];</div> |
| <div class="line"><a name="l00218"></a><span class="lineno"> 218</span>  part_stat->__isset.intermediate_col_stats = <span class="keyword">true</span>;</div> |
| <div class="line"><a name="l00219"></a><span class="lineno"> 219</span>  <span class="keywordflow">for</span> (<span class="keywordtype">int</span> i = 0; i < num_cols * COLUMNS_PER_STAT; i += COLUMNS_PER_STAT) {</div> |
| <div class="line"><a name="l00220"></a><span class="lineno"> 220</span>  <a class="code" href="structPerColumnStats.html">PerColumnStats</a>* stat = &stats[i / COLUMNS_PER_STAT];</div> |
| <div class="line"><a name="l00221"></a><span class="lineno"> 221</span>  <span class="keyword">const</span> <span class="keywordtype">string</span>& ndv = col_stats_row.colVals[i].stringVal.value;</div> |
| <div class="line"><a name="l00222"></a><span class="lineno"> 222</span>  int64_t num_rows = col_stats_row.colVals[i + 4].i64Val.value;</div> |
| <div class="line"><a name="l00223"></a><span class="lineno"> 223</span>  <span class="keywordtype">double</span> avg_width = col_stats_row.colVals[i + 3].doubleVal.value;</div> |
| <div class="line"><a name="l00224"></a><span class="lineno"> 224</span>  int32_t max_width = col_stats_row.colVals[i + 2].i32Val.value;</div> |
| <div class="line"><a name="l00225"></a><span class="lineno"> 225</span>  int64_t num_nulls = col_stats_row.colVals[i + 1].i64Val.value;</div> |
| <div class="line"><a name="l00226"></a><span class="lineno"> 226</span> </div> |
| <div class="line"><a name="l00227"></a><span class="lineno"> 227</span>  stat-><a class="code" href="structPerColumnStats.html#a9db29e9f39c9df9a93d345172d6882e1">Update</a>(ndv, num_rows, avg_width, max_width, num_nulls);</div> |
| <div class="line"><a name="l00228"></a><span class="lineno"> 228</span> </div> |
| <div class="line"><a name="l00229"></a><span class="lineno"> 229</span>  <span class="comment">// Save the intermediate state per-column, per-partition</span></div> |
| <div class="line"><a name="l00230"></a><span class="lineno"> 230</span>  TIntermediateColumnStats int_stats;</div> |
| <div class="line"><a name="l00231"></a><span class="lineno"> 231</span>  <span class="keywordtype">bool</span> is_encoded;</div> |
| <div class="line"><a name="l00232"></a><span class="lineno"> 232</span>  int_stats.__set_intermediate_ndv(<a class="code" href="incr-stats-util_8cc.html#ac119443ab23486b522706d2460885a97">EncodeNdv</a>(ndv, &is_encoded));</div> |
| <div class="line"><a name="l00233"></a><span class="lineno"> 233</span>  int_stats.__set_is_ndv_encoded(is_encoded);</div> |
| <div class="line"><a name="l00234"></a><span class="lineno"> 234</span>  int_stats.__set_num_nulls(num_nulls);</div> |
| <div class="line"><a name="l00235"></a><span class="lineno"> 235</span>  int_stats.__set_max_width(max_width);</div> |
| <div class="line"><a name="l00236"></a><span class="lineno"> 236</span>  int_stats.__set_avg_width(avg_width);</div> |
| <div class="line"><a name="l00237"></a><span class="lineno"> 237</span>  int_stats.__set_num_rows(num_rows);</div> |
| <div class="line"><a name="l00238"></a><span class="lineno"> 238</span> </div> |
| <div class="line"><a name="l00239"></a><span class="lineno"> 239</span>  part_stat->intermediate_col_stats[col_stats_schema.columns[i].columnName] =</div> |
| <div class="line"><a name="l00240"></a><span class="lineno"> 240</span>  int_stats;</div> |
| <div class="line"><a name="l00241"></a><span class="lineno"> 241</span>  }</div> |
| <div class="line"><a name="l00242"></a><span class="lineno"> 242</span>  }</div> |
| <div class="line"><a name="l00243"></a><span class="lineno"> 243</span>  }</div> |
| <div class="line"><a name="l00244"></a><span class="lineno"> 244</span> </div> |
| <div class="line"><a name="l00245"></a><span class="lineno"> 245</span>  <span class="comment">// Make sure there's a zeroed entry for all partitions that were included in the query -</span></div> |
| <div class="line"><a name="l00246"></a><span class="lineno"> 246</span>  <span class="comment">// empty partitions will not have a row in the GROUP BY, but should still emit a</span></div> |
| <div class="line"><a name="l00247"></a><span class="lineno"> 247</span>  <span class="comment">// TPartitionStats.</span></div> |
| <div class="line"><a name="l00248"></a><span class="lineno"> 248</span>  TIntermediateColumnStats empty_column_stats;</div> |
| <div class="line"><a name="l00249"></a><span class="lineno"> 249</span>  <span class="keywordtype">bool</span> is_encoded;</div> |
| <div class="line"><a name="l00250"></a><span class="lineno"> 250</span>  empty_column_stats.__set_intermediate_ndv(</div> |
| <div class="line"><a name="l00251"></a><span class="lineno"> 251</span>  <a class="code" href="incr-stats-util_8cc.html#ac119443ab23486b522706d2460885a97">EncodeNdv</a>(<span class="keywordtype">string</span>(<a class="code" href="incr-stats-util-test_8cc.html#a4a1314e36faa5381e72ffd752d5c3975">AggregateFunctions::HLL_LEN</a>, 0), &is_encoded));</div> |
| <div class="line"><a name="l00252"></a><span class="lineno"> 252</span>  empty_column_stats.__set_is_ndv_encoded(is_encoded);</div> |
| <div class="line"><a name="l00253"></a><span class="lineno"> 253</span>  empty_column_stats.__set_num_nulls(0);</div> |
| <div class="line"><a name="l00254"></a><span class="lineno"> 254</span>  empty_column_stats.__set_max_width(0);</div> |
| <div class="line"><a name="l00255"></a><span class="lineno"> 255</span>  empty_column_stats.__set_avg_width(0);</div> |
| <div class="line"><a name="l00256"></a><span class="lineno"> 256</span>  empty_column_stats.__set_num_rows(0);</div> |
| <div class="line"><a name="l00257"></a><span class="lineno"> 257</span>  TPartitionStats empty_part_stats;</div> |
| <div class="line"><a name="l00258"></a><span class="lineno"> 258</span>  <span class="keywordflow">for</span> (<span class="keywordtype">int</span> i = 0; i < num_cols * COLUMNS_PER_STAT; i += COLUMNS_PER_STAT) {</div> |
| <div class="line"><a name="l00259"></a><span class="lineno"> 259</span>  empty_part_stats.intermediate_col_stats[col_stats_schema.columns[i].columnName] =</div> |
| <div class="line"><a name="l00260"></a><span class="lineno"> 260</span>  empty_column_stats;</div> |
| <div class="line"><a name="l00261"></a><span class="lineno"> 261</span>  }</div> |
| <div class="line"><a name="l00262"></a><span class="lineno"> 262</span>  empty_part_stats.__isset.intermediate_col_stats = <span class="keyword">true</span>;</div> |
| <div class="line"><a name="l00263"></a><span class="lineno"> 263</span>  TTableStats empty_table_stats;</div> |
| <div class="line"><a name="l00264"></a><span class="lineno"> 264</span>  empty_table_stats.__set_num_rows(0);</div> |
| <div class="line"><a name="l00265"></a><span class="lineno"> 265</span>  empty_part_stats.stats = empty_table_stats;</div> |
| <div class="line"><a name="l00266"></a><span class="lineno"> 266</span>  BOOST_FOREACH(<span class="keyword">const</span> vector<string>& part_key_vals, expected_partitions) {</div> |
| <div class="line"><a name="l00267"></a><span class="lineno"> 267</span>  DCHECK_EQ(part_key_vals.size(), num_partition_cols);</div> |
| <div class="line"><a name="l00268"></a><span class="lineno"> 268</span>  <span class="keywordflow">if</span> (seen_partitions.find(part_key_vals) != seen_partitions.end()) <span class="keywordflow">continue</span>;</div> |
| <div class="line"><a name="l00269"></a><span class="lineno"> 269</span>  params->partition_stats[part_key_vals] = empty_part_stats;</div> |
| <div class="line"><a name="l00270"></a><span class="lineno"> 270</span>  }</div> |
| <div class="line"><a name="l00271"></a><span class="lineno"> 271</span> </div> |
| <div class="line"><a name="l00272"></a><span class="lineno"> 272</span>  <span class="comment">// Now aggregate the existing statistics. The FE will ensure that the set of</span></div> |
| <div class="line"><a name="l00273"></a><span class="lineno"> 273</span>  <span class="comment">// partitions accessed by the query and this list are disjoint and cover the entire</span></div> |
| <div class="line"><a name="l00274"></a><span class="lineno"> 274</span>  <span class="comment">// set of partitions.</span></div> |
| <div class="line"><a name="l00275"></a><span class="lineno"> 275</span>  BOOST_FOREACH(<span class="keyword">const</span> TPartitionStats& existing_stats, existing_part_stats) {</div> |
| <div class="line"><a name="l00276"></a><span class="lineno"> 276</span>  DCHECK_LE(existing_stats.intermediate_col_stats.size(),</div> |
| <div class="line"><a name="l00277"></a><span class="lineno"> 277</span>  col_stats_schema.columns.size());</div> |
| <div class="line"><a name="l00278"></a><span class="lineno"> 278</span>  <span class="keywordflow">for</span> (<span class="keywordtype">int</span> i = 0; i < num_cols; ++i) {</div> |
| <div class="line"><a name="l00279"></a><span class="lineno"> 279</span>  <span class="keyword">const</span> <span class="keywordtype">string</span>& col_name = col_stats_schema.columns[i * COLUMNS_PER_STAT].columnName;</div> |
| <div class="line"><a name="l00280"></a><span class="lineno"> 280</span>  map<string, TIntermediateColumnStats>::const_iterator it =</div> |
| <div class="line"><a name="l00281"></a><span class="lineno"> 281</span>  existing_stats.intermediate_col_stats.find(col_name);</div> |
| <div class="line"><a name="l00282"></a><span class="lineno"> 282</span>  <span class="keywordflow">if</span> (it == existing_stats.intermediate_col_stats.end()) {</div> |
| <div class="line"><a name="l00283"></a><span class="lineno"> 283</span>  VLOG(2) << <span class="stringliteral">"Could not find column in existing column stat state: "</span> << col_name;</div> |
| <div class="line"><a name="l00284"></a><span class="lineno"> 284</span>  <span class="keywordflow">continue</span>;</div> |
| <div class="line"><a name="l00285"></a><span class="lineno"> 285</span>  }</div> |
| <div class="line"><a name="l00286"></a><span class="lineno"> 286</span> </div> |
| <div class="line"><a name="l00287"></a><span class="lineno"> 287</span>  <span class="keyword">const</span> TIntermediateColumnStats& int_stats = it->second;</div> |
| <div class="line"><a name="l00288"></a><span class="lineno"> 288</span>  stats[i].Update(<a class="code" href="incr-stats-util_8cc.html#a89c8672b1593c5cb907376e88dfa053a">DecodeNdv</a>(int_stats.intermediate_ndv, int_stats.is_ndv_encoded),</div> |
| <div class="line"><a name="l00289"></a><span class="lineno"> 289</span>  int_stats.num_rows, int_stats.avg_width, int_stats.max_width,</div> |
| <div class="line"><a name="l00290"></a><span class="lineno"> 290</span>  int_stats.num_nulls);</div> |
| <div class="line"><a name="l00291"></a><span class="lineno"> 291</span>  }</div> |
| <div class="line"><a name="l00292"></a><span class="lineno"> 292</span>  }</div> |
| <div class="line"><a name="l00293"></a><span class="lineno"> 293</span> </div> |
| <div class="line"><a name="l00294"></a><span class="lineno"> 294</span>  <span class="comment">// Compute the final results now that all aggregations are done, and save those as</span></div> |
| <div class="line"><a name="l00295"></a><span class="lineno"> 295</span>  <span class="comment">// column stats for each column in turn.</span></div> |
| <div class="line"><a name="l00296"></a><span class="lineno"> 296</span>  <span class="keywordflow">for</span> (<span class="keywordtype">int</span> i = 0; i < stats.size(); ++i) {</div> |
| <div class="line"><a name="l00297"></a><span class="lineno"> 297</span>  stats[i].Finalize();</div> |
| <div class="line"><a name="l00298"></a><span class="lineno"> 298</span>  <span class="keyword">const</span> <span class="keywordtype">string</span>& col_name = col_stats_schema.columns[i * COLUMNS_PER_STAT].columnName;</div> |
| <div class="line"><a name="l00299"></a><span class="lineno"> 299</span>  params->column_stats[col_name] = stats[i].ToTColumnStats();</div> |
| <div class="line"><a name="l00300"></a><span class="lineno"> 300</span> </div> |
| <div class="line"><a name="l00301"></a><span class="lineno"> 301</span>  VLOG(3) << <span class="stringliteral">"Incremental stats result for column: "</span> << col_name << <span class="stringliteral">": "</span></div> |
| <div class="line"><a name="l00302"></a><span class="lineno"> 302</span>  << stats[i].DebugString();</div> |
| <div class="line"><a name="l00303"></a><span class="lineno"> 303</span>  }</div> |
| <div class="line"><a name="l00304"></a><span class="lineno"> 304</span> </div> |
| <div class="line"><a name="l00305"></a><span class="lineno"> 305</span>  params->__isset.column_stats = <span class="keyword">true</span>;</div> |
| <div class="line"><a name="l00306"></a><span class="lineno"> 306</span> }</div> |
| <div class="line"><a name="l00307"></a><span class="lineno"> 307</span> </div> |
| <div class="line"><a name="l00308"></a><span class="lineno"> 308</span> }</div> |
| <div class="ttc" id="structPerColumnStats_html"><div class="ttname"><a href="structPerColumnStats.html">PerColumnStats</a></div><div class="ttdef"><b>Definition:</b> <a href="incr-stats-util_8cc_source.html#l00114">incr-stats-util.cc:114</a></div></div> |
| <div class="ttc" id="structPerColumnStats_html_a3cbc36f0eeca7cf06bbd9a52a986efc7"><div class="ttname"><a href="structPerColumnStats.html#a3cbc36f0eeca7cf06bbd9a52a986efc7">PerColumnStats::max_width</a></div><div class="ttdeci">int32_t max_width</div><div class="ttdef"><b>Definition:</b> <a href="incr-stats-util_8cc_source.html#l00123">incr-stats-util.cc:123</a></div></div> |
| <div class="ttc" id="classimpala_1_1AggregateFunctions_html"><div class="ttname"><a href="classimpala_1_1AggregateFunctions.html">impala::AggregateFunctions</a></div><div class="ttdef"><b>Definition:</b> <a href="aggregate-functions_8h_source.html#l00030">aggregate-functions.h:30</a></div></div> |
| <div class="ttc" id="aggregate-functions_8h_html"><div class="ttname"><a href="aggregate-functions_8h.html">aggregate-functions.h</a></div></div> |
| <div class="ttc" id="udf_8h_html"><div class="ttname"><a href="udf_8h.html">udf.h</a></div></div> |
| <div class="ttc" id="incr-stats-util_8cc_html_ac119443ab23486b522706d2460885a97"><div class="ttname"><a href="incr-stats-util_8cc.html#ac119443ab23486b522706d2460885a97">EncodeNdv</a></div><div class="ttdeci">string EncodeNdv(const string &ndv, bool *is_encoded)</div><div class="ttdef"><b>Definition:</b> <a href="incr-stats-util_8cc_source.html#l00058">incr-stats-util.cc:58</a></div></div> |
| <div class="ttc" id="namespaceimpala_html_a3d439da3c73d5f3a86b0c7df3436eda5"><div class="ttname"><a href="namespaceimpala.html#a3d439da3c73d5f3a86b0c7df3436eda5">impala::PrintTColumnValue</a></div><div class="ttdeci">void PrintTColumnValue(const apache::hive::service::cli::thrift::TColumnValue &colval, std::stringstream *out)</div></div> |
| <div class="ttc" id="incr-stats-util_8cc_html_a89c8672b1593c5cb907376e88dfa053a"><div class="ttname"><a href="incr-stats-util_8cc.html#a89c8672b1593c5cb907376e88dfa053a">DecodeNdv</a></div><div class="ttdeci">string DecodeNdv(const string &ndv, bool is_encoded)</div><div class="ttdef"><b>Definition:</b> <a href="incr-stats-util_8cc_source.html#l00097">incr-stats-util.cc:97</a></div></div> |
| <div class="ttc" id="structPerColumnStats_html_af24afd73513060d9989bf20d22d6064e"><div class="ttname"><a href="structPerColumnStats.html#af24afd73513060d9989bf20d22d6064e">PerColumnStats::Finalize</a></div><div class="ttdeci">void Finalize()</div><div class="ttdef"><b>Definition:</b> <a href="incr-stats-util_8cc_source.html#l00162">incr-stats-util.cc:162</a></div></div> |
| <div class="ttc" id="logging_8h_html"><div class="ttname"><a href="logging_8h.html">logging.h</a></div></div> |
| <div class="ttc" id="structimpala__udf_1_1StringVal_html_a1152442079d2471ec396076f37bf6c35"><div class="ttname"><a href="structimpala__udf_1_1StringVal.html#a1152442079d2471ec396076f37bf6c35">impala_udf::StringVal::ptr</a></div><div class="ttdeci">uint8_t * ptr</div><div class="ttdef"><b>Definition:</b> <a href="udf_8h_source.html#l00523">udf.h:523</a></div></div> |
| <div class="ttc" id="structPerColumnStats_html_a3be87c212b5835a98bd102ea350110d3"><div class="ttname"><a href="structPerColumnStats.html#a3be87c212b5835a98bd102ea350110d3">PerColumnStats::intermediate_ndv</a></div><div class="ttdeci">string intermediate_ndv</div><div class="ttdef"><b>Definition:</b> <a href="incr-stats-util_8cc_source.html#l00117">incr-stats-util.cc:117</a></div></div> |
| <div class="ttc" id="classimpala__udf_1_1FunctionContext_html"><div class="ttname"><a href="classimpala__udf_1_1FunctionContext.html">impala_udf::FunctionContext</a></div><div class="ttdef"><b>Definition:</b> <a href="udf_8h_source.html#l00047">udf.h:47</a></div></div> |
| <div class="ttc" id="structimpala__udf_1_1AnyVal_html_aded62c6b250c62814c94271856b06cbe"><div class="ttname"><a href="structimpala__udf_1_1AnyVal.html#aded62c6b250c62814c94271856b06cbe">impala_udf::AnyVal::is_null</a></div><div class="ttdeci">bool is_null</div><div class="ttdef"><b>Definition:</b> <a href="udf_8h_source.html#l00359">udf.h:359</a></div></div> |
| <div class="ttc" id="incr-stats-util_8h_html"><div class="ttname"><a href="incr-stats-util_8h.html">incr-stats-util.h</a></div></div> |
| <div class="ttc" id="structPerColumnStats_html_a9e4141bc73e1656c823d547368d2ee74"><div class="ttname"><a href="structPerColumnStats.html#a9e4141bc73e1656c823d547368d2ee74">PerColumnStats::PerColumnStats</a></div><div class="ttdeci">PerColumnStats()</div><div class="ttdef"><b>Definition:</b> <a href="incr-stats-util_8cc_source.html#l00138">incr-stats-util.cc:138</a></div></div> |
| <div class="ttc" id="structPerColumnStats_html_a1fff46d1bf99c8664c68ba2a06705f4c"><div class="ttname"><a href="structPerColumnStats.html#a1fff46d1bf99c8664c68ba2a06705f4c">PerColumnStats::num_nulls</a></div><div class="ttdeci">int64_t num_nulls</div><div class="ttdef"><b>Definition:</b> <a href="incr-stats-util_8cc_source.html#l00120">incr-stats-util.cc:120</a></div></div> |
| <div class="ttc" id="structPerColumnStats_html_a9db29e9f39c9df9a93d345172d6882e1"><div class="ttname"><a href="structPerColumnStats.html#a9db29e9f39c9df9a93d345172d6882e1">PerColumnStats::Update</a></div><div class="ttdeci">void Update(const string &ndv, int64_t num_new_rows, double new_avg_width, int32_t max_new_width, int64_t num_new_nulls)</div><div class="ttdef"><b>Definition:</b> <a href="incr-stats-util_8cc_source.html#l00143">incr-stats-util.cc:143</a></div></div> |
| <div class="ttc" id="classimpala__udf_1_1FunctionContext_html_a14904de2d63cac14b1a1ef09b0653a14"><div class="ttname"><a href="classimpala__udf_1_1FunctionContext.html#a14904de2d63cac14b1a1ef09b0653a14">impala_udf::FunctionContext::Free</a></div><div class="ttdeci">void Free(uint8_t *buffer)</div><div class="ttdoc">Frees a buffer returned from Allocate() or Reallocate() </div><div class="ttdef"><b>Definition:</b> <a href="udf_8cc_source.html#l00291">udf.cc:291</a></div></div> |
| <div class="ttc" id="structimpala__udf_1_1StringVal_html"><div class="ttname"><a href="structimpala__udf_1_1StringVal.html">impala_udf::StringVal</a></div><div class="ttdef"><b>Definition:</b> <a href="udf_8h_source.html#l00521">udf.h:521</a></div></div> |
| <div class="ttc" id="incr-stats-util_8cc_html_af13a2b8fc5ed714abb9ce44f58edc2db"><div class="ttname"><a href="incr-stats-util_8cc.html#af13a2b8fc5ed714abb9ce44f58edc2db">IncrementNdvFinalize</a></div><div class="ttdeci">StringVal IncrementNdvFinalize(FunctionContext *ctx, const StringVal &src)</div><div class="ttdef"><b>Definition:</b> <a href="incr-stats-util_8cc_source.html#l00039">incr-stats-util.cc:39</a></div></div> |
| <div class="ttc" id="structPerColumnStats_html_a93cbe26df3eb6decba12dcbdd5b39a90"><div class="ttname"><a href="structPerColumnStats.html#a93cbe26df3eb6decba12dcbdd5b39a90">PerColumnStats::avg_width</a></div><div class="ttdeci">double avg_width</div><div class="ttdef"><b>Definition:</b> <a href="incr-stats-util_8cc_source.html#l00136">incr-stats-util.cc:136</a></div></div> |
| <div class="ttc" id="incr-stats-util-test_8cc_html_a4a1314e36faa5381e72ffd752d5c3975"><div class="ttname"><a href="incr-stats-util-test_8cc.html#a4a1314e36faa5381e72ffd752d5c3975">HLL_LEN</a></div><div class="ttdeci">static const int HLL_LEN</div><div class="ttdef"><b>Definition:</b> <a href="incr-stats-util-test_8cc_source.html#l00030">incr-stats-util-test.cc:30</a></div></div> |
| <div class="ttc" id="partitioning-throughput-test_8cc_html_ad43c3812e6d13e0518d9f8b8f463ffcf"><div class="ttname"><a href="partitioning-throughput-test_8cc.html#ad43c3812e6d13e0518d9f8b8f463ffcf">count</a></div><div class="ttdeci">uint64_t count</div><div class="ttdef"><b>Definition:</b> <a href="partitioning-throughput-test_8cc_source.html#l00035">partitioning-throughput-test.cc:35</a></div></div> |
| <div class="ttc" id="structPerColumnStats_html_adada0352408dc6820c88d75c5baa5c60"><div class="ttname"><a href="structPerColumnStats.html#adada0352408dc6820c88d75c5baa5c60">PerColumnStats::total_width</a></div><div class="ttdeci">double total_width</div><div class="ttdef"><b>Definition:</b> <a href="incr-stats-util_8cc_source.html#l00130">incr-stats-util.cc:130</a></div></div> |
| <div class="ttc" id="structPerColumnStats_html_afadcf8c66ab7baaa6610774fcf2245de"><div class="ttname"><a href="structPerColumnStats.html#afadcf8c66ab7baaa6610774fcf2245de">PerColumnStats::DebugString</a></div><div class="ttdeci">string DebugString() const </div><div class="ttdef"><b>Definition:</b> <a href="incr-stats-util_8cc_source.html#l00179">incr-stats-util.cc:179</a></div></div> |
| <div class="ttc" id="structPerColumnStats_html_a1347d5dc153e58257792f28122f44d13"><div class="ttname"><a href="structPerColumnStats.html#a1347d5dc153e58257792f28122f44d13">PerColumnStats::ToTColumnStats</a></div><div class="ttdeci">TColumnStats ToTColumnStats() const </div><div class="ttdef"><b>Definition:</b> <a href="incr-stats-util_8cc_source.html#l00169">incr-stats-util.cc:169</a></div></div> |
| <div class="ttc" id="hs2-util_8h_html"><div class="ttname"><a href="hs2-util_8h.html">hs2-util.h</a></div></div> |
| <div class="ttc" id="names_8h_html"><div class="ttname"><a href="names_8h.html">names.h</a></div></div> |
| <div class="ttc" id="namespacegen__ir__descriptions_html_a0d6572990c8dd576a55a21ff11d6deb7"><div class="ttname"><a href="namespacegen__ir__descriptions.html#a0d6572990c8dd576a55a21ff11d6deb7">gen_ir_descriptions.idx</a></div><div class="ttdeci">int idx</div><div class="ttdef"><b>Definition:</b> <a href="gen__ir__descriptions_8py_source.html#l00215">gen_ir_descriptions.py:215</a></div></div> |
| <div class="ttc" id="structimpala__udf_1_1StringVal_html_a72a872d5acebca872db6bd37eb4ae184"><div class="ttname"><a href="structimpala__udf_1_1StringVal.html#a72a872d5acebca872db6bd37eb4ae184">impala_udf::StringVal::len</a></div><div class="ttdeci">int len</div><div class="ttdef"><b>Definition:</b> <a href="udf_8h_source.html#l00522">udf.h:522</a></div></div> |
| <div class="ttc" id="namespaceimpala_html_a1a02780a0ef5ef9c20f452b39db5f446"><div class="ttname"><a href="namespaceimpala.html#a1a02780a0ef5ef9c20f452b39db5f446">impala::FinalizePartitionedColumnStats</a></div><div class="ttdeci">void FinalizePartitionedColumnStats(const TTableSchema &col_stats_schema, const vector< TPartitionStats > &existing_part_stats, const vector< vector< string > > &expected_partitions, const TRowSet &rowset, int32_t num_partition_cols, TAlterTableUpdateStatsParams *params)</div><div class="ttdef"><b>Definition:</b> <a href="incr-stats-util_8cc_source.html#l00188">incr-stats-util.cc:188</a></div></div> |
| <div class="ttc" id="structPerColumnStats_html_a0c35cdf9bede903799a2d60a4b69a01e"><div class="ttname"><a href="structPerColumnStats.html#a0c35cdf9bede903799a2d60a4b69a01e">PerColumnStats::ndv_estimate</a></div><div class="ttdeci">int64_t ndv_estimate</div><div class="ttdef"><b>Definition:</b> <a href="incr-stats-util_8cc_source.html#l00133">incr-stats-util.cc:133</a></div></div> |
| <div class="ttc" id="structPerColumnStats_html_a7148386c16fd51493de77d905b2fba0e"><div class="ttname"><a href="structPerColumnStats.html#a7148386c16fd51493de77d905b2fba0e">PerColumnStats::num_rows</a></div><div class="ttdeci">int64_t num_rows</div><div class="ttdef"><b>Definition:</b> <a href="incr-stats-util_8cc_source.html#l00126">incr-stats-util.cc:126</a></div></div> |
| </div><!-- fragment --></div><!-- contents --> |
| </div><!-- doc-content --> |
| <!-- start footer part --> |
| <div id="nav-path" class="navpath"><!-- id is needed for treeview function! --> |
| <ul> |
| <li class="navelem"><a class="el" href="dir_e5d120be6b5e8a44336cbfd013b25604.html">be</a></li><li class="navelem"><a class="el" href="dir_68cf7cafb51a962d5bc4848b83cab0de.html">src</a></li><li class="navelem"><a class="el" href="dir_479336c0a15f2c0737bcafcf969f884c.html">exec</a></li><li class="navelem"><a class="el" href="incr-stats-util_8cc.html">incr-stats-util.cc</a></li> |
| <li class="footer">Generated on Thu May 7 2015 16:10:35 for Impala by |
| <a href="http://www.doxygen.org/index.html"> |
| <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.6 </li> |
| </ul> |
| </div> |
| </body> |
| </html> |