blob: 096e0f6730162dc9dedbee1802dc2849623d0c4d [file] [log] [blame]
<!doctype html>
<html lang="en" dir="ltr" class="docs-wrapper docs-doc-page docs-version-current plugin-docs plugin-id-default docs-doc-id-development/extensions-core/bloom-filter">
<head>
<meta charset="UTF-8">
<meta name="generator" content="Docusaurus v2.4.1">
<title data-rh="true">Bloom Filter | Apache® Druid</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:image" content="https://druid.apache.org/img/druid_nav.png"><meta data-rh="true" name="twitter:image" content="https://druid.apache.org/img/druid_nav.png"><meta data-rh="true" property="og:url" content="https://druid.apache.org/docs/latest/development/extensions-core/bloom-filter"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="current"><meta data-rh="true" name="docusaurus_tag" content="docs-default-current"><meta data-rh="true" name="docsearch:version" content="current"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-current"><meta data-rh="true" property="og:title" content="Bloom Filter | Apache® Druid"><meta data-rh="true" name="description" content="&lt;!--"><meta data-rh="true" property="og:description" content="&lt;!--"><link data-rh="true" rel="icon" href="/img/favicon.png"><link data-rh="true" rel="canonical" href="https://druid.apache.org/docs/latest/development/extensions-core/bloom-filter"><link data-rh="true" rel="alternate" href="https://druid.apache.org/docs/latest/development/extensions-core/bloom-filter" hreflang="en"><link data-rh="true" rel="alternate" href="https://druid.apache.org/docs/latest/development/extensions-core/bloom-filter" hreflang="x-default"><link rel="preconnect" href="https://www.google-analytics.com">
<link rel="preconnect" href="https://www.googletagmanager.com">
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-131010415-1"></script>
<script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","UA-131010415-1",{})</script>
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.7.2/css/all.css">
<script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.4/clipboard.min.js"></script><link rel="stylesheet" href="/assets/css/styles.546f39eb.css">
<link rel="preload" href="/assets/js/runtime~main.4c9a7172.js" as="script">
<link rel="preload" href="/assets/js/main.3a5ab01b.js" as="script">
</head>
<body class="navigation-with-keyboard">
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}return t}()||function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}()</script><div id="__docusaurus">
<div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><nav aria-label="Main" class="navbar navbar--fixed-top navbar--dark"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/img/druid_nav.png" alt="Apache® Druid" class="themedImage_ToTc themedImage--light_HNdA"><img src="/img/druid_nav.png" alt="Apache® Druid" class="themedImage_ToTc themedImage--dark_i4oU"></div></a></div><div class="navbar__items navbar__items--right"><a class="navbar__item navbar__link" href="/technology">Technology</a><a class="navbar__item navbar__link" href="/use-cases">Use Cases</a><a class="navbar__item navbar__link" href="/druid-powered">Powered By</a><a class="navbar__item navbar__link" href="/docs/latest/design/">Docs</a><a class="navbar__item navbar__link" href="/community/">Community</a><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Apache®</a><ul class="dropdown__menu"><li><a href="https://www.apache.org/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Foundation<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://apachecon.com/?ref=druid.apache.org" target="_blank" rel="noopener noreferrer" class="dropdown__link">Events<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer" class="dropdown__link">License<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Thanks<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/security/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Security<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Sponsorship<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><a class="navbar__item navbar__link" href="/downloads/">Download</a><div class="searchBox_ZlJk"><div class="navbar__search"><span aria-label="expand searchbar" role="button" class="search-icon" tabindex="0"></span><input type="search" id="search_input_react" placeholder="Loading..." aria-label="Search" class="navbar__search-input search-bar" disabled=""></div></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="__docusaurus_skipToContent_fallback" class="main-wrapper mainWrapper_z2l0 docsWrapper_BCFX"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type="button"></button><div class="docPage__5DB"><main class="docMainContainer_gTbr docMainContainerEnhanced_Uz_u"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_VOVn"><div class="docItemContainer_Djhp"><article><div class="tocCollapsible_ETCw theme-doc-toc-mobile tocMobile_ITEo"><button type="button" class="clean-btn tocCollapsibleButton_TO0P">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>Bloom Filter</h1></header><p>To use this Apache Druid extension, <a href="/docs/latest/configuration/extensions#loading-extensions">include</a> <code>druid-bloom-filter</code> in the extensions load list.</p><p>This extension adds the ability to both construct bloom filters from query results, and filter query results by testing
against a bloom filter. A Bloom filter is a probabilistic data structure for performing a set membership check. A bloom
filter is a good candidate to use with Druid for cases where an explicit filter is impossible, e.g. filtering a query
against a set of millions of values.</p><p>Following are some characteristics of Bloom filters:</p><ul><li>Bloom filters are highly space efficient when compared to using a HashSet.</li><li>Because of the probabilistic nature of bloom filters, false positive results are possible (element was not actually
inserted into a bloom filter during construction, but <code>test()</code> says true)</li><li>False negatives are not possible (if element is present then <code>test()</code> will never say false).</li><li>The false positive probability of this implementation is currently fixed at 5%, but increasing the number of entries
that the filter can hold can decrease this false positive rate in exchange for overall size.</li><li>Bloom filters are sensitive to number of elements that will be inserted in the bloom filter. During the creation of bloom filter expected number of entries must be specified. If the number of insertions exceed
the specified initial number of entries then false positive probability will increase accordingly.</li></ul><p>This extension is currently based on <code>org.apache.hive.common.util.BloomKFilter</code> from <code>hive-storage-api</code>. Internally,
this implementation uses Murmur3 as the hash algorithm.</p><p>To construct a BloomKFilter externally with Java to use as a filter in a Druid query:</p><div class="language-java codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-java codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">BloomKFilter bloomFilter = new BloomKFilter(1500);</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">bloomFilter.addString(&quot;value 1&quot;);</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">bloomFilter.addString(&quot;value 2&quot;);</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">bloomFilter.addString(&quot;value 3&quot;);</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">BloomKFilter.serialize(byteArrayOutputStream, bloomFilter);</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">String base64Serialized = Base64.encodeBase64String(byteArrayOutputStream.toByteArray());</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>This string can then be used in the native or SQL Druid query.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="filtering-queries-with-a-bloom-filter">Filtering queries with a Bloom Filter<a href="#filtering-queries-with-a-bloom-filter" class="hash-link" aria-label="Direct link to Filtering queries with a Bloom Filter" title="Direct link to Filtering queries with a Bloom Filter"></a></h2><h3 class="anchor anchorWithStickyNavbar_LWe7" id="json-specification-of-bloom-filter">JSON Specification of Bloom Filter<a href="#json-specification-of-bloom-filter" class="hash-link" aria-label="Direct link to JSON Specification of Bloom Filter" title="Direct link to JSON Specification of Bloom Filter"></a></h3><div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;bloom&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;dimension&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> &lt;dimension_name&gt;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;bloomKFilter&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> &lt;serialized_bytes_for_BloomKFilter&gt;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;extractionFn&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> &lt;extraction_fn&gt;</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><table><thead><tr><th>Property</th><th>Description</th><th>required?</th></tr></thead><tbody><tr><td><code>type</code></td><td>Filter Type. Should always be <code>bloom</code></td><td>yes</td></tr><tr><td><code>dimension</code></td><td>The dimension to filter over.</td><td>yes</td></tr><tr><td><code>bloomKFilter</code></td><td>Base64 encoded Binary representation of <code>org.apache.hive.common.util.BloomKFilter</code></td><td>yes</td></tr><tr><td><code>extractionFn</code></td><td><a href="/docs/latest/querying/dimensionspecs#extraction-functions">Extraction function</a> to apply to the dimension values</td><td>no</td></tr></tbody></table><h3 class="anchor anchorWithStickyNavbar_LWe7" id="serialized-format-for-bloomkfilter">Serialized Format for BloomKFilter<a href="#serialized-format-for-bloomkfilter" class="hash-link" aria-label="Direct link to Serialized Format for BloomKFilter" title="Direct link to Serialized Format for BloomKFilter"></a></h3><p> Serialized BloomKFilter format:</p><ul><li>1 byte for the number of hash functions.</li><li>1 big endian int(That is how OutputStream works) for the number of longs in the bitset</li><li>big endian longs in the BloomKFilter bitset</li></ul><p>Note: <code>org.apache.hive.common.util.BloomKFilter</code> provides a serialize method which can be used to serialize bloom filters to outputStream.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="filtering-sql-queries">Filtering SQL Queries<a href="#filtering-sql-queries" class="hash-link" aria-label="Direct link to Filtering SQL Queries" title="Direct link to Filtering SQL Queries"></a></h3><p>Bloom filters can be used in SQL <code>WHERE</code> clauses via the <code>bloom_filter_test</code> operator:</p><div class="language-sql codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-sql codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token keyword" style="font-style:italic">SELECT</span><span class="token plain"> </span><span class="token function" style="color:rgb(130, 170, 255)">COUNT</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token operator" style="color:rgb(137, 221, 255)">*</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"> </span><span class="token keyword" style="font-style:italic">FROM</span><span class="token plain"> druid</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">foo </span><span class="token keyword" style="font-style:italic">WHERE</span><span class="token plain"> bloom_filter_test</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token operator" style="color:rgb(137, 221, 255)">&lt;</span><span class="token plain">expr</span><span class="token operator" style="color:rgb(137, 221, 255)">&gt;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;&lt;serialized_bytes_for_BloomKFilter&gt;&#x27;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h3 class="anchor anchorWithStickyNavbar_LWe7" id="expression-and-virtual-column-support">Expression and Virtual Column Support<a href="#expression-and-virtual-column-support" class="hash-link" aria-label="Direct link to Expression and Virtual Column Support" title="Direct link to Expression and Virtual Column Support"></a></h3><p>The bloom filter extension also adds a bloom filter <a href="/docs/latest/querying/math-expr">Druid expression</a> which shares syntax
with the SQL operator.</p><div class="language-sql codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-sql codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">bloom_filter_test</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token operator" style="color:rgb(137, 221, 255)">&lt;</span><span class="token plain">expr</span><span class="token operator" style="color:rgb(137, 221, 255)">&gt;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;&lt;serialized_bytes_for_BloomKFilter&gt;&#x27;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h2 class="anchor anchorWithStickyNavbar_LWe7" id="bloom-filter-query-aggregator">Bloom Filter Query Aggregator<a href="#bloom-filter-query-aggregator" class="hash-link" aria-label="Direct link to Bloom Filter Query Aggregator" title="Direct link to Bloom Filter Query Aggregator"></a></h2><p>Input for a <code>bloomKFilter</code> can also be created from a druid query with the <code>bloom</code> aggregator. Note that it is very
important to set a reasonable value for the <code>maxNumEntries</code> parameter, which is the maximum number of distinct entries
that the bloom filter can represent without increasing the false positive rate. It may be worth performing a query using
one of the unique count sketches to calculate the value for this parameter in order to build a bloom filter appropriate
for the query.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="json-specification-of-bloom-filter-aggregator">JSON Specification of Bloom Filter Aggregator<a href="#json-specification-of-bloom-filter-aggregator" class="hash-link" aria-label="Direct link to JSON Specification of Bloom Filter Aggregator" title="Direct link to JSON Specification of Bloom Filter Aggregator"></a></h3><div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;bloom&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;name&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> &lt;output_field_name&gt;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;maxNumEntries&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> &lt;maximum_number_of_elements_for_BloomKFilter&gt;</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;field&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> &lt;dimension_spec&gt;</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><table><thead><tr><th>Property</th><th>Description</th><th>required?</th></tr></thead><tbody><tr><td><code>type</code></td><td>Aggregator Type. Should always be <code>bloom</code></td><td>yes</td></tr><tr><td><code>name</code></td><td>Output field name</td><td>yes</td></tr><tr><td><code>field</code></td><td><a href="/docs/latest/querying/dimensionspecs">DimensionSpec</a> to add to <code>org.apache.hive.common.util.BloomKFilter</code></td><td>yes</td></tr><tr><td><code>maxNumEntries</code></td><td>Maximum number of distinct values supported by <code>org.apache.hive.common.util.BloomKFilter</code>, default <code>1500</code></td><td>no</td></tr></tbody></table><h3 class="anchor anchorWithStickyNavbar_LWe7" id="example">Example<a href="#example" class="hash-link" aria-label="Direct link to Example" title="Direct link to Example"></a></h3><div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;queryType&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;timeseries&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;dataSource&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;wikiticker&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;intervals&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;2015-09-12T00:00:00.000/2015-09-13T00:00:00.000&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;granularity&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;day&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;aggregations&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;bloom&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;name&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;userBloom&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;maxNumEntries&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">100000</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;field&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;default&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;dimension&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;user&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;outputType&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;STRING&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>response</p><div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token property">&quot;timestamp&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;2015-09-12T00:00:00.000Z&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token property">&quot;result&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token property">&quot;userBloom&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;BAAAJhAAAA...&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>These values can then be set in the filter specification described above.</p><p>Ordering results by a bloom filter aggregator, for example in a TopN query, will perform a comparatively expensive
linear scan <em>of the filter itself</em> to count the number of set bits as a means of approximating how many items have been
added to the set. As such, ordering by an alternate aggregation is recommended if possible.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="sql-bloom-filter-aggregator">SQL Bloom Filter Aggregator<a href="#sql-bloom-filter-aggregator" class="hash-link" aria-label="Direct link to SQL Bloom Filter Aggregator" title="Direct link to SQL Bloom Filter Aggregator"></a></h3><p>Bloom filters can be computed in SQL expressions with the <code>bloom_filter</code> aggregator:</p><div class="language-sql codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-sql codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token keyword" style="font-style:italic">SELECT</span><span class="token plain"> BLOOM_FILTER</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token operator" style="color:rgb(137, 221, 255)">&lt;</span><span class="token plain">expression</span><span class="token operator" style="color:rgb(137, 221, 255)">&gt;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">&lt;</span><span class="token plain">max number </span><span class="token keyword" style="font-style:italic">of</span><span class="token plain"> entries</span><span class="token operator" style="color:rgb(137, 221, 255)">&gt;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"> </span><span class="token keyword" style="font-style:italic">FROM</span><span class="token plain"> druid</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">foo </span><span class="token keyword" style="font-style:italic">WHERE</span><span class="token plain"> dim2 </span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;abc&#x27;</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>but requires the setting <code>druid.sql.planner.serializeComplexValues</code> to be set to <code>true</code>. Bloom filter results in a SQL
response are serialized into a base64 string, which can then be used in subsequent queries as a filter.</p></div></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages"></nav></div></div><div class="col col--3"><div class="tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#filtering-queries-with-a-bloom-filter" class="table-of-contents__link toc-highlight">Filtering queries with a Bloom Filter</a><ul><li><a href="#json-specification-of-bloom-filter" class="table-of-contents__link toc-highlight">JSON Specification of Bloom Filter</a></li><li><a href="#serialized-format-for-bloomkfilter" class="table-of-contents__link toc-highlight">Serialized Format for BloomKFilter</a></li><li><a href="#filtering-sql-queries" class="table-of-contents__link toc-highlight">Filtering SQL Queries</a></li><li><a href="#expression-and-virtual-column-support" class="table-of-contents__link toc-highlight">Expression and Virtual Column Support</a></li></ul></li><li><a href="#bloom-filter-query-aggregator" class="table-of-contents__link toc-highlight">Bloom Filter Query Aggregator</a><ul><li><a href="#json-specification-of-bloom-filter-aggregator" class="table-of-contents__link toc-highlight">JSON Specification of Bloom Filter Aggregator</a></li><li><a href="#example" class="table-of-contents__link toc-highlight">Example</a></li><li><a href="#sql-bloom-filter-aggregator" class="table-of-contents__link toc-highlight">SQL Bloom Filter Aggregator</a></li></ul></li></ul></div></div></div></div></main></div></div><footer class="footer"><div class="container container-fluid"><div class="footer__bottom text--center"><div class="margin-bottom--sm"><img src="/img/favicon.png" class="themedImage_ToTc themedImage--light_HNdA footer__logo"><img src="/img/favicon.png" class="themedImage_ToTc themedImage--dark_i4oU footer__logo"></div><div class="footer__copyright">Copyright © 2023 Apache Software Foundation. Except where otherwise noted, licensed under CC BY-SA 4.0. Apache Druid, Druid, and the Druid logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</div></div></div></footer></div>
<script src="/assets/js/runtime~main.4c9a7172.js"></script>
<script src="/assets/js/main.3a5ab01b.js"></script>
</body>
</html>