blob: 888c5ad927d895966dd86b75a3c3fc061722b58d [file] [log] [blame]
"use strict";(self.webpackChunkdoris_website=self.webpackChunkdoris_website||[]).push([[36265],{15680:(e,t,n)=>{n.d(t,{xA:()=>p,yg:()=>h});var a=n(296540);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function l(e){for(var t=1;t<arguments.length;t++){var n=null!=arguments[t]?arguments[t]:{};t%2?i(Object(n),!0).forEach((function(t){r(e,t,n[t])})):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(n)):i(Object(n)).forEach((function(t){Object.defineProperty(e,t,Object.getOwnPropertyDescriptor(n,t))}))}return e}function o(e,t){if(null==e)return{};var n,a,r=function(e,t){if(null==e)return{};var n,a,r={},i=Object.keys(e);for(a=0;a<i.length;a++)n=i[a],t.indexOf(n)>=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a<i.length;a++)n=i[a],t.indexOf(n)>=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),u=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},p=function(e){var t=u(e.components);return a.createElement(s.Provider,{value:t},e.children)},m="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},y=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,p=o(e,["components","mdxType","originalType","parentName"]),m=u(n),y=r,h=m["".concat(s,".").concat(y)]||m[y]||c[y]||i;return n?a.createElement(h,l(l({ref:t},p),{},{components:n})):a.createElement(h,l({ref:t},p))}));function h(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,l=new Array(i);l[0]=y;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[m]="string"==typeof e?e:r,l[1]=o;for(var u=2;u<i;u++)l[u]=n[u];return a.createElement.apply(null,l)}return a.createElement.apply(null,n)}y.displayName="MDXCreateElement"},795026:(e,t,n)=>{n.r(t),n.d(t,{assets:()=>s,contentTitle:()=>l,default:()=>c,frontMatter:()=>i,metadata:()=>o,toc:()=>u});var a=n(58168),r=(n(296540),n(15680));const i={title:"bitmap_hash",language:"en"},l=void 0,o={unversionedId:"sql-manual/sql-functions/bitmap-functions/bitmap_hash",id:"version-1.2/sql-manual/sql-functions/bitmap-functions/bitmap_hash",title:"bitmap_hash",description:"\x3c!--",source:"@site/versioned_docs/version-1.2/sql-manual/sql-functions/bitmap-functions/bitmap_hash.md",sourceDirName:"sql-manual/sql-functions/bitmap-functions",slug:"/sql-manual/sql-functions/bitmap-functions/bitmap_hash",permalink:"/docs/1.2/sql-manual/sql-functions/bitmap-functions/bitmap_hash",draft:!1,tags:[],version:"1.2",frontMatter:{title:"bitmap_hash",language:"en"},sidebar:"docs",previous:{title:"to_bitmap",permalink:"/docs/1.2/sql-manual/sql-functions/bitmap-functions/to_bitmap"},next:{title:"bitmap_from_string",permalink:"/docs/1.2/sql-manual/sql-functions/bitmap-functions/bitmap_from_string"}},s={},u=[{value:"bitmap_hash",id:"bitmap_hash",level:2},{value:"Name",id:"name",level:3},{value:"Description",id:"description",level:3},{value:"Syntax",id:"syntax",level:4},{value:"Arguments",id:"arguments",level:4},{value:"Return Type",id:"return-type",level:4},{value:"Remarks",id:"remarks",level:4},{value:"Example",id:"example",level:3},{value:"Keywords",id:"keywords",level:3},{value:"Best Practice",id:"best-practice",level:3}],p={toc:u},m="wrapper";function c(e){let{components:t,...n}=e;return(0,r.yg)(m,(0,a.A)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h2",{id:"bitmap_hash"},"bitmap_hash"),(0,r.yg)("h3",{id:"name"},"Name"),(0,r.yg)("p",null,"BITMAP_HASH"),(0,r.yg)("h3",{id:"description"},"Description"),(0,r.yg)("p",null,'Calculating hash value for what your input and return a BITMAP which contain the hash value. MurMur3 is used for this function because it is high-performance with low collision rate. More important, the MurMur3 distribution is "simili-random"; the Chi-Square distribution test is used to prove it. By the way, Different hardware platforms and different SEED may change the result of MurMur3. For more information about its performance, see ',(0,r.yg)("a",{parentName:"p",href:"http://rurban.github.io/smhasher/"},"Smhasher"),"."),(0,r.yg)("h4",{id:"syntax"},"Syntax"),(0,r.yg)("p",null,(0,r.yg)("inlineCode",{parentName:"p"},"BITMAP BITMAP_HASH(<any_value>)")),(0,r.yg)("h4",{id:"arguments"},"Arguments"),(0,r.yg)("p",null,(0,r.yg)("inlineCode",{parentName:"p"},"<any_value>"),"\nany value or expression. "),(0,r.yg)("h4",{id:"return-type"},"Return Type"),(0,r.yg)("p",null,"BITMAP"),(0,r.yg)("h4",{id:"remarks"},"Remarks"),(0,r.yg)("p",null,"Generally, MurMurHash 32 is friendly to random, short STRING with low collision rate about one-billionth. But for longer STRING, such as your path of system, can cause more frequent collision. If you indexed your system path, you will find a lot of collisions."),(0,r.yg)("p",null,"The following two values are the same."),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-sql"},"SELECT bitmap_to_string(bitmap_hash('/System/Volumes/Data/Library/Developer/CommandLineTools/SDKs/MacOSX12.3.sdk/System/Library/Frameworks/KernelManagement.framework/KernelManagement.tbd')) AS a ,\n bitmap_to_string(bitmap_hash('/System/Library/PrivateFrameworks/Install.framework/Versions/Current/Resources/es_419.lproj/Architectures.strings')) AS b;\n")),(0,r.yg)("p",null,"Here is the result."),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-text"},"+-----------+-----------+\n| a | b |\n+-----------+-----------+\n| 282251871 | 282251871 |\n+-----------+-----------+\n")),(0,r.yg)("h3",{id:"example"},"Example"),(0,r.yg)("p",null,"If you want to calculate MurMur3 of a certain value, you can"),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre"},"select bitmap_to_array(bitmap_hash('hello'))[1];\n")),(0,r.yg)("p",null,"Here is the result."),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-text"},"+-------------------------------------------------------------+\n| %element_extract%(bitmap_to_array(bitmap_hash('hello')), 1) |\n+-------------------------------------------------------------+\n| 1321743225 |\n+-------------------------------------------------------------+\n")),(0,r.yg)("p",null,"If you want to ",(0,r.yg)("inlineCode",{parentName:"p"},"count distinct")," some columns, using bitmap has higher performance in some scenes. "),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-sql"},"select bitmap_count(bitmap_union(bitmap_hash(`word`))) from `words`;\n")),(0,r.yg)("p",null,"Here is the result."),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-text"},"+-------------------------------------------------+\n| bitmap_count(bitmap_union(bitmap_hash(`word`))) |\n+-------------------------------------------------+\n| 33263478 |\n+-------------------------------------------------+\n")),(0,r.yg)("h3",{id:"keywords"},"Keywords"),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre"},"BITMAP_HASH,BITMAP\n")),(0,r.yg)("h3",{id:"best-practice"},"Best Practice"),(0,r.yg)("p",null,"For more information, see also:"),(0,r.yg)("ul",null,(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("a",{parentName:"li",href:"/docs/1.2/sql-manual/sql-functions/bitmap-functions/bitmap_hash64"},"BITMAP_HASH64"))))}c.isMDXComponent=!0}}]);