| <!doctype html> |
| <html class="docs-version-0.12.1 docs-custom-styles" lang="cn" dir="ltr"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width,initial-scale=1"> |
| <meta name="generator" content="Docusaurus v2.0.0-beta.14"> |
| <link rel="alternate" type="application/rss+xml" href="/cn/blog/rss.xml" title="Apache Hudi: User-Facing Analytics RSS Feed"> |
| <link rel="alternate" type="application/atom+xml" href="/cn/blog/atom.xml" title="Apache Hudi: User-Facing Analytics Atom Feed"> |
| <link rel="alternate" type="application/json" href="/cn/blog/feed.json" title="Apache Hudi: User-Facing Analytics JSON Feed"> |
| <link rel="search" type="application/opensearchdescription+xml" title="Apache Hudi" href="/cn/opensearch.xml"> |
| <link rel="alternate" type="application/rss+xml" href="/cn/videos/rss.xml" title="Apache Hudi RSS Feed"> |
| <link rel="alternate" type="application/atom+xml" href="/cn/videos/atom.xml" title="Apache Hudi Atom Feed"> |
| <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Comfortaa|Ubuntu|Roboto|Source+Code+Pro"> |
| <link rel="stylesheet" href="https://at-ui.github.io/feather-font/css/iconfont.css"><title data-react-helmet="true">Metadata Indexing | Apache Hudi</title><meta data-react-helmet="true" name="twitter:card" content="summary_large_image"><meta data-react-helmet="true" property="og:url" content="https://hudi.apache.org/cn/docs/0.12.1/metadata_indexing"><meta data-react-helmet="true" name="docsearch:language" content="cn"><meta data-react-helmet="true" name="docsearch:version" content="0.12.1"><meta data-react-helmet="true" name="docsearch:docusaurus_tag" content="docs-default-0.12.1"><meta data-react-helmet="true" name="keywords" content="apache hudi, data lake, lakehouse, big data, apache spark, apache flink, presto, trino, analytics, data engineering"><meta data-react-helmet="true" property="og:title" content="Metadata Indexing | Apache Hudi"><meta data-react-helmet="true" name="description" content="We can now create different metadata indexes, including files, bloom filters and column stats,"><meta data-react-helmet="true" property="og:description" content="We can now create different metadata indexes, including files, bloom filters and column stats,"><link data-react-helmet="true" rel="icon" href="/cn/assets/images/favicon.ico"><link data-react-helmet="true" rel="canonical" href="https://hudi.apache.org/cn/docs/0.12.1/metadata_indexing"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/docs/0.12.1/metadata_indexing" hreflang="en"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/cn/docs/0.12.1/metadata_indexing" hreflang="cn"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/docs/0.12.1/metadata_indexing" hreflang="x-default"><link data-react-helmet="true" rel="preconnect" href="https://BH4D9OD16A-dsn.algolia.net" crossorigin="anonymous"><link rel="stylesheet" href="/cn/assets/css/styles.ea681a30.css"> |
| <link rel="preload" href="/cn/assets/js/runtime~main.0acdb754.js" as="script"> |
| <link rel="preload" href="/cn/assets/js/main.6d6aa24f.js" as="script"> |
| </head> |
| <body> |
| <script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus"> |
| <div><a href="#" class="skipToContent_OuoZ">Skip to main content</a></div><div class="announcementBar_axC9" role="banner"><div class="announcementBarPlaceholder_xYHE"></div><div class="announcementBarContent_6uhP">⭐️ If you like Apache Hudi, give it a star on <a target="_blank" rel="noopener noreferrer" href="https://github.com/apache/hudi">GitHub</a>! ⭐</div><button type="button" class="clean-btn close announcementBarClose_A3A1" aria-label="Close"><svg viewBox="0 0 15 15" width="14" height="14"><g stroke="currentColor" stroke-width="3.1"><path d="M.75.75l13.5 13.5M14.25.75L.75 14.25"></path></g></svg></button></div><nav class="navbar navbar--fixed-top navbarWrapper_UIa0"><div class="navbar__inner"><img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=8f594acf-9b77-44fb-9475-3e82ead1910c" width="0" height="0" alt=""><img referrerpolicy="no-referrer-when-downgrade" src="https://analytics.apache.org/matomo.php?idsite=47&rec=1" width="0" height="0" alt=""><div class="navbar__items"><button aria-label="Navigation bar toggle" class="navbar__toggle clean-btn" type="button" tabindex="0"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/cn/"><div class="navbar__logo navbarLogo_Bz6n"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--light_4Vu1"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--dark_uzRr"></div></a><a class="navbar__item navbar__link" href="/cn/docs/overview"><div class="labelWrapperDropdown_Mqbj">Docs</div></a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Learn<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/talks"><div class="labelWrapperDropdown_Mqbj">Talks</div></a></li><li><a class="dropdown__link" href="/cn/videos"><div class="labelWrapperDropdown_Mqbj">Video Guides</div></a></li><li><a class="dropdown__link" href="/cn/docs/faq"><div class="labelWrapperDropdown_Mqbj">FAQ</div></a></li><li><a class="dropdown__link" href="/cn/tech-specs"><div class="labelWrapperDropdown_Mqbj">Tech Specs</div></a></li><li><a class="dropdown__link" href="/cn/tech-specs-1point0"><div class="labelWrapperDropdown_Mqbj">Tech Specs 1.0</div></a></li><li><a href="https://cwiki.apache.org/confluence/display/HUDI" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span class="externalLink_AE3f">Technical Wiki<svg width="20" height="20" viewBox="0 0 26 26" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M16.965 8.745 9.01 16.7M10.561 8.758l6.403-.013-.013 6.403" stroke="#0DB1F9" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path><rect x="4.5" y="4.5" width="17" height="17" rx="2.5" stroke="#0DB1F9"></rect></svg></span></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Contribute<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/contribute/how-to-contribute"><div class="labelWrapperDropdown_Mqbj">How to Contribute</div></a></li><li><a class="dropdown__link" href="/cn/contribute/developer-setup"><div class="labelWrapperDropdown_Mqbj">Developer Setup</div></a></li><li><a class="dropdown__link" href="/cn/contribute/rfc-process"><div class="labelWrapperDropdown_Mqbj">RFC Process</div></a></li><li><a class="dropdown__link" href="/cn/contribute/report-security-issues"><div class="labelWrapperDropdown_Mqbj">Report Security Issues</div></a></li><li><a href="https://issues.apache.org/jira/projects/HUDI/summary" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span class="externalLink_AE3f">Report Issues<svg width="20" height="20" viewBox="0 0 26 26" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M16.965 8.745 9.01 16.7M10.561 8.758l6.403-.013-.013 6.403" stroke="#0DB1F9" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path><rect x="4.5" y="4.5" width="17" height="17" rx="2.5" stroke="#0DB1F9"></rect></svg></span></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Community<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/community/get-involved"><div class="labelWrapperDropdown_Mqbj">Get Involved</div></a></li><li><a class="dropdown__link" href="/cn/community/syncs"><div class="labelWrapperDropdown_Mqbj">Community Syncs</div></a></li><li><a class="dropdown__link" href="/cn/community/office_hours"><div class="labelWrapperDropdown_Mqbj">Office Hours</div></a></li><li><a class="dropdown__link" href="/cn/community/team"><div class="labelWrapperDropdown_Mqbj">Team</div></a></li></ul></div><a class="navbar__item navbar__link" href="/cn/blog"><div class="labelWrapperDropdown_Mqbj">Blog</div></a><a class="navbar__item navbar__link" href="/cn/powered-by"><div class="labelWrapperDropdown_Mqbj">Who's Using</div></a><a class="navbar__item navbar__link" href="/cn/roadmap"><div class="labelWrapperDropdown_Mqbj">Roadmap</div></a><a class="navbar__item navbar__link" href="/cn/releases/download"><div class="labelWrapperDropdown_Mqbj">Download</div></a></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__link downloadLinkDropdownHide_aDP3" href="/cn/docs/0.12.1/overview"><div class="labelWrapperDropdown_Mqbj">0.12.1<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/docs/next/metadata_indexing"><div class="labelWrapperDropdown_Mqbj">Next</div></a></li><li><a class="dropdown__link" href="/cn/docs/metadata_indexing"><div class="labelWrapperDropdown_Mqbj">0.14.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.14.0/metadata_indexing"><div class="labelWrapperDropdown_Mqbj">0.14.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.13.1/metadata_indexing"><div class="labelWrapperDropdown_Mqbj">0.13.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.13.0/metadata_indexing"><div class="labelWrapperDropdown_Mqbj">0.13.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.3/metadata_indexing"><div class="labelWrapperDropdown_Mqbj">0.12.3</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.2/metadata_indexing"><div class="labelWrapperDropdown_Mqbj">0.12.2</div></a></li><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/cn/docs/0.12.1/metadata_indexing"><div class="labelWrapperDropdown_Mqbj">0.12.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.0/metadata_indexing"><div class="labelWrapperDropdown_Mqbj">0.12.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.11.1/metadata_indexing"><div class="labelWrapperDropdown_Mqbj">0.11.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.11.0/metadata_indexing"><div class="labelWrapperDropdown_Mqbj">0.11.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.10.1/overview"><div class="labelWrapperDropdown_Mqbj">0.10.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.10.0/overview"><div class="labelWrapperDropdown_Mqbj">0.10.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.9.0/overview"><div class="labelWrapperDropdown_Mqbj">0.9.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.8.0/overview"><div class="labelWrapperDropdown_Mqbj">0.8.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.7.0/overview"><div class="labelWrapperDropdown_Mqbj">0.7.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.6.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.6.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.3/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.3</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.2/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.2</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.1/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.0</div></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj"><span><svg viewBox="0 0 20 20" width="20" height="20" aria-hidden="true" class="iconLanguage_zID8"><path fill="currentColor" d="M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"></path></svg><span>Chinese</span></span><svg width="14" height="14" viewBox="0 0 14 14" fill="none" xmlns="http://www.w3.org/2000/svg"><g clip-path="url(#a)"><path d="M14 6.457a6.842 6.842 0 0 0-7-6.02 6.843 6.843 0 0 0-7 6.02v1.085a6.843 6.843 0 0 0 7 6.02 6.843 6.843 0 0 0 7-6.02V6.457Zm-1.094 0h-2.625a9.92 9.92 0 0 0-.376-2.222 6.65 6.65 0 0 0 1.531-.875 5.25 5.25 0 0 1 1.444 3.097h.026Zm-8.032 0a8.479 8.479 0 0 1 .324-1.872 7.376 7.376 0 0 0 3.63 0c.175.61.284 1.239.325 1.872h-4.28Zm4.305 1.085a8.391 8.391 0 0 1-.324 1.873 7.464 7.464 0 0 0-3.658 0 8.479 8.479 0 0 1-.323-1.873h4.305Zm.35-4.375A10.342 10.342 0 0 0 8.75 1.75c.627.194 1.218.49 1.75.875a5.748 5.748 0 0 1-.998.577l.027-.035ZM7.254 1.54A8.75 8.75 0 0 1 8.46 3.552c-.48.11-.97.165-1.461.167-.492-.001-.982-.057-1.461-.167.308-.722.715-1.4 1.207-2.012h.508ZM4.498 3.202a5.748 5.748 0 0 1-.998-.577 6.029 6.029 0 0 1 1.75-.875c-.294.46-.546.947-.753 1.452Zm-1.873.15c.47.358.984.652 1.531.874A9.625 9.625 0 0 0 3.78 6.45H1.155a5.25 5.25 0 0 1 1.47-3.098ZM1.12 7.541h2.625c.038.753.164 1.5.376 2.223a6.649 6.649 0 0 0-1.531.875 5.25 5.25 0 0 1-1.47-3.098Zm3.377 3.255c.207.506.459.992.753 1.453a6.03 6.03 0 0 1-1.75-.875c.312-.226.646-.419.997-.578Zm2.25 1.663a8.594 8.594 0 0 1-1.208-2.013 6.501 6.501 0 0 1 2.922 0 8.54 8.54 0 0 1-1.207 2.013h-.508Zm2.755-1.663c.367.156.716.35 1.042.578a6.338 6.338 0 0 1-1.75.875c.275-.464.512-.95.708-1.453Zm1.873-.148a6.647 6.647 0 0 0-1.531-.875 9.45 9.45 0 0 0 .376-2.223h2.625a5.25 5.25 0 0 1-1.47 3.098Z" fill="#1C1E21"></path></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h14v14H0z"></path></clipPath></defs></svg></div></a><ul class="dropdown__menu"><li><a href="/docs/0.12.1/metadata_indexing" target="_self" rel="noopener noreferrer" class="dropdown__link"><div class="labelWrapperDropdown_Mqbj">English</div></a></li><li><a href="/cn/docs/0.12.1/metadata_indexing" target="_self" rel="noopener noreferrer" class="dropdown__link dropdown__link--active"><div class="labelWrapperDropdown_Mqbj">Chinese</div></a></li></ul></div><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-github-link" aria-label="GitHub repository"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-twitter-link" aria-label="Hudi Twitter Handle"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-slack-link" aria-label="Hudi Slack Channel"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-youtube-link" aria-label="Hudi YouTube Channel"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-linkedin-link" aria-label="Hudi Linkedin Page"><div class="labelWrapperDropdown_Mqbj"></div></a><div class="searchBox_fBfG"><div role="button" class="searchButton_g9-U" aria-label="Search"><span class="searchText_RI6l">Search</span><svg width="14" height="14" viewBox="0 0 14 14" fill="none" xmlns="http://www.w3.org/2000/svg"><circle cx="6.864" cy="6.864" r="5.243" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></circle><path d="m10.51 10.783 2.056 2.05" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div><div class="navbar-sidebar"><div class="navbar-sidebar__brand"><a class="navbar__brand" href="/cn/"><div class="navbar__logo"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--light_4Vu1"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--dark_uzRr"></div></a><button type="button" class="clean-btn navbar-sidebar__close"><svg viewBox="0 0 15 15" width="21" height="21"><g stroke="var(--ifm-color-emphasis-600)" stroke-width="1.2"><path d="M.75.75l13.5 13.5M14.25.75L.75 14.25"></path></g></svg></button></div><div class="navbar-sidebar__items"><div class="navbar-sidebar__item menu"><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" href="/cn/docs/overview"><div class="labelWrapperDropdown_Mqbj">Docs</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Learn</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Contribute</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Community</div></a></li><li class="menu__list-item"><a class="menu__link" href="/cn/blog"><div class="labelWrapperDropdown_Mqbj">Blog</div></a></li><li class="menu__list-item"><a class="menu__link" href="/cn/powered-by"><div class="labelWrapperDropdown_Mqbj">Who's Using</div></a></li><li class="menu__list-item"><a class="menu__link" href="/cn/roadmap"><div class="labelWrapperDropdown_Mqbj">Roadmap</div></a></li><li class="menu__list-item"><a class="menu__link" href="/cn/releases/download"><div class="labelWrapperDropdown_Mqbj">Download</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Versions</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj"><span><svg viewBox="0 0 20 20" width="20" height="20" aria-hidden="true" class="iconLanguage_zID8"><path fill="currentColor" d="M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"></path></svg><span>Languages</span></span></div></a></li><li class="menu__list-item"><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="menu__link header-github-link" aria-label="GitHub repository"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="menu__link header-twitter-link" aria-label="Hudi Twitter Handle"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="menu__link header-slack-link" aria-label="Hudi Slack Channel"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="menu__link header-youtube-link" aria-label="Hudi YouTube Channel"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="menu__link header-linkedin-link" aria-label="Hudi Linkedin Page"><div class="labelWrapperDropdown_Mqbj"></div></a></li></ul></div><div class="navbar-sidebar__item menu"><button type="button" class="clean-btn navbar-sidebar__back">← Back to main menu</button></div></div></div></nav><div class="main-wrapper docs-wrapper docs-doc-page"><div class="docPage_GMj9"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_i9tI" type="button"></button><aside class="docSidebarContainer_k0Pq"><div class="sidebar_a3j0"><nav class="menu thin-scrollbar menu_cyFh menuWithAnnouncementBar_+O1J"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/cn/docs/0.12.1/overview">Overview</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/cn/docs/0.12.1/quick-start-guide">Quick Start</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/quick-start-guide">Spark Guide</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/flink-quick-start-guide">Flink Guide</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/docker_demo">Docker Demo</a></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/cn/docs/0.12.1/timeline">Concepts</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/cn/docs/0.12.1/table_management">How To</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--active hasHref_TwRn" href="/cn/docs/0.12.1/migration_guide">Services</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/migration_guide">Bootstrapping</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/compaction">Compaction</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/clustering">Clustering</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/cn/docs/0.12.1/metadata_indexing">Metadata Indexing</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/hoodie_cleaner">Cleaning</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/transforms">Transformers</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/markers">Marker Mechanism</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/file_sizing">File Sizing</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/disaster_recovery">Disaster Recovery</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/snapshot_exporter">Exporter</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/precommit_validator">Data Quality</a></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/cn/docs/0.12.1/basic_configurations">Configurations</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/cn/docs/0.12.1/query_engine_setup">Guides</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/cn/docs/0.12.1/use_cases">Use Cases</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/cn/docs/0.12.1/faq">FAQs</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/cn/docs/0.12.1/privacy">Privacy Policy</a></li></ul></nav></div></aside><main class="docMainContainer_Q970"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_zHA2"><div class="theme-doc-version-banner alert alert--warning margin-bottom--md" role="alert"><div>This is documentation for <!-- -->Apache Hudi<!-- --> <b>0.12.1</b>, which is no longer actively maintained.</div><div class="margin-top--md">For up-to-date documentation, see the <b><a href="/cn/docs/metadata_indexing">latest version</a></b> (<!-- -->0.14.1<!-- -->).</div></div><div class="docItemContainer_oiyr"><article><span class="theme-doc-version-badge badge badge--secondary">Version: <!-- -->0.12.1</span><div class="tocCollapsible_aw-L theme-doc-toc-mobile tocMobile_Tx6Y"><button type="button" class="clean-btn tocCollapsibleButton_zr6a">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>Metadata Indexing</h1></header><p>We can now create different metadata indexes, including files, bloom filters and column stats, |
| asynchronously in Hudi, which are then used by queries and writing to improve performance. |
| Being able to index without blocking writing has two benefits,</p><ul><li>improved write latency</li><li>reduced resource wastage due to contention between writing and indexing.</li></ul><p>To learn more about the design of this feature, please check out <a href="https://github.com/apache/hudi/blob/master/rfc/rfc-45/rfc-45.md" target="_blank" rel="noopener noreferrer">RFC-45</a>.</p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="setup-async-indexing">Setup Async Indexing<a class="hash-link" href="#setup-async-indexing" title="Direct link to heading"></a></h2><p>First, we will generate a continuous workload. In the below example, we are going to start a <a href="/cn/docs/0.12.1/hoodie_deltastreamer#deltastreamer">deltastreamer</a> which will continuously write data |
| from raw parquet to Hudi table. We used the widely available <a href="https://registry.opendata.aws/nyc-tlc-trip-records-pds/" target="_blank" rel="noopener noreferrer">NY Taxi dataset</a>, whose setup details are as below:</p><details class="details_Q743 alert alert--info details_h+cY" data-collapsed="true"><summary>Ingestion write config</summary><div><div class="collapsibleContent_K5uX"><p></p><div class="codeBlockContainer_J+bg language-bash theme-code-block"><div class="codeBlockContent_csEI bash"><pre tabindex="0" class="prism-code language-bash codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.datasource.write.recordkey.field</span><span class="token operator">=</span><span class="token plain">VendorID</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.datasource.write.partitionpath.field</span><span class="token operator">=</span><span class="token plain">tpep_dropoff_datetime</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.datasource.write.precombine.field</span><span class="token operator">=</span><span class="token plain">tpep_dropoff_datetime</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.deltastreamer.source.dfs.root</span><span class="token operator">=</span><span class="token plain">/Users/home/path/to/data/parquet_files/</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.deltastreamer.schemaprovider.target.schema.file</span><span class="token operator">=</span><span class="token plain">/Users/home/path/to/schema/schema.avsc</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.deltastreamer.schemaprovider.source.schema.file</span><span class="token operator">=</span><span class="token plain">/Users/home/path/to/schema/schema.avsc</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// </span><span class="token builtin class-name" style="color:rgb(189, 147, 249)">set</span><span class="token plain"> lock provider configs</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.write.lock.provider</span><span class="token operator">=</span><span class="token plain">org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.write.lock.zookeeper.url</span><span class="token operator">=</span><span class="token operator"><</span><span class="token plain">zk_url</span><span class="token operator">></span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.write.lock.zookeeper.port</span><span class="token operator">=</span><span class="token operator"><</span><span class="token plain">zk_port</span><span class="token operator">></span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.write.lock.zookeeper.lock_key</span><span class="token operator">=</span><span class="token operator"><</span><span class="token plain">zk_key</span><span class="token operator">></span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.write.lock.zookeeper.base_path</span><span class="token operator">=</span><span class="token operator"><</span><span class="token plain">zk_base_path</span><span class="token operator">></span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p></p></div></div></details><details class="details_Q743 alert alert--info details_h+cY" data-collapsed="true"><summary>Run deltastreamer</summary><div><div class="collapsibleContent_K5uX"><p></p><div class="codeBlockContainer_J+bg language-bash theme-code-block"><div class="codeBlockContent_csEI bash"><pre tabindex="0" class="prism-code language-bash codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark-submit </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer </span><span class="token variable" style="color:rgb(189, 147, 249);font-style:italic">`</span><span class="token variable function" style="color:rgb(80, 250, 123);font-style:italic">ls</span><span class="token variable" style="color:rgb(189, 147, 249);font-style:italic"> /Users/home/path/to/hudi-utilities-bundle/target/hudi-utilities-bundle_2.11-0.12.1-SNAPSHOT.jar</span><span class="token variable" style="color:rgb(189, 147, 249);font-style:italic">`</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--props </span><span class="token variable" style="color:rgb(189, 147, 249);font-style:italic">`</span><span class="token variable function" style="color:rgb(80, 250, 123);font-style:italic">ls</span><span class="token variable" style="color:rgb(189, 147, 249);font-style:italic"> /Users/home/path/to/write/config.properties</span><span class="token variable" style="color:rgb(189, 147, 249);font-style:italic">`</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--source-class org.apache.hudi.utilities.sources.ParquetDFSSource --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--source-ordering-field tpep_dropoff_datetime </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--table-type COPY_ON_WRITE </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--target-base-path file:///tmp/hudi-ny-taxi/ </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--target-table ny_hudi_tbl </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--op UPSERT </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--continuous </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--source-limit </span><span class="token number">5000000</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--min-sync-interval-seconds </span><span class="token number">60</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p></p></div></div></details><p>From version 0.11.0 onwards, Hudi metadata table is enabled by default and the files index will be automatically created. While the deltastreamer is running in continuous mode, let |
| us schedule the indexing for COLUMN_STATS index. First we need to define a properties file for the indexer.</p><div class="admonition admonition-note alert alert--secondary"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M6.3 5.69a.942.942 0 0 1-.28-.7c0-.28.09-.52.28-.7.19-.18.42-.28.7-.28.28 0 .52.09.7.28.18.19.28.42.28.7 0 .28-.09.52-.28.7a1 1 0 0 1-.7.3c-.28 0-.52-.11-.7-.3zM8 7.99c-.02-.25-.11-.48-.31-.69-.2-.19-.42-.3-.69-.31H6c-.27.02-.48.13-.69.31-.2.2-.3.44-.31.69h1v3c.02.27.11.5.31.69.2.2.42.31.69.31h1c.27 0 .48-.11.69-.31.2-.19.3-.42.31-.69H8V7.98v.01zM7 2.3c-3.14 0-5.7 2.54-5.7 5.68 0 3.14 2.56 5.7 5.7 5.7s5.7-2.55 5.7-5.7c0-3.15-2.56-5.69-5.7-5.69v.01zM7 .98c3.86 0 7 3.14 7 7s-3.14 7-7 7-7-3.12-7-7 3.14-7 7-7z"></path></svg></span>note</h5></div><div class="admonition-content"><p>Enabling metadata table and configuring a lock provider are the prerequisites for using async indexer.</p></div></div><div class="codeBlockContainer_J+bg theme-code-block"><div class="codeBlockContent_csEI"><pre tabindex="0" class="prism-code language-undefined codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain"># ensure that both metadata and async indexing is enabled as below two configs</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.metadata.enable=true</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.metadata.index.async=true</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># enable column_stats index config</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.metadata.index.column.stats.enable=true</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># set concurrency mode and lock configs as this is a multi-writer scenario</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># check https://hudi.apache.org/docs/concurrency_control/ for differnt lock provider configs</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.write.concurrency.mode=optimistic_concurrency_control</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.write.lock.provider=org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.write.lock.zookeeper.url=<zk_url></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.write.lock.zookeeper.port=<zk_port></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.write.lock.zookeeper.lock_key=<zk_key></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.write.lock.zookeeper.base_path=<zk_base_path></span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="schedule-indexing">Schedule indexing<a class="hash-link" href="#schedule-indexing" title="Direct link to heading"></a></h3><p>Now, we can schedule indexing using <code>HoodieIndexer</code> in <code>schedule</code> mode as follows:</p><div class="codeBlockContainer_J+bg theme-code-block"><div class="codeBlockContent_csEI"><pre tabindex="0" class="prism-code language-undefined codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark-submit \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--class org.apache.hudi.utilities.HoodieIndexer \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">/Users/home/path/to/hudi-utilities-bundle/target/hudi-utilities-bundle_2.11-0.12.1-SNAPSHOT.jar \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--props /Users/home/path/to/indexer.properties \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--mode schedule \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--base-path /tmp/hudi-ny-taxi \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--table-name ny_hudi_tbl \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--index-types COLUMN_STATS \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--parallelism 1 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--spark-memory 1g</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>This will write an <code>indexing.requested</code> instant to the timeline.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="execute-indexing">Execute Indexing<a class="hash-link" href="#execute-indexing" title="Direct link to heading"></a></h3><p>To execute indexing, run the indexer in <code>execute</code> mode as below.</p><div class="codeBlockContainer_J+bg theme-code-block"><div class="codeBlockContent_csEI"><pre tabindex="0" class="prism-code language-undefined codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark-submit \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--class org.apache.hudi.utilities.HoodieIndexer \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">/Users/home/path/to/hudi-utilities-bundle/target/hudi-utilities-bundle_2.11-0.12.1-SNAPSHOT.jar \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--props /Users/home/path/to/indexer.properties \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--mode execute \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--base-path /tmp/hudi-ny-taxi \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--table-name ny_hudi_tbl \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--index-types COLUMN_STATS \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--parallelism 1 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--spark-memory 1g</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>We can also run the indexer in <code>scheduleAndExecute</code> mode to do the above two steps in one shot. Doing it separately gives us better control over when we want to execute.</p><p>Let's look at the data timeline.</p><div class="codeBlockContainer_J+bg theme-code-block"><div class="codeBlockContent_csEI"><pre tabindex="0" class="prism-code language-undefined codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">ls -lrt /tmp/hudi-ny-taxi/.hoodie</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">total 1816</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">-rw-r--r-- 1 sagars wheel 0 Apr 14 19:53 20220414195327683.commit.requested</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">-rw-r--r-- 1 sagars wheel 153423 Apr 14 19:54 20220414195327683.inflight</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">-rw-r--r-- 1 sagars wheel 207061 Apr 14 19:54 20220414195327683.commit</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">-rw-r--r-- 1 sagars wheel 0 Apr 14 19:54 20220414195423420.commit.requested</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">-rw-r--r-- 1 sagars wheel 659 Apr 14 19:54 20220414195437837.indexing.requested</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">-rw-r--r-- 1 sagars wheel 323950 Apr 14 19:54 20220414195423420.inflight</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">-rw-r--r-- 1 sagars wheel 0 Apr 14 19:55 20220414195437837.indexing.inflight</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">-rw-r--r-- 1 sagars wheel 222920 Apr 14 19:55 20220414195423420.commit</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">-rw-r--r-- 1 sagars wheel 734 Apr 14 19:55 hoodie.properties</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">-rw-r--r-- 1 sagars wheel 979 Apr 14 19:55 20220414195437837.indexing</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>In the data timeline, we can see that indexing was scheduled after one commit completed (<code>20220414195327683.commit</code>) and another was requested |
| (<code>20220414195423420.commit.requested</code>). This would have picked <code>20220414195327683</code> as the base instant. Indexing was inflight with an inflight writer as well. If we parse the |
| indexer logs, we would find that it indeed caught up with instant <code>20220414195423420</code> after indexing upto the base instant.</p><div class="codeBlockContainer_J+bg theme-code-block"><div class="codeBlockContent_csEI"><pre tabindex="0" class="prism-code language-undefined codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">22/04/14 19:55:22 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/hudi-ny-taxi/.hoodie/metadata</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">22/04/14 19:55:22 INFO RunIndexActionExecutor: Starting Index Building with base instant: 20220414195327683</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">22/04/14 19:55:22 INFO HoodieBackedTableMetadataWriter: Creating a new metadata index for partition 'column_stats' under path /tmp/hudi-ny-taxi/.hoodie/metadata upto instant 20220414195327683</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">...</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">...</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">22/04/14 19:55:38 INFO RunIndexActionExecutor: Total remaining instants to index: 1</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">22/04/14 19:55:38 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/hudi-ny-taxi/.hoodie/metadata</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">22/04/14 19:55:38 INFO HoodieTableConfig: Loading table properties from /tmp/hudi-ny-taxi/.hoodie/metadata/.hoodie/hoodie.properties</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">22/04/14 19:55:38 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/hudi-ny-taxi/.hoodie/metadata</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">22/04/14 19:55:38 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20220414195423420__deltacommit__COMPLETED]}</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">22/04/14 19:55:38 INFO RunIndexActionExecutor: Starting index catchup task</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">...</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="drop-index">Drop Index<a class="hash-link" href="#drop-index" title="Direct link to heading"></a></h3><p>To drop an index, just run the index in <code>dropindex</code> mode.</p><div class="codeBlockContainer_J+bg theme-code-block"><div class="codeBlockContent_csEI"><pre tabindex="0" class="prism-code language-undefined codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark-submit \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--class org.apache.hudi.utilities.HoodieIndexer \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">/Users/home/path/to/hudi-utilities-bundle/target/hudi-utilities-bundle_2.11-0.12.1-SNAPSHOT.jar \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--props /Users/home/path/to/indexer.properties \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--mode dropindex \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--base-path /tmp/hudi-ny-taxi \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--table-name ny_hudi_tbl \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--index-types COLUMN_STATS \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--parallelism 1 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--spark-memory 2g</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h2 class="anchor anchorWithStickyNavbar_y2LR" id="caveats">Caveats<a class="hash-link" href="#caveats" title="Direct link to heading"></a></h2><p>Asynchronous indexing feature is still evolving. Few points to note from deployment perspective while running the indexer:</p><ul><li>While an index can be created concurrently with ingestion, it cannot be dropped concurrently. Please stop all writers |
| before dropping an index.</li><li>Files index is created by default as long as the metadata table is enabled.</li><li>Trigger indexing for one metadata partition (or index type) at a time.</li><li>If an index is enabled via async HoodieIndexer, then ensure that index is also enabled in configs corresponding to regular ingestion writers. Otherwise, metadata writer will |
| think that particular index was disabled and cleanup the metadata partition.</li><li>In the case of multi-writers, enable async index and specific index config for all writers.</li><li>Unlike other table services like compaction and clustering, where we have a separate configuration to run inline, there is no such inline config here. |
| For example, if async indexing is disabled and metadata is enabled along with column stats index type, then both files and column stats index will be created synchronously with ingestion.</li></ul><p>Some of these limitations will be removed in the upcoming releases. Please |
| follow <a href="https://issues.apache.org/jira/browse/HUDI-2488" target="_blank" rel="noopener noreferrer">HUDI-2488</a> for developments on this feature.</p></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="theme-doc-footer-edit-meta-row row"><div class="col"><a href="https://github.com/apache/hudi/tree/asf-site/website/versioned_docs/version-0.12.1/metadata_indexing.md" target="_blank" rel="noreferrer noopener" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_mS5F" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_mt2f"></div></div></footer></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages navigation"><div class="pagination-nav__item"><a class="pagination-nav__link" href="/cn/docs/0.12.1/clustering"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">Clustering</div></a></div><div class="pagination-nav__item pagination-nav__item--next"><a class="pagination-nav__link" href="/cn/docs/0.12.1/hoodie_cleaner"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Cleaning</div></a></div></nav></div></div><div class="col col--3"><div class="tableOfContents_vrFS thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#setup-async-indexing" class="table-of-contents__link toc-highlight">Setup Async Indexing</a><ul><li><a href="#schedule-indexing" class="table-of-contents__link toc-highlight">Schedule indexing</a></li><li><a href="#execute-indexing" class="table-of-contents__link toc-highlight">Execute Indexing</a></li><li><a href="#drop-index" class="table-of-contents__link toc-highlight">Drop Index</a></li></ul></li><li><a href="#caveats" class="table-of-contents__link toc-highlight">Caveats</a></li></ul></div></div></div></div></main></div></div><footer class="footer"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">About</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/blog/2021/07/21/streaming-data-lake-platform">Our Vision</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/concepts">Concepts</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/community/team">Team</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/releases/release-0.14.1">Releases</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/releases/download">Download</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/powered-by">Who's Using</a></li></ul></div><div class="col footer__col"><div class="footer__title">Learn</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/docs/quick-start-guide">Quick Start</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/docker_demo">Docker Demo</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/blog">Blog</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/talks">Talks</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/videos">Video Guides</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/faq">FAQ</a></li><li class="footer__item"><a href="https://cwiki.apache.org/confluence/display/HUDI" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Technical Wiki<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li></ul></div><div class="col footer__col"><div class="footer__title">Hudi On Cloud</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/docs/s3_hoodie">AWS</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/gcs_hoodie">Google Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/oss_hoodie">Alibaba Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/azure_hoodie">Microsoft Azure</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/cos_hoodie">Tencent Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/ibm_cos_hoodie">IBM Cloud</a></li></ul></div><div class="col footer__col"><div class="footer__title">Community</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/community/get-involved">Get Involved</a></li><li class="footer__item"><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Slack<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Twitter<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>YouTube<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Linkedin<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="mailto:dev-subscribe@hudi.apache.org?Subject=SubscribeToHudi" target="_blank" rel="noopener noreferrer" class="footer__link-item">Mailing List</a></li></ul></div><div class="col footer__col"><div class="footer__title">Apache</div><ul class="footer__items"><li class="footer__item"><a href="https://www.apache.org/events/current-event" target="_blank" rel="noopener noreferrer" class="footer__link-item">Events</a></li><li class="footer__item"><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Thanks</a></li><li class="footer__item"><a href="https://www.apache.org/licenses" target="_blank" rel="noopener noreferrer" class="footer__link-item">License</a></li><li class="footer__item"><a href="https://www.apache.org/security" target="_blank" rel="noopener noreferrer" class="footer__link-item">Security</a></li><li class="footer__item"><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Sponsorship</a></li><li class="footer__item"><a href="https://www.apache.org" target="_blank" rel="noopener noreferrer" class="footer__link-item">Foundation</a></li></ul></div></div><div class="footer__bottom text--center"><div class="margin-bottom--sm"><a href="https://hudi.apache.org/" target="_blank" rel="noopener noreferrer" class="footerLogoLink_SRtH"><img src="/cn/assets/images/logo-big.png" alt="Apache Hudi™" class="themedImage_TMUO themedImage--light_4Vu1 footer__logo"><img src="/cn/assets/images/logo-big.png" alt="Apache Hudi™" class="themedImage_TMUO themedImage--dark_uzRr footer__logo"></a></div><div class="footer__copyright">Copyright © 2021 <a href="https://apache.org">The Apache Software Foundation</a>, Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0"> Apache License, Version 2.0</a>. |
| Hudi, Apache and the Apache feather logo are trademarks of The Apache Software Foundation. <a href="/docs/privacy">Privacy Policy</a></div></div></div></footer></div> |
| <script src="/cn/assets/js/runtime~main.0acdb754.js"></script> |
| <script src="/cn/assets/js/main.6d6aa24f.js"></script> |
| </body> |
| </html> |