blob: 52057f9ff66f2481b80a55a358c485d367d4d2bc [file] [log] [blame]
<!doctype html>
<html class="docs-version-0.12.1 docs-custom-styles" lang="cn" dir="ltr">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="generator" content="Docusaurus v2.0.0-beta.14">
<link rel="alternate" type="application/rss+xml" href="/cn/blog/rss.xml" title="Apache Hudi: User-Facing Analytics RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/cn/blog/atom.xml" title="Apache Hudi: User-Facing Analytics Atom Feed">
<link rel="alternate" type="application/json" href="/cn/blog/feed.json" title="Apache Hudi: User-Facing Analytics JSON Feed">
<link rel="search" type="application/opensearchdescription+xml" title="Apache Hudi" href="/cn/opensearch.xml">
<link rel="alternate" type="application/rss+xml" href="/cn/videos/rss.xml" title="Apache Hudi RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/cn/videos/atom.xml" title="Apache Hudi Atom Feed">
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Comfortaa|Ubuntu|Roboto|Source+Code+Pro">
<link rel="stylesheet" href="https://at-ui.github.io/feather-font/css/iconfont.css"><title data-react-helmet="true">Metadata Indexing | Apache Hudi</title><meta data-react-helmet="true" name="twitter:card" content="summary_large_image"><meta data-react-helmet="true" property="og:url" content="https://hudi.apache.org/cn/docs/0.12.1/metadata_indexing"><meta data-react-helmet="true" name="docsearch:language" content="cn"><meta data-react-helmet="true" name="docsearch:version" content="0.12.1"><meta data-react-helmet="true" name="docsearch:docusaurus_tag" content="docs-default-0.12.1"><meta data-react-helmet="true" name="keywords" content="apache hudi, data lake, lakehouse, big data, apache spark, apache flink, presto, trino, analytics, data engineering"><meta data-react-helmet="true" property="og:title" content="Metadata Indexing | Apache Hudi"><meta data-react-helmet="true" name="description" content="We can now create different metadata indexes, including files, bloom filters and column stats,"><meta data-react-helmet="true" property="og:description" content="We can now create different metadata indexes, including files, bloom filters and column stats,"><link data-react-helmet="true" rel="icon" href="/cn/assets/images/favicon.ico"><link data-react-helmet="true" rel="canonical" href="https://hudi.apache.org/cn/docs/0.12.1/metadata_indexing"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/docs/0.12.1/metadata_indexing" hreflang="en"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/cn/docs/0.12.1/metadata_indexing" hreflang="cn"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/docs/0.12.1/metadata_indexing" hreflang="x-default"><link data-react-helmet="true" rel="preconnect" href="https://BH4D9OD16A-dsn.algolia.net" crossorigin="anonymous"><link rel="stylesheet" href="/cn/assets/css/styles.ea681a30.css">
<link rel="preload" href="/cn/assets/js/runtime~main.0acdb754.js" as="script">
<link rel="preload" href="/cn/assets/js/main.6d6aa24f.js" as="script">
</head>
<body>
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus">
<div><a href="#" class="skipToContent_OuoZ">Skip to main content</a></div><div class="announcementBar_axC9" role="banner"><div class="announcementBarPlaceholder_xYHE"></div><div class="announcementBarContent_6uhP">⭐️ If you like Apache Hudi, give it a star on <a target="_blank" rel="noopener noreferrer" href="https://github.com/apache/hudi">GitHub</a>! ⭐</div><button type="button" class="clean-btn close announcementBarClose_A3A1" aria-label="Close"><svg viewBox="0 0 15 15" width="14" height="14"><g stroke="currentColor" stroke-width="3.1"><path d="M.75.75l13.5 13.5M14.25.75L.75 14.25"></path></g></svg></button></div><nav class="navbar navbar--fixed-top navbarWrapper_UIa0"><div class="navbar__inner"><img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=8f594acf-9b77-44fb-9475-3e82ead1910c" width="0" height="0" alt=""><img referrerpolicy="no-referrer-when-downgrade" src="https://analytics.apache.org/matomo.php?idsite=47&amp;rec=1" width="0" height="0" alt=""><div class="navbar__items"><button aria-label="Navigation bar toggle" class="navbar__toggle clean-btn" type="button" tabindex="0"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/cn/"><div class="navbar__logo navbarLogo_Bz6n"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--light_4Vu1"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--dark_uzRr"></div></a><a class="navbar__item navbar__link" href="/cn/docs/overview"><div class="labelWrapperDropdown_Mqbj">Docs</div></a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Learn<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/talks"><div class="labelWrapperDropdown_Mqbj">Talks</div></a></li><li><a class="dropdown__link" href="/cn/videos"><div class="labelWrapperDropdown_Mqbj">Video Guides</div></a></li><li><a class="dropdown__link" href="/cn/docs/faq"><div class="labelWrapperDropdown_Mqbj">FAQ</div></a></li><li><a class="dropdown__link" href="/cn/tech-specs"><div class="labelWrapperDropdown_Mqbj">Tech Specs</div></a></li><li><a class="dropdown__link" href="/cn/tech-specs-1point0"><div class="labelWrapperDropdown_Mqbj">Tech Specs 1.0</div></a></li><li><a href="https://cwiki.apache.org/confluence/display/HUDI" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span class="externalLink_AE3f">Technical Wiki<svg width="20" height="20" viewBox="0 0 26 26" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M16.965 8.745 9.01 16.7M10.561 8.758l6.403-.013-.013 6.403" stroke="#0DB1F9" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path><rect x="4.5" y="4.5" width="17" height="17" rx="2.5" stroke="#0DB1F9"></rect></svg></span></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Contribute<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/contribute/how-to-contribute"><div class="labelWrapperDropdown_Mqbj">How to Contribute</div></a></li><li><a class="dropdown__link" href="/cn/contribute/developer-setup"><div class="labelWrapperDropdown_Mqbj">Developer Setup</div></a></li><li><a class="dropdown__link" href="/cn/contribute/rfc-process"><div class="labelWrapperDropdown_Mqbj">RFC Process</div></a></li><li><a class="dropdown__link" href="/cn/contribute/report-security-issues"><div class="labelWrapperDropdown_Mqbj">Report Security Issues</div></a></li><li><a href="https://issues.apache.org/jira/projects/HUDI/summary" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span class="externalLink_AE3f">Report Issues<svg width="20" height="20" viewBox="0 0 26 26" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M16.965 8.745 9.01 16.7M10.561 8.758l6.403-.013-.013 6.403" stroke="#0DB1F9" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path><rect x="4.5" y="4.5" width="17" height="17" rx="2.5" stroke="#0DB1F9"></rect></svg></span></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Community<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/community/get-involved"><div class="labelWrapperDropdown_Mqbj">Get Involved</div></a></li><li><a class="dropdown__link" href="/cn/community/syncs"><div class="labelWrapperDropdown_Mqbj">Community Syncs</div></a></li><li><a class="dropdown__link" href="/cn/community/office_hours"><div class="labelWrapperDropdown_Mqbj">Office Hours</div></a></li><li><a class="dropdown__link" href="/cn/community/team"><div class="labelWrapperDropdown_Mqbj">Team</div></a></li></ul></div><a class="navbar__item navbar__link" href="/cn/blog"><div class="labelWrapperDropdown_Mqbj">Blog</div></a><a class="navbar__item navbar__link" href="/cn/powered-by"><div class="labelWrapperDropdown_Mqbj">Who&#x27;s Using</div></a><a class="navbar__item navbar__link" href="/cn/roadmap"><div class="labelWrapperDropdown_Mqbj">Roadmap</div></a><a class="navbar__item navbar__link" href="/cn/releases/download"><div class="labelWrapperDropdown_Mqbj">Download</div></a></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__link downloadLinkDropdownHide_aDP3" href="/cn/docs/0.12.1/overview"><div class="labelWrapperDropdown_Mqbj">0.12.1<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/docs/next/metadata_indexing"><div class="labelWrapperDropdown_Mqbj">Next</div></a></li><li><a class="dropdown__link" href="/cn/docs/metadata_indexing"><div class="labelWrapperDropdown_Mqbj">0.14.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.14.0/metadata_indexing"><div class="labelWrapperDropdown_Mqbj">0.14.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.13.1/metadata_indexing"><div class="labelWrapperDropdown_Mqbj">0.13.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.13.0/metadata_indexing"><div class="labelWrapperDropdown_Mqbj">0.13.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.3/metadata_indexing"><div class="labelWrapperDropdown_Mqbj">0.12.3</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.2/metadata_indexing"><div class="labelWrapperDropdown_Mqbj">0.12.2</div></a></li><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/cn/docs/0.12.1/metadata_indexing"><div class="labelWrapperDropdown_Mqbj">0.12.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.0/metadata_indexing"><div class="labelWrapperDropdown_Mqbj">0.12.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.11.1/metadata_indexing"><div class="labelWrapperDropdown_Mqbj">0.11.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.11.0/metadata_indexing"><div class="labelWrapperDropdown_Mqbj">0.11.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.10.1/overview"><div class="labelWrapperDropdown_Mqbj">0.10.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.10.0/overview"><div class="labelWrapperDropdown_Mqbj">0.10.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.9.0/overview"><div class="labelWrapperDropdown_Mqbj">0.9.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.8.0/overview"><div class="labelWrapperDropdown_Mqbj">0.8.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.7.0/overview"><div class="labelWrapperDropdown_Mqbj">0.7.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.6.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.6.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.3/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.3</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.2/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.2</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.1/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.0</div></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj"><span><svg viewBox="0 0 20 20" width="20" height="20" aria-hidden="true" class="iconLanguage_zID8"><path fill="currentColor" d="M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"></path></svg><span>Chinese</span></span><svg width="14" height="14" viewBox="0 0 14 14" fill="none" xmlns="http://www.w3.org/2000/svg"><g clip-path="url(#a)"><path d="M14 6.457a6.842 6.842 0 0 0-7-6.02 6.843 6.843 0 0 0-7 6.02v1.085a6.843 6.843 0 0 0 7 6.02 6.843 6.843 0 0 0 7-6.02V6.457Zm-1.094 0h-2.625a9.92 9.92 0 0 0-.376-2.222 6.65 6.65 0 0 0 1.531-.875 5.25 5.25 0 0 1 1.444 3.097h.026Zm-8.032 0a8.479 8.479 0 0 1 .324-1.872 7.376 7.376 0 0 0 3.63 0c.175.61.284 1.239.325 1.872h-4.28Zm4.305 1.085a8.391 8.391 0 0 1-.324 1.873 7.464 7.464 0 0 0-3.658 0 8.479 8.479 0 0 1-.323-1.873h4.305Zm.35-4.375A10.342 10.342 0 0 0 8.75 1.75c.627.194 1.218.49 1.75.875a5.748 5.748 0 0 1-.998.577l.027-.035ZM7.254 1.54A8.75 8.75 0 0 1 8.46 3.552c-.48.11-.97.165-1.461.167-.492-.001-.982-.057-1.461-.167.308-.722.715-1.4 1.207-2.012h.508ZM4.498 3.202a5.748 5.748 0 0 1-.998-.577 6.029 6.029 0 0 1 1.75-.875c-.294.46-.546.947-.753 1.452Zm-1.873.15c.47.358.984.652 1.531.874A9.625 9.625 0 0 0 3.78 6.45H1.155a5.25 5.25 0 0 1 1.47-3.098ZM1.12 7.541h2.625c.038.753.164 1.5.376 2.223a6.649 6.649 0 0 0-1.531.875 5.25 5.25 0 0 1-1.47-3.098Zm3.377 3.255c.207.506.459.992.753 1.453a6.03 6.03 0 0 1-1.75-.875c.312-.226.646-.419.997-.578Zm2.25 1.663a8.594 8.594 0 0 1-1.208-2.013 6.501 6.501 0 0 1 2.922 0 8.54 8.54 0 0 1-1.207 2.013h-.508Zm2.755-1.663c.367.156.716.35 1.042.578a6.338 6.338 0 0 1-1.75.875c.275-.464.512-.95.708-1.453Zm1.873-.148a6.647 6.647 0 0 0-1.531-.875 9.45 9.45 0 0 0 .376-2.223h2.625a5.25 5.25 0 0 1-1.47 3.098Z" fill="#1C1E21"></path></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h14v14H0z"></path></clipPath></defs></svg></div></a><ul class="dropdown__menu"><li><a href="/docs/0.12.1/metadata_indexing" target="_self" rel="noopener noreferrer" class="dropdown__link"><div class="labelWrapperDropdown_Mqbj">English</div></a></li><li><a href="/cn/docs/0.12.1/metadata_indexing" target="_self" rel="noopener noreferrer" class="dropdown__link dropdown__link--active"><div class="labelWrapperDropdown_Mqbj">Chinese</div></a></li></ul></div><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-github-link" aria-label="GitHub repository"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-twitter-link" aria-label="Hudi Twitter Handle"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-slack-link" aria-label="Hudi Slack Channel"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-youtube-link" aria-label="Hudi YouTube Channel"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-linkedin-link" aria-label="Hudi Linkedin Page"><div class="labelWrapperDropdown_Mqbj"></div></a><div class="searchBox_fBfG"><div role="button" class="searchButton_g9-U" aria-label="Search"><span class="searchText_RI6l">Search</span><svg width="14" height="14" viewBox="0 0 14 14" fill="none" xmlns="http://www.w3.org/2000/svg"><circle cx="6.864" cy="6.864" r="5.243" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></circle><path d="m10.51 10.783 2.056 2.05" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div><div class="navbar-sidebar"><div class="navbar-sidebar__brand"><a class="navbar__brand" href="/cn/"><div class="navbar__logo"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--light_4Vu1"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--dark_uzRr"></div></a><button type="button" class="clean-btn navbar-sidebar__close"><svg viewBox="0 0 15 15" width="21" height="21"><g stroke="var(--ifm-color-emphasis-600)" stroke-width="1.2"><path d="M.75.75l13.5 13.5M14.25.75L.75 14.25"></path></g></svg></button></div><div class="navbar-sidebar__items"><div class="navbar-sidebar__item menu"><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" href="/cn/docs/overview"><div class="labelWrapperDropdown_Mqbj">Docs</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Learn</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Contribute</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Community</div></a></li><li class="menu__list-item"><a class="menu__link" href="/cn/blog"><div class="labelWrapperDropdown_Mqbj">Blog</div></a></li><li class="menu__list-item"><a class="menu__link" href="/cn/powered-by"><div class="labelWrapperDropdown_Mqbj">Who&#x27;s Using</div></a></li><li class="menu__list-item"><a class="menu__link" href="/cn/roadmap"><div class="labelWrapperDropdown_Mqbj">Roadmap</div></a></li><li class="menu__list-item"><a class="menu__link" href="/cn/releases/download"><div class="labelWrapperDropdown_Mqbj">Download</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Versions</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj"><span><svg viewBox="0 0 20 20" width="20" height="20" aria-hidden="true" class="iconLanguage_zID8"><path fill="currentColor" d="M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"></path></svg><span>Languages</span></span></div></a></li><li class="menu__list-item"><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="menu__link header-github-link" aria-label="GitHub repository"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="menu__link header-twitter-link" aria-label="Hudi Twitter Handle"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="menu__link header-slack-link" aria-label="Hudi Slack Channel"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="menu__link header-youtube-link" aria-label="Hudi YouTube Channel"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="menu__link header-linkedin-link" aria-label="Hudi Linkedin Page"><div class="labelWrapperDropdown_Mqbj"></div></a></li></ul></div><div class="navbar-sidebar__item menu"><button type="button" class="clean-btn navbar-sidebar__back">← Back to main menu</button></div></div></div></nav><div class="main-wrapper docs-wrapper docs-doc-page"><div class="docPage_GMj9"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_i9tI" type="button"></button><aside class="docSidebarContainer_k0Pq"><div class="sidebar_a3j0"><nav class="menu thin-scrollbar menu_cyFh menuWithAnnouncementBar_+O1J"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/cn/docs/0.12.1/overview">Overview</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/cn/docs/0.12.1/quick-start-guide">Quick Start</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/quick-start-guide">Spark Guide</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/flink-quick-start-guide">Flink Guide</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/docker_demo">Docker Demo</a></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/cn/docs/0.12.1/timeline">Concepts</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/cn/docs/0.12.1/table_management">How To</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--active hasHref_TwRn" href="/cn/docs/0.12.1/migration_guide">Services</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/migration_guide">Bootstrapping</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/compaction">Compaction</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/clustering">Clustering</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/cn/docs/0.12.1/metadata_indexing">Metadata Indexing</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/hoodie_cleaner">Cleaning</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/transforms">Transformers</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/markers">Marker Mechanism</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/file_sizing">File Sizing</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/disaster_recovery">Disaster Recovery</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/snapshot_exporter">Exporter</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.12.1/precommit_validator">Data Quality</a></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/cn/docs/0.12.1/basic_configurations">Configurations</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/cn/docs/0.12.1/query_engine_setup">Guides</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/cn/docs/0.12.1/use_cases">Use Cases</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/cn/docs/0.12.1/faq">FAQs</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/cn/docs/0.12.1/privacy">Privacy Policy</a></li></ul></nav></div></aside><main class="docMainContainer_Q970"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_zHA2"><div class="theme-doc-version-banner alert alert--warning margin-bottom--md" role="alert"><div>This is documentation for <!-- -->Apache Hudi<!-- --> <b>0.12.1</b>, which is no longer actively maintained.</div><div class="margin-top--md">For up-to-date documentation, see the <b><a href="/cn/docs/metadata_indexing">latest version</a></b> (<!-- -->0.14.1<!-- -->).</div></div><div class="docItemContainer_oiyr"><article><span class="theme-doc-version-badge badge badge--secondary">Version: <!-- -->0.12.1</span><div class="tocCollapsible_aw-L theme-doc-toc-mobile tocMobile_Tx6Y"><button type="button" class="clean-btn tocCollapsibleButton_zr6a">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>Metadata Indexing</h1></header><p>We can now create different metadata indexes, including files, bloom filters and column stats,
asynchronously in Hudi, which are then used by queries and writing to improve performance.
Being able to index without blocking writing has two benefits,</p><ul><li>improved write latency</li><li>reduced resource wastage due to contention between writing and indexing.</li></ul><p>To learn more about the design of this feature, please check out <a href="https://github.com/apache/hudi/blob/master/rfc/rfc-45/rfc-45.md" target="_blank" rel="noopener noreferrer">RFC-45</a>.</p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="setup-async-indexing">Setup Async Indexing<a class="hash-link" href="#setup-async-indexing" title="Direct link to heading"></a></h2><p>First, we will generate a continuous workload. In the below example, we are going to start a <a href="/cn/docs/0.12.1/hoodie_deltastreamer#deltastreamer">deltastreamer</a> which will continuously write data
from raw parquet to Hudi table. We used the widely available <a href="https://registry.opendata.aws/nyc-tlc-trip-records-pds/" target="_blank" rel="noopener noreferrer">NY Taxi dataset</a>, whose setup details are as below:</p><details class="details_Q743 alert alert--info details_h+cY" data-collapsed="true"><summary>Ingestion write config</summary><div><div class="collapsibleContent_K5uX"><p></p><div class="codeBlockContainer_J+bg language-bash theme-code-block"><div class="codeBlockContent_csEI bash"><pre tabindex="0" class="prism-code language-bash codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.datasource.write.recordkey.field</span><span class="token operator">=</span><span class="token plain">VendorID</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.datasource.write.partitionpath.field</span><span class="token operator">=</span><span class="token plain">tpep_dropoff_datetime</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.datasource.write.precombine.field</span><span class="token operator">=</span><span class="token plain">tpep_dropoff_datetime</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.deltastreamer.source.dfs.root</span><span class="token operator">=</span><span class="token plain">/Users/home/path/to/data/parquet_files/</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.deltastreamer.schemaprovider.target.schema.file</span><span class="token operator">=</span><span class="token plain">/Users/home/path/to/schema/schema.avsc</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.deltastreamer.schemaprovider.source.schema.file</span><span class="token operator">=</span><span class="token plain">/Users/home/path/to/schema/schema.avsc</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// </span><span class="token builtin class-name" style="color:rgb(189, 147, 249)">set</span><span class="token plain"> lock provider configs</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.write.lock.provider</span><span class="token operator">=</span><span class="token plain">org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.write.lock.zookeeper.url</span><span class="token operator">=</span><span class="token operator">&lt;</span><span class="token plain">zk_url</span><span class="token operator">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.write.lock.zookeeper.port</span><span class="token operator">=</span><span class="token operator">&lt;</span><span class="token plain">zk_port</span><span class="token operator">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.write.lock.zookeeper.lock_key</span><span class="token operator">=</span><span class="token operator">&lt;</span><span class="token plain">zk_key</span><span class="token operator">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.write.lock.zookeeper.base_path</span><span class="token operator">=</span><span class="token operator">&lt;</span><span class="token plain">zk_base_path</span><span class="token operator">&gt;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p></p></div></div></details><details class="details_Q743 alert alert--info details_h+cY" data-collapsed="true"><summary>Run deltastreamer</summary><div><div class="collapsibleContent_K5uX"><p></p><div class="codeBlockContainer_J+bg language-bash theme-code-block"><div class="codeBlockContent_csEI bash"><pre tabindex="0" class="prism-code language-bash codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark-submit </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer </span><span class="token variable" style="color:rgb(189, 147, 249);font-style:italic">`</span><span class="token variable function" style="color:rgb(80, 250, 123);font-style:italic">ls</span><span class="token variable" style="color:rgb(189, 147, 249);font-style:italic"> /Users/home/path/to/hudi-utilities-bundle/target/hudi-utilities-bundle_2.11-0.12.1-SNAPSHOT.jar</span><span class="token variable" style="color:rgb(189, 147, 249);font-style:italic">`</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--props </span><span class="token variable" style="color:rgb(189, 147, 249);font-style:italic">`</span><span class="token variable function" style="color:rgb(80, 250, 123);font-style:italic">ls</span><span class="token variable" style="color:rgb(189, 147, 249);font-style:italic"> /Users/home/path/to/write/config.properties</span><span class="token variable" style="color:rgb(189, 147, 249);font-style:italic">`</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--source-class org.apache.hudi.utilities.sources.ParquetDFSSource --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--source-ordering-field tpep_dropoff_datetime </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--table-type COPY_ON_WRITE </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--target-base-path file:///tmp/hudi-ny-taxi/ </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--target-table ny_hudi_tbl </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--op UPSERT </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--continuous </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--source-limit </span><span class="token number">5000000</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--min-sync-interval-seconds </span><span class="token number">60</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p></p></div></div></details><p>From version 0.11.0 onwards, Hudi metadata table is enabled by default and the files index will be automatically created. While the deltastreamer is running in continuous mode, let
us schedule the indexing for COLUMN_STATS index. First we need to define a properties file for the indexer.</p><div class="admonition admonition-note alert alert--secondary"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M6.3 5.69a.942.942 0 0 1-.28-.7c0-.28.09-.52.28-.7.19-.18.42-.28.7-.28.28 0 .52.09.7.28.18.19.28.42.28.7 0 .28-.09.52-.28.7a1 1 0 0 1-.7.3c-.28 0-.52-.11-.7-.3zM8 7.99c-.02-.25-.11-.48-.31-.69-.2-.19-.42-.3-.69-.31H6c-.27.02-.48.13-.69.31-.2.2-.3.44-.31.69h1v3c.02.27.11.5.31.69.2.2.42.31.69.31h1c.27 0 .48-.11.69-.31.2-.19.3-.42.31-.69H8V7.98v.01zM7 2.3c-3.14 0-5.7 2.54-5.7 5.68 0 3.14 2.56 5.7 5.7 5.7s5.7-2.55 5.7-5.7c0-3.15-2.56-5.69-5.7-5.69v.01zM7 .98c3.86 0 7 3.14 7 7s-3.14 7-7 7-7-3.12-7-7 3.14-7 7-7z"></path></svg></span>note</h5></div><div class="admonition-content"><p>Enabling metadata table and configuring a lock provider are the prerequisites for using async indexer.</p></div></div><div class="codeBlockContainer_J+bg theme-code-block"><div class="codeBlockContent_csEI"><pre tabindex="0" class="prism-code language-undefined codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain"># ensure that both metadata and async indexing is enabled as below two configs</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.metadata.enable=true</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.metadata.index.async=true</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># enable column_stats index config</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.metadata.index.column.stats.enable=true</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># set concurrency mode and lock configs as this is a multi-writer scenario</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># check https://hudi.apache.org/docs/concurrency_control/ for differnt lock provider configs</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.write.concurrency.mode=optimistic_concurrency_control</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.write.lock.provider=org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.write.lock.zookeeper.url=&lt;zk_url&gt;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.write.lock.zookeeper.port=&lt;zk_port&gt;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.write.lock.zookeeper.lock_key=&lt;zk_key&gt;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.write.lock.zookeeper.base_path=&lt;zk_base_path&gt;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="schedule-indexing">Schedule indexing<a class="hash-link" href="#schedule-indexing" title="Direct link to heading"></a></h3><p>Now, we can schedule indexing using <code>HoodieIndexer</code> in <code>schedule</code> mode as follows:</p><div class="codeBlockContainer_J+bg theme-code-block"><div class="codeBlockContent_csEI"><pre tabindex="0" class="prism-code language-undefined codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark-submit \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--class org.apache.hudi.utilities.HoodieIndexer \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">/Users/home/path/to/hudi-utilities-bundle/target/hudi-utilities-bundle_2.11-0.12.1-SNAPSHOT.jar \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--props /Users/home/path/to/indexer.properties \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--mode schedule \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--base-path /tmp/hudi-ny-taxi \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--table-name ny_hudi_tbl \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--index-types COLUMN_STATS \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--parallelism 1 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--spark-memory 1g</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>This will write an <code>indexing.requested</code> instant to the timeline.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="execute-indexing">Execute Indexing<a class="hash-link" href="#execute-indexing" title="Direct link to heading"></a></h3><p>To execute indexing, run the indexer in <code>execute</code> mode as below.</p><div class="codeBlockContainer_J+bg theme-code-block"><div class="codeBlockContent_csEI"><pre tabindex="0" class="prism-code language-undefined codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark-submit \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--class org.apache.hudi.utilities.HoodieIndexer \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">/Users/home/path/to/hudi-utilities-bundle/target/hudi-utilities-bundle_2.11-0.12.1-SNAPSHOT.jar \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--props /Users/home/path/to/indexer.properties \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--mode execute \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--base-path /tmp/hudi-ny-taxi \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--table-name ny_hudi_tbl \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--index-types COLUMN_STATS \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--parallelism 1 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--spark-memory 1g</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>We can also run the indexer in <code>scheduleAndExecute</code> mode to do the above two steps in one shot. Doing it separately gives us better control over when we want to execute.</p><p>Let&#x27;s look at the data timeline.</p><div class="codeBlockContainer_J+bg theme-code-block"><div class="codeBlockContent_csEI"><pre tabindex="0" class="prism-code language-undefined codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">ls -lrt /tmp/hudi-ny-taxi/.hoodie</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">total 1816</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">-rw-r--r-- 1 sagars wheel 0 Apr 14 19:53 20220414195327683.commit.requested</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">-rw-r--r-- 1 sagars wheel 153423 Apr 14 19:54 20220414195327683.inflight</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">-rw-r--r-- 1 sagars wheel 207061 Apr 14 19:54 20220414195327683.commit</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">-rw-r--r-- 1 sagars wheel 0 Apr 14 19:54 20220414195423420.commit.requested</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">-rw-r--r-- 1 sagars wheel 659 Apr 14 19:54 20220414195437837.indexing.requested</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">-rw-r--r-- 1 sagars wheel 323950 Apr 14 19:54 20220414195423420.inflight</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">-rw-r--r-- 1 sagars wheel 0 Apr 14 19:55 20220414195437837.indexing.inflight</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">-rw-r--r-- 1 sagars wheel 222920 Apr 14 19:55 20220414195423420.commit</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">-rw-r--r-- 1 sagars wheel 734 Apr 14 19:55 hoodie.properties</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">-rw-r--r-- 1 sagars wheel 979 Apr 14 19:55 20220414195437837.indexing</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>In the data timeline, we can see that indexing was scheduled after one commit completed (<code>20220414195327683.commit</code>) and another was requested
(<code>20220414195423420.commit.requested</code>). This would have picked <code>20220414195327683</code> as the base instant. Indexing was inflight with an inflight writer as well. If we parse the
indexer logs, we would find that it indeed caught up with instant <code>20220414195423420</code> after indexing upto the base instant.</p><div class="codeBlockContainer_J+bg theme-code-block"><div class="codeBlockContent_csEI"><pre tabindex="0" class="prism-code language-undefined codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">22/04/14 19:55:22 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/hudi-ny-taxi/.hoodie/metadata</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">22/04/14 19:55:22 INFO RunIndexActionExecutor: Starting Index Building with base instant: 20220414195327683</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">22/04/14 19:55:22 INFO HoodieBackedTableMetadataWriter: Creating a new metadata index for partition &#x27;column_stats&#x27; under path /tmp/hudi-ny-taxi/.hoodie/metadata upto instant 20220414195327683</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">...</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">...</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">22/04/14 19:55:38 INFO RunIndexActionExecutor: Total remaining instants to index: 1</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">22/04/14 19:55:38 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/hudi-ny-taxi/.hoodie/metadata</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">22/04/14 19:55:38 INFO HoodieTableConfig: Loading table properties from /tmp/hudi-ny-taxi/.hoodie/metadata/.hoodie/hoodie.properties</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">22/04/14 19:55:38 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/hudi-ny-taxi/.hoodie/metadata</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">22/04/14 19:55:38 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20220414195423420__deltacommit__COMPLETED]}</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">22/04/14 19:55:38 INFO RunIndexActionExecutor: Starting index catchup task</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">...</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="drop-index">Drop Index<a class="hash-link" href="#drop-index" title="Direct link to heading"></a></h3><p>To drop an index, just run the index in <code>dropindex</code> mode.</p><div class="codeBlockContainer_J+bg theme-code-block"><div class="codeBlockContent_csEI"><pre tabindex="0" class="prism-code language-undefined codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark-submit \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--class org.apache.hudi.utilities.HoodieIndexer \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">/Users/home/path/to/hudi-utilities-bundle/target/hudi-utilities-bundle_2.11-0.12.1-SNAPSHOT.jar \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--props /Users/home/path/to/indexer.properties \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--mode dropindex \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--base-path /tmp/hudi-ny-taxi \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--table-name ny_hudi_tbl \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--index-types COLUMN_STATS \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--parallelism 1 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--spark-memory 2g</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h2 class="anchor anchorWithStickyNavbar_y2LR" id="caveats">Caveats<a class="hash-link" href="#caveats" title="Direct link to heading"></a></h2><p>Asynchronous indexing feature is still evolving. Few points to note from deployment perspective while running the indexer:</p><ul><li>While an index can be created concurrently with ingestion, it cannot be dropped concurrently. Please stop all writers
before dropping an index.</li><li>Files index is created by default as long as the metadata table is enabled.</li><li>Trigger indexing for one metadata partition (or index type) at a time.</li><li>If an index is enabled via async HoodieIndexer, then ensure that index is also enabled in configs corresponding to regular ingestion writers. Otherwise, metadata writer will
think that particular index was disabled and cleanup the metadata partition.</li><li>In the case of multi-writers, enable async index and specific index config for all writers.</li><li>Unlike other table services like compaction and clustering, where we have a separate configuration to run inline, there is no such inline config here.
For example, if async indexing is disabled and metadata is enabled along with column stats index type, then both files and column stats index will be created synchronously with ingestion.</li></ul><p>Some of these limitations will be removed in the upcoming releases. Please
follow <a href="https://issues.apache.org/jira/browse/HUDI-2488" target="_blank" rel="noopener noreferrer">HUDI-2488</a> for developments on this feature.</p></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="theme-doc-footer-edit-meta-row row"><div class="col"><a href="https://github.com/apache/hudi/tree/asf-site/website/versioned_docs/version-0.12.1/metadata_indexing.md" target="_blank" rel="noreferrer noopener" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_mS5F" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_mt2f"></div></div></footer></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages navigation"><div class="pagination-nav__item"><a class="pagination-nav__link" href="/cn/docs/0.12.1/clustering"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">Clustering</div></a></div><div class="pagination-nav__item pagination-nav__item--next"><a class="pagination-nav__link" href="/cn/docs/0.12.1/hoodie_cleaner"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Cleaning</div></a></div></nav></div></div><div class="col col--3"><div class="tableOfContents_vrFS thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#setup-async-indexing" class="table-of-contents__link toc-highlight">Setup Async Indexing</a><ul><li><a href="#schedule-indexing" class="table-of-contents__link toc-highlight">Schedule indexing</a></li><li><a href="#execute-indexing" class="table-of-contents__link toc-highlight">Execute Indexing</a></li><li><a href="#drop-index" class="table-of-contents__link toc-highlight">Drop Index</a></li></ul></li><li><a href="#caveats" class="table-of-contents__link toc-highlight">Caveats</a></li></ul></div></div></div></div></main></div></div><footer class="footer"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">About</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/blog/2021/07/21/streaming-data-lake-platform">Our Vision</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/concepts">Concepts</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/community/team">Team</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/releases/release-0.14.1">Releases</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/releases/download">Download</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/powered-by">Who&#x27;s Using</a></li></ul></div><div class="col footer__col"><div class="footer__title">Learn</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/docs/quick-start-guide">Quick Start</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/docker_demo">Docker Demo</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/blog">Blog</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/talks">Talks</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/videos">Video Guides</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/faq">FAQ</a></li><li class="footer__item"><a href="https://cwiki.apache.org/confluence/display/HUDI" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Technical Wiki<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li></ul></div><div class="col footer__col"><div class="footer__title">Hudi On Cloud</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/docs/s3_hoodie">AWS</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/gcs_hoodie">Google Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/oss_hoodie">Alibaba Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/azure_hoodie">Microsoft Azure</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/cos_hoodie">Tencent Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/ibm_cos_hoodie">IBM Cloud</a></li></ul></div><div class="col footer__col"><div class="footer__title">Community</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/community/get-involved">Get Involved</a></li><li class="footer__item"><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Slack<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Twitter<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>YouTube<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Linkedin<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="mailto:dev-subscribe@hudi.apache.org?Subject=SubscribeToHudi" target="_blank" rel="noopener noreferrer" class="footer__link-item">Mailing List</a></li></ul></div><div class="col footer__col"><div class="footer__title">Apache</div><ul class="footer__items"><li class="footer__item"><a href="https://www.apache.org/events/current-event" target="_blank" rel="noopener noreferrer" class="footer__link-item">Events</a></li><li class="footer__item"><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Thanks</a></li><li class="footer__item"><a href="https://www.apache.org/licenses" target="_blank" rel="noopener noreferrer" class="footer__link-item">License</a></li><li class="footer__item"><a href="https://www.apache.org/security" target="_blank" rel="noopener noreferrer" class="footer__link-item">Security</a></li><li class="footer__item"><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Sponsorship</a></li><li class="footer__item"><a href="https://www.apache.org" target="_blank" rel="noopener noreferrer" class="footer__link-item">Foundation</a></li></ul></div></div><div class="footer__bottom text--center"><div class="margin-bottom--sm"><a href="https://hudi.apache.org/" target="_blank" rel="noopener noreferrer" class="footerLogoLink_SRtH"><img src="/cn/assets/images/logo-big.png" alt="Apache Hudi™" class="themedImage_TMUO themedImage--light_4Vu1 footer__logo"><img src="/cn/assets/images/logo-big.png" alt="Apache Hudi™" class="themedImage_TMUO themedImage--dark_uzRr footer__logo"></a></div><div class="footer__copyright">Copyright © 2021 <a href="https://apache.org">The Apache Software Foundation</a>, Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0"> Apache License, Version 2.0</a>.
Hudi, Apache and the Apache feather logo are trademarks of The Apache Software Foundation. <a href="/docs/privacy">Privacy Policy</a></div></div></div></footer></div>
<script src="/cn/assets/js/runtime~main.0acdb754.js"></script>
<script src="/cn/assets/js/main.6d6aa24f.js"></script>
</body>
</html>