| <!doctype html> |
| <html lang="cn" dir="ltr"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width,initial-scale=1"> |
| <meta name="generator" content="Docusaurus v2.0.0-beta.14"> |
| <link rel="alternate" type="application/rss+xml" href="/cn/blog/rss.xml" title="Apache Hudi: User-Facing Analytics RSS Feed"> |
| <link rel="alternate" type="application/atom+xml" href="/cn/blog/atom.xml" title="Apache Hudi: User-Facing Analytics Atom Feed"> |
| <link rel="alternate" type="application/json" href="/cn/blog/feed.json" title="Apache Hudi: User-Facing Analytics JSON Feed"> |
| <link rel="search" type="application/opensearchdescription+xml" title="Apache Hudi" href="/cn/opensearch.xml"> |
| <link rel="alternate" type="application/rss+xml" href="/cn/videos/rss.xml" title="Apache Hudi RSS Feed"> |
| <link rel="alternate" type="application/atom+xml" href="/cn/videos/atom.xml" title="Apache Hudi Atom Feed"> |
| <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Comfortaa|Ubuntu|Roboto|Source+Code+Pro"> |
| <link rel="stylesheet" href="https://at-ui.github.io/feather-font/css/iconfont.css"><title data-react-helmet="true">Record Level Index: Hudi's blazing fast indexing for large-scale datasets | Apache Hudi</title><meta data-react-helmet="true" name="twitter:card" content="summary_large_image"><meta data-react-helmet="true" property="og:url" content="https://hudi.apache.org/cn/blog/2023/11/01/record-level-index"><meta data-react-helmet="true" name="docsearch:language" content="cn"><meta data-react-helmet="true" name="docsearch:docusaurus_tag" content="default"><meta data-react-helmet="true" name="keywords" content="apache hudi, data lake, lakehouse, big data, apache spark, apache flink, presto, trino, analytics, data engineering"><meta data-react-helmet="true" property="og:title" content="Record Level Index: Hudi's blazing fast indexing for large-scale datasets | Apache Hudi"><meta data-react-helmet="true" name="description" content="Introduction"><meta data-react-helmet="true" property="og:description" content="Introduction"><meta data-react-helmet="true" property="og:image" content="https://hudi.apache.org/cn/assets/images/blog/record-level-index/03.RLI_bulkinsert.png"><meta data-react-helmet="true" name="twitter:image" content="https://hudi.apache.org/cn/assets/images/blog/record-level-index/03.RLI_bulkinsert.png"><meta data-react-helmet="true" property="og:type" content="article"><meta data-react-helmet="true" property="article:published_time" content="2023-11-01T00:00:00.000Z"><meta data-react-helmet="true" property="article:tag" content="design,indexing,metadata,apache hudi,blog"><link data-react-helmet="true" rel="icon" href="/cn/assets/images/favicon.ico"><link data-react-helmet="true" rel="canonical" href="https://hudi.apache.org/cn/blog/2023/11/01/record-level-index"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/blog/2023/11/01/record-level-index" hreflang="en"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/cn/blog/2023/11/01/record-level-index" hreflang="cn"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/blog/2023/11/01/record-level-index" hreflang="x-default"><link data-react-helmet="true" rel="preconnect" href="https://BH4D9OD16A-dsn.algolia.net" crossorigin="anonymous"><link rel="stylesheet" href="/cn/assets/css/styles.ea681a30.css"> |
| <link rel="preload" href="/cn/assets/js/runtime~main.0acdb754.js" as="script"> |
| <link rel="preload" href="/cn/assets/js/main.6d6aa24f.js" as="script"> |
| </head> |
| <body> |
| <script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus"> |
| <div><a href="#" class="skipToContent_OuoZ">Skip to main content</a></div><div class="announcementBar_axC9" role="banner"><div class="announcementBarPlaceholder_xYHE"></div><div class="announcementBarContent_6uhP">⭐️ If you like Apache Hudi, give it a star on <a target="_blank" rel="noopener noreferrer" href="https://github.com/apache/hudi">GitHub</a>! ⭐</div><button type="button" class="clean-btn close announcementBarClose_A3A1" aria-label="Close"><svg viewBox="0 0 15 15" width="14" height="14"><g stroke="currentColor" stroke-width="3.1"><path d="M.75.75l13.5 13.5M14.25.75L.75 14.25"></path></g></svg></button></div><nav class="navbar navbar--fixed-top navbarWrapper_UIa0"><div class="navbar__inner"><img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=8f594acf-9b77-44fb-9475-3e82ead1910c" width="0" height="0" alt=""><img referrerpolicy="no-referrer-when-downgrade" src="https://analytics.apache.org/matomo.php?idsite=47&rec=1" width="0" height="0" alt=""><div class="navbar__items"><button aria-label="Navigation bar toggle" class="navbar__toggle clean-btn" type="button" tabindex="0"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/cn/"><div class="navbar__logo navbarLogo_Bz6n"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--light_4Vu1"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--dark_uzRr"></div></a><a class="navbar__item navbar__link" href="/cn/docs/overview"><div class="labelWrapperDropdown_Mqbj">Docs</div></a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Learn<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/talks"><div class="labelWrapperDropdown_Mqbj">Talks</div></a></li><li><a class="dropdown__link" href="/cn/videos"><div class="labelWrapperDropdown_Mqbj">Video Guides</div></a></li><li><a class="dropdown__link" href="/cn/docs/faq"><div class="labelWrapperDropdown_Mqbj">FAQ</div></a></li><li><a class="dropdown__link" href="/cn/tech-specs"><div class="labelWrapperDropdown_Mqbj">Tech Specs</div></a></li><li><a class="dropdown__link" href="/cn/tech-specs-1point0"><div class="labelWrapperDropdown_Mqbj">Tech Specs 1.0</div></a></li><li><a href="https://cwiki.apache.org/confluence/display/HUDI" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span class="externalLink_AE3f">Technical Wiki<svg width="20" height="20" viewBox="0 0 26 26" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M16.965 8.745 9.01 16.7M10.561 8.758l6.403-.013-.013 6.403" stroke="#0DB1F9" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path><rect x="4.5" y="4.5" width="17" height="17" rx="2.5" stroke="#0DB1F9"></rect></svg></span></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Contribute<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/contribute/how-to-contribute"><div class="labelWrapperDropdown_Mqbj">How to Contribute</div></a></li><li><a class="dropdown__link" href="/cn/contribute/developer-setup"><div class="labelWrapperDropdown_Mqbj">Developer Setup</div></a></li><li><a class="dropdown__link" href="/cn/contribute/rfc-process"><div class="labelWrapperDropdown_Mqbj">RFC Process</div></a></li><li><a class="dropdown__link" href="/cn/contribute/report-security-issues"><div class="labelWrapperDropdown_Mqbj">Report Security Issues</div></a></li><li><a href="https://issues.apache.org/jira/projects/HUDI/summary" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span class="externalLink_AE3f">Report Issues<svg width="20" height="20" viewBox="0 0 26 26" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M16.965 8.745 9.01 16.7M10.561 8.758l6.403-.013-.013 6.403" stroke="#0DB1F9" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path><rect x="4.5" y="4.5" width="17" height="17" rx="2.5" stroke="#0DB1F9"></rect></svg></span></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Community<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/community/get-involved"><div class="labelWrapperDropdown_Mqbj">Get Involved</div></a></li><li><a class="dropdown__link" href="/cn/community/syncs"><div class="labelWrapperDropdown_Mqbj">Community Syncs</div></a></li><li><a class="dropdown__link" href="/cn/community/office_hours"><div class="labelWrapperDropdown_Mqbj">Office Hours</div></a></li><li><a class="dropdown__link" href="/cn/community/team"><div class="labelWrapperDropdown_Mqbj">Team</div></a></li></ul></div><a aria-current="page" class="navbar__item navbar__link navbar__link--active" href="/cn/blog"><div class="labelWrapperDropdown_Mqbj">Blog</div></a><a class="navbar__item navbar__link" href="/cn/powered-by"><div class="labelWrapperDropdown_Mqbj">Who's Using</div></a><a class="navbar__item navbar__link" href="/cn/roadmap"><div class="labelWrapperDropdown_Mqbj">Roadmap</div></a><a class="navbar__item navbar__link" href="/cn/releases/download"><div class="labelWrapperDropdown_Mqbj">Download</div></a></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__link downloadLinkDropdownHide_aDP3" href="/cn/docs/overview"><div class="labelWrapperDropdown_Mqbj">0.14.1<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/docs/next/overview"><div class="labelWrapperDropdown_Mqbj">Next</div></a></li><li><a class="dropdown__link" href="/cn/docs/overview"><div class="labelWrapperDropdown_Mqbj">0.14.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.14.0/overview"><div class="labelWrapperDropdown_Mqbj">0.14.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.13.1/overview"><div class="labelWrapperDropdown_Mqbj">0.13.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.13.0/overview"><div class="labelWrapperDropdown_Mqbj">0.13.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.3/overview"><div class="labelWrapperDropdown_Mqbj">0.12.3</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.2/overview"><div class="labelWrapperDropdown_Mqbj">0.12.2</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.1/overview"><div class="labelWrapperDropdown_Mqbj">0.12.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.0/overview"><div class="labelWrapperDropdown_Mqbj">0.12.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.11.1/overview"><div class="labelWrapperDropdown_Mqbj">0.11.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.11.0/overview"><div class="labelWrapperDropdown_Mqbj">0.11.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.10.1/overview"><div class="labelWrapperDropdown_Mqbj">0.10.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.10.0/overview"><div class="labelWrapperDropdown_Mqbj">0.10.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.9.0/overview"><div class="labelWrapperDropdown_Mqbj">0.9.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.8.0/overview"><div class="labelWrapperDropdown_Mqbj">0.8.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.7.0/overview"><div class="labelWrapperDropdown_Mqbj">0.7.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.6.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.6.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.3/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.3</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.2/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.2</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.1/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.0</div></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj"><span><svg viewBox="0 0 20 20" width="20" height="20" aria-hidden="true" class="iconLanguage_zID8"><path fill="currentColor" d="M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"></path></svg><span>Chinese</span></span><svg width="14" height="14" viewBox="0 0 14 14" fill="none" xmlns="http://www.w3.org/2000/svg"><g clip-path="url(#a)"><path d="M14 6.457a6.842 6.842 0 0 0-7-6.02 6.843 6.843 0 0 0-7 6.02v1.085a6.843 6.843 0 0 0 7 6.02 6.843 6.843 0 0 0 7-6.02V6.457Zm-1.094 0h-2.625a9.92 9.92 0 0 0-.376-2.222 6.65 6.65 0 0 0 1.531-.875 5.25 5.25 0 0 1 1.444 3.097h.026Zm-8.032 0a8.479 8.479 0 0 1 .324-1.872 7.376 7.376 0 0 0 3.63 0c.175.61.284 1.239.325 1.872h-4.28Zm4.305 1.085a8.391 8.391 0 0 1-.324 1.873 7.464 7.464 0 0 0-3.658 0 8.479 8.479 0 0 1-.323-1.873h4.305Zm.35-4.375A10.342 10.342 0 0 0 8.75 1.75c.627.194 1.218.49 1.75.875a5.748 5.748 0 0 1-.998.577l.027-.035ZM7.254 1.54A8.75 8.75 0 0 1 8.46 3.552c-.48.11-.97.165-1.461.167-.492-.001-.982-.057-1.461-.167.308-.722.715-1.4 1.207-2.012h.508ZM4.498 3.202a5.748 5.748 0 0 1-.998-.577 6.029 6.029 0 0 1 1.75-.875c-.294.46-.546.947-.753 1.452Zm-1.873.15c.47.358.984.652 1.531.874A9.625 9.625 0 0 0 3.78 6.45H1.155a5.25 5.25 0 0 1 1.47-3.098ZM1.12 7.541h2.625c.038.753.164 1.5.376 2.223a6.649 6.649 0 0 0-1.531.875 5.25 5.25 0 0 1-1.47-3.098Zm3.377 3.255c.207.506.459.992.753 1.453a6.03 6.03 0 0 1-1.75-.875c.312-.226.646-.419.997-.578Zm2.25 1.663a8.594 8.594 0 0 1-1.208-2.013 6.501 6.501 0 0 1 2.922 0 8.54 8.54 0 0 1-1.207 2.013h-.508Zm2.755-1.663c.367.156.716.35 1.042.578a6.338 6.338 0 0 1-1.75.875c.275-.464.512-.95.708-1.453Zm1.873-.148a6.647 6.647 0 0 0-1.531-.875 9.45 9.45 0 0 0 .376-2.223h2.625a5.25 5.25 0 0 1-1.47 3.098Z" fill="#1C1E21"></path></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h14v14H0z"></path></clipPath></defs></svg></div></a><ul class="dropdown__menu"><li><a href="/blog/2023/11/01/record-level-index" target="_self" rel="noopener noreferrer" class="dropdown__link"><div class="labelWrapperDropdown_Mqbj">English</div></a></li><li><a href="/cn/blog/2023/11/01/record-level-index" target="_self" rel="noopener noreferrer" class="dropdown__link dropdown__link--active"><div class="labelWrapperDropdown_Mqbj">Chinese</div></a></li></ul></div><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-github-link" aria-label="GitHub repository"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-twitter-link" aria-label="Hudi Twitter Handle"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-slack-link" aria-label="Hudi Slack Channel"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-youtube-link" aria-label="Hudi YouTube Channel"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-linkedin-link" aria-label="Hudi Linkedin Page"><div class="labelWrapperDropdown_Mqbj"></div></a><div class="searchBox_fBfG"><div role="button" class="searchButton_g9-U" aria-label="Search"><span class="searchText_RI6l">Search</span><svg width="14" height="14" viewBox="0 0 14 14" fill="none" xmlns="http://www.w3.org/2000/svg"><circle cx="6.864" cy="6.864" r="5.243" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></circle><path d="m10.51 10.783 2.056 2.05" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div><div class="navbar-sidebar"><div class="navbar-sidebar__brand"><a class="navbar__brand" href="/cn/"><div class="navbar__logo"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--light_4Vu1"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--dark_uzRr"></div></a><button type="button" class="clean-btn navbar-sidebar__close"><svg viewBox="0 0 15 15" width="21" height="21"><g stroke="var(--ifm-color-emphasis-600)" stroke-width="1.2"><path d="M.75.75l13.5 13.5M14.25.75L.75 14.25"></path></g></svg></button></div><div class="navbar-sidebar__items"><div class="navbar-sidebar__item menu"><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" href="/cn/docs/overview"><div class="labelWrapperDropdown_Mqbj">Docs</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Learn</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Contribute</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Community</div></a></li><li class="menu__list-item"><a aria-current="page" class="menu__link menu__link--active" href="/cn/blog"><div class="labelWrapperDropdown_Mqbj">Blog</div></a></li><li class="menu__list-item"><a class="menu__link" href="/cn/powered-by"><div class="labelWrapperDropdown_Mqbj">Who's Using</div></a></li><li class="menu__list-item"><a class="menu__link" href="/cn/roadmap"><div class="labelWrapperDropdown_Mqbj">Roadmap</div></a></li><li class="menu__list-item"><a class="menu__link" href="/cn/releases/download"><div class="labelWrapperDropdown_Mqbj">Download</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Versions</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj"><span><svg viewBox="0 0 20 20" width="20" height="20" aria-hidden="true" class="iconLanguage_zID8"><path fill="currentColor" d="M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"></path></svg><span>Languages</span></span></div></a></li><li class="menu__list-item"><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="menu__link header-github-link" aria-label="GitHub repository"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="menu__link header-twitter-link" aria-label="Hudi Twitter Handle"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="menu__link header-slack-link" aria-label="Hudi Slack Channel"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="menu__link header-youtube-link" aria-label="Hudi YouTube Channel"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="menu__link header-linkedin-link" aria-label="Hudi Linkedin Page"><div class="labelWrapperDropdown_Mqbj"></div></a></li></ul></div><div class="navbar-sidebar__item menu"><button type="button" class="clean-btn navbar-sidebar__back">← Back to main menu</button></div></div></div></nav><div class="main-wrapper blog-wrapper blog-post-page"><div class="container margin-vert--lg"><div class="row"><main class="col col--9 col--offset-2" itemscope="" itemtype="http://schema.org/Blog"><article itemprop="blogPost" itemscope="" itemtype="http://schema.org/BlogPosting"><header class="postHeader_Ipb1"><div><h1 class="blogPostTitle_RC3s" itemprop="headline"><h1 class="blogPostPageTitle_bKZt" itemprop="headline">Record Level Index: Hudi's blazing fast indexing for large-scale datasets</h1></h1><div class="blogInfo_1FPd margin-top--sm margin-bottom--sm"><div class="blogPostText_jBA8 row"><time datetime="2023-11-01T00:00:00.000Z" itemprop="datePublished">November 1, 2023</time><div><div><div><a itemprop="url"><span class="blogPostAuthorsList_dlEG" itemprop="name">Shiyan Xu and Sivabalan Narayanan</span></a></div></div></div></div><div class="blogPostData_A2Le">12 min read</div></div></div><ul class="authorTimeTags_oN88 padding--none margin-left--sm tagsWrapperPostPage_VdId"><li class="tag_MgfY tagPostPage_gnvv"><a class="tag_WK-t tagRegular_LXbV" href="/cn/blog/tags/design">design</a></li><li class="tag_MgfY tagPostPage_gnvv"><a class="tag_WK-t tagRegular_LXbV" href="/cn/blog/tags/indexing">indexing</a></li><li class="tag_MgfY tagPostPage_gnvv"><a class="tag_WK-t tagRegular_LXbV" href="/cn/blog/tags/metadata">metadata</a></li><li class="tag_MgfY tagPostPage_gnvv"><a class="tag_WK-t tagRegular_LXbV" href="/cn/blog/tags/apache-hudi">apache hudi</a></li><li class="tag_MgfY tagPostPage_gnvv"><a class="tag_WK-t tagRegular_LXbV" href="/cn/blog/tags/blog">blog</a></li></ul></header><div class="markdown" itemprop="articleBody"><h2 class="anchor anchorWithStickyNavbar_y2LR" id="introduction">Introduction<a class="hash-link" href="#introduction" title="Direct link to heading"></a></h2><p>Index is a critical component that facilitates quick updates and deletes for Hudi writers, and it plays a pivotal |
| role in boosting query executions as well. Hudi provides several index types, including the Bloom and Simple indexes with global |
| variations, the HBase Index that leverages a HBase server, the hash-based Bucket index, and the multi-modal index |
| realized through the metadata table. The choice of an index depends on factors such as table sizes, partition data distributions, |
| or traffic patterns, where a specific index may be more suitable for simpler operation or better performance<sup id="fnref-1"><a href="#fn-1" class="footnote-ref">1</a></sup>. |
| Users often face trade-offs when selecting index types for different tables, since there hasn't been |
| a generally performant index capable of facilitating both writes and reads with minimal operational overhead.</p><p>Starting from <a href="https://hudi.apache.org/releases/release-0.14.0" target="_blank" rel="noopener noreferrer">Hudi 0.14.0</a>, we are thrilled to announce a |
| general purpose index for Apache Hudi - the Record Level Index (RLI). This innovation not only dramatically boosts |
| write efficiency but also improves read efficiency for relevant queries. Integrated seamlessly within the table storage layer, |
| RLI can easily work without any additional operational efforts.</p><p>In the subsequent sections of this blog, we will give a brief introduction to Hudi's metadata table, a pre-requisite for discussing RLI. |
| Following that, we will delve into the design and workflows of RLI, and then show performance analysis and index type comparisons. The blog |
| will conclude with insights into future work for RLI.</p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="metadata-table">Metadata table<a class="hash-link" href="#metadata-table" title="Direct link to heading"></a></h2><p>A <a href="https://hudi.apache.org/docs/metadata" target="_blank" rel="noopener noreferrer">Hudi metadata table</a> is a Merge-on-Read (MoR) table within the <code>.hoodie/metadata/</code> directory. It contains various |
| metadata pertaining to records, seamlessly integrated into both the writer and reader paths to improve indexing efficiency. |
| The metadata is segregated into four partitions: <code>files</code>, <code>column stats</code>, <code>bloom filters</code>, and <code>record level index</code>.</p><img src="/assets/images/blog/record-level-index/01.metadatatable_layout.png" alt="Hudi metadata table layout" width="800" align="middle"><p>The metadata table is updated synchronously with each commit action on the Timeline, in other words, the commits to the |
| metadata table are part of the transactions to the Hudi data table. With four partitions containing different types of |
| metadata, this layout serves the purpose of a multi-modal index:</p><ul><li><code>files</code> partition keeps track of the Hudi data table’s partitions, and data files of each partition</li><li><code>column stats</code> partition records statistics about each column of the data table</li><li><code>bloom filter</code> partition stores serialized bloom filters for base files</li><li><code>record level index</code> partition contains mappings of individual record key and the corresponding file group id</li></ul><p>Users can activate the metadata table by setting <code>hoodie.metadata.enable=true</code>. Once activated, the <code>files</code> partition |
| will always be enabled. Other partitions can be enabled and configured individually to harness additional indexing |
| capabilities.</p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="record-level-index">Record Level Index<a class="hash-link" href="#record-level-index" title="Direct link to heading"></a></h2><p>Starting from release 0.14.0, the Record Level Index (RLI) can be activated by setting <code>hoodie.metadata.record.index.enable=true</code> |
| and <code>hoodie.index.type=RECORD_INDEX</code>. The core concept behind RLI is the ability to determine the location of records, thus |
| reducing the number of files that need to be scanned to extract the desired data. This process is usually referred to as "index look-up". |
| Hudi employs a primary-key model, requiring each record to be associated with a key |
| to satisfy the uniqueness constraint. Consequently, we can establish one-to-one mappings between record keys and file groups, |
| precisely the data we intend to store within the <code>record level index</code> partition.</p><p>Performance is paramount when it comes to indexes. The metadata table, which includes the RLI partition, chooses <a href="https://hbase.apache.org/book.html#_hfile_format_2" target="_blank" rel="noopener noreferrer">HFile</a><sup id="fnref-2"><a href="#fn-2" class="footnote-ref">2</a></sup>, |
| HBase’s file format that utilizes B+ tree-like structures for fast look-up, as the file format. Real-world benchmarking |
| has shown that an HFile containing 1 million RLI mappings can look up a batch of 100k records in just 600 ms. |
| We will cover the performance topic in a later section with detailed analysis.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="initialization">Initialization<a class="hash-link" href="#initialization" title="Direct link to heading"></a></h3><p>Initializing the RLI partition for an existing Hudi table can be a laborious and time-consuming task, contingent on the number |
| of records. Just like with a typical database, building indexes takes time, but the investment ultimately pays off by speeding up |
| numerous queries in the future.</p><img src="/assets/images/blog/record-level-index/02.RLI_init_flow.png" alt="RLI init flow" width="800" align="middle"><p>The diagram above shows the high-level steps of RLI initialization. Since these jobs are all parallelizable, users can |
| scale the cluster and configure relevant parallelism settings (e.g., <code>hoodie.metadata.max.init.parallelism</code>) accordingly |
| to meet their time requirement.</p><p>Focusing on the final step, "Bulk insert to RLI partition," the metadata table writer employs a hash function to |
| partition the RLI records, ensuring that the number of resulting file groups aligns with the number of partitions. |
| This guarantees consistent record key look-ups.</p><img src="/assets/images/blog/record-level-index/03.RLI_bulkinsert.png" alt="RLI bulkinsert" width="800" align="middle"><p>It’s important to note that the current implementation fixes the number of file groups in the RLI partition once it’s initialized. |
| Therefore, users should lean towards over-provisioning the file groups and adjust these configurations accordingly.</p><div class="codeBlockContainer_J+bg theme-code-block"><div class="codeBlockContent_csEI"><pre tabindex="0" class="prism-code language-undefined codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.metadata.record.index.max.filegroup.count</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.metadata.record.index.min.filegroup.count</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.metadata.record.index.max.filegroup.size</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.metadata.record.index.growth.factor</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>In future development iterations, RLI should be able to overcome this limitation by dynamically rebalancing file groups to |
| accommodate the ever-increasing number of records.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="updating-rli-upon-data-table-writes">Updating RLI upon data table writes<a class="hash-link" href="#updating-rli-upon-data-table-writes" title="Direct link to heading"></a></h3><p>During regular writes, the RLI partition will be updated as part of the transactions. Metadata records will be generated |
| using the incoming record keys with their corresponding location info. Given that the RLI partition contains the exact |
| mappings of record keys and locations, upserts to the data table will result in upsertion of the corresponding keys to the |
| RLI partition, The hash function employed will guarantee that identical keys are routed to the same file group.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="writer-indexing">Writer Indexing<a class="hash-link" href="#writer-indexing" title="Direct link to heading"></a></h3><p>Being part of the write flow, RLI follows the high-level indexing flow, similar to any other global index: for a given |
| set of records, it tags each record with location information if the index finds them present in any existing file group. |
| The key distinction lies in the source of truth for the existence test—the RLI partition. The diagram below illustrates |
| the tagging flow with detailed steps.</p><img src="/assets/images/blog/record-level-index/04.RLI_tagging.png" alt="RLI tagging" width="800" align="middle"><p>The tagged records will be passed to Hudi write handles and will undergo write operations to their respective file groups. |
| The indexing process is a critical step in applying updates to the table, as its efficiency directly influences the write |
| latency. In a later section, we will demonstrate the Record Level Index performance using benchmarking results.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="read-flow">Read Flow<a class="hash-link" href="#read-flow" title="Direct link to heading"></a></h3><p>The Record Level Index is also integrated on the query side<sup id="fnref-3"><a href="#fn-3" class="footnote-ref">3</a></sup>. In queries that involve equality check (e.g., EqualTo or IN) |
| against the record key column, Hudi’s file index implementation optimizes the file pruning process. This optimization is |
| achieved by leveraging RLI to precisely locate the file groups that need to be read for completing the queries.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="storage">Storage<a class="hash-link" href="#storage" title="Direct link to heading"></a></h3><p>Storage efficiency is another vital aspect of the design. Each RLI mapping entry must include some necessary information |
| to precisely locate files, such as record key, partition path, file group id, etc. To optimize the storage, RLI adopts |
| some compression techniques such as encoding file group id (in the form of UUID) into 2 Longs to represent the high and |
| low bits. Using Gzip compression and a 4MB block size, an individual RLI record averages only 48 bytes in size. To |
| illustrate this more practically, let’s assume we have a table of 100TB data with about 1 billion records (average record size = 100Kb). |
| The storage space required by the RLI partition will be approximately 48 Gb, which is less than 0.05% of the total data size. |
| Since RLI contains the same number of entries as the data table, storage optimization is crucial to make RLI practical, |
| especially for tables of petabyte size and beyond.</p><p>RLI exploits the low cost of storage to enable the rapid look-up process similar to the HBase index, while avoiding the |
| operational overhead of running an extra server. In the next section, we will review some benchmarking results to demonstrate |
| its performance advantages.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="performance">Performance<a class="hash-link" href="#performance" title="Direct link to heading"></a></h3><p>We conducted a comprehensive benchmarking analysis of the Record Level Index evaluating aspects such write latency, |
| index look-up latency, and data shuffling in comparison to existing indexing mechanisms in Hudi. In addition to the |
| benchmarks for write operations, we will also showcase the reduction in query latencies for point look-ups. Hudi 0.14.0 |
| and Spark 3.2.1 were used throughout the experiments.</p><p>In comparison to the Global Simple Index (GSI) in Hudi, Record Level Index (RLI) is crafted for significant performance |
| advantages stemming from a greatly reduced scan space and minimized data shuffling. GSI conducts join operations between |
| incoming records and existing data across all partitions of the data table, resulting in substantial data shuffling and |
| computational overhead to pinpoint the records. On the other hand, RLI efficiently extracts location info through a |
| hash function, leading to a considerably smaller amount of data shuffling by only loading the file groups of interest |
| from the metadata table.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="write-latency">Write latency<a class="hash-link" href="#write-latency" title="Direct link to heading"></a></h4><p>In the first set of experiments, we established two pipelines: one configured using GSI, and the other configured with RLI. |
| Each pipeline was executed on an EMR cluster of 10 m5.4xlarge core instances, and was set to ingest batches of 200Mb data |
| into a 1TB dataset of 2 billion records. The RLI partition was configured with 1000 file groups. For N batches of ingestion, |
| <strong>the average write latency using RLI showed a remarkable 72% improvement over GSI</strong>.</p><img src="/assets/images/blog/record-level-index/write-latency.png" alt="metadata-rli" width="600" align="middle"><p>Note: Between Global Simple Index and Global Bloom Index in Hudi, the former yielded better results due to the randomness |
| of record keys. Therefore, we omitted the presentation of the Global Bloom Index in the chart.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="index-look-up-latency">Index look-up latency<a class="hash-link" href="#index-look-up-latency" title="Direct link to heading"></a></h4><p>We also isolated the index look-up step using HoodieReadClient to accurately gauge indexing efficiency. Through |
| experiments involving the look-up of 400,000 records (0.02%) in a 1TB dataset of 2 billion records, <strong>RLI showcased a |
| 72% improvement over GSI, consistent with the end-to-end write latency results</strong>.</p><img src="/assets/images/blog/record-level-index/index-latency.png" alt="index-latency" width="600" align="middle"><h4 class="anchor anchorWithStickyNavbar_y2LR" id="data-shuffling">Data shuffling<a class="hash-link" href="#data-shuffling" title="Direct link to heading"></a></h4><p>In the index look-up experiments, we observed that around 85Gb of data was shuffled for GSI, whereas only 700Mb was shuffled |
| for RLI. <strong>This reflects an impressive 92% reduction in data shuffling when using RLI compared to GSI</strong>.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="query-latency">Query latency<a class="hash-link" href="#query-latency" title="Direct link to heading"></a></h4><p>The Record Level Index will greatly boost Spark queries with “EqualTo” and “IN” predicates on record key columns. |
| We created a 400GB Hudi table comprising 20,000 file groups. When we executed a query predicated on a single record key, |
| we observed a significant improvement in query time. <strong>With RLI enabled, the query time decreased from 977 seconds to just |
| 12 seconds, representing an impressive 98% reduction in latency</strong><sup id="fnref-4"><a href="#fn-4" class="footnote-ref">4</a></sup>.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="when-to-use">When to Use<a class="hash-link" href="#when-to-use" title="Direct link to heading"></a></h3><p>RLI demonstrates outstanding performance in general, elevating update and delete efficiency to a new level and |
| fast-tracking reads when executing key-matching queries. Enabling RLI is also as simple as setting some configuration flags. |
| Below, we have summarized a comparison table highlighting these important characteristics of RLI in contrast to other common Hudi index types.</p><table><thead><tr><th></th><th>Record Level Index</th><th>Global Simple Index</th><th>Global Bloom Index</th><th>HBase Index</th><th>Bucket Index</th></tr></thead><tbody><tr><td>Performant look-up in general</td><td>Yes</td><td>No</td><td>No</td><td>Yes, with possible throttling issues</td><td>Yes</td></tr><tr><td>Boost both writes and reads</td><td>Yes</td><td>No, write-only</td><td>No, write-only</td><td>No, write-only</td><td>No, write-only</td></tr><tr><td>Easy to enable</td><td>Yes</td><td>Yes</td><td>Yes</td><td>No, require HBase server</td><td>Yes</td></tr></tbody></table><p>Many real-world applications will significantly benefit from using RLI. A common example is fulfilling the GDPR requirements. |
| Typically, when users make requests, a set of IDs will be provided to identify the to-be-deleted records, |
| which will either be updated (columns being nullified) or permanently removed. |
| By enabling RLI, offline jobs performing such changes will become notably more efficient, resulting in cost savings. |
| On the read side, analysts or engineers collecting historical events through certain tracing IDs will also |
| experience blazing fast responses from the key-matching queries.</p><p>While RLI holds the above-mentioned advantages over all other index types, it is important to consider certain |
| aspects when using it. Similar to any other global index, RLI requires record-key uniqueness across all partitions in a table. |
| As RLI keeps track of all record keys and locations, the initialization process may take time for large tables. |
| In scenarios with extremely skewed large workloads, RLI might not achieve the desired performance due to limitations in the current design.</p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="future-work">Future Work<a class="hash-link" href="#future-work" title="Direct link to heading"></a></h2><p>In this initial version of the Record Level Index, certain limitations are acknowledged. As mentioned in the |
| "Initialization" section, the number of file groups must be predetermined during the creation of the RLI partition. |
| Hudi does use some heuristics and a growth factor for an existing table, but for a new table, it is recommended to |
| set appropriate file group configs for RLI. As the data volume increases, the RLI partition requires re-bootstrapping |
| when additional file groups are needed for scaling out. To address the need for rebalancing, a consistent hashing |
| technique could be employed.</p><p>Another valuable enhancement would involve supporting the indexing of secondary columns alongside the record key |
| fields, thus catering to a broader range of queries. On the reader side, there is a plan to integrate more query |
| engines, such as Presto and Trino, with the Record Level Index to fully leverage the performance benefits offered |
| by Hudi metadata tables.</p><hr><p><sup id="fnref-1"><a href="#fn-1" class="footnote-ref">1</a></sup> <a href="https://hudi.apache.org/blog/2020/11/11/hudi-indexing-mechanisms/" target="_blank" rel="noopener noreferrer">This blog</a> well-explained some best practices regarding index selection and configuration.</p><p><sup id="fnref-2"><a href="#fn-2" class="footnote-ref">2</a></sup> Other formats like Parquet can also be supported in the future.</p><p><sup id="fnref-3"><a href="#fn-3" class="footnote-ref">3</a></sup> As of now, query engine integration is only available for Spark, with plans to support additional engines in the future.</p><p><sup id="fnref-4"><a href="#fn-4" class="footnote-ref">4</a></sup> The query improvement is specific to record-key-matching queries and does not reflect a general reduction in latency by enabling RLI. In the case of the single record-key query, 99.995% of file groups (19999 out of 20000) were pruned during query execution.</p></div><footer0 class="row docusaurus-mt-lg blogPostDetailsFull_2lop"><div class="col margin-top--sm"><a href="https://github.com/apache/hudi/edit/asf-site/website/blog/blog/2023-11-01-record-level-index.md" target="_blank" rel="noreferrer noopener" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_mS5F" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div></footer0></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Blog post page navigation"><div class="pagination-nav__item"><a class="pagination-nav__link" href="/cn/blog/2023/11/13/Apache-Hudi-From-Zero-To-One-blog-6"><div class="pagination-nav__sublabel">Newer Post</div><div class="pagination-nav__label">« <!-- -->Apache Hudi: From Zero To One (6/10)</div></a></div><div class="pagination-nav__item pagination-nav__item--next"><a class="pagination-nav__link" href="/cn/blog/2023/10/29/UPSERT-Performance-Evaluation-of-Hudi-0-14-and-Spark-3-4-1-Record-Level-Index-Global-Bloom-Global-Simple-Indexes"><div class="pagination-nav__sublabel">Older Post</div><div class="pagination-nav__label">UPSERT Performance Evaluation of Hudi 0.14 and Spark 3.4.1: Record Level Index vs. Global Bloom & Global Simple Indexes<!-- --> »</div></a></div></nav></main><div class="col col--2"><div class="tableOfContents_vrFS thin-scrollbar"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#introduction" class="table-of-contents__link toc-highlight">Introduction</a></li><li><a href="#metadata-table" class="table-of-contents__link toc-highlight">Metadata table</a></li><li><a href="#record-level-index" class="table-of-contents__link toc-highlight">Record Level Index</a><ul><li><a href="#initialization" class="table-of-contents__link toc-highlight">Initialization</a></li><li><a href="#updating-rli-upon-data-table-writes" class="table-of-contents__link toc-highlight">Updating RLI upon data table writes</a></li><li><a href="#writer-indexing" class="table-of-contents__link toc-highlight">Writer Indexing</a></li><li><a href="#read-flow" class="table-of-contents__link toc-highlight">Read Flow</a></li><li><a href="#storage" class="table-of-contents__link toc-highlight">Storage</a></li><li><a href="#performance" class="table-of-contents__link toc-highlight">Performance</a></li><li><a href="#when-to-use" class="table-of-contents__link toc-highlight">When to Use</a></li></ul></li><li><a href="#future-work" class="table-of-contents__link toc-highlight">Future Work</a></li></ul></div></div></div></div></div><footer class="footer"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">About</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/blog/2021/07/21/streaming-data-lake-platform">Our Vision</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/concepts">Concepts</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/community/team">Team</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/releases/release-0.14.1">Releases</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/releases/download">Download</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/powered-by">Who's Using</a></li></ul></div><div class="col footer__col"><div class="footer__title">Learn</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/docs/quick-start-guide">Quick Start</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/docker_demo">Docker Demo</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/blog">Blog</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/talks">Talks</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/videos">Video Guides</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/faq">FAQ</a></li><li class="footer__item"><a href="https://cwiki.apache.org/confluence/display/HUDI" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Technical Wiki<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li></ul></div><div class="col footer__col"><div class="footer__title">Hudi On Cloud</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/docs/s3_hoodie">AWS</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/gcs_hoodie">Google Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/oss_hoodie">Alibaba Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/azure_hoodie">Microsoft Azure</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/cos_hoodie">Tencent Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/ibm_cos_hoodie">IBM Cloud</a></li></ul></div><div class="col footer__col"><div class="footer__title">Community</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/community/get-involved">Get Involved</a></li><li class="footer__item"><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Slack<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Twitter<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>YouTube<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Linkedin<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="mailto:dev-subscribe@hudi.apache.org?Subject=SubscribeToHudi" target="_blank" rel="noopener noreferrer" class="footer__link-item">Mailing List</a></li></ul></div><div class="col footer__col"><div class="footer__title">Apache</div><ul class="footer__items"><li class="footer__item"><a href="https://www.apache.org/events/current-event" target="_blank" rel="noopener noreferrer" class="footer__link-item">Events</a></li><li class="footer__item"><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Thanks</a></li><li class="footer__item"><a href="https://www.apache.org/licenses" target="_blank" rel="noopener noreferrer" class="footer__link-item">License</a></li><li class="footer__item"><a href="https://www.apache.org/security" target="_blank" rel="noopener noreferrer" class="footer__link-item">Security</a></li><li class="footer__item"><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Sponsorship</a></li><li class="footer__item"><a href="https://www.apache.org" target="_blank" rel="noopener noreferrer" class="footer__link-item">Foundation</a></li></ul></div></div><div class="footer__bottom text--center"><div class="margin-bottom--sm"><a href="https://hudi.apache.org/" target="_blank" rel="noopener noreferrer" class="footerLogoLink_SRtH"><img src="/cn/assets/images/logo-big.png" alt="Apache Hudi™" class="themedImage_TMUO themedImage--light_4Vu1 footer__logo"><img src="/cn/assets/images/logo-big.png" alt="Apache Hudi™" class="themedImage_TMUO themedImage--dark_uzRr footer__logo"></a></div><div class="footer__copyright">Copyright © 2021 <a href="https://apache.org">The Apache Software Foundation</a>, Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0"> Apache License, Version 2.0</a>. |
| Hudi, Apache and the Apache feather logo are trademarks of The Apache Software Foundation. <a href="/docs/privacy">Privacy Policy</a></div></div></div></footer></div> |
| <script src="/cn/assets/js/runtime~main.0acdb754.js"></script> |
| <script src="/cn/assets/js/main.6d6aa24f.js"></script> |
| </body> |
| </html> |