blob: 376915359f995fa60de4d1212edb33ac223e4e38 [file] [log] [blame]
<!doctype html>
<html lang="cn" dir="ltr">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="generator" content="Docusaurus v2.0.0-beta.14">
<link rel="alternate" type="application/rss+xml" href="/cn/blog/rss.xml" title="Apache Hudi: User-Facing Analytics RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/cn/blog/atom.xml" title="Apache Hudi: User-Facing Analytics Atom Feed">
<link rel="alternate" type="application/json" href="/cn/blog/feed.json" title="Apache Hudi: User-Facing Analytics JSON Feed">
<link rel="search" type="application/opensearchdescription+xml" title="Apache Hudi" href="/cn/opensearch.xml">
<link rel="alternate" type="application/rss+xml" href="/cn/videos/rss.xml" title="Apache Hudi RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/cn/videos/atom.xml" title="Apache Hudi Atom Feed">
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Comfortaa|Ubuntu|Roboto|Source+Code+Pro">
<link rel="stylesheet" href="https://at-ui.github.io/feather-font/css/iconfont.css"><title data-react-helmet="true">Record Level Index: Hudi&#x27;s blazing fast indexing for large-scale datasets | Apache Hudi</title><meta data-react-helmet="true" name="twitter:card" content="summary_large_image"><meta data-react-helmet="true" property="og:url" content="https://hudi.apache.org/cn/blog/2023/11/01/record-level-index"><meta data-react-helmet="true" name="docsearch:language" content="cn"><meta data-react-helmet="true" name="docsearch:docusaurus_tag" content="default"><meta data-react-helmet="true" name="keywords" content="apache hudi, data lake, lakehouse, big data, apache spark, apache flink, presto, trino, analytics, data engineering"><meta data-react-helmet="true" property="og:title" content="Record Level Index: Hudi&#x27;s blazing fast indexing for large-scale datasets | Apache Hudi"><meta data-react-helmet="true" name="description" content="Introduction"><meta data-react-helmet="true" property="og:description" content="Introduction"><meta data-react-helmet="true" property="og:image" content="https://hudi.apache.org/cn/assets/images/blog/record-level-index/03.RLI_bulkinsert.png"><meta data-react-helmet="true" name="twitter:image" content="https://hudi.apache.org/cn/assets/images/blog/record-level-index/03.RLI_bulkinsert.png"><meta data-react-helmet="true" property="og:type" content="article"><meta data-react-helmet="true" property="article:published_time" content="2023-11-01T00:00:00.000Z"><meta data-react-helmet="true" property="article:tag" content="design,indexing,metadata,apache hudi,blog"><link data-react-helmet="true" rel="icon" href="/cn/assets/images/favicon.ico"><link data-react-helmet="true" rel="canonical" href="https://hudi.apache.org/cn/blog/2023/11/01/record-level-index"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/blog/2023/11/01/record-level-index" hreflang="en"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/cn/blog/2023/11/01/record-level-index" hreflang="cn"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/blog/2023/11/01/record-level-index" hreflang="x-default"><link data-react-helmet="true" rel="preconnect" href="https://BH4D9OD16A-dsn.algolia.net" crossorigin="anonymous"><link rel="stylesheet" href="/cn/assets/css/styles.ea681a30.css">
<link rel="preload" href="/cn/assets/js/runtime~main.0acdb754.js" as="script">
<link rel="preload" href="/cn/assets/js/main.6d6aa24f.js" as="script">
</head>
<body>
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus">
<div><a href="#" class="skipToContent_OuoZ">Skip to main content</a></div><div class="announcementBar_axC9" role="banner"><div class="announcementBarPlaceholder_xYHE"></div><div class="announcementBarContent_6uhP">⭐️ If you like Apache Hudi, give it a star on <a target="_blank" rel="noopener noreferrer" href="https://github.com/apache/hudi">GitHub</a>! ⭐</div><button type="button" class="clean-btn close announcementBarClose_A3A1" aria-label="Close"><svg viewBox="0 0 15 15" width="14" height="14"><g stroke="currentColor" stroke-width="3.1"><path d="M.75.75l13.5 13.5M14.25.75L.75 14.25"></path></g></svg></button></div><nav class="navbar navbar--fixed-top navbarWrapper_UIa0"><div class="navbar__inner"><img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=8f594acf-9b77-44fb-9475-3e82ead1910c" width="0" height="0" alt=""><img referrerpolicy="no-referrer-when-downgrade" src="https://analytics.apache.org/matomo.php?idsite=47&amp;rec=1" width="0" height="0" alt=""><div class="navbar__items"><button aria-label="Navigation bar toggle" class="navbar__toggle clean-btn" type="button" tabindex="0"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/cn/"><div class="navbar__logo navbarLogo_Bz6n"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--light_4Vu1"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--dark_uzRr"></div></a><a class="navbar__item navbar__link" href="/cn/docs/overview"><div class="labelWrapperDropdown_Mqbj">Docs</div></a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Learn<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/talks"><div class="labelWrapperDropdown_Mqbj">Talks</div></a></li><li><a class="dropdown__link" href="/cn/videos"><div class="labelWrapperDropdown_Mqbj">Video Guides</div></a></li><li><a class="dropdown__link" href="/cn/docs/faq"><div class="labelWrapperDropdown_Mqbj">FAQ</div></a></li><li><a class="dropdown__link" href="/cn/tech-specs"><div class="labelWrapperDropdown_Mqbj">Tech Specs</div></a></li><li><a class="dropdown__link" href="/cn/tech-specs-1point0"><div class="labelWrapperDropdown_Mqbj">Tech Specs 1.0</div></a></li><li><a href="https://cwiki.apache.org/confluence/display/HUDI" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span class="externalLink_AE3f">Technical Wiki<svg width="20" height="20" viewBox="0 0 26 26" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M16.965 8.745 9.01 16.7M10.561 8.758l6.403-.013-.013 6.403" stroke="#0DB1F9" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path><rect x="4.5" y="4.5" width="17" height="17" rx="2.5" stroke="#0DB1F9"></rect></svg></span></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Contribute<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/contribute/how-to-contribute"><div class="labelWrapperDropdown_Mqbj">How to Contribute</div></a></li><li><a class="dropdown__link" href="/cn/contribute/developer-setup"><div class="labelWrapperDropdown_Mqbj">Developer Setup</div></a></li><li><a class="dropdown__link" href="/cn/contribute/rfc-process"><div class="labelWrapperDropdown_Mqbj">RFC Process</div></a></li><li><a class="dropdown__link" href="/cn/contribute/report-security-issues"><div class="labelWrapperDropdown_Mqbj">Report Security Issues</div></a></li><li><a href="https://issues.apache.org/jira/projects/HUDI/summary" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span class="externalLink_AE3f">Report Issues<svg width="20" height="20" viewBox="0 0 26 26" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M16.965 8.745 9.01 16.7M10.561 8.758l6.403-.013-.013 6.403" stroke="#0DB1F9" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path><rect x="4.5" y="4.5" width="17" height="17" rx="2.5" stroke="#0DB1F9"></rect></svg></span></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Community<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/community/get-involved"><div class="labelWrapperDropdown_Mqbj">Get Involved</div></a></li><li><a class="dropdown__link" href="/cn/community/syncs"><div class="labelWrapperDropdown_Mqbj">Community Syncs</div></a></li><li><a class="dropdown__link" href="/cn/community/office_hours"><div class="labelWrapperDropdown_Mqbj">Office Hours</div></a></li><li><a class="dropdown__link" href="/cn/community/team"><div class="labelWrapperDropdown_Mqbj">Team</div></a></li></ul></div><a aria-current="page" class="navbar__item navbar__link navbar__link--active" href="/cn/blog"><div class="labelWrapperDropdown_Mqbj">Blog</div></a><a class="navbar__item navbar__link" href="/cn/powered-by"><div class="labelWrapperDropdown_Mqbj">Who&#x27;s Using</div></a><a class="navbar__item navbar__link" href="/cn/roadmap"><div class="labelWrapperDropdown_Mqbj">Roadmap</div></a><a class="navbar__item navbar__link" href="/cn/releases/download"><div class="labelWrapperDropdown_Mqbj">Download</div></a></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__link downloadLinkDropdownHide_aDP3" href="/cn/docs/overview"><div class="labelWrapperDropdown_Mqbj">0.14.1<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/docs/next/overview"><div class="labelWrapperDropdown_Mqbj">Next</div></a></li><li><a class="dropdown__link" href="/cn/docs/overview"><div class="labelWrapperDropdown_Mqbj">0.14.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.14.0/overview"><div class="labelWrapperDropdown_Mqbj">0.14.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.13.1/overview"><div class="labelWrapperDropdown_Mqbj">0.13.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.13.0/overview"><div class="labelWrapperDropdown_Mqbj">0.13.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.3/overview"><div class="labelWrapperDropdown_Mqbj">0.12.3</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.2/overview"><div class="labelWrapperDropdown_Mqbj">0.12.2</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.1/overview"><div class="labelWrapperDropdown_Mqbj">0.12.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.0/overview"><div class="labelWrapperDropdown_Mqbj">0.12.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.11.1/overview"><div class="labelWrapperDropdown_Mqbj">0.11.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.11.0/overview"><div class="labelWrapperDropdown_Mqbj">0.11.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.10.1/overview"><div class="labelWrapperDropdown_Mqbj">0.10.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.10.0/overview"><div class="labelWrapperDropdown_Mqbj">0.10.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.9.0/overview"><div class="labelWrapperDropdown_Mqbj">0.9.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.8.0/overview"><div class="labelWrapperDropdown_Mqbj">0.8.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.7.0/overview"><div class="labelWrapperDropdown_Mqbj">0.7.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.6.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.6.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.3/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.3</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.2/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.2</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.1/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.0</div></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj"><span><svg viewBox="0 0 20 20" width="20" height="20" aria-hidden="true" class="iconLanguage_zID8"><path fill="currentColor" d="M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"></path></svg><span>Chinese</span></span><svg width="14" height="14" viewBox="0 0 14 14" fill="none" xmlns="http://www.w3.org/2000/svg"><g clip-path="url(#a)"><path d="M14 6.457a6.842 6.842 0 0 0-7-6.02 6.843 6.843 0 0 0-7 6.02v1.085a6.843 6.843 0 0 0 7 6.02 6.843 6.843 0 0 0 7-6.02V6.457Zm-1.094 0h-2.625a9.92 9.92 0 0 0-.376-2.222 6.65 6.65 0 0 0 1.531-.875 5.25 5.25 0 0 1 1.444 3.097h.026Zm-8.032 0a8.479 8.479 0 0 1 .324-1.872 7.376 7.376 0 0 0 3.63 0c.175.61.284 1.239.325 1.872h-4.28Zm4.305 1.085a8.391 8.391 0 0 1-.324 1.873 7.464 7.464 0 0 0-3.658 0 8.479 8.479 0 0 1-.323-1.873h4.305Zm.35-4.375A10.342 10.342 0 0 0 8.75 1.75c.627.194 1.218.49 1.75.875a5.748 5.748 0 0 1-.998.577l.027-.035ZM7.254 1.54A8.75 8.75 0 0 1 8.46 3.552c-.48.11-.97.165-1.461.167-.492-.001-.982-.057-1.461-.167.308-.722.715-1.4 1.207-2.012h.508ZM4.498 3.202a5.748 5.748 0 0 1-.998-.577 6.029 6.029 0 0 1 1.75-.875c-.294.46-.546.947-.753 1.452Zm-1.873.15c.47.358.984.652 1.531.874A9.625 9.625 0 0 0 3.78 6.45H1.155a5.25 5.25 0 0 1 1.47-3.098ZM1.12 7.541h2.625c.038.753.164 1.5.376 2.223a6.649 6.649 0 0 0-1.531.875 5.25 5.25 0 0 1-1.47-3.098Zm3.377 3.255c.207.506.459.992.753 1.453a6.03 6.03 0 0 1-1.75-.875c.312-.226.646-.419.997-.578Zm2.25 1.663a8.594 8.594 0 0 1-1.208-2.013 6.501 6.501 0 0 1 2.922 0 8.54 8.54 0 0 1-1.207 2.013h-.508Zm2.755-1.663c.367.156.716.35 1.042.578a6.338 6.338 0 0 1-1.75.875c.275-.464.512-.95.708-1.453Zm1.873-.148a6.647 6.647 0 0 0-1.531-.875 9.45 9.45 0 0 0 .376-2.223h2.625a5.25 5.25 0 0 1-1.47 3.098Z" fill="#1C1E21"></path></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h14v14H0z"></path></clipPath></defs></svg></div></a><ul class="dropdown__menu"><li><a href="/blog/2023/11/01/record-level-index" target="_self" rel="noopener noreferrer" class="dropdown__link"><div class="labelWrapperDropdown_Mqbj">English</div></a></li><li><a href="/cn/blog/2023/11/01/record-level-index" target="_self" rel="noopener noreferrer" class="dropdown__link dropdown__link--active"><div class="labelWrapperDropdown_Mqbj">Chinese</div></a></li></ul></div><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-github-link" aria-label="GitHub repository"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-twitter-link" aria-label="Hudi Twitter Handle"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-slack-link" aria-label="Hudi Slack Channel"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-youtube-link" aria-label="Hudi YouTube Channel"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-linkedin-link" aria-label="Hudi Linkedin Page"><div class="labelWrapperDropdown_Mqbj"></div></a><div class="searchBox_fBfG"><div role="button" class="searchButton_g9-U" aria-label="Search"><span class="searchText_RI6l">Search</span><svg width="14" height="14" viewBox="0 0 14 14" fill="none" xmlns="http://www.w3.org/2000/svg"><circle cx="6.864" cy="6.864" r="5.243" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></circle><path d="m10.51 10.783 2.056 2.05" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div><div class="navbar-sidebar"><div class="navbar-sidebar__brand"><a class="navbar__brand" href="/cn/"><div class="navbar__logo"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--light_4Vu1"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--dark_uzRr"></div></a><button type="button" class="clean-btn navbar-sidebar__close"><svg viewBox="0 0 15 15" width="21" height="21"><g stroke="var(--ifm-color-emphasis-600)" stroke-width="1.2"><path d="M.75.75l13.5 13.5M14.25.75L.75 14.25"></path></g></svg></button></div><div class="navbar-sidebar__items"><div class="navbar-sidebar__item menu"><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" href="/cn/docs/overview"><div class="labelWrapperDropdown_Mqbj">Docs</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Learn</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Contribute</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Community</div></a></li><li class="menu__list-item"><a aria-current="page" class="menu__link menu__link--active" href="/cn/blog"><div class="labelWrapperDropdown_Mqbj">Blog</div></a></li><li class="menu__list-item"><a class="menu__link" href="/cn/powered-by"><div class="labelWrapperDropdown_Mqbj">Who&#x27;s Using</div></a></li><li class="menu__list-item"><a class="menu__link" href="/cn/roadmap"><div class="labelWrapperDropdown_Mqbj">Roadmap</div></a></li><li class="menu__list-item"><a class="menu__link" href="/cn/releases/download"><div class="labelWrapperDropdown_Mqbj">Download</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Versions</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj"><span><svg viewBox="0 0 20 20" width="20" height="20" aria-hidden="true" class="iconLanguage_zID8"><path fill="currentColor" d="M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"></path></svg><span>Languages</span></span></div></a></li><li class="menu__list-item"><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="menu__link header-github-link" aria-label="GitHub repository"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="menu__link header-twitter-link" aria-label="Hudi Twitter Handle"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="menu__link header-slack-link" aria-label="Hudi Slack Channel"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="menu__link header-youtube-link" aria-label="Hudi YouTube Channel"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="menu__link header-linkedin-link" aria-label="Hudi Linkedin Page"><div class="labelWrapperDropdown_Mqbj"></div></a></li></ul></div><div class="navbar-sidebar__item menu"><button type="button" class="clean-btn navbar-sidebar__back">← Back to main menu</button></div></div></div></nav><div class="main-wrapper blog-wrapper blog-post-page"><div class="container margin-vert--lg"><div class="row"><main class="col col--9 col--offset-2" itemscope="" itemtype="http://schema.org/Blog"><article itemprop="blogPost" itemscope="" itemtype="http://schema.org/BlogPosting"><header class="postHeader_Ipb1"><div><h1 class="blogPostTitle_RC3s" itemprop="headline"><h1 class="blogPostPageTitle_bKZt" itemprop="headline">Record Level Index: Hudi&#x27;s blazing fast indexing for large-scale datasets</h1></h1><div class="blogInfo_1FPd margin-top--sm margin-bottom--sm"><div class="blogPostText_jBA8 row"><time datetime="2023-11-01T00:00:00.000Z" itemprop="datePublished">November 1, 2023</time><div><div><div><a itemprop="url"><span class="blogPostAuthorsList_dlEG" itemprop="name">Shiyan Xu and Sivabalan Narayanan</span></a></div></div></div></div><div class="blogPostData_A2Le">12 min read</div></div></div><ul class="authorTimeTags_oN88 padding--none margin-left--sm tagsWrapperPostPage_VdId"><li class="tag_MgfY tagPostPage_gnvv"><a class="tag_WK-t tagRegular_LXbV" href="/cn/blog/tags/design">design</a></li><li class="tag_MgfY tagPostPage_gnvv"><a class="tag_WK-t tagRegular_LXbV" href="/cn/blog/tags/indexing">indexing</a></li><li class="tag_MgfY tagPostPage_gnvv"><a class="tag_WK-t tagRegular_LXbV" href="/cn/blog/tags/metadata">metadata</a></li><li class="tag_MgfY tagPostPage_gnvv"><a class="tag_WK-t tagRegular_LXbV" href="/cn/blog/tags/apache-hudi">apache hudi</a></li><li class="tag_MgfY tagPostPage_gnvv"><a class="tag_WK-t tagRegular_LXbV" href="/cn/blog/tags/blog">blog</a></li></ul></header><div class="markdown" itemprop="articleBody"><h2 class="anchor anchorWithStickyNavbar_y2LR" id="introduction">Introduction<a class="hash-link" href="#introduction" title="Direct link to heading"></a></h2><p>Index is a critical component that facilitates quick updates and deletes for Hudi writers, and it plays a pivotal
role in boosting query executions as well. Hudi provides several index types, including the Bloom and Simple indexes with global
variations, the HBase Index that leverages a HBase server, the hash-based Bucket index, and the multi-modal index
realized through the metadata table. The choice of an index depends on factors such as table sizes, partition data distributions,
or traffic patterns, where a specific index may be more suitable for simpler operation or better performance<sup id="fnref-1"><a href="#fn-1" class="footnote-ref">1</a></sup>.
Users often face trade-offs when selecting index types for different tables, since there hasn&#x27;t been
a generally performant index capable of facilitating both writes and reads with minimal operational overhead.</p><p>Starting from <a href="https://hudi.apache.org/releases/release-0.14.0" target="_blank" rel="noopener noreferrer">Hudi 0.14.0</a>, we are thrilled to announce a
general purpose index for Apache Hudi - the Record Level Index (RLI). This innovation not only dramatically boosts
write efficiency but also improves read efficiency for relevant queries. Integrated seamlessly within the table storage layer,
RLI can easily work without any additional operational efforts.</p><p>In the subsequent sections of this blog, we will give a brief introduction to Hudi&#x27;s metadata table, a pre-requisite for discussing RLI.
Following that, we will delve into the design and workflows of RLI, and then show performance analysis and index type comparisons. The blog
will conclude with insights into future work for RLI.</p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="metadata-table">Metadata table<a class="hash-link" href="#metadata-table" title="Direct link to heading"></a></h2><p>A <a href="https://hudi.apache.org/docs/metadata" target="_blank" rel="noopener noreferrer">Hudi metadata table</a> is a Merge-on-Read (MoR) table within the <code>.hoodie/metadata/</code> directory. It contains various
metadata pertaining to records, seamlessly integrated into both the writer and reader paths to improve indexing efficiency.
The metadata is segregated into four partitions: <code>files</code>, <code>column stats</code>, <code>bloom filters</code>, and <code>record level index</code>.</p><img src="/assets/images/blog/record-level-index/01.metadatatable_layout.png" alt="Hudi metadata table layout" width="800" align="middle"><p>The metadata table is updated synchronously with each commit action on the Timeline, in other words, the commits to the
metadata table are part of the transactions to the Hudi data table. With four partitions containing different types of
metadata, this layout serves the purpose of a multi-modal index:</p><ul><li><code>files</code> partition keeps track of the Hudi data table’s partitions, and data files of each partition</li><li><code>column stats</code> partition records statistics about each column of the data table</li><li><code>bloom filter</code> partition stores serialized bloom filters for base files</li><li><code>record level index</code> partition contains mappings of individual record key and the corresponding file group id</li></ul><p>Users can activate the metadata table by setting <code>hoodie.metadata.enable=true</code>. Once activated, the <code>files</code> partition
will always be enabled. Other partitions can be enabled and configured individually to harness additional indexing
capabilities.</p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="record-level-index">Record Level Index<a class="hash-link" href="#record-level-index" title="Direct link to heading"></a></h2><p>Starting from release 0.14.0, the Record Level Index (RLI) can be activated by setting <code>hoodie.metadata.record.index.enable=true</code>
and <code>hoodie.index.type=RECORD_INDEX</code>. The core concept behind RLI is the ability to determine the location of records, thus
reducing the number of files that need to be scanned to extract the desired data. This process is usually referred to as &quot;index look-up&quot;.
Hudi employs a primary-key model, requiring each record to be associated with a key
to satisfy the uniqueness constraint. Consequently, we can establish one-to-one mappings between record keys and file groups,
precisely the data we intend to store within the <code>record level index</code> partition.</p><p>Performance is paramount when it comes to indexes. The metadata table, which includes the RLI partition, chooses <a href="https://hbase.apache.org/book.html#_hfile_format_2" target="_blank" rel="noopener noreferrer">HFile</a><sup id="fnref-2"><a href="#fn-2" class="footnote-ref">2</a></sup>,
HBase’s file format that utilizes B+ tree-like structures for fast look-up, as the file format. Real-world benchmarking
has shown that an HFile containing 1 million RLI mappings can look up a batch of 100k records in just 600 ms.
We will cover the performance topic in a later section with detailed analysis.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="initialization">Initialization<a class="hash-link" href="#initialization" title="Direct link to heading"></a></h3><p>Initializing the RLI partition for an existing Hudi table can be a laborious and time-consuming task, contingent on the number
of records. Just like with a typical database, building indexes takes time, but the investment ultimately pays off by speeding up
numerous queries in the future.</p><img src="/assets/images/blog/record-level-index/02.RLI_init_flow.png" alt="RLI init flow" width="800" align="middle"><p>The diagram above shows the high-level steps of RLI initialization. Since these jobs are all parallelizable, users can
scale the cluster and configure relevant parallelism settings (e.g., <code>hoodie.metadata.max.init.parallelism</code>) accordingly
to meet their time requirement.</p><p>Focusing on the final step, &quot;Bulk insert to RLI partition,&quot; the metadata table writer employs a hash function to
partition the RLI records, ensuring that the number of resulting file groups aligns with the number of partitions.
This guarantees consistent record key look-ups.</p><img src="/assets/images/blog/record-level-index/03.RLI_bulkinsert.png" alt="RLI bulkinsert" width="800" align="middle"><p>It’s important to note that the current implementation fixes the number of file groups in the RLI partition once it’s initialized.
Therefore, users should lean towards over-provisioning the file groups and adjust these configurations accordingly.</p><div class="codeBlockContainer_J+bg theme-code-block"><div class="codeBlockContent_csEI"><pre tabindex="0" class="prism-code language-undefined codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.metadata.record.index.max.filegroup.count</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.metadata.record.index.min.filegroup.count</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.metadata.record.index.max.filegroup.size</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie.metadata.record.index.growth.factor</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>In future development iterations, RLI should be able to overcome this limitation by dynamically rebalancing file groups to
accommodate the ever-increasing number of records.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="updating-rli-upon-data-table-writes">Updating RLI upon data table writes<a class="hash-link" href="#updating-rli-upon-data-table-writes" title="Direct link to heading"></a></h3><p>During regular writes, the RLI partition will be updated as part of the transactions. Metadata records will be generated
using the incoming record keys with their corresponding location info. Given that the RLI partition contains the exact
mappings of record keys and locations, upserts to the data table will result in upsertion of the corresponding keys to the
RLI partition, The hash function employed will guarantee that identical keys are routed to the same file group.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="writer-indexing">Writer Indexing<a class="hash-link" href="#writer-indexing" title="Direct link to heading"></a></h3><p>Being part of the write flow, RLI follows the high-level indexing flow, similar to any other global index: for a given
set of records, it tags each record with location information if the index finds them present in any existing file group.
The key distinction lies in the source of truth for the existence test—the RLI partition. The diagram below illustrates
the tagging flow with detailed steps.</p><img src="/assets/images/blog/record-level-index/04.RLI_tagging.png" alt="RLI tagging" width="800" align="middle"><p>The tagged records will be passed to Hudi write handles and will undergo write operations to their respective file groups.
The indexing process is a critical step in applying updates to the table, as its efficiency directly influences the write
latency. In a later section, we will demonstrate the Record Level Index performance using benchmarking results.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="read-flow">Read Flow<a class="hash-link" href="#read-flow" title="Direct link to heading"></a></h3><p>The Record Level Index is also integrated on the query side<sup id="fnref-3"><a href="#fn-3" class="footnote-ref">3</a></sup>. In queries that involve equality check (e.g., EqualTo or IN)
against the record key column, Hudi’s file index implementation optimizes the file pruning process. This optimization is
achieved by leveraging RLI to precisely locate the file groups that need to be read for completing the queries.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="storage">Storage<a class="hash-link" href="#storage" title="Direct link to heading"></a></h3><p>Storage efficiency is another vital aspect of the design. Each RLI mapping entry must include some necessary information
to precisely locate files, such as record key, partition path, file group id, etc. To optimize the storage, RLI adopts
some compression techniques such as encoding file group id (in the form of UUID) into 2 Longs to represent the high and
low bits. Using Gzip compression and a 4MB block size, an individual RLI record averages only 48 bytes in size. To
illustrate this more practically, let’s assume we have a table of 100TB data with about 1 billion records (average record size = 100Kb).
The storage space required by the RLI partition will be approximately 48 Gb, which is less than 0.05% of the total data size.
Since RLI contains the same number of entries as the data table, storage optimization is crucial to make RLI practical,
especially for tables of petabyte size and beyond.</p><p>RLI exploits the low cost of storage to enable the rapid look-up process similar to the HBase index, while avoiding the
operational overhead of running an extra server. In the next section, we will review some benchmarking results to demonstrate
its performance advantages.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="performance">Performance<a class="hash-link" href="#performance" title="Direct link to heading"></a></h3><p>We conducted a comprehensive benchmarking analysis of the Record Level Index evaluating aspects such write latency,
index look-up latency, and data shuffling in comparison to existing indexing mechanisms in Hudi. In addition to the
benchmarks for write operations, we will also showcase the reduction in query latencies for point look-ups. Hudi 0.14.0
and Spark 3.2.1 were used throughout the experiments.</p><p>In comparison to the Global Simple Index (GSI) in Hudi, Record Level Index (RLI) is crafted for significant performance
advantages stemming from a greatly reduced scan space and minimized data shuffling. GSI conducts join operations between
incoming records and existing data across all partitions of the data table, resulting in substantial data shuffling and
computational overhead to pinpoint the records. On the other hand, RLI efficiently extracts location info through a
hash function, leading to a considerably smaller amount of data shuffling by only loading the file groups of interest
from the metadata table.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="write-latency">Write latency<a class="hash-link" href="#write-latency" title="Direct link to heading"></a></h4><p>In the first set of experiments, we established two pipelines: one configured using GSI, and the other configured with RLI.
Each pipeline was executed on an EMR cluster of 10 m5.4xlarge core instances, and was set to ingest batches of 200Mb data
into a 1TB dataset of 2 billion records. The RLI partition was configured with 1000 file groups. For N batches of ingestion,
<strong>the average write latency using RLI showed a remarkable 72% improvement over GSI</strong>.</p><img src="/assets/images/blog/record-level-index/write-latency.png" alt="metadata-rli" width="600" align="middle"><p>Note: Between Global Simple Index and Global Bloom Index in Hudi, the former yielded better results due to the randomness
of record keys. Therefore, we omitted the presentation of the Global Bloom Index in the chart.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="index-look-up-latency">Index look-up latency<a class="hash-link" href="#index-look-up-latency" title="Direct link to heading"></a></h4><p>We also isolated the index look-up step using HoodieReadClient to accurately gauge indexing efficiency. Through
experiments involving the look-up of 400,000 records (0.02%) in a 1TB dataset of 2 billion records, <strong>RLI showcased a
72% improvement over GSI, consistent with the end-to-end write latency results</strong>.</p><img src="/assets/images/blog/record-level-index/index-latency.png" alt="index-latency" width="600" align="middle"><h4 class="anchor anchorWithStickyNavbar_y2LR" id="data-shuffling">Data shuffling<a class="hash-link" href="#data-shuffling" title="Direct link to heading"></a></h4><p>In the index look-up experiments, we observed that around 85Gb of data was shuffled for GSI, whereas only 700Mb was shuffled
for RLI. <strong>This reflects an impressive 92% reduction in data shuffling when using RLI compared to GSI</strong>.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="query-latency">Query latency<a class="hash-link" href="#query-latency" title="Direct link to heading"></a></h4><p>The Record Level Index will greatly boost Spark queries with “EqualTo” and “IN” predicates on record key columns.
We created a 400GB Hudi table comprising 20,000 file groups. When we executed a query predicated on a single record key,
we observed a significant improvement in query time. <strong>With RLI enabled, the query time decreased from 977 seconds to just
12 seconds, representing an impressive 98% reduction in latency</strong><sup id="fnref-4"><a href="#fn-4" class="footnote-ref">4</a></sup>.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="when-to-use">When to Use<a class="hash-link" href="#when-to-use" title="Direct link to heading"></a></h3><p>RLI demonstrates outstanding performance in general, elevating update and delete efficiency to a new level and
fast-tracking reads when executing key-matching queries. Enabling RLI is also as simple as setting some configuration flags.
Below, we have summarized a comparison table highlighting these important characteristics of RLI in contrast to other common Hudi index types.</p><table><thead><tr><th></th><th>Record Level Index</th><th>Global Simple Index</th><th>Global Bloom Index</th><th>HBase Index</th><th>Bucket Index</th></tr></thead><tbody><tr><td>Performant look-up in general</td><td>Yes</td><td>No</td><td>No</td><td>Yes, with possible throttling issues</td><td>Yes</td></tr><tr><td>Boost both writes and reads</td><td>Yes</td><td>No, write-only</td><td>No, write-only</td><td>No, write-only</td><td>No, write-only</td></tr><tr><td>Easy to enable</td><td>Yes</td><td>Yes</td><td>Yes</td><td>No, require HBase server</td><td>Yes</td></tr></tbody></table><p>Many real-world applications will significantly benefit from using RLI. A common example is fulfilling the GDPR requirements.
Typically, when users make requests, a set of IDs will be provided to identify the to-be-deleted records,
which will either be updated (columns being nullified) or permanently removed.
By enabling RLI, offline jobs performing such changes will become notably more efficient, resulting in cost savings.
On the read side, analysts or engineers collecting historical events through certain tracing IDs will also
experience blazing fast responses from the key-matching queries.</p><p>While RLI holds the above-mentioned advantages over all other index types, it is important to consider certain
aspects when using it. Similar to any other global index, RLI requires record-key uniqueness across all partitions in a table.
As RLI keeps track of all record keys and locations, the initialization process may take time for large tables.
In scenarios with extremely skewed large workloads, RLI might not achieve the desired performance due to limitations in the current design.</p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="future-work">Future Work<a class="hash-link" href="#future-work" title="Direct link to heading"></a></h2><p>In this initial version of the Record Level Index, certain limitations are acknowledged. As mentioned in the
&quot;Initialization&quot; section, the number of file groups must be predetermined during the creation of the RLI partition.
Hudi does use some heuristics and a growth factor for an existing table, but for a new table, it is recommended to
set appropriate file group configs for RLI. As the data volume increases, the RLI partition requires re-bootstrapping
when additional file groups are needed for scaling out. To address the need for rebalancing, a consistent hashing
technique could be employed.</p><p>Another valuable enhancement would involve supporting the indexing of secondary columns alongside the record key
fields, thus catering to a broader range of queries. On the reader side, there is a plan to integrate more query
engines, such as Presto and Trino, with the Record Level Index to fully leverage the performance benefits offered
by Hudi metadata tables.</p><hr><p><sup id="fnref-1"><a href="#fn-1" class="footnote-ref">1</a></sup> <a href="https://hudi.apache.org/blog/2020/11/11/hudi-indexing-mechanisms/" target="_blank" rel="noopener noreferrer">This blog</a> well-explained some best practices regarding index selection and configuration.</p><p><sup id="fnref-2"><a href="#fn-2" class="footnote-ref">2</a></sup> Other formats like Parquet can also be supported in the future.</p><p><sup id="fnref-3"><a href="#fn-3" class="footnote-ref">3</a></sup> As of now, query engine integration is only available for Spark, with plans to support additional engines in the future.</p><p><sup id="fnref-4"><a href="#fn-4" class="footnote-ref">4</a></sup> The query improvement is specific to record-key-matching queries and does not reflect a general reduction in latency by enabling RLI. In the case of the single record-key query, 99.995% of file groups (19999 out of 20000) were pruned during query execution.</p></div><footer0 class="row docusaurus-mt-lg blogPostDetailsFull_2lop"><div class="col margin-top--sm"><a href="https://github.com/apache/hudi/edit/asf-site/website/blog/blog/2023-11-01-record-level-index.md" target="_blank" rel="noreferrer noopener" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_mS5F" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div></footer0></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Blog post page navigation"><div class="pagination-nav__item"><a class="pagination-nav__link" href="/cn/blog/2023/11/13/Apache-Hudi-From-Zero-To-One-blog-6"><div class="pagination-nav__sublabel">Newer Post</div><div class="pagination-nav__label">« <!-- -->Apache Hudi: From Zero To One (6/10)</div></a></div><div class="pagination-nav__item pagination-nav__item--next"><a class="pagination-nav__link" href="/cn/blog/2023/10/29/UPSERT-Performance-Evaluation-of-Hudi-0-14-and-Spark-3-4-1-Record-Level-Index-Global-Bloom-Global-Simple-Indexes"><div class="pagination-nav__sublabel">Older Post</div><div class="pagination-nav__label">UPSERT Performance Evaluation of Hudi 0.14 and Spark 3.4.1: Record Level Index vs. Global Bloom &amp; Global Simple Indexes<!-- --> »</div></a></div></nav></main><div class="col col--2"><div class="tableOfContents_vrFS thin-scrollbar"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#introduction" class="table-of-contents__link toc-highlight">Introduction</a></li><li><a href="#metadata-table" class="table-of-contents__link toc-highlight">Metadata table</a></li><li><a href="#record-level-index" class="table-of-contents__link toc-highlight">Record Level Index</a><ul><li><a href="#initialization" class="table-of-contents__link toc-highlight">Initialization</a></li><li><a href="#updating-rli-upon-data-table-writes" class="table-of-contents__link toc-highlight">Updating RLI upon data table writes</a></li><li><a href="#writer-indexing" class="table-of-contents__link toc-highlight">Writer Indexing</a></li><li><a href="#read-flow" class="table-of-contents__link toc-highlight">Read Flow</a></li><li><a href="#storage" class="table-of-contents__link toc-highlight">Storage</a></li><li><a href="#performance" class="table-of-contents__link toc-highlight">Performance</a></li><li><a href="#when-to-use" class="table-of-contents__link toc-highlight">When to Use</a></li></ul></li><li><a href="#future-work" class="table-of-contents__link toc-highlight">Future Work</a></li></ul></div></div></div></div></div><footer class="footer"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">About</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/blog/2021/07/21/streaming-data-lake-platform">Our Vision</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/concepts">Concepts</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/community/team">Team</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/releases/release-0.14.1">Releases</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/releases/download">Download</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/powered-by">Who&#x27;s Using</a></li></ul></div><div class="col footer__col"><div class="footer__title">Learn</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/docs/quick-start-guide">Quick Start</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/docker_demo">Docker Demo</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/blog">Blog</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/talks">Talks</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/videos">Video Guides</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/faq">FAQ</a></li><li class="footer__item"><a href="https://cwiki.apache.org/confluence/display/HUDI" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Technical Wiki<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li></ul></div><div class="col footer__col"><div class="footer__title">Hudi On Cloud</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/docs/s3_hoodie">AWS</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/gcs_hoodie">Google Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/oss_hoodie">Alibaba Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/azure_hoodie">Microsoft Azure</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/cos_hoodie">Tencent Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/ibm_cos_hoodie">IBM Cloud</a></li></ul></div><div class="col footer__col"><div class="footer__title">Community</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/community/get-involved">Get Involved</a></li><li class="footer__item"><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Slack<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Twitter<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>YouTube<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Linkedin<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="mailto:dev-subscribe@hudi.apache.org?Subject=SubscribeToHudi" target="_blank" rel="noopener noreferrer" class="footer__link-item">Mailing List</a></li></ul></div><div class="col footer__col"><div class="footer__title">Apache</div><ul class="footer__items"><li class="footer__item"><a href="https://www.apache.org/events/current-event" target="_blank" rel="noopener noreferrer" class="footer__link-item">Events</a></li><li class="footer__item"><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Thanks</a></li><li class="footer__item"><a href="https://www.apache.org/licenses" target="_blank" rel="noopener noreferrer" class="footer__link-item">License</a></li><li class="footer__item"><a href="https://www.apache.org/security" target="_blank" rel="noopener noreferrer" class="footer__link-item">Security</a></li><li class="footer__item"><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Sponsorship</a></li><li class="footer__item"><a href="https://www.apache.org" target="_blank" rel="noopener noreferrer" class="footer__link-item">Foundation</a></li></ul></div></div><div class="footer__bottom text--center"><div class="margin-bottom--sm"><a href="https://hudi.apache.org/" target="_blank" rel="noopener noreferrer" class="footerLogoLink_SRtH"><img src="/cn/assets/images/logo-big.png" alt="Apache Hudi™" class="themedImage_TMUO themedImage--light_4Vu1 footer__logo"><img src="/cn/assets/images/logo-big.png" alt="Apache Hudi™" class="themedImage_TMUO themedImage--dark_uzRr footer__logo"></a></div><div class="footer__copyright">Copyright © 2021 <a href="https://apache.org">The Apache Software Foundation</a>, Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0"> Apache License, Version 2.0</a>.
Hudi, Apache and the Apache feather logo are trademarks of The Apache Software Foundation. <a href="/docs/privacy">Privacy Policy</a></div></div></div></footer></div>
<script src="/cn/assets/js/runtime~main.0acdb754.js"></script>
<script src="/cn/assets/js/main.6d6aa24f.js"></script>
</body>
</html>