blob: 0d06062fa345b146e35f272150f4dfc41d34526d [file] [log] [blame]
<!doctype html>
<html class="docs-version-0.8.0 docs-custom-styles" lang="en" dir="ltr">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="generator" content="Docusaurus v2.0.0-beta.14">
<link rel="alternate" type="application/rss+xml" href="/blog/rss.xml" title="Apache Hudi: User-Facing Analytics RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/blog/atom.xml" title="Apache Hudi: User-Facing Analytics Atom Feed">
<link rel="alternate" type="application/json" href="/blog/feed.json" title="Apache Hudi: User-Facing Analytics JSON Feed">
<link rel="search" type="application/opensearchdescription+xml" title="Apache Hudi" href="/opensearch.xml">
<link rel="alternate" type="application/rss+xml" href="/videos/rss.xml" title="Apache Hudi RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/videos/atom.xml" title="Apache Hudi Atom Feed">
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Comfortaa|Ubuntu|Roboto|Source+Code+Pro">
<link rel="stylesheet" href="https://at-ui.github.io/feather-font/css/iconfont.css"><title data-react-helmet="true">Configurations | Apache Hudi</title><meta data-react-helmet="true" name="twitter:card" content="summary_large_image"><meta data-react-helmet="true" property="og:url" content="https://hudi.apache.org/docs/0.8.0/configurations"><meta data-react-helmet="true" name="docsearch:language" content="en"><meta data-react-helmet="true" name="docsearch:version" content="0.8.0"><meta data-react-helmet="true" name="docsearch:docusaurus_tag" content="docs-default-0.8.0"><meta data-react-helmet="true" property="og:title" content="Configurations | Apache Hudi"><meta data-react-helmet="true" name="description" content="This page covers the different ways of configuring your job to write/read Hudi tables."><meta data-react-helmet="true" property="og:description" content="This page covers the different ways of configuring your job to write/read Hudi tables."><meta data-react-helmet="true" name="keywords" content="garbage collection,hudi,jvm,configs,tuning"><link data-react-helmet="true" rel="icon" href="/assets/images/favicon.ico"><link data-react-helmet="true" rel="canonical" href="https://hudi.apache.org/docs/0.8.0/configurations"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/docs/0.8.0/configurations" hreflang="en"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/cn/docs/0.8.0/configurations" hreflang="cn"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/docs/0.8.0/configurations" hreflang="x-default"><link data-react-helmet="true" rel="preconnect" href="https://BH4D9OD16A-dsn.algolia.net" crossorigin="anonymous"><link rel="stylesheet" href="/assets/css/styles.ea681a30.css">
<link rel="preload" href="/assets/js/runtime~main.2cab5691.js" as="script">
<link rel="preload" href="/assets/js/main.bd020950.js" as="script">
</head>
<body>
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus">
<div><a href="#" class="skipToContent_OuoZ">Skip to main content</a></div><div class="announcementBar_axC9" role="banner"><div class="announcementBarPlaceholder_xYHE"></div><div class="announcementBarContent_6uhP">⭐️ If you like Apache Hudi, give it a star on <a target="_blank" rel="noopener noreferrer" href="https://github.com/apache/hudi">GitHub</a>! ⭐</div><button type="button" class="clean-btn close announcementBarClose_A3A1" aria-label="Close"><svg viewBox="0 0 15 15" width="14" height="14"><g stroke="currentColor" stroke-width="3.1"><path d="M.75.75l13.5 13.5M14.25.75L.75 14.25"></path></g></svg></button></div><nav class="navbar navbar--fixed-top navbarWrapper_UIa0"><div class="navbar__inner"><img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=8f594acf-9b77-44fb-9475-3e82ead1910c" width="0" height="0" alt=""><img referrerpolicy="no-referrer-when-downgrade" src="https://analytics.apache.org/matomo.php?idsite=47&amp;rec=1" width="0" height="0" alt=""><div class="navbar__items"><button aria-label="Navigation bar toggle" class="navbar__toggle clean-btn" type="button" tabindex="0"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo navbarLogo_Bz6n"><img src="/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--light_4Vu1"><img src="/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--dark_uzRr"></div></a><a class="navbar__item navbar__link" href="/docs/overview"><div class="labelWrapperDropdown_Mqbj">Docs</div></a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Learn<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/talks"><div class="labelWrapperDropdown_Mqbj">Talks</div></a></li><li><a class="dropdown__link" href="/videos"><div class="labelWrapperDropdown_Mqbj">Video Guides</div></a></li><li><a class="dropdown__link" href="/docs/faq"><div class="labelWrapperDropdown_Mqbj">FAQ</div></a></li><li><a class="dropdown__link" href="/tech-specs"><div class="labelWrapperDropdown_Mqbj">Tech Specs</div></a></li><li><a class="dropdown__link" href="/tech-specs-1point0"><div class="labelWrapperDropdown_Mqbj">Tech Specs 1.0</div></a></li><li><a href="https://cwiki.apache.org/confluence/display/HUDI" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span class="externalLink_AE3f">Technical Wiki<svg width="20" height="20" viewBox="0 0 26 26" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M16.965 8.745 9.01 16.7M10.561 8.758l6.403-.013-.013 6.403" stroke="#0DB1F9" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path><rect x="4.5" y="4.5" width="17" height="17" rx="2.5" stroke="#0DB1F9"></rect></svg></span></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Contribute<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/contribute/how-to-contribute"><div class="labelWrapperDropdown_Mqbj">How to Contribute</div></a></li><li><a class="dropdown__link" href="/contribute/developer-setup"><div class="labelWrapperDropdown_Mqbj">Developer Setup</div></a></li><li><a class="dropdown__link" href="/contribute/rfc-process"><div class="labelWrapperDropdown_Mqbj">RFC Process</div></a></li><li><a class="dropdown__link" href="/contribute/report-security-issues"><div class="labelWrapperDropdown_Mqbj">Report Security Issues</div></a></li><li><a href="https://issues.apache.org/jira/projects/HUDI/summary" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span class="externalLink_AE3f">Report Issues<svg width="20" height="20" viewBox="0 0 26 26" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M16.965 8.745 9.01 16.7M10.561 8.758l6.403-.013-.013 6.403" stroke="#0DB1F9" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path><rect x="4.5" y="4.5" width="17" height="17" rx="2.5" stroke="#0DB1F9"></rect></svg></span></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Community<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/community/get-involved"><div class="labelWrapperDropdown_Mqbj">Get Involved</div></a></li><li><a class="dropdown__link" href="/community/syncs"><div class="labelWrapperDropdown_Mqbj">Community Syncs</div></a></li><li><a class="dropdown__link" href="/community/office_hours"><div class="labelWrapperDropdown_Mqbj">Office Hours</div></a></li><li><a class="dropdown__link" href="/community/team"><div class="labelWrapperDropdown_Mqbj">Team</div></a></li></ul></div><a class="navbar__item navbar__link" href="/blog"><div class="labelWrapperDropdown_Mqbj">Blog</div></a><a class="navbar__item navbar__link" href="/powered-by"><div class="labelWrapperDropdown_Mqbj">Who&#x27;s Using</div></a><a class="navbar__item navbar__link" href="/roadmap"><div class="labelWrapperDropdown_Mqbj">Roadmap</div></a><a class="navbar__item navbar__link" href="/releases/download"><div class="labelWrapperDropdown_Mqbj">Download</div></a></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__link downloadLinkDropdownHide_aDP3" href="/docs/0.8.0/overview"><div class="labelWrapperDropdown_Mqbj">0.8.0<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/docs/next/configurations"><div class="labelWrapperDropdown_Mqbj">Current</div></a></li><li><a class="dropdown__link" href="/docs/configurations"><div class="labelWrapperDropdown_Mqbj">0.14.1</div></a></li><li><a class="dropdown__link" href="/docs/0.14.0/configurations"><div class="labelWrapperDropdown_Mqbj">0.14.0</div></a></li><li><a class="dropdown__link" href="/docs/0.13.1/configurations"><div class="labelWrapperDropdown_Mqbj">0.13.1</div></a></li><li><a class="dropdown__link" href="/docs/0.13.0/configurations"><div class="labelWrapperDropdown_Mqbj">0.13.0</div></a></li><li><a class="dropdown__link" href="/docs/0.12.3/configurations"><div class="labelWrapperDropdown_Mqbj">0.12.3</div></a></li><li><a class="dropdown__link" href="/docs/0.12.2/configurations"><div class="labelWrapperDropdown_Mqbj">0.12.2</div></a></li><li><a class="dropdown__link" href="/docs/0.12.1/configurations"><div class="labelWrapperDropdown_Mqbj">0.12.1</div></a></li><li><a class="dropdown__link" href="/docs/0.12.0/configurations"><div class="labelWrapperDropdown_Mqbj">0.12.0</div></a></li><li><a class="dropdown__link" href="/docs/0.11.1/configurations"><div class="labelWrapperDropdown_Mqbj">0.11.1</div></a></li><li><a class="dropdown__link" href="/docs/0.11.0/configurations"><div class="labelWrapperDropdown_Mqbj">0.11.0</div></a></li><li><a class="dropdown__link" href="/docs/0.10.1/configurations"><div class="labelWrapperDropdown_Mqbj">0.10.1</div></a></li><li><a class="dropdown__link" href="/docs/0.10.0/configurations"><div class="labelWrapperDropdown_Mqbj">0.10.0</div></a></li><li><a class="dropdown__link" href="/docs/0.9.0/configurations"><div class="labelWrapperDropdown_Mqbj">0.9.0</div></a></li><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/0.8.0/configurations"><div class="labelWrapperDropdown_Mqbj">0.8.0</div></a></li><li><a class="dropdown__link" href="/docs/0.7.0/configurations"><div class="labelWrapperDropdown_Mqbj">0.7.0</div></a></li><li><a class="dropdown__link" href="/docs/0.6.0/configurations"><div class="labelWrapperDropdown_Mqbj">0.6.0</div></a></li><li><a class="dropdown__link" href="/docs/0.5.3/configurations"><div class="labelWrapperDropdown_Mqbj">0.5.3</div></a></li><li><a class="dropdown__link" href="/docs/0.5.2/configurations"><div class="labelWrapperDropdown_Mqbj">0.5.2</div></a></li><li><a class="dropdown__link" href="/docs/0.5.1/configurations"><div class="labelWrapperDropdown_Mqbj">0.5.1</div></a></li><li><a class="dropdown__link" href="/docs/0.5.0/configurations"><div class="labelWrapperDropdown_Mqbj">0.5.0</div></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj"><span><svg viewBox="0 0 20 20" width="20" height="20" aria-hidden="true" class="iconLanguage_zID8"><path fill="currentColor" d="M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"></path></svg><span>English</span></span><svg width="14" height="14" viewBox="0 0 14 14" fill="none" xmlns="http://www.w3.org/2000/svg"><g clip-path="url(#a)"><path d="M14 6.457a6.842 6.842 0 0 0-7-6.02 6.843 6.843 0 0 0-7 6.02v1.085a6.843 6.843 0 0 0 7 6.02 6.843 6.843 0 0 0 7-6.02V6.457Zm-1.094 0h-2.625a9.92 9.92 0 0 0-.376-2.222 6.65 6.65 0 0 0 1.531-.875 5.25 5.25 0 0 1 1.444 3.097h.026Zm-8.032 0a8.479 8.479 0 0 1 .324-1.872 7.376 7.376 0 0 0 3.63 0c.175.61.284 1.239.325 1.872h-4.28Zm4.305 1.085a8.391 8.391 0 0 1-.324 1.873 7.464 7.464 0 0 0-3.658 0 8.479 8.479 0 0 1-.323-1.873h4.305Zm.35-4.375A10.342 10.342 0 0 0 8.75 1.75c.627.194 1.218.49 1.75.875a5.748 5.748 0 0 1-.998.577l.027-.035ZM7.254 1.54A8.75 8.75 0 0 1 8.46 3.552c-.48.11-.97.165-1.461.167-.492-.001-.982-.057-1.461-.167.308-.722.715-1.4 1.207-2.012h.508ZM4.498 3.202a5.748 5.748 0 0 1-.998-.577 6.029 6.029 0 0 1 1.75-.875c-.294.46-.546.947-.753 1.452Zm-1.873.15c.47.358.984.652 1.531.874A9.625 9.625 0 0 0 3.78 6.45H1.155a5.25 5.25 0 0 1 1.47-3.098ZM1.12 7.541h2.625c.038.753.164 1.5.376 2.223a6.649 6.649 0 0 0-1.531.875 5.25 5.25 0 0 1-1.47-3.098Zm3.377 3.255c.207.506.459.992.753 1.453a6.03 6.03 0 0 1-1.75-.875c.312-.226.646-.419.997-.578Zm2.25 1.663a8.594 8.594 0 0 1-1.208-2.013 6.501 6.501 0 0 1 2.922 0 8.54 8.54 0 0 1-1.207 2.013h-.508Zm2.755-1.663c.367.156.716.35 1.042.578a6.338 6.338 0 0 1-1.75.875c.275-.464.512-.95.708-1.453Zm1.873-.148a6.647 6.647 0 0 0-1.531-.875 9.45 9.45 0 0 0 .376-2.223h2.625a5.25 5.25 0 0 1-1.47 3.098Z" fill="#1C1E21"></path></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h14v14H0z"></path></clipPath></defs></svg></div></a><ul class="dropdown__menu"><li><a href="/docs/0.8.0/configurations" target="_self" rel="noopener noreferrer" class="dropdown__link dropdown__link--active"><div class="labelWrapperDropdown_Mqbj">English</div></a></li><li><a href="/cn/docs/0.8.0/configurations" target="_self" rel="noopener noreferrer" class="dropdown__link"><div class="labelWrapperDropdown_Mqbj">Chinese</div></a></li></ul></div><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-github-link" aria-label="GitHub repository"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-twitter-link" aria-label="Hudi Twitter Handle"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-slack-link" aria-label="Hudi Slack Channel"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-youtube-link" aria-label="Hudi YouTube Channel"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-linkedin-link" aria-label="Hudi Linkedin Page"><div class="labelWrapperDropdown_Mqbj"></div></a><div class="searchBox_fBfG"><div role="button" class="searchButton_g9-U" aria-label="Search"><span class="searchText_RI6l">Search</span><svg width="14" height="14" viewBox="0 0 14 14" fill="none" xmlns="http://www.w3.org/2000/svg"><circle cx="6.864" cy="6.864" r="5.243" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></circle><path d="m10.51 10.783 2.056 2.05" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div><div class="navbar-sidebar"><div class="navbar-sidebar__brand"><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--light_4Vu1"><img src="/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--dark_uzRr"></div></a><button type="button" class="clean-btn navbar-sidebar__close"><svg viewBox="0 0 15 15" width="21" height="21"><g stroke="var(--ifm-color-emphasis-600)" stroke-width="1.2"><path d="M.75.75l13.5 13.5M14.25.75L.75 14.25"></path></g></svg></button></div><div class="navbar-sidebar__items"><div class="navbar-sidebar__item menu"><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" href="/docs/overview"><div class="labelWrapperDropdown_Mqbj">Docs</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Learn</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Contribute</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Community</div></a></li><li class="menu__list-item"><a class="menu__link" href="/blog"><div class="labelWrapperDropdown_Mqbj">Blog</div></a></li><li class="menu__list-item"><a class="menu__link" href="/powered-by"><div class="labelWrapperDropdown_Mqbj">Who&#x27;s Using</div></a></li><li class="menu__list-item"><a class="menu__link" href="/roadmap"><div class="labelWrapperDropdown_Mqbj">Roadmap</div></a></li><li class="menu__list-item"><a class="menu__link" href="/releases/download"><div class="labelWrapperDropdown_Mqbj">Download</div></a></li><li class="menu__list-item"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Versions</div></a><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="menu__list-item"><a class="menu__link" href="/docs/next/configurations"><div class="labelWrapperDropdown_Mqbj">Current</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/configurations"><div class="labelWrapperDropdown_Mqbj">0.14.1</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.14.0/configurations"><div class="labelWrapperDropdown_Mqbj">0.14.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.13.1/configurations"><div class="labelWrapperDropdown_Mqbj">0.13.1</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.13.0/configurations"><div class="labelWrapperDropdown_Mqbj">0.13.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.12.3/configurations"><div class="labelWrapperDropdown_Mqbj">0.12.3</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.12.2/configurations"><div class="labelWrapperDropdown_Mqbj">0.12.2</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.12.1/configurations"><div class="labelWrapperDropdown_Mqbj">0.12.1</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.12.0/configurations"><div class="labelWrapperDropdown_Mqbj">0.12.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.11.1/configurations"><div class="labelWrapperDropdown_Mqbj">0.11.1</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.11.0/configurations"><div class="labelWrapperDropdown_Mqbj">0.11.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.10.1/configurations"><div class="labelWrapperDropdown_Mqbj">0.10.1</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.10.0/configurations"><div class="labelWrapperDropdown_Mqbj">0.10.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.9.0/configurations"><div class="labelWrapperDropdown_Mqbj">0.9.0</div></a></li><li class="menu__list-item"><a aria-current="page" class="menu__link menu__link--active" href="/docs/0.8.0/configurations"><div class="labelWrapperDropdown_Mqbj">0.8.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.7.0/configurations"><div class="labelWrapperDropdown_Mqbj">0.7.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.6.0/configurations"><div class="labelWrapperDropdown_Mqbj">0.6.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.5.3/configurations"><div class="labelWrapperDropdown_Mqbj">0.5.3</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.5.2/configurations"><div class="labelWrapperDropdown_Mqbj">0.5.2</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.5.1/configurations"><div class="labelWrapperDropdown_Mqbj">0.5.1</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.5.0/configurations"><div class="labelWrapperDropdown_Mqbj">0.5.0</div></a></li></ul></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj"><span><svg viewBox="0 0 20 20" width="20" height="20" aria-hidden="true" class="iconLanguage_zID8"><path fill="currentColor" d="M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"></path></svg><span>Languages</span></span></div></a></li><li class="menu__list-item"><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="menu__link header-github-link" aria-label="GitHub repository"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="menu__link header-twitter-link" aria-label="Hudi Twitter Handle"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="menu__link header-slack-link" aria-label="Hudi Slack Channel"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="menu__link header-youtube-link" aria-label="Hudi YouTube Channel"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="menu__link header-linkedin-link" aria-label="Hudi Linkedin Page"><div class="labelWrapperDropdown_Mqbj"></div></a></li></ul></div><div class="navbar-sidebar__item menu"><button type="button" class="clean-btn navbar-sidebar__back">← Back to main menu</button></div></div></div></nav><div class="main-wrapper docs-wrapper docs-doc-page"><div class="docPage_GMj9"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_i9tI" type="button"></button><aside class="docSidebarContainer_k0Pq"><div class="sidebar_a3j0"><nav class="menu thin-scrollbar menu_cyFh menuWithAnnouncementBar_+O1J"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/0.8.0/overview">Overview</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/docs/0.8.0/quick-start-guide">Quick Start</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/0.8.0/quick-start-guide">Spark Guide</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/0.8.0/flink-quick-start-guide">Flink Guide</a></li></ul></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/0.8.0/use_cases">Use Cases</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/0.8.0/writing_data">Writing Data</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/0.8.0/concurrency_control">Concurrency Control</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/0.8.0/querying_data">Querying Data</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link menu__link--active" aria-current="page" href="/docs/0.8.0/configurations">Configurations</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/0.8.0/performance">Performance</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/0.8.0/deployment">Deployment</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/docs/0.8.0/cloud">Storage Configurations</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/docs/0.8.0/docker_demo">Resources</a></div></li></ul></nav></div></aside><main class="docMainContainer_Q970"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_zHA2"><div class="theme-doc-version-banner alert alert--warning margin-bottom--md" role="alert"><div>This is documentation for <!-- -->Apache Hudi<!-- --> <b>0.8.0</b>, which is no longer actively maintained.</div><div class="margin-top--md">For up-to-date documentation, see the <b><a href="/docs/configurations">latest version</a></b> (<!-- -->0.14.1<!-- -->).</div></div><div class="docItemContainer_oiyr"><article><span class="theme-doc-version-badge badge badge--secondary">Version: <!-- -->0.8.0</span><div class="tocCollapsible_aw-L theme-doc-toc-mobile tocMobile_Tx6Y"><button type="button" class="clean-btn tocCollapsibleButton_zr6a">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>Configurations</h1></header><p>This page covers the different ways of configuring your job to write/read Hudi tables.
At a high level, you can control behaviour at few levels. </p><ul><li><strong><a href="#spark-datasource">Spark Datasource Configs</a></strong> : These configs control the Hudi Spark Datasource, providing ability to define keys/partitioning, pick out the write operation, specify how to merge records or choosing query type to read.</li><li><strong><a href="#flink-options">Flink SQL Configs</a></strong> : These configs control the Hudi Flink SQL source/sink connectors, providing ability to define record keys, pick out the write operation, specify how to merge records, enable/disable asynchronous compaction or choosing query type to read.</li><li><strong><a href="#writeclient-configs">WriteClient Configs</a></strong> : Internally, the Hudi datasource uses a RDD based <code>HoodieWriteClient</code> api to actually perform writes to storage. These configs provide deep control over lower level aspects like
file sizing, compression, parallelism, compaction, write schema, cleaning etc. Although Hudi provides sane defaults, from time-time these configs may need to be tweaked to optimize for specific workloads.</li><li><strong><a href="#PAYLOAD_CLASS_OPT_KEY">RecordPayload Config</a></strong> : This is the lowest level of customization offered by Hudi. Record payloads define how to produce new values to upsert based on incoming new record and
stored old record. Hudi provides default implementations such as <code>OverwriteWithLatestAvroPayload</code> which simply update table with the latest/last-written record.
This can be overridden to a custom class extending <code>HoodieRecordPayload</code> class, on both datasource and WriteClient levels.</li></ul><h2 class="anchor anchorWithStickyNavbar_y2LR" id="spark-datasource-configs">Spark Datasource Configs<a class="hash-link" href="#spark-datasource-configs" title="Direct link to heading"></a></h2><p>Spark jobs using the datasource can be configured by passing the below options into the <code>option(k,v)</code> method as usual.
The actual datasource level configs are listed below.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="write-options">Write Options<a class="hash-link" href="#write-options" title="Direct link to heading"></a></h3><p>Additionally, you can pass down any of the WriteClient level configs directly using <code>options()</code> or <code>option(k,v)</code> methods.</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">inputDF.write()</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">.format(&quot;org.apache.hudi&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">.options(clientOpts) // any of the Hudi client opts can be passed in as well</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">.option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), &quot;_row_key&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">.option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), &quot;partition&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">.option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY(), &quot;timestamp&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">.option(HoodieWriteConfig.TABLE_NAME, tableName)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">.mode(SaveMode.Append)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">.save(basePath);</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>Options useful for writing tables via <code>write.format.option(...)</code></p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="table_name_opt_key">TABLE_NAME_OPT_KEY<a class="hash-link" href="#table_name_opt_key" title="Direct link to heading"></a></h4><p> Property: <code>hoodie.datasource.write.table.name</code> <!-- -->[Required]<br>
Hive table name, to register the table into.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="operation_opt_key">OPERATION_OPT_KEY<a class="hash-link" href="#operation_opt_key" title="Direct link to heading"></a></h4><p> Property: <code>hoodie.datasource.write.operation</code>, Default: <code>upsert</code><br>
whether to do upsert, insert or bulkinsert for the write operation. Use <code>bulkinsert</code> to load new data into a table, and there on use <code>upsert</code>/<code>insert</code>.
bulk insert uses a disk based write path to scale to load large inputs without need to cache it.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="table_type_opt_key">TABLE_TYPE_OPT_KEY<a class="hash-link" href="#table_type_opt_key" title="Direct link to heading"></a></h4><p> Property: <code>hoodie.datasource.write.table.type</code>, Default: <code>COPY_ON_WRITE</code> <br>
The table type for the underlying data, for this write. This can&#x27;t change between writes.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="precombine_field_opt_key">PRECOMBINE_FIELD_OPT_KEY<a class="hash-link" href="#precombine_field_opt_key" title="Direct link to heading"></a></h4><p> Property: <code>hoodie.datasource.write.precombine.field</code>, Default: <code>ts</code> <br>
Field used in preCombining before actual write. When two records have the same key value,
we will pick the one with the largest value for the precombine field, determined by Object.compareTo(..)</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="payload_class_opt_key">PAYLOAD_CLASS_OPT_KEY<a class="hash-link" href="#payload_class_opt_key" title="Direct link to heading"></a></h4><p> Property: <code>hoodie.datasource.write.payload.class</code>, Default: <code>org.apache.hudi.OverwriteWithLatestAvroPayload</code> <br>
Payload class used. Override this, if you like to roll your own merge logic, when upserting/inserting.
This will render any value set for <code>PRECOMBINE_FIELD_OPT_VAL</code> in-effective</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="recordkey_field_opt_key">RECORDKEY_FIELD_OPT_KEY<a class="hash-link" href="#recordkey_field_opt_key" title="Direct link to heading"></a></h4><p> Property: <code>hoodie.datasource.write.recordkey.field</code>, Default: <code>uuid</code> <br>
Record key field. Value to be used as the <code>recordKey</code> component of <code>HoodieKey</code>. Actual value
will be obtained by invoking .toString() on the field value. Nested fields can be specified using
the dot notation eg: <code>a.b.c</code></p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="partitionpath_field_opt_key">PARTITIONPATH_FIELD_OPT_KEY<a class="hash-link" href="#partitionpath_field_opt_key" title="Direct link to heading"></a></h4><p> Property: <code>hoodie.datasource.write.partitionpath.field</code>, Default: <code>partitionpath</code> <br>
Partition path field. Value to be used at the <code>partitionPath</code> component of <code>HoodieKey</code>.
Actual value ontained by invoking .toString()</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hive_style_partitioning_opt_key">HIVE_STYLE_PARTITIONING_OPT_KEY<a class="hash-link" href="#hive_style_partitioning_opt_key" title="Direct link to heading"></a></h4><p> Property: <code>hoodie.datasource.write.hive_style_partitioning</code>, Default: <code>false</code> <br>
When set to true, partition folder names follow the format of Hive partitions: <!-- -->[partition_column_name]<!-- -->=<!-- -->[partition_value]</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="keygenerator_class_opt_key">KEYGENERATOR_CLASS_OPT_KEY<a class="hash-link" href="#keygenerator_class_opt_key" title="Direct link to heading"></a></h4><p> Property: <code>hoodie.datasource.write.keygenerator.class</code>, Default: <code>org.apache.hudi.keygen.SimpleKeyGenerator</code> <br>
Key generator class, that implements will extract the key out of incoming <code>Row</code> object</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="commit_metadata_keyprefix_opt_key">COMMIT_METADATA_KEYPREFIX_OPT_KEY<a class="hash-link" href="#commit_metadata_keyprefix_opt_key" title="Direct link to heading"></a></h4><p> Property: <code>hoodie.datasource.write.commitmeta.key.prefix</code>, Default: <code>_</code> <br>
Option keys beginning with this prefix, are automatically added to the commit/deltacommit metadata.
This is useful to store checkpointing information, in a consistent way with the hudi timeline</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="insert_drop_dups_opt_key">INSERT_DROP_DUPS_OPT_KEY<a class="hash-link" href="#insert_drop_dups_opt_key" title="Direct link to heading"></a></h4><p> Property: <code>hoodie.datasource.write.insert.drop.duplicates</code>, Default: <code>false</code> <br>
If set to true, filters out all duplicate records from incoming dataframe, during insert operations. </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="enable_row_writer_opt_key">ENABLE_ROW_WRITER_OPT_KEY<a class="hash-link" href="#enable_row_writer_opt_key" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.datasource.write.row.writer.enable</code>, Default: <code>false</code> <br>
When set to true, will perform write operations directly using the spark native <code>Row</code>
representation. This is expected to be faster by 20 to 30% than regular bulk_insert by setting this config</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hive_sync_enabled_opt_key">HIVE_SYNC_ENABLED_OPT_KEY<a class="hash-link" href="#hive_sync_enabled_opt_key" title="Direct link to heading"></a></h4><p> Property: <code>hoodie.datasource.hive_sync.enable</code>, Default: <code>false</code> <br>
When set to true, register/sync the table to Apache Hive metastore</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hive_database_opt_key">HIVE_DATABASE_OPT_KEY<a class="hash-link" href="#hive_database_opt_key" title="Direct link to heading"></a></h4><p> Property: <code>hoodie.datasource.hive_sync.database</code>, Default: <code>default</code> <br>
database to sync to</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hive_table_opt_key">HIVE_TABLE_OPT_KEY<a class="hash-link" href="#hive_table_opt_key" title="Direct link to heading"></a></h4><p> Property: <code>hoodie.datasource.hive_sync.table</code>, <!-- -->[Required]<!-- --> <br>
table to sync to</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hive_user_opt_key">HIVE_USER_OPT_KEY<a class="hash-link" href="#hive_user_opt_key" title="Direct link to heading"></a></h4><p> Property: <code>hoodie.datasource.hive_sync.username</code>, Default: <code>hive</code> <br>
hive user name to use</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hive_pass_opt_key">HIVE_PASS_OPT_KEY<a class="hash-link" href="#hive_pass_opt_key" title="Direct link to heading"></a></h4><p> Property: <code>hoodie.datasource.hive_sync.password</code>, Default: <code>hive</code> <br>
hive password to use</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hive_url_opt_key">HIVE_URL_OPT_KEY<a class="hash-link" href="#hive_url_opt_key" title="Direct link to heading"></a></h4><p> Property: <code>hoodie.datasource.hive_sync.jdbcurl</code>, Default: <code>jdbc:hive2://localhost:10000</code> <br>
Hive metastore url</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hive_partition_fields_opt_key">HIVE_PARTITION_FIELDS_OPT_KEY<a class="hash-link" href="#hive_partition_fields_opt_key" title="Direct link to heading"></a></h4><p> Property: <code>hoodie.datasource.hive_sync.partition_fields</code>, Default: <code> </code> <br>
field in the table to use for determining hive partition columns.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hive_partition_extractor_class_opt_key">HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY<a class="hash-link" href="#hive_partition_extractor_class_opt_key" title="Direct link to heading"></a></h4><p> Property: <code>hoodie.datasource.hive_sync.partition_extractor_class</code>, Default: <code>org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor</code> <br>
Class used to extract partition field values into hive partition columns.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hive_assume_date_partition_opt_key">HIVE_ASSUME_DATE_PARTITION_OPT_KEY<a class="hash-link" href="#hive_assume_date_partition_opt_key" title="Direct link to heading"></a></h4><p> Property: <code>hoodie.datasource.hive_sync.assume_date_partitioning</code>, Default: <code>false</code> <br>
Assume partitioning is yyyy/mm/dd</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hive_use_jdbc_opt_key">HIVE_USE_JDBC_OPT_KEY<a class="hash-link" href="#hive_use_jdbc_opt_key" title="Direct link to heading"></a></h4><p> Property: <code>hoodie.datasource.hive_sync.use_jdbc</code>, Default: <code>true</code> <br>
Use JDBC when hive synchronization is enabled</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hive_auto_create_database_opt_key">HIVE_AUTO_CREATE_DATABASE_OPT_KEY<a class="hash-link" href="#hive_auto_create_database_opt_key" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.datasource.hive_sync.auto_create_database</code> Default: <code>true</code> <br>
Auto create hive database if does not exists </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hive_skip_ro_suffix">HIVE_SKIP_RO_SUFFIX<a class="hash-link" href="#hive_skip_ro_suffix" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.datasource.hive_sync.skip_ro_suffix</code> Default: <code>false</code> <br>
Skip the <code>_ro</code> suffix for Read optimized table, when registering</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hive_support_timestamp">HIVE_SUPPORT_TIMESTAMP<a class="hash-link" href="#hive_support_timestamp" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.datasource.hive_sync.support_timestamp</code> Default: <code>false</code> <br>
&#x27;INT64&#x27; with original type TIMESTAMP_MICROS is converted to hive &#x27;timestamp&#x27; type. Disabled by default for backward compatibility. </p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="read-options">Read Options<a class="hash-link" href="#read-options" title="Direct link to heading"></a></h3><p>Options useful for reading tables via <code>read.format.option(...)</code></p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="query_type_opt_key">QUERY_TYPE_OPT_KEY<a class="hash-link" href="#query_type_opt_key" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.datasource.query.type</code>, Default: <code>snapshot</code> <br>
Whether data needs to be read, in incremental mode (new data since an instantTime)
(or) Read Optimized mode (obtain latest view, based on columnar data)
(or) Snapshot mode (obtain latest view, based on row &amp; columnar data)</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="begin_instanttime_opt_key">BEGIN_INSTANTTIME_OPT_KEY<a class="hash-link" href="#begin_instanttime_opt_key" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.datasource.read.begin.instanttime</code>, <!-- -->[Required in incremental mode]<!-- --> <br>
Instant time to start incrementally pulling data from. The instanttime here need not
necessarily correspond to an instant on the timeline. New data written with an
<code>instant_time &gt; BEGIN_INSTANTTIME</code> are fetched out. For e.g: &#x27;20170901080000&#x27; will get
all new data written after Sep 1, 2017 08:00AM.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="end_instanttime_opt_key">END_INSTANTTIME_OPT_KEY<a class="hash-link" href="#end_instanttime_opt_key" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.datasource.read.end.instanttime</code>, Default: latest instant (i.e fetches all new data since begin instant time) <br>
Instant time to limit incrementally fetched data to. New data written with an
<code>instant_time &amp;lt;= END_INSTANTTIME</code> are fetched out.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="incremental_read_schema_use_end_instanttime_opt_key">INCREMENTAL_READ_SCHEMA_USE_END_INSTANTTIME_OPT_KEY<a class="hash-link" href="#incremental_read_schema_use_end_instanttime_opt_key" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.datasource.read.schema.use.end.instanttime</code>, Default: false <br>
Uses end instant schema when incrementally fetched data to. Default: users latest instant schema. </p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="flink-sql-config-options">Flink SQL Config Options<a class="hash-link" href="#flink-sql-config-options" title="Direct link to heading"></a></h2><p>Flink jobs using the SQL can be configured through the options in <code>WITH</code> clause.
The actual datasource level configs are listed below.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="write-options-1">Write Options<a class="hash-link" href="#write-options-1" title="Direct link to heading"></a></h3><table><thead><tr><th>Option Name</th><th>Required</th><th>Default</th><th>Remarks</th></tr></thead><tbody><tr><td><code>path</code></td><td>Y</td><td>N/A</td><td>Base path for the target hoodie table. The path would be created if it does not exist, otherwise a hudi table expects to be initialized successfully</td></tr><tr><td><code>table.type</code></td><td>N</td><td>COPY_ON_WRITE</td><td>Type of table to write. COPY_ON_WRITE (or) MERGE_ON_READ</td></tr><tr><td><code>write.operation</code></td><td>N</td><td>upsert</td><td>The write operation, that this write should do (insert or upsert is supported)</td></tr><tr><td><code>write.precombine.field</code></td><td>N</td><td>ts</td><td>Field used in preCombining before actual write. When two records have the same key value, we will pick the one with the largest value for the precombine field, determined by Object.compareTo(..)</td></tr><tr><td><code>write.payload.class</code></td><td>N</td><td>OverwriteWithLatestAvroPayload.class</td><td>Payload class used. Override this, if you like to roll your own merge logic, when upserting/inserting. This will render any value set for the option in-effective</td></tr><tr><td><code>write.insert.drop.duplicates</code></td><td>N</td><td>false</td><td>Flag to indicate whether to drop duplicates upon insert. By default insert will accept duplicates, to gain extra performance</td></tr><tr><td><code>write.ignore.failed</code></td><td>N</td><td>true</td><td>Flag to indicate whether to ignore any non exception error (e.g. writestatus error). within a checkpoint batch. By default true (in favor of streaming progressing over data integrity)</td></tr><tr><td><code>hoodie.datasource.write.recordkey.field</code></td><td>N</td><td>uuid</td><td>Record key field. Value to be used as the <code>recordKey</code> component of <code>HoodieKey</code>. Actual value will be obtained by invoking .toString() on the field value. Nested fields can be specified using the dot notation eg: <code>a.b.c</code></td></tr><tr><td><code>hoodie.datasource.write.keygenerator.class</code></td><td>N</td><td>SimpleAvroKeyGenerator.class</td><td>Key generator class, that implements will extract the key out of incoming record</td></tr><tr><td><code>write.tasks</code></td><td>N</td><td>4</td><td>Parallelism of tasks that do actual write, default is 4</td></tr><tr><td><code>write.batch.size.MB</code></td><td>N</td><td>128</td><td>Batch buffer size in MB to flush data into the underneath filesystem</td></tr></tbody></table><p>If the table type is MERGE_ON_READ, you can also specify the asynchronous compaction strategy through options:</p><table><thead><tr><th>Option Name</th><th>Required</th><th>Default</th><th>Remarks</th></tr></thead><tbody><tr><td><code>compaction.async.enabled</code></td><td>N</td><td>true</td><td>Async Compaction, enabled by default for MOR</td></tr><tr><td><code>compaction.trigger.strategy</code></td><td>N</td><td>num_commits</td><td>Strategy to trigger compaction, options are &#x27;num_commits&#x27;: trigger compaction when reach N delta commits; &#x27;time_elapsed&#x27;: trigger compaction when time elapsed &gt; N seconds since last compaction; &#x27;num_and_time&#x27;: trigger compaction when both NUM_COMMITS and TIME_ELAPSED are satisfied; &#x27;num_or_time&#x27;: trigger compaction when NUM_COMMITS or TIME_ELAPSED is satisfied. Default is &#x27;num_commits&#x27;</td></tr><tr><td><code>compaction.delta_commits</code></td><td>N</td><td>5</td><td>Max delta commits needed to trigger compaction, default 5 commits</td></tr><tr><td><code>compaction.delta_seconds</code></td><td>N</td><td>3600</td><td>Max delta seconds time needed to trigger compaction, default 1 hour</td></tr></tbody></table><h3 class="anchor anchorWithStickyNavbar_y2LR" id="read-options-1">Read Options<a class="hash-link" href="#read-options-1" title="Direct link to heading"></a></h3><table><thead><tr><th>Option Name</th><th>Required</th><th>Default</th><th>Remarks</th></tr></thead><tbody><tr><td><code>path</code></td><td>Y</td><td>N/A</td><td>Base path for the target hoodie table. The path would be created if it does not exist, otherwise a hudi table expects to be initialized successfully</td></tr><tr><td><code>table.type</code></td><td>N</td><td>COPY_ON_WRITE</td><td>Type of table to write. COPY_ON_WRITE (or) MERGE_ON_READ</td></tr><tr><td><code>read.tasks</code></td><td>N</td><td>4</td><td>Parallelism of tasks that do actual read, default is 4</td></tr><tr><td><code>read.avro-schema.path</code></td><td>N</td><td>N/A</td><td>Avro schema file path, the parsed schema is used for deserialization, if not specified, the avro schema is inferred from the table DDL</td></tr><tr><td><code>read.avro-schema</code></td><td>N</td><td>N/A</td><td>Avro schema string, the parsed schema is used for deserialization, if not specified, the avro schema is inferred from the table DDL</td></tr><tr><td><code>hoodie.datasource.query.type</code></td><td>N</td><td>snapshot</td><td>Decides how data files need to be read, in 1) Snapshot mode (obtain latest view, based on row &amp; columnar data); 2) incremental mode (new data since an instantTime), not supported yet; 3) Read Optimized mode (obtain latest view, based on columnar data). Default: snapshot</td></tr><tr><td><code>hoodie.datasource.merge.type</code></td><td>N</td><td>payload_combine</td><td>For Snapshot query on merge on read table. Use this key to define how the payloads are merged, in 1) skip_merge: read the base file records plus the log file records; 2) payload_combine: read the base file records first, for each record in base file, checks whether the key is in the log file records(combines the two records with same key for base and log file records), then read the left log file records</td></tr><tr><td><code>hoodie.datasource.hive_style_partition</code></td><td>N</td><td>false</td><td>Whether the partition path is with Hive style, e.g. &#x27;{partition key}={partition value}&#x27;, default false</td></tr><tr><td><code>read.utc-timezone</code></td><td>N</td><td>true</td><td>Use UTC timezone or local timezone to the conversion between epoch time and LocalDateTime. Hive 0.x/1.x/2.x use local timezone. But Hive 3.x use UTC timezone, by default true</td></tr></tbody></table><p>If the table type is MERGE_ON_READ, streaming read is supported through options:</p><table><thead><tr><th>Option Name</th><th>Required</th><th>Default</th><th>Remarks</th></tr></thead><tbody><tr><td><code>read.streaming.enabled</code></td><td>N</td><td>false</td><td>Whether to read as streaming source, default false</td></tr><tr><td><code>read.streaming.check-interval</code></td><td>N</td><td>60</td><td>Check interval for streaming read of SECOND, default 1 minute</td></tr><tr><td><code>read.streaming.start-commit</code></td><td>N</td><td>N/A</td><td>Start commit instant for streaming read, the commit time format should be &#x27;yyyyMMddHHmmss&#x27;, by default reading from the latest instant</td></tr></tbody></table><h2 class="anchor anchorWithStickyNavbar_y2LR" id="writeclient-configs">WriteClient Configs<a class="hash-link" href="#writeclient-configs" title="Direct link to heading"></a></h2><p>Jobs programming directly against the RDD level apis can build a <code>HoodieWriteConfig</code> object and pass it in to the <code>HoodieWriteClient</code> constructor.
HoodieWriteConfig can be built using a builder pattern as below. </p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder()</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .withPath(basePath)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .forTable(tableName)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .withSchema(schemaStr)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .withProps(props) // pass raw k,v pairs from a property file.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .withCompactionConfig(HoodieCompactionConfig.newBuilder().withXXX(...).build())</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .withIndexConfig(HoodieIndexConfig.newBuilder().withXXX(...).build())</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .build();</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>Following subsections go over different aspects of write configs, explaining most important configs with their property names, default values.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withpathhoodie_base_path">withPath(hoodie_base_path)<a class="hash-link" href="#withpathhoodie_base_path" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.base.path</code> <!-- -->[Required]<!-- --> <br>
Base DFS path under which all the data partitions are created. Always prefix it explicitly with the storage scheme (e.g hdfs://, s3:// etc). Hudi stores all the main meta-data about commits, savepoints, cleaning audit logs etc in .hoodie directory under the base directory. </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withschemaschema_str">withSchema(schema_str)<a class="hash-link" href="#withschemaschema_str" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.avro.schema</code> <!-- -->[Required]<br>
This is the current reader avro schema for the table. This is a string of the entire schema. HoodieWriteClient uses this schema to pass on to implementations of HoodieRecordPayload to convert from the source format to avro record. This is also used when re-writing records during an update. </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="fortabletable_name">forTable(table_name)<a class="hash-link" href="#fortabletable_name" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.table.name</code> <!-- -->[Required]<!-- --> <br>
Table name that will be used for registering with Hive. Needs to be same across runs.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withbulkinsertparallelismbulk_insert_parallelism--1500">withBulkInsertParallelism(bulk_insert_parallelism = 1500)<a class="hash-link" href="#withbulkinsertparallelismbulk_insert_parallelism--1500" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.bulkinsert.shuffle.parallelism</code><br>
Bulk insert is meant to be used for large initial imports and this parallelism determines the initial number of files in your table. Tune this to achieve a desired optimal size during initial import.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withuserdefinedbulkinsertpartitionerclassclassname--xyzuserdefinedpatitionerclass">withUserDefinedBulkInsertPartitionerClass(className = x.y.z.UserDefinedPatitionerClass)<a class="hash-link" href="#withuserdefinedbulkinsertpartitionerclassclassname--xyzuserdefinedpatitionerclass" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.bulkinsert.user.defined.partitioner.class</code><br>
If specified, this class will be used to re-partition input records before they are inserted.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withbulkinsertsortmodemode--bulkinsertsortmodeglobal_sort">withBulkInsertSortMode(mode = BulkInsertSortMode.GLOBAL_SORT)<a class="hash-link" href="#withbulkinsertsortmodemode--bulkinsertsortmodeglobal_sort" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.bulkinsert.sort.mode</code><br>
Sorting modes to use for sorting records for bulk insert. This is leveraged when user defined partitioner is not configured. Default is GLOBAL_SORT.
Available values are - <strong>GLOBAL_SORT</strong>: this ensures best file sizes, with lowest memory overhead at cost of sorting.
<strong>PARTITION_SORT</strong>: Strikes a balance by only sorting within a partition, still keeping the memory overhead of writing lowest and best effort file sizing.
<strong>NONE</strong>: No sorting. Fastest and matches <code>spark.write.parquet()</code> in terms of number of files, overheads </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withparallelisminsert_shuffle_parallelism--1500-upsert_shuffle_parallelism--1500">withParallelism(insert_shuffle_parallelism = 1500, upsert_shuffle_parallelism = 1500)<a class="hash-link" href="#withparallelisminsert_shuffle_parallelism--1500-upsert_shuffle_parallelism--1500" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.insert.shuffle.parallelism</code>, <code>hoodie.upsert.shuffle.parallelism</code><br>
Once data has been initially imported, this parallelism controls initial parallelism for reading input records. Ensure this value is high enough say: 1 partition for 1 GB of input data</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withdeleteparallelismparallelism--1500">withDeleteParallelism(parallelism = 1500)<a class="hash-link" href="#withdeleteparallelismparallelism--1500" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.delete.shuffle.parallelism</code><br>
This parallelism is Used for &quot;delete&quot; operation while deduping or repartioning. </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="combineinputon_insert--false-on_updatetrue">combineInput(on_insert = false, on_update=true)<a class="hash-link" href="#combineinputon_insert--false-on_updatetrue" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.combine.before.insert</code>, <code>hoodie.combine.before.upsert</code><br>
Flag which first combines the input RDD and merges multiple partial records into a single record before inserting or updating in DFS</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="combinedeleteinputon_delete--true">combineDeleteInput(on_Delete = true)<a class="hash-link" href="#combinedeleteinputon_delete--true" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.combine.before.delete</code><br>
Flag which first combines the input RDD and merges multiple partial records into a single record before deleting in DFS</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withmergeallowduplicateoninsertsmergeallowduplicateoninserts--false">withMergeAllowDuplicateOnInserts(mergeAllowDuplicateOnInserts = false)<a class="hash-link" href="#withmergeallowduplicateoninsertsmergeallowduplicateoninserts--false" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.merge.allow.duplicate.on.inserts</code> <br>
When enabled, will route new records as inserts and will not merge with existing records.
Result could contain duplicate entries. </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withwritestatusstoragelevellevel--memory_and_disk_ser">withWriteStatusStorageLevel(level = MEMORY_AND_DISK_SER)<a class="hash-link" href="#withwritestatusstoragelevellevel--memory_and_disk_ser" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.write.status.storage.level</code><br>
HoodieWriteClient.insert and HoodieWriteClient.upsert returns a persisted RDD<!-- -->[WriteStatus]<!-- -->, this is because the Client can choose to inspect the WriteStatus and choose and commit or not based on the failures. This is a configuration for the storage level for this RDD </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withautocommitautocommit--true">withAutoCommit(autoCommit = true)<a class="hash-link" href="#withautocommitautocommit--true" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.auto.commit</code><br>
Should HoodieWriteClient autoCommit after insert and upsert. The client can choose to turn off auto-commit and commit on a &quot;defined success condition&quot;</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withassumedatepartitioningassumedatepartitioning--false">withAssumeDatePartitioning(assumeDatePartitioning = false)<a class="hash-link" href="#withassumedatepartitioningassumedatepartitioning--false" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.assume.date.partitioning</code><br>
Should HoodieWriteClient assume the data is partitioned by dates, i.e three levels from base path. This is a stop-gap to support tables created by versions <!-- -->&lt;<!-- --> 0.3.1. Will be removed eventually </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withconsistencycheckenabledenabled--false">withConsistencyCheckEnabled(enabled = false)<a class="hash-link" href="#withconsistencycheckenabledenabled--false" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.consistency.check.enabled</code><br>
Should HoodieWriteClient perform additional checks to ensure written files&#x27; are listable on the underlying filesystem/storage. Set this to true, to workaround S3&#x27;s eventual consistency model and ensure all data written as a part of a commit is faithfully available for queries. </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withrollbackparallelismrollbackparallelism--100">withRollbackParallelism(rollbackParallelism = 100)<a class="hash-link" href="#withrollbackparallelismrollbackparallelism--100" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.rollback.parallelism</code><br>
Determines the parallelism for rollback of commits.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withrollbackusingmarkersrollbackusingmarkers--false">withRollbackUsingMarkers(rollbackUsingMarkers = false)<a class="hash-link" href="#withrollbackusingmarkersrollbackusingmarkers--false" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.rollback.using.markers</code><br>
Enables a more efficient mechanism for rollbacks based on the marker files generated during the writes. Turned off by default.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withmarkersdeleteparallelismparallelism--100">withMarkersDeleteParallelism(parallelism = 100)<a class="hash-link" href="#withmarkersdeleteparallelismparallelism--100" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.markers.delete.parallelism</code><br>
Determines the parallelism for deleting marker files.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="index-configs">Index configs<a class="hash-link" href="#index-configs" title="Direct link to heading"></a></h3><p>Following configs control indexing behavior, which tags incoming records as either inserts or updates to older records. </p><p><a href="#index-configs">withIndexConfig</a> (HoodieIndexConfig) <br>
This is pluggable to have a external index (HBase) or use the default bloom filter stored in the Parquet files</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withindexclassindexclass--xyzuserdefinedindex">withIndexClass(indexClass = &quot;x.y.z.UserDefinedIndex&quot;)<a class="hash-link" href="#withindexclassindexclass--xyzuserdefinedindex" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.index.class</code> <br>
Full path of user-defined index class and must be a subclass of HoodieIndex class. It will take precedence over the <code>hoodie.index.type</code> configuration if specified</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withindextypeindextype--bloom">withIndexType(indexType = BLOOM)<a class="hash-link" href="#withindextypeindextype--bloom" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.index.type</code> <br>
Type of index to use. Default is Bloom filter. Possible options are <!-- -->[BLOOM | GLOBAL_BLOOM |SIMPLE | GLOBAL_SIMPLE | INMEMORY | HBASE]<!-- -->. Bloom filters removes the dependency on a external system and is stored in the footer of the Parquet Data Files</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="bloom-index-configs">Bloom Index configs<a class="hash-link" href="#bloom-index-configs" title="Direct link to heading"></a></h4><h4 class="anchor anchorWithStickyNavbar_y2LR" id="bloomindexfiltertypebucketizedchecking--bloomfiltertypecodesimple">bloomIndexFilterType(bucketizedChecking = BloomFilterTypeCode.SIMPLE)<a class="hash-link" href="#bloomindexfiltertypebucketizedchecking--bloomfiltertypecodesimple" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.bloom.index.filter.type</code> <br>
Filter type used. Default is BloomFilterTypeCode.SIMPLE. Available values are <!-- -->[BloomFilterTypeCode.SIMPLE , BloomFilterTypeCode.DYNAMIC_V0]<!-- -->. Dynamic bloom filters auto size themselves based on number of keys.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="bloomfilternumentriesnumentries--60000">bloomFilterNumEntries(numEntries = 60000)<a class="hash-link" href="#bloomfilternumentriesnumentries--60000" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.index.bloom.num_entries</code> <br>
Only applies if index type is BLOOM. <br>This is the number of entries to be stored in the bloom filter. We assume the maxParquetFileSize is 128MB and averageRecordSize is 1024B and hence we approx a total of 130K records in a file. The default (60000) is roughly half of this approximation. <a href="https://issues.apache.org/jira/browse/HUDI-56" target="_blank" rel="noopener noreferrer">HUDI-56</a> tracks computing this dynamically. Warning: Setting this very low, will generate a lot of false positives and index lookup will have to scan a lot more files than it has to and Setting this to a very high number will increase the size every data file linearly (roughly 4KB for every 50000 entries). This config is also used with DYNNAMIC bloom filter which determines the initial size for the bloom. </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="bloomfilterfppfpp--0000000001">bloomFilterFPP(fpp = 0.000000001)<a class="hash-link" href="#bloomfilterfppfpp--0000000001" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.index.bloom.fpp</code> <br>
Only applies if index type is BLOOM. <br> Error rate allowed given the number of entries. This is used to calculate how many bits should be assigned for the bloom filter and the number of hash functions. This is usually set very low (default: 0.000000001), we like to tradeoff disk space for lower false positives. If the number of entries added to bloom filter exceeds the congfigured value (<code>hoodie.index.bloom.num_entries</code>), then this fpp may not be honored.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="bloomindexparallelism0">bloomIndexParallelism(0)<a class="hash-link" href="#bloomindexparallelism0" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.bloom.index.parallelism</code> <br>
Only applies if index type is BLOOM. <br> This is the amount of parallelism for index lookup, which involves a Spark Shuffle. By default, this is auto computed based on input workload characteristics</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="bloomindexprunebyrangespruneranges--true">bloomIndexPruneByRanges(pruneRanges = true)<a class="hash-link" href="#bloomindexprunebyrangespruneranges--true" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.bloom.index.prune.by.ranges</code> <br>
Only applies if index type is BLOOM. <br> When true, range information from files to leveraged speed up index lookups. Particularly helpful, if the key has a monotonously increasing prefix, such as timestamp. If the record key is completely random, it is better to turn this off.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="bloomindexusecachingusecaching--true">bloomIndexUseCaching(useCaching = true)<a class="hash-link" href="#bloomindexusecachingusecaching--true" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.bloom.index.use.caching</code> <br>
Only applies if index type is BLOOM. <br> When true, the input RDD will cached to speed up index lookup by reducing IO for computing parallelism or affected partitions</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="bloomindextreebasedfilterusetreefilter--true">bloomIndexTreebasedFilter(useTreeFilter = true)<a class="hash-link" href="#bloomindextreebasedfilterusetreefilter--true" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.bloom.index.use.treebased.filter</code> <br>
Only applies if index type is BLOOM. <br> When true, interval tree based file pruning optimization is enabled. This mode speeds-up file-pruning based on key ranges when compared with the brute-force mode</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="bloomindexbucketizedcheckingbucketizedchecking--true">bloomIndexBucketizedChecking(bucketizedChecking = true)<a class="hash-link" href="#bloomindexbucketizedcheckingbucketizedchecking--true" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.bloom.index.bucketized.checking</code> <br>
Only applies if index type is BLOOM. <br> When true, bucketized bloom filtering is enabled. This reduces skew seen in sort based bloom index lookup</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="bloomindexfilterdynamicmaxentriesmaxnumberofentries--100000">bloomIndexFilterDynamicMaxEntries(maxNumberOfEntries = 100000)<a class="hash-link" href="#bloomindexfilterdynamicmaxentriesmaxnumberofentries--100000" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.bloom.index.filter.dynamic.max.entries</code> <br>
The threshold for the maximum number of keys to record in a dynamic Bloom filter row. Only applies if filter type is BloomFilterTypeCode.DYNAMIC_V0.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="bloomindexkeysperbucketkeysperbucket--10000000">bloomIndexKeysPerBucket(keysPerBucket = 10000000)<a class="hash-link" href="#bloomindexkeysperbucketkeysperbucket--10000000" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.bloom.index.keys.per.bucket</code> <br>
Only applies if bloomIndexBucketizedChecking is enabled and index type is bloom. <br> This configuration controls the &quot;bucket&quot; size which tracks the number of record-key checks made against a single file and is the unit of work allocated to each partition performing bloom filter lookup. A higher value would amortize the fixed cost of reading a bloom filter to memory. </p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="withbloomindexinputstoragelevellevel--memory_and_disk_ser">withBloomIndexInputStorageLevel(level = MEMORY_AND_DISK_SER)<a class="hash-link" href="#withbloomindexinputstoragelevellevel--memory_and_disk_ser" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.bloom.index.input.storage.level</code> <br>
Only applies when <a href="#bloomIndexUseCaching">#bloomIndexUseCaching</a> is set. Determine what level of persistence is used to cache input RDDs.<br> Refer to org.apache.spark.storage.StorageLevel for different values</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="bloomindexupdatepartitionpathupdatepartitionpath--false">bloomIndexUpdatePartitionPath(updatePartitionPath = false)<a class="hash-link" href="#bloomindexupdatepartitionpathupdatepartitionpath--false" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.bloom.index.update.partition.path</code> <br>
Only applies if index type is GLOBAL_BLOOM. <br>When set to true, an update including the partition path of a record that already exists will result in inserting the incoming record into the new partition and deleting the original record in the old partition. When set to false, the original record will only be updated in the old partition.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hbase-index-configs">HBase Index configs<a class="hash-link" href="#hbase-index-configs" title="Direct link to heading"></a></h4><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hbasezkquorumzkstring-required">hbaseZkQuorum(zkString) <!-- -->[Required]<a class="hash-link" href="#hbasezkquorumzkstring-required" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.index.hbase.zkquorum</code> <br>
Only applies if index type is HBASE. HBase ZK Quorum url to connect to.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hbasezkportport-required">hbaseZkPort(port) <!-- -->[Required]<a class="hash-link" href="#hbasezkportport-required" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.index.hbase.zkport</code> <br>
Only applies if index type is HBASE. HBase ZK Quorum port to connect to.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hbasezkznodeparentzkznodeparent--required">hbaseZkZnodeParent(zkZnodeParent) <!-- -->[Required]<a class="hash-link" href="#hbasezkznodeparentzkznodeparent--required" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.index.hbase.zknode.path</code> <br>
Only applies if index type is HBASE. This is the root znode that will contain all the znodes created/used by HBase.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hbasetablenametablename--required">hbaseTableName(tableName) <!-- -->[Required]<a class="hash-link" href="#hbasetablenametablename--required" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.index.hbase.table</code> <br>
Only applies if index type is HBASE. HBase Table name to use as the index. Hudi stores the row_key and <!-- -->[partition_path, fileID, commitTime]<!-- --> mapping in the table.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hbaseindexupdatepartitionpathupdatepartitionpath">hbaseIndexUpdatePartitionPath(updatePartitionPath)<a class="hash-link" href="#hbaseindexupdatepartitionpathupdatepartitionpath" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.hbase.index.update.partition.path</code> <br>
Only applies if index type is HBASE. When an already existing record is upserted to a new partition compared to whats in storage, this config when set, will delete old record in old paritition and will insert it as new record in new partition. </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="simple-index-configs">Simple Index configs<a class="hash-link" href="#simple-index-configs" title="Direct link to heading"></a></h4><h4 class="anchor anchorWithStickyNavbar_y2LR" id="simpleindexusecachingusecaching--true">simpleIndexUseCaching(useCaching = true)<a class="hash-link" href="#simpleindexusecachingusecaching--true" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.simple.index.use.caching</code> <br>
Only applies if index type is SIMPLE. <br> When true, the input RDD will cached to speed up index lookup by reducing IO for computing parallelism or affected partitions</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="withsimpleindexinputstoragelevellevel--memory_and_disk_ser">withSimpleIndexInputStorageLevel(level = MEMORY_AND_DISK_SER)<a class="hash-link" href="#withsimpleindexinputstoragelevellevel--memory_and_disk_ser" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.simple.index.input.storage.level</code> <br>
Only applies when <a href="#simpleIndexUseCaching">#simpleIndexUseCaching</a> is set. Determine what level of persistence is used to cache input RDDs.<br> Refer to org.apache.spark.storage.StorageLevel for different values</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withsimpleindexparallelismparallelism--50">withSimpleIndexParallelism(parallelism = 50)<a class="hash-link" href="#withsimpleindexparallelismparallelism--50" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.simple.index.parallelism</code> <br>
Only applies if index type is SIMPLE. <br> This is the amount of parallelism for index lookup, which involves a Spark Shuffle.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withglobalsimpleindexparallelismparallelism--100">withGlobalSimpleIndexParallelism(parallelism = 100)<a class="hash-link" href="#withglobalsimpleindexparallelismparallelism--100" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.global.simple.index.parallelism</code> <br>
Only applies if index type is GLOBAL_SIMPLE. <br> This is the amount of parallelism for index lookup, which involves a Spark Shuffle.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="storage-configs">Storage configs<a class="hash-link" href="#storage-configs" title="Direct link to heading"></a></h3><p>Controls aspects around sizing parquet and log files.</p><p><a href="#withStorageConfig">withStorageConfig</a> (HoodieStorageConfig) <br></p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="limitfilesize-size--120mb">limitFileSize (size = 120MB)<a class="hash-link" href="#limitfilesize-size--120mb" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.parquet.max.file.size</code> <br>
Target size for parquet files produced by Hudi write phases. For DFS, this needs to be aligned with the underlying filesystem block size for optimal performance. </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="parquetblocksizerowgroupsize--120mb">parquetBlockSize(rowgroupsize = 120MB)<a class="hash-link" href="#parquetblocksizerowgroupsize--120mb" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.parquet.block.size</code> <br>
Parquet RowGroup size. Its better this is same as the file size, so that a single column within a file is stored continuously on disk</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="parquetpagesizepagesize--1mb">parquetPageSize(pagesize = 1MB)<a class="hash-link" href="#parquetpagesizepagesize--1mb" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.parquet.page.size</code> <br>
Parquet page size. Page is the unit of read within a parquet file. Within a block, pages are compressed seperately. </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="parquetcompressionratioparquetcompressionratio--01">parquetCompressionRatio(parquetCompressionRatio = 0.1)<a class="hash-link" href="#parquetcompressionratioparquetcompressionratio--01" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.parquet.compression.ratio</code> <br>
Expected compression of parquet data used by Hudi, when it tries to size new parquet files. Increase this value, if bulk_insert is producing smaller than expected sized files</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="parquetcompressioncodecparquetcompressioncodec--gzip">parquetCompressionCodec(parquetCompressionCodec = gzip)<a class="hash-link" href="#parquetcompressioncodecparquetcompressioncodec--gzip" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.parquet.compression.codec</code> <br>
Parquet compression codec name. Default is gzip. Possible options are <!-- -->[gzip | snappy | uncompressed | lzo]</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="logfilemaxsizelogfilesize--1gb">logFileMaxSize(logFileSize = 1GB)<a class="hash-link" href="#logfilemaxsizelogfilesize--1gb" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.logfile.max.size</code> <br>
LogFile max size. This is the maximum size allowed for a log file before it is rolled over to the next version. </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="logfiledatablockmaxsizedatablocksize--256mb">logFileDataBlockMaxSize(dataBlockSize = 256MB)<a class="hash-link" href="#logfiledatablockmaxsizedatablocksize--256mb" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.logfile.data.block.max.size</code> <br>
LogFile Data block max size. This is the maximum size allowed for a single data block to be appended to a log file. This helps to make sure the data appended to the log file is broken up into sizable blocks to prevent from OOM errors. This size should be greater than the JVM memory. </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="logfiletoparquetcompressionratiologfiletoparquetcompressionratio--035">logFileToParquetCompressionRatio(logFileToParquetCompressionRatio = 0.35)<a class="hash-link" href="#logfiletoparquetcompressionratiologfiletoparquetcompressionratio--035" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.logfile.to.parquet.compression.ratio</code> <br>
Expected additional compression as records move from log files to parquet. Used for merge_on_read table to send inserts into log files &amp; control the size of compacted parquet file.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="parquetcompressioncodecparquetcompressioncodec--gzip-1">parquetCompressionCodec(parquetCompressionCodec = gzip)<a class="hash-link" href="#parquetcompressioncodecparquetcompressioncodec--gzip-1" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.parquet.compression.codec</code> <br>
Compression Codec for parquet files </p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="compaction-configs">Compaction configs<a class="hash-link" href="#compaction-configs" title="Direct link to heading"></a></h3><p>Configs that control compaction (merging of log files onto a new parquet base file), cleaning (reclamation of older/unused file groups).
<a href="#withCompactionConfig">withCompactionConfig</a> (HoodieCompactionConfig) <br></p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withcleanerpolicypolicy--keep_latest_commits">withCleanerPolicy(policy = KEEP_LATEST_COMMITS)<a class="hash-link" href="#withcleanerpolicypolicy--keep_latest_commits" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.cleaner.policy</code> <br>
Cleaning policy to be used. Hudi will delete older versions of parquet files to re-claim space. Any Query/Computation referring to this version of the file will fail. It is good to make sure that the data is retained for more than the maximum query execution time.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withfailedwritescleaningpolicypolicy--hoodiefailedwritescleaningpolicyeager">withFailedWritesCleaningPolicy(policy = HoodieFailedWritesCleaningPolicy.EAGER)<a class="hash-link" href="#withfailedwritescleaningpolicypolicy--hoodiefailedwritescleaningpolicyeager" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.cleaner.policy.failed.writes</code> <br>
Cleaning policy for failed writes to be used. Hudi will delete any files written by failed writes to re-claim space. Choose to perform this rollback of failed writes <code>eagerly</code> before every writer starts (only supported for single writer) or <code>lazily</code> by the cleaner (required for multi-writers)</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="retaincommitsno_of_commits_to_retain--24">retainCommits(no_of_commits_to_retain = 24)<a class="hash-link" href="#retaincommitsno_of_commits_to_retain--24" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.cleaner.commits.retained</code> <br>
Number of commits to retain. So data will be retained for num_of_commits * time_between_commits (scheduled). This also directly translates into how much you can incrementally pull on this table</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withautocleanautoclean--true">withAutoClean(autoClean = true)<a class="hash-link" href="#withautocleanautoclean--true" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.clean.automatic</code> <br>
Should cleanup if there is anything to cleanup immediately after the commit</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withasynccleanasyncclean--false">withAsyncClean(asyncClean = false)<a class="hash-link" href="#withasynccleanasyncclean--false" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.clean.async</code> <br>
Only applies when <a href="#withAutoClean">#withAutoClean</a> is turned on. When turned on runs cleaner async with writing. </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="archivecommitswithmincommits--96-maxcommits--128">archiveCommitsWith(minCommits = 96, maxCommits = 128)<a class="hash-link" href="#archivecommitswithmincommits--96-maxcommits--128" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.keep.min.commits</code>, <code>hoodie.keep.max.commits</code> <br>
Each commit is a small file in the <code>.hoodie</code> directory. Since DFS typically does not favor lots of small files, Hudi archives older commits into a sequential log. A commit is published atomically by a rename of the commit file.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withcommitsarchivalbatchsizebatch--10">withCommitsArchivalBatchSize(batch = 10)<a class="hash-link" href="#withcommitsarchivalbatchsizebatch--10" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.commits.archival.batch</code> <br>
This controls the number of commit instants read in memory as a batch and archived together.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="compactionsmallfilesizesize--100mb">compactionSmallFileSize(size = 100MB)<a class="hash-link" href="#compactionsmallfilesizesize--100mb" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.parquet.small.file.limit</code> <br>
This should be less <!-- -->&lt;<!-- --> maxFileSize and setting it to 0, turns off this feature. Small files can always happen because of the number of insert records in a partition in a batch. Hudi has an option to auto-resolve small files by masking inserts into this partition as updates to existing small files. The size here is the minimum file size considered as a &quot;small file size&quot;.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="insertsplitsizesize--500000">insertSplitSize(size = 500000)<a class="hash-link" href="#insertsplitsizesize--500000" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.copyonwrite.insert.split.size</code> <br>
Insert Write Parallelism. Number of inserts grouped for a single partition. Writing out 100MB files, with atleast 1kb records, means 100K records per file. Default is to overprovision to 500K. To improve insert latency, tune this to match the number of records in a single file. Setting this to a low number, will result in small files (particularly when compactionSmallFileSize is 0)</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="autotuneinsertsplitstrue">autoTuneInsertSplits(true)<a class="hash-link" href="#autotuneinsertsplitstrue" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.copyonwrite.insert.auto.split</code> <br>
Should hudi dynamically compute the insertSplitSize based on the last 24 commit&#x27;s metadata. Turned on by default. </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="approxrecordsizesize--1024">approxRecordSize(size = 1024)<a class="hash-link" href="#approxrecordsizesize--1024" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.copyonwrite.record.size.estimate</code> <br>
The average record size. If specified, hudi will use this and not compute dynamically based on the last 24 commit&#x27;s metadata. No value set as default. This is critical in computing the insert parallelism and bin-packing inserts into small files. See above.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withinlinecompactioninlinecompaction--false">withInlineCompaction(inlineCompaction = false)<a class="hash-link" href="#withinlinecompactioninlinecompaction--false" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.compact.inline</code> <br>
When set to true, compaction is triggered by the ingestion itself, right after a commit/deltacommit action as part of insert/upsert/bulk_insert</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withmaxnumdeltacommitsbeforecompactionmaxnumdeltacommitsbeforecompaction--10">withMaxNumDeltaCommitsBeforeCompaction(maxNumDeltaCommitsBeforeCompaction = 10)<a class="hash-link" href="#withmaxnumdeltacommitsbeforecompactionmaxnumdeltacommitsbeforecompaction--10" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.compact.inline.max.delta.commits</code> <br>
Number of max delta commits to keep before triggering an inline compaction</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withcompactionlazyblockreadenabledtrue">withCompactionLazyBlockReadEnabled(true)<a class="hash-link" href="#withcompactionlazyblockreadenabledtrue" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.compaction.lazy.block.read</code> <br>
When a CompactedLogScanner merges all log files, this config helps to choose whether the logblocks should be read lazily or not. Choose true to use I/O intensive lazy block reading (low memory usage) or false for Memory intensive immediate block read (high memory usage)</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withcompactionreverselogreadenabledfalse">withCompactionReverseLogReadEnabled(false)<a class="hash-link" href="#withcompactionreverselogreadenabledfalse" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.compaction.reverse.log.read</code> <br>
HoodieLogFormatReader reads a logfile in the forward direction starting from pos=0 to pos=file_length. If this config is set to true, the Reader reads the logfile in reverse direction, from pos=file_length to pos=0</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withcleanerparallelismcleanerparallelism--200">withCleanerParallelism(cleanerParallelism = 200)<a class="hash-link" href="#withcleanerparallelismcleanerparallelism--200" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.cleaner.parallelism</code> <br>
Increase this if cleaning becomes slow.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withcompactionstrategycompactionstrategy--orgapachehudiiocompactstrategylogfilesizebasedcompactionstrategy">withCompactionStrategy(compactionStrategy = org.apache.hudi.io.compact.strategy.LogFileSizeBasedCompactionStrategy)<a class="hash-link" href="#withcompactionstrategycompactionstrategy--orgapachehudiiocompactstrategylogfilesizebasedcompactionstrategy" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.compaction.strategy</code> <br>
Compaction strategy decides which file groups are picked up for compaction during each compaction run. By default. Hudi picks the log file with most accumulated unmerged data</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withtargetiopercompactioninmbtargetiopercompactioninmb--500000">withTargetIOPerCompactionInMB(targetIOPerCompactionInMB = 500000)<a class="hash-link" href="#withtargetiopercompactioninmbtargetiopercompactioninmb--500000" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.compaction.target.io</code> <br>
Amount of MBs to spend during compaction run for the LogFileSizeBasedCompactionStrategy. This value helps bound ingestion latency while compaction is run inline mode.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withtargetpartitionsperdaybasedcompactiontargetpartitionspercompaction--10">withTargetPartitionsPerDayBasedCompaction(targetPartitionsPerCompaction = 10)<a class="hash-link" href="#withtargetpartitionsperdaybasedcompactiontargetpartitionspercompaction--10" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.compaction.daybased.target</code> <br>
Used by org.apache.hudi.io.compact.strategy.DayBasedCompactionStrategy to denote the number of latest partitions to compact during a compaction run. </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withpayloadclasspayloadclassname--orgapachehudicommonmodelhoodieavropayload">withPayloadClass(payloadClassName = org.apache.hudi.common.model.HoodieAvroPayload)<a class="hash-link" href="#withpayloadclasspayloadclassname--orgapachehudicommonmodelhoodieavropayload" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.compaction.payload.class</code> <br>
This needs to be same as class used during insert/upserts. Just like writing, compaction also uses the record payload class to merge records in the log against each other, merge again with the base file and produce the final record to be written after compaction.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="bootstrap-configs">Bootstrap Configs<a class="hash-link" href="#bootstrap-configs" title="Direct link to heading"></a></h3><p>Controls bootstrap related configs. If you want to bootstrap your data for the first time into hudi, this bootstrap operation will come in handy as you don&#x27;t need to wait for entire data to be loaded into hudi to start leveraging hudi. </p><p><a href="#withBootstrapConfig">withBootstrapConfig</a> (HoodieBootstrapConfig) <br></p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withbootstrapbasepathbasepath">withBootstrapBasePath(basePath)<a class="hash-link" href="#withbootstrapbasepathbasepath" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.bootstrap.base.path</code> <br>
Base path of the dataset that needs to be bootstrapped as a Hudi table </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withbootstrapparallelismparallelism--1500">withBootstrapParallelism(parallelism = 1500)<a class="hash-link" href="#withbootstrapparallelismparallelism--1500" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.bootstrap.parallelism</code> <br>
Parallelism value to be used to bootstrap data into hudi </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withbootstrapkeygenclasskeygenclass-withbootstrapkeygenclass">withBootstrapKeyGenClass(keyGenClass) (#withBootstrapKeyGenClass)<a class="hash-link" href="#withbootstrapkeygenclasskeygenclass-withbootstrapkeygenclass" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.bootstrap.keygen.class</code> <br>
Key generator implementation to be used for generating keys from the bootstrapped dataset </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withbootstrapmodeselectorpartitionselectorclass--orgapachehudiclientbootstrapselectormetadataonlybootstrapmodeselector">withBootstrapModeSelector(partitionSelectorClass = org.apache.hudi.client.bootstrap.selector.MetadataOnlyBootstrapModeSelector)<a class="hash-link" href="#withbootstrapmodeselectorpartitionselectorclass--orgapachehudiclientbootstrapselectormetadataonlybootstrapmodeselector" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.bootstrap.mode.selector</code> <br>
Selects the mode in which each file/partition in the bootstrapped dataset gets bootstrapped</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withbootstrappartitionpathtranslatorclasspartitionpathtranslatorclass--orgapachehudiclientbootstraptranslatoridentitybootstrappartitionpathtranslator">withBootstrapPartitionPathTranslatorClass(partitionPathTranslatorClass = org.apache.hudi.client.bootstrap.translator.IdentityBootstrapPartitionPathTranslator)<a class="hash-link" href="#withbootstrappartitionpathtranslatorclasspartitionpathtranslatorclass--orgapachehudiclientbootstraptranslatoridentitybootstrappartitionpathtranslator" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.bootstrap.partitionpath.translator.class</code> <br>
Translates the partition paths from the bootstrapped data into how is laid out as a Hudi table. </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withfullbootstrapinputproviderpartitionselectorclass--orgapachehudibootstrapsparkparquetbootstrapdataprovider">withFullBootstrapInputProvider(partitionSelectorClass = org.apache.hudi.bootstrap.SparkParquetBootstrapDataProvider)<a class="hash-link" href="#withfullbootstrapinputproviderpartitionselectorclass--orgapachehudibootstrapsparkparquetbootstrapdataprovider" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.bootstrap.full.input.provider</code> <br>
Class to use for reading the bootstrap dataset partitions/files, for Bootstrap mode <code>FULL_RECORD</code> </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withbootstrapmodeselectorregexregex--">withBootstrapModeSelectorRegex(regex = &quot;.*&quot;)<a class="hash-link" href="#withbootstrapmodeselectorregexregex--" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.bootstrap.mode.selector.regex</code> <br>
Matches each bootstrap dataset partition against this regex and applies the mode below to it. </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withbootstrapmodeforregexmatchmodeforregexmatch--orgapachehudiclientbootstrapmetadata_only">withBootstrapModeForRegexMatch(modeForRegexMatch = org.apache.hudi.client.bootstrap.METADATA_ONLY)<a class="hash-link" href="#withbootstrapmodeforregexmatchmodeforregexmatch--orgapachehudiclientbootstrapmetadata_only" title="Direct link to heading"></a></h4><p>Property: <code>withBootstrapModeForRegexMatch</code> <br>
Bootstrap mode to apply for partition paths, that match regex above. <code>METADATA_ONLY</code> will generate just skeleton base files
with keys/footers, avoiding full cost of rewriting the dataset. <code>FULL_RECORD</code> will perform a full copy/rewrite of the data as a Hudi table. </p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="metadata-config">Metadata Config<a class="hash-link" href="#metadata-config" title="Direct link to heading"></a></h3><p>Configurations used by the HUDI Metadata Table. This table maintains the meta information stored in hudi dataset so that listing can be avoided during queries. </p><p><a href="#withMetadataConfig">withMetadataConfig</a> (HoodieMetadataConfig) <br></p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="enableenable--false">enable(enable = false)<a class="hash-link" href="#enableenable--false" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.metadata.enable</code> <br>
Enable the internal Metadata Table which stores table level metadata such as file listings </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="enablereuseenable--true">enableReuse(enable = true)<a class="hash-link" href="#enablereuseenable--true" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.metadata.reuse.enable</code> <br>
Enable reusing of opened file handles/merged logs, across multiple fetches from metadata table. </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="enablefallbackenable--true">enableFallback(enable = true)<a class="hash-link" href="#enablefallbackenable--true" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.metadata.fallback.enable</code> <br>
Fallback to listing from DFS, if there are any errors in fetching from metadata table </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="validatevalidate--false">validate(validate = false)<a class="hash-link" href="#validatevalidate--false" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.metadata.validate</code> <br>
Validate contents of Metadata Table on each access against the actual listings from DFS</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withinsertparallelismparallelism--1">withInsertParallelism(parallelism = 1)<a class="hash-link" href="#withinsertparallelismparallelism--1" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.metadata.insert.parallelism</code> <br>
Parallelism to use when writing to the metadata table </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withmaxnumdeltacommitsbeforecompactionmaxnumdeltacommitsbeforecompaction--24">withMaxNumDeltaCommitsBeforeCompaction(maxNumDeltaCommitsBeforeCompaction = 24)<a class="hash-link" href="#withmaxnumdeltacommitsbeforecompactionmaxnumdeltacommitsbeforecompaction--24" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.metadata.compact.max.delta.commits</code> <br>
Controls how often the metadata table is compacted.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="archivecommitswithmintokeep--30-maxtokeep--20">archiveCommitsWith(minToKeep = 30, maxToKeep = 20)<a class="hash-link" href="#archivecommitswithmintokeep--30-maxtokeep--20" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.metadata.keep.min.commits</code>, <code>hoodie.metadata.keep.max.commits</code> <br>
Controls the archival of the metadata table&#x27;s timeline </p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="clustering-configs">Clustering Configs<a class="hash-link" href="#clustering-configs" title="Direct link to heading"></a></h3><p>Controls clustering operations in hudi. Each clustering has to be configured for its strategy, and config params. This config drives the same. </p><p><a href="#withClusteringConfig">withClusteringConfig</a> (HoodieClusteringConfig) <br></p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withclusteringplanstrategyclassclusteringstrategyclass--orgapachehudiclientclusteringplanstrategysparkrecentdaysclusteringplanstrategy">withClusteringPlanStrategyClass(clusteringStrategyClass = org.apache.hudi.client.clustering.plan.strategy.SparkRecentDaysClusteringPlanStrategy)<a class="hash-link" href="#withclusteringplanstrategyclassclusteringstrategyclass--orgapachehudiclientclusteringplanstrategysparkrecentdaysclusteringplanstrategy" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.clustering.plan.strategy.class</code> <br>
Config to provide a strategy class to create ClusteringPlan. Class has to be subclass of ClusteringPlanStrategy </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withclusteringexecutionstrategyclassrunclusteringstrategyclass--orgapachehudiclientclusteringrunstrategysparksortandsizeexecutionstrategy">withClusteringExecutionStrategyClass(runClusteringStrategyClass = org.apache.hudi.client.clustering.run.strategy.SparkSortAndSizeExecutionStrategy)<a class="hash-link" href="#withclusteringexecutionstrategyclassrunclusteringstrategyclass--orgapachehudiclientclusteringrunstrategysparksortandsizeexecutionstrategy" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.clustering.execution.strategy.class</code> <br>
Config to provide a strategy class to execute a ClusteringPlan. Class has to be subclass of RunClusteringStrategy </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withclusteringtargetpartitionsclusteringtargetpartitions--2">withClusteringTargetPartitions(clusteringTargetPartitions = 2)<a class="hash-link" href="#withclusteringtargetpartitionsclusteringtargetpartitions--2" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.clustering.plan.strategy.daybased.lookback.partitions</code> <br>
Number of partitions to list to create ClusteringPlan </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withclusteringplansmallfilelimitclusteringsmallfilelimit--600mb">withClusteringPlanSmallFileLimit(clusteringSmallFileLimit = 600Mb)<a class="hash-link" href="#withclusteringplansmallfilelimitclusteringsmallfilelimit--600mb" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.clustering.plan.strategy.small.file.limit</code> <br>
Files smaller than the size specified here are candidates for clustering </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withclusteringmaxbytesingroupclusteringmaxgroupsize--2gb">withClusteringMaxBytesInGroup(clusteringMaxGroupSize = 2Gb)<a class="hash-link" href="#withclusteringmaxbytesingroupclusteringmaxgroupsize--2gb" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.clustering.plan.strategy.max.bytes.per.group</code> <br>
Max amount of data to be included in one group
Each clustering operation can create multiple groups. Total amount of data processed by clustering operation is defined by below two properties (CLUSTERING_MAX_BYTES_PER_GROUP * CLUSTERING_MAX_NUM_GROUPS). </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withclusteringmaxnumgroupsmaxnumgroups--30">withClusteringMaxNumGroups(maxNumGroups = 30)<a class="hash-link" href="#withclusteringmaxnumgroupsmaxnumgroups--30" title="Direct link to heading"></a></h4><p>Property : <code>hoodie.clustering.plan.strategy.max.num.groups</code> <br>
Maximum number of groups to create as part of ClusteringPlan. Increasing groups will increase parallelism. </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withclusteringtargetfilemaxbytestargetfilesize--1gb-">withClusteringTargetFileMaxBytes(targetFileSize = 1Gb )<a class="hash-link" href="#withclusteringtargetfilemaxbytestargetfilesize--1gb-" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.clustering.plan.strategy.target.file.max.bytes</code> <br>
Each group can produce &#x27;N&#x27; (CLUSTERING_MAX_GROUP_SIZE/CLUSTERING_TARGET_FILE_SIZE) output file groups </p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="payload-configs">Payload Configs<a class="hash-link" href="#payload-configs" title="Direct link to heading"></a></h3><p>Payload related configs. This config can be leveraged by payload implementations to determine their business logic. </p><p><a href="#withPayloadConfig">withPayloadConfig</a> (HoodiePayloadConfig) <br></p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withpayloadorderingfieldpayloadorderingfield--ts">withPayloadOrderingField(payloadOrderingField = &quot;ts&quot;)<a class="hash-link" href="#withpayloadorderingfieldpayloadorderingfield--ts" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.payload.ordering.field</code> <br>
Property to hold the payload ordering field name. </p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="metrics-configs">Metrics configs<a class="hash-link" href="#metrics-configs" title="Direct link to heading"></a></h3><p>Enables reporting on Hudi metrics.
<a href="#withMetricsConfig">withMetricsConfig</a> (HoodieMetricsConfig) <br>
Hudi publishes metrics on every commit, clean, rollback etc. The following sections list the supported reporters.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="graphite">GRAPHITE<a class="hash-link" href="#graphite" title="Direct link to heading"></a></h4><h5 class="anchor anchorWithStickyNavbar_y2LR" id="onmetricson--false">on(metricsOn = false)<a class="hash-link" href="#onmetricson--false" title="Direct link to heading"></a></h5><p><code>hoodie.metrics.on</code> <br>
Turn on/off metrics reporting. off by default.</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="withreportertypereportertype--graphite">withReporterType(reporterType = GRAPHITE)<a class="hash-link" href="#withreportertypereportertype--graphite" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.metrics.reporter.type</code> <br>
Type of metrics reporter.</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="tographitehosthost--localhost">toGraphiteHost(host = localhost)<a class="hash-link" href="#tographitehosthost--localhost" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.metrics.graphite.host</code> <br>
Graphite host to connect to</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="ongraphiteportport--4756">onGraphitePort(port = 4756)<a class="hash-link" href="#ongraphiteportport--4756" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.metrics.graphite.port</code> <br>
Graphite port to connect to</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="useprefixprefix--">usePrefix(prefix = &quot;&quot;)<a class="hash-link" href="#useprefixprefix--" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.metrics.graphite.metric.prefix</code> <br>
Standard prefix applied to all metrics. This helps to add datacenter, environment information for e.g</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="jmx">JMX<a class="hash-link" href="#jmx" title="Direct link to heading"></a></h4><h5 class="anchor anchorWithStickyNavbar_y2LR" id="onmetricson--false-1">on(metricsOn = false)<a class="hash-link" href="#onmetricson--false-1" title="Direct link to heading"></a></h5><p><code>hoodie.metrics.on</code> <br>
Turn on/off metrics reporting. off by default.</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="withreportertypereportertype--jmx">withReporterType(reporterType = JMX)<a class="hash-link" href="#withreportertypereportertype--jmx" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.metrics.reporter.type</code> <br>
Type of metrics reporter.</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="tojmxhosthost--localhost">toJmxHost(host = localhost)<a class="hash-link" href="#tojmxhosthost--localhost" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.metrics.jmx.host</code> <br>
Jmx host to connect to</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="onjmxportport--1000-5000">onJmxPort(port = 1000-5000)<a class="hash-link" href="#onjmxportport--1000-5000" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.metrics.jmx.port</code> <br>
Jmx port to connect to</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="datadog">DATADOG<a class="hash-link" href="#datadog" title="Direct link to heading"></a></h4><h5 class="anchor anchorWithStickyNavbar_y2LR" id="onmetricson--false-2">on(metricsOn = false)<a class="hash-link" href="#onmetricson--false-2" title="Direct link to heading"></a></h5><p><code>hoodie.metrics.on</code> <br>
Turn on/off metrics reporting. off by default.</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="withreportertypereportertype--datadog">withReporterType(reporterType = DATADOG)<a class="hash-link" href="#withreportertypereportertype--datadog" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.metrics.reporter.type</code> <br>
Type of metrics reporter.</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="withdatadogreportperiodsecondsperiod--30">withDatadogReportPeriodSeconds(period = 30)<a class="hash-link" href="#withdatadogreportperiodsecondsperiod--30" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.metrics.datadog.report.period.seconds</code> <br>
Datadog report period in seconds. Default to 30.</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="withdatadogapisiteapisite">withDatadogApiSite(apiSite)<a class="hash-link" href="#withdatadogapisiteapisite" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.metrics.datadog.api.site</code> <br>
Datadog API site: EU or US</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="withdatadogapikeyapikey">withDatadogApiKey(apiKey)<a class="hash-link" href="#withdatadogapikeyapikey" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.metrics.datadog.api.key</code> <br>
Datadog API key</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="withdatadogapikeyskipvalidationskip--false">withDatadogApiKeySkipValidation(skip = false)<a class="hash-link" href="#withdatadogapikeyskipvalidationskip--false" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.metrics.datadog.api.key.skip.validation</code> <br>
Before sending metrics via Datadog API, whether to skip validating Datadog API key or not. Default to false.</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="withdatadogapikeysupplierapikeysupplier">withDatadogApiKeySupplier(apiKeySupplier)<a class="hash-link" href="#withdatadogapikeysupplierapikeysupplier" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.metrics.datadog.api.key.supplier</code> <br>
Datadog API key supplier to supply the API key at runtime. This will take effect if <code>hoodie.metrics.datadog.api.key</code> is not set.</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="withdatadogapitimeoutsecondstimeout--3">withDatadogApiTimeoutSeconds(timeout = 3)<a class="hash-link" href="#withdatadogapitimeoutsecondstimeout--3" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.metrics.datadog.metric.prefix</code> <br>
Datadog API timeout in seconds. Default to 3.</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="withdatadogprefixprefix">withDatadogPrefix(prefix)<a class="hash-link" href="#withdatadogprefixprefix" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.metrics.datadog.metric.prefix</code> <br>
Datadog metric prefix to be prepended to each metric name with a dot as delimiter. For example, if it is set to <code>foo</code>, <code>foo.</code> will be prepended.</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="withdatadoghosthost">withDatadogHost(host)<a class="hash-link" href="#withdatadoghosthost" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.metrics.datadog.metric.host</code> <br>
Datadog metric host to be sent along with metrics data.</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="withdatadogtagstags">withDatadogTags(tags)<a class="hash-link" href="#withdatadogtagstags" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.metrics.datadog.metric.tags</code> <br>
Datadog metric tags (comma-delimited) to be sent along with metrics data.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="user-defined-reporter">USER DEFINED REPORTER<a class="hash-link" href="#user-defined-reporter" title="Direct link to heading"></a></h4><h5 class="anchor anchorWithStickyNavbar_y2LR" id="onmetricson--false-3">on(metricsOn = false)<a class="hash-link" href="#onmetricson--false-3" title="Direct link to heading"></a></h5><p><code>hoodie.metrics.on</code> <br>
Turn on/off metrics reporting. off by default.</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="withreporterclassclassname--">withReporterClass(className = &quot;&quot;)<a class="hash-link" href="#withreporterclassclassname--" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.metrics.reporter.class</code> <br>
User-defined class used to report metrics, must be a subclass of AbstractUserDefinedMetricsReporter.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="memory-configs">Memory configs<a class="hash-link" href="#memory-configs" title="Direct link to heading"></a></h3><p>Controls memory usage for compaction and merges, performed internally by Hudi
<a href="#withMemoryConfig">withMemoryConfig</a> (HoodieMemoryConfig) <br>
Memory related configs</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withmaxmemoryfractionperpartitionmergemaxmemoryfractionperpartitionmerge--06">withMaxMemoryFractionPerPartitionMerge(maxMemoryFractionPerPartitionMerge = 0.6)<a class="hash-link" href="#withmaxmemoryfractionperpartitionmergemaxmemoryfractionperpartitionmerge--06" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.memory.merge.fraction</code> <br>
This fraction is multiplied with the user memory fraction (1 - spark.memory.fraction) to get a final fraction of heap space to use during merge </p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withmaxmemorysizepercompactioninbytesmaxmemorysizepercompactioninbytes--1gb">withMaxMemorySizePerCompactionInBytes(maxMemorySizePerCompactionInBytes = 1GB)<a class="hash-link" href="#withmaxmemorysizepercompactioninbytesmaxmemorysizepercompactioninbytes--1gb" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.memory.compaction.fraction</code> <br>
HoodieCompactedLogScanner reads logblocks, converts records to HoodieRecords and then merges these log blocks and records. At any point, the number of entries in a log block can be less than or equal to the number of entries in the corresponding parquet file. This can lead to OOM in the Scanner. Hence, a spillable map helps alleviate the memory pressure. Use this config to set the max allowable inMemory footprint of the spillable map.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withwritestatusfailurefractionfailurefraction--01">withWriteStatusFailureFraction(failureFraction = 0.1)<a class="hash-link" href="#withwritestatusfailurefractionfailurefraction--01" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.memory.writestatus.failure.fraction</code> <br>
This property controls what fraction of the failed record, exceptions we report back to driver</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="write-commit-callback-configs">Write commit callback configs<a class="hash-link" href="#write-commit-callback-configs" title="Direct link to heading"></a></h3><p>Controls callback behavior on write commit. Exception will be thrown if user enabled the callback service and errors occurred during the process of callback. Currently support HTTP, Kafka type.
<a href="#withCallbackConfig">withCallbackConfig</a> (HoodieWriteCommitCallbackConfig) <br>
Callback related configs</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="writecommitcallbackoncallbackon--false">writeCommitCallbackOn(callbackOn = false)<a class="hash-link" href="#writecommitcallbackoncallbackon--false" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.write.commit.callback.on</code> <br>
Turn callback on/off. off by default.</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="withcallbackclasscallbackclass">withCallbackClass(callbackClass)<a class="hash-link" href="#withcallbackclasscallbackclass" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.write.commit.callback.class</code> <br>
Full path of callback class and must be a subclass of HoodieWriteCommitCallback class, org.apache.hudi.callback.impl.HoodieWriteCommitHttpCallback by default</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="http-callback">HTTP CALLBACK<a class="hash-link" href="#http-callback" title="Direct link to heading"></a></h4><p>Callback via HTTP, User does not need to specify this way explicitly, it is the default type.</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="withcallbackhttpurlurl">withCallbackHttpUrl(url)<a class="hash-link" href="#withcallbackhttpurlurl" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.write.commit.callback.http.url</code> <br>
Callback host to be sent along with callback messages</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="withcallbackhttptimeoutsecondstimeoutseconds--3">withCallbackHttpTimeoutSeconds(timeoutSeconds = 3)<a class="hash-link" href="#withcallbackhttptimeoutsecondstimeoutseconds--3" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.write.commit.callback.http.timeout.seconds</code> <br>
Callback timeout in seconds. 3 by default</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="withcallbackhttpapikeyapikey">withCallbackHttpApiKey(apiKey)<a class="hash-link" href="#withcallbackhttpapikeyapikey" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.write.commit.callback.http.api.key</code> <br>
Http callback API key. hudi_write_commit_http_callback by default</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="kafka-callback">KAFKA CALLBACK<a class="hash-link" href="#kafka-callback" title="Direct link to heading"></a></h4><p>To use kafka callback, User should set <code>hoodie.write.commit.callback.class</code> = <code>org.apache.hudi.utilities.callback.kafka.HoodieWriteCommitKafkaCallback</code></p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="callback_kafka_bootstrap_servers">CALLBACK_KAFKA_BOOTSTRAP_SERVERS<a class="hash-link" href="#callback_kafka_bootstrap_servers" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.write.commit.callback.kafka.bootstrap.servers</code> <br>
Bootstrap servers of kafka callback cluster</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="callback_kafka_topic">CALLBACK_KAFKA_TOPIC<a class="hash-link" href="#callback_kafka_topic" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.write.commit.callback.kafka.topic</code> <br>
Kafka topic to be sent along with callback messages</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="callback_kafka_partition">CALLBACK_KAFKA_PARTITION<a class="hash-link" href="#callback_kafka_partition" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.write.commit.callback.kafka.partition</code> <br>
partition of <code>CALLBACK_KAFKA_TOPIC</code>, 0 by default</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="callback_kafka_acks">CALLBACK_KAFKA_ACKS<a class="hash-link" href="#callback_kafka_acks" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.write.commit.callback.kafka.acks</code> <br>
kafka acks level, <code>all</code> by default</p><h5 class="anchor anchorWithStickyNavbar_y2LR" id="callback_kafka_retries">CALLBACK_KAFKA_RETRIES<a class="hash-link" href="#callback_kafka_retries" title="Direct link to heading"></a></h5><p>Property: <code>hoodie.write.commit.callback.kafka.retries</code> <br>
Times to retry. 3 by default</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="locking-configs">Locking configs<a class="hash-link" href="#locking-configs" title="Direct link to heading"></a></h3><p>Configs that control locking mechanisms if <a href="#WriteConcurrencyMode">WriteConcurrencyMode=optimistic_concurrency_control</a> is enabled
<a href="#withLockConfig">withLockConfig</a> (HoodieLockConfig) <br></p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withlockproviderlockprovider--orgapachehudiclienttransactionlockzookeeperbasedlockprovider">withLockProvider(lockProvider = org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider)<a class="hash-link" href="#withlockproviderlockprovider--orgapachehudiclienttransactionlockzookeeperbasedlockprovider" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.write.lock.provider</code> <br>
Lock provider class name, user can provide their own implementation of LockProvider which should be subclass of org.apache.hudi.common.lock.LockProvider</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withzkquorumzkquorum">withZkQuorum(zkQuorum)<a class="hash-link" href="#withzkquorumzkquorum" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.write.lock.zookeeper.url</code> <br>
Set the list of comma separated servers to connect to</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withzkbasepathzkbasepath">withZkBasePath(zkBasePath)<a class="hash-link" href="#withzkbasepathzkbasepath" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.write.lock.zookeeper.base_path</code> <!-- -->[Required]<!-- --> <br>
The base path on Zookeeper under which to create a ZNode to acquire the lock. This should be common for all jobs writing to the same table</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withzkportzkport">withZkPort(zkPort)<a class="hash-link" href="#withzkportzkport" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.write.lock.zookeeper.port</code> <!-- -->[Required]<!-- --> <br>
The connection port to be used for Zookeeper</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withzklockkeyzklockkey">withZkLockKey(zkLockKey)<a class="hash-link" href="#withzklockkeyzklockkey" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.write.lock.zookeeper.lock_key</code> <!-- -->[Required]<!-- --> <br>
Key name under base_path at which to create a ZNode and acquire lock. Final path on zk will look like base_path/lock_key. We recommend setting this to the table name</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withzkconnectiontimeoutinmsconnectiontimeoutinms--15000">withZkConnectionTimeoutInMs(connectionTimeoutInMs = 15000)<a class="hash-link" href="#withzkconnectiontimeoutinmsconnectiontimeoutinms--15000" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.write.lock.zookeeper.connection_timeout_ms</code> <br>
How long to wait when connecting to ZooKeeper before considering the connection a failure</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withzksessiontimeoutinmssessiontimeoutinms--60000">withZkSessionTimeoutInMs(sessionTimeoutInMs = 60000)<a class="hash-link" href="#withzksessiontimeoutinmssessiontimeoutinms--60000" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.write.lock.zookeeper.session_timeout_ms</code> <br>
How long to wait after losing a connection to ZooKeeper before the session is expired</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withnumretriesnum_retries--3">withNumRetries(num_retries = 3)<a class="hash-link" href="#withnumretriesnum_retries--3" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.write.lock.num_retries</code> <br>
Maximum number of times to retry by lock provider client</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withretrywaittimeinmillisretrywaittimeinmillis--5000">withRetryWaitTimeInMillis(retryWaitTimeInMillis = 5000)<a class="hash-link" href="#withretrywaittimeinmillisretrywaittimeinmillis--5000" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.write.lock.wait_time_ms_between_retry</code> <br>
Initial amount of time to wait between retries by lock provider client</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withhivedatabasenamehivedatabasename">withHiveDatabaseName(hiveDatabaseName)<a class="hash-link" href="#withhivedatabasenamehivedatabasename" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.write.lock.hivemetastore.database</code> <!-- -->[Required]<!-- --> <br>
The Hive database to acquire lock against</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withhivetablenamehivetablename">withHiveTableName(hiveTableName)<a class="hash-link" href="#withhivetablenamehivetablename" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.write.lock.hivemetastore.table</code> <!-- -->[Required]<!-- --> <br>
The Hive table under the hive database to acquire lock against</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withclientnumretriesclientnumretries--0">withClientNumRetries(clientNumRetries = 0)<a class="hash-link" href="#withclientnumretriesclientnumretries--0" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.write.lock.client.num_retries</code> <br>
Maximum number of times to retry to acquire lock additionally from the hudi client</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withretrywaittimeinmillisretrywaittimeinmillis--10000">withRetryWaitTimeInMillis(retryWaitTimeInMillis = 10000)<a class="hash-link" href="#withretrywaittimeinmillisretrywaittimeinmillis--10000" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.write.lock.client.wait_time_ms_between_retry</code> <br>
Amount of time to wait between retries from the hudi client</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="withconflictresolutionstrategylockprovider--orgapachehudiclienttransactionsimpleconcurrentfilewritesconflictresolutionstrategy">withConflictResolutionStrategy(lockProvider = org.apache.hudi.client.transaction.SimpleConcurrentFileWritesConflictResolutionStrategy)<a class="hash-link" href="#withconflictresolutionstrategylockprovider--orgapachehudiclienttransactionsimpleconcurrentfilewritesconflictresolutionstrategy" title="Direct link to heading"></a></h4><p>Property: <code>hoodie.write.lock.conflict.resolution.strategy</code> <br>
Lock provider class name, this should be subclass of org.apache.hudi.client.transaction.ConflictResolutionStrategy</p></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="theme-doc-footer-edit-meta-row row"><div class="col"><a href="https://github.com/apache/hudi/tree/asf-site/website/versioned_docs/version-0.8.0/configurations.md" target="_blank" rel="noreferrer noopener" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_mS5F" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_mt2f"></div></div></footer></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages navigation"><div class="pagination-nav__item"><a class="pagination-nav__link" href="/docs/0.8.0/querying_data"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">Querying Data</div></a></div><div class="pagination-nav__item pagination-nav__item--next"><a class="pagination-nav__link" href="/docs/0.8.0/performance"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Performance</div></a></div></nav></div></div><div class="col col--3"><div class="tableOfContents_vrFS thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#spark-datasource-configs" class="table-of-contents__link toc-highlight">Spark Datasource Configs</a><ul><li><a href="#write-options" class="table-of-contents__link toc-highlight">Write Options</a></li><li><a href="#read-options" class="table-of-contents__link toc-highlight">Read Options</a></li></ul></li><li><a href="#flink-sql-config-options" class="table-of-contents__link toc-highlight">Flink SQL Config Options</a><ul><li><a href="#write-options-1" class="table-of-contents__link toc-highlight">Write Options</a></li><li><a href="#read-options-1" class="table-of-contents__link toc-highlight">Read Options</a></li></ul></li><li><a href="#writeclient-configs" class="table-of-contents__link toc-highlight">WriteClient Configs</a><ul><li><a href="#index-configs" class="table-of-contents__link toc-highlight">Index configs</a></li><li><a href="#storage-configs" class="table-of-contents__link toc-highlight">Storage configs</a></li><li><a href="#compaction-configs" class="table-of-contents__link toc-highlight">Compaction configs</a></li><li><a href="#bootstrap-configs" class="table-of-contents__link toc-highlight">Bootstrap Configs</a></li><li><a href="#metadata-config" class="table-of-contents__link toc-highlight">Metadata Config</a></li><li><a href="#clustering-configs" class="table-of-contents__link toc-highlight">Clustering Configs</a></li><li><a href="#payload-configs" class="table-of-contents__link toc-highlight">Payload Configs</a></li><li><a href="#metrics-configs" class="table-of-contents__link toc-highlight">Metrics configs</a></li><li><a href="#memory-configs" class="table-of-contents__link toc-highlight">Memory configs</a></li><li><a href="#write-commit-callback-configs" class="table-of-contents__link toc-highlight">Write commit callback configs</a></li><li><a href="#locking-configs" class="table-of-contents__link toc-highlight">Locking configs</a></li></ul></li></ul></div></div></div></div></main></div></div><footer class="footer"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">About</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/blog/2021/07/21/streaming-data-lake-platform">Our Vision</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/concepts">Concepts</a></li><li class="footer__item"><a class="footer__link-item" href="/community/team">Team</a></li><li class="footer__item"><a class="footer__link-item" href="/releases/release-0.14.1">Releases</a></li><li class="footer__item"><a class="footer__link-item" href="/releases/download">Download</a></li><li class="footer__item"><a class="footer__link-item" href="/powered-by">Who&#x27;s Using</a></li></ul></div><div class="col footer__col"><div class="footer__title">Learn</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/docs/quick-start-guide">Quick Start</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/docker_demo">Docker Demo</a></li><li class="footer__item"><a class="footer__link-item" href="/blog">Blog</a></li><li class="footer__item"><a class="footer__link-item" href="/talks">Talks</a></li><li class="footer__item"><a class="footer__link-item" href="/videos">Video Guides</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/faq">FAQ</a></li><li class="footer__item"><a href="https://cwiki.apache.org/confluence/display/HUDI" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Technical Wiki<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li></ul></div><div class="col footer__col"><div class="footer__title">Hudi On Cloud</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/docs/s3_hoodie">AWS</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/gcs_hoodie">Google Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/oss_hoodie">Alibaba Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/azure_hoodie">Microsoft Azure</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/cos_hoodie">Tencent Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/ibm_cos_hoodie">IBM Cloud</a></li></ul></div><div class="col footer__col"><div class="footer__title">Community</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/community/get-involved">Get Involved</a></li><li class="footer__item"><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Slack<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Twitter<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>YouTube<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Linkedin<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="mailto:dev-subscribe@hudi.apache.org?Subject=SubscribeToHudi" target="_blank" rel="noopener noreferrer" class="footer__link-item">Mailing List</a></li></ul></div><div class="col footer__col"><div class="footer__title">Apache</div><ul class="footer__items"><li class="footer__item"><a href="https://www.apache.org/events/current-event" target="_blank" rel="noopener noreferrer" class="footer__link-item">Events</a></li><li class="footer__item"><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Thanks</a></li><li class="footer__item"><a href="https://www.apache.org/licenses" target="_blank" rel="noopener noreferrer" class="footer__link-item">License</a></li><li class="footer__item"><a href="https://www.apache.org/security" target="_blank" rel="noopener noreferrer" class="footer__link-item">Security</a></li><li class="footer__item"><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Sponsorship</a></li><li class="footer__item"><a href="https://www.apache.org" target="_blank" rel="noopener noreferrer" class="footer__link-item">Foundation</a></li></ul></div></div><div class="footer__bottom text--center"><div class="margin-bottom--sm"><a href="https://hudi.apache.org/" target="_blank" rel="noopener noreferrer" class="footerLogoLink_SRtH"><img src="/assets/images/logo-big.png" alt="Apache Hudi™" class="themedImage_TMUO themedImage--light_4Vu1 footer__logo"><img src="/assets/images/logo-big.png" alt="Apache Hudi™" class="themedImage_TMUO themedImage--dark_uzRr footer__logo"></a></div><div class="footer__copyright">Copyright © 2021 <a href="https://apache.org">The Apache Software Foundation</a>, Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0"> Apache License, Version 2.0</a>. <br>Hudi, Apache and the Apache feather logo are trademarks of The Apache Software Foundation.</div></div></div></footer></div>
<script src="/assets/js/runtime~main.2cab5691.js"></script>
<script src="/assets/js/main.bd020950.js"></script>
</body>
</html>