blob: 79b33e483245d4c2eec199a61bfc8d262bea916b [file] [log] [blame]
<!doctype html>
<html class="docs-version-0.13.0 docs-custom-styles" lang="cn" dir="ltr">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="generator" content="Docusaurus v2.0.0-beta.14">
<link rel="alternate" type="application/rss+xml" href="/cn/blog/rss.xml" title="Apache Hudi: User-Facing Analytics RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/cn/blog/atom.xml" title="Apache Hudi: User-Facing Analytics Atom Feed">
<link rel="alternate" type="application/json" href="/cn/blog/feed.json" title="Apache Hudi: User-Facing Analytics JSON Feed">
<link rel="search" type="application/opensearchdescription+xml" title="Apache Hudi" href="/cn/opensearch.xml">
<link rel="alternate" type="application/rss+xml" href="/cn/videos/rss.xml" title="Apache Hudi RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/cn/videos/atom.xml" title="Apache Hudi Atom Feed">
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Comfortaa|Ubuntu|Roboto|Source+Code+Pro">
<link rel="stylesheet" href="https://at-ui.github.io/feather-font/css/iconfont.css"><title data-react-helmet="true">Basic Configurations | Apache Hudi</title><meta data-react-helmet="true" name="twitter:card" content="summary_large_image"><meta data-react-helmet="true" property="og:url" content="https://hudi.apache.org/cn/docs/0.13.0/basic_configurations"><meta data-react-helmet="true" name="docsearch:language" content="cn"><meta data-react-helmet="true" name="docsearch:version" content="0.13.0"><meta data-react-helmet="true" name="docsearch:docusaurus_tag" content="docs-default-0.13.0"><meta data-react-helmet="true" name="keywords" content="apache hudi, data lake, lakehouse, big data, apache spark, apache flink, presto, trino, analytics, data engineering"><meta data-react-helmet="true" property="og:title" content="Basic Configurations | Apache Hudi"><meta data-react-helmet="true" name="description" content="This page covers the basic configurations you may use to write/read Hudi tables. This page only features a subset of the"><meta data-react-helmet="true" property="og:description" content="This page covers the basic configurations you may use to write/read Hudi tables. This page only features a subset of the"><link data-react-helmet="true" rel="icon" href="/cn/assets/images/favicon.ico"><link data-react-helmet="true" rel="canonical" href="https://hudi.apache.org/cn/docs/0.13.0/basic_configurations"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/docs/0.13.0/basic_configurations" hreflang="en"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/cn/docs/0.13.0/basic_configurations" hreflang="cn"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/docs/0.13.0/basic_configurations" hreflang="x-default"><link data-react-helmet="true" rel="preconnect" href="https://BH4D9OD16A-dsn.algolia.net" crossorigin="anonymous"><link rel="stylesheet" href="/cn/assets/css/styles.ea681a30.css">
<link rel="preload" href="/cn/assets/js/runtime~main.0acdb754.js" as="script">
<link rel="preload" href="/cn/assets/js/main.6d6aa24f.js" as="script">
</head>
<body>
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus">
<div><a href="#" class="skipToContent_OuoZ">Skip to main content</a></div><div class="announcementBar_axC9" role="banner"><div class="announcementBarPlaceholder_xYHE"></div><div class="announcementBarContent_6uhP">⭐️ If you like Apache Hudi, give it a star on <a target="_blank" rel="noopener noreferrer" href="https://github.com/apache/hudi">GitHub</a>! ⭐</div><button type="button" class="clean-btn close announcementBarClose_A3A1" aria-label="Close"><svg viewBox="0 0 15 15" width="14" height="14"><g stroke="currentColor" stroke-width="3.1"><path d="M.75.75l13.5 13.5M14.25.75L.75 14.25"></path></g></svg></button></div><nav class="navbar navbar--fixed-top navbarWrapper_UIa0"><div class="navbar__inner"><img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=8f594acf-9b77-44fb-9475-3e82ead1910c" width="0" height="0" alt=""><img referrerpolicy="no-referrer-when-downgrade" src="https://analytics.apache.org/matomo.php?idsite=47&amp;rec=1" width="0" height="0" alt=""><div class="navbar__items"><button aria-label="Navigation bar toggle" class="navbar__toggle clean-btn" type="button" tabindex="0"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/cn/"><div class="navbar__logo navbarLogo_Bz6n"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--light_4Vu1"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--dark_uzRr"></div></a><a class="navbar__item navbar__link" href="/cn/docs/overview"><div class="labelWrapperDropdown_Mqbj">Docs</div></a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Learn<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/talks"><div class="labelWrapperDropdown_Mqbj">Talks</div></a></li><li><a class="dropdown__link" href="/cn/videos"><div class="labelWrapperDropdown_Mqbj">Video Guides</div></a></li><li><a class="dropdown__link" href="/cn/docs/faq"><div class="labelWrapperDropdown_Mqbj">FAQ</div></a></li><li><a class="dropdown__link" href="/cn/tech-specs"><div class="labelWrapperDropdown_Mqbj">Tech Specs</div></a></li><li><a class="dropdown__link" href="/cn/tech-specs-1point0"><div class="labelWrapperDropdown_Mqbj">Tech Specs 1.0</div></a></li><li><a href="https://cwiki.apache.org/confluence/display/HUDI" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span class="externalLink_AE3f">Technical Wiki<svg width="20" height="20" viewBox="0 0 26 26" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M16.965 8.745 9.01 16.7M10.561 8.758l6.403-.013-.013 6.403" stroke="#0DB1F9" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path><rect x="4.5" y="4.5" width="17" height="17" rx="2.5" stroke="#0DB1F9"></rect></svg></span></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Contribute<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/contribute/how-to-contribute"><div class="labelWrapperDropdown_Mqbj">How to Contribute</div></a></li><li><a class="dropdown__link" href="/cn/contribute/developer-setup"><div class="labelWrapperDropdown_Mqbj">Developer Setup</div></a></li><li><a class="dropdown__link" href="/cn/contribute/rfc-process"><div class="labelWrapperDropdown_Mqbj">RFC Process</div></a></li><li><a class="dropdown__link" href="/cn/contribute/report-security-issues"><div class="labelWrapperDropdown_Mqbj">Report Security Issues</div></a></li><li><a href="https://issues.apache.org/jira/projects/HUDI/summary" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span class="externalLink_AE3f">Report Issues<svg width="20" height="20" viewBox="0 0 26 26" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M16.965 8.745 9.01 16.7M10.561 8.758l6.403-.013-.013 6.403" stroke="#0DB1F9" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path><rect x="4.5" y="4.5" width="17" height="17" rx="2.5" stroke="#0DB1F9"></rect></svg></span></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Community<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/community/get-involved"><div class="labelWrapperDropdown_Mqbj">Get Involved</div></a></li><li><a class="dropdown__link" href="/cn/community/syncs"><div class="labelWrapperDropdown_Mqbj">Community Syncs</div></a></li><li><a class="dropdown__link" href="/cn/community/office_hours"><div class="labelWrapperDropdown_Mqbj">Office Hours</div></a></li><li><a class="dropdown__link" href="/cn/community/team"><div class="labelWrapperDropdown_Mqbj">Team</div></a></li></ul></div><a class="navbar__item navbar__link" href="/cn/blog"><div class="labelWrapperDropdown_Mqbj">Blog</div></a><a class="navbar__item navbar__link" href="/cn/powered-by"><div class="labelWrapperDropdown_Mqbj">Who&#x27;s Using</div></a><a class="navbar__item navbar__link" href="/cn/roadmap"><div class="labelWrapperDropdown_Mqbj">Roadmap</div></a><a class="navbar__item navbar__link" href="/cn/releases/download"><div class="labelWrapperDropdown_Mqbj">Download</div></a></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__link downloadLinkDropdownHide_aDP3" href="/cn/docs/0.13.0/overview"><div class="labelWrapperDropdown_Mqbj">0.13.0<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/docs/next/basic_configurations"><div class="labelWrapperDropdown_Mqbj">Next</div></a></li><li><a class="dropdown__link" href="/cn/docs/basic_configurations"><div class="labelWrapperDropdown_Mqbj">0.14.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.14.0/basic_configurations"><div class="labelWrapperDropdown_Mqbj">0.14.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.13.1/basic_configurations"><div class="labelWrapperDropdown_Mqbj">0.13.1</div></a></li><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/cn/docs/0.13.0/basic_configurations"><div class="labelWrapperDropdown_Mqbj">0.13.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.3/basic_configurations"><div class="labelWrapperDropdown_Mqbj">0.12.3</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.2/basic_configurations"><div class="labelWrapperDropdown_Mqbj">0.12.2</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.1/basic_configurations"><div class="labelWrapperDropdown_Mqbj">0.12.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.0/basic_configurations"><div class="labelWrapperDropdown_Mqbj">0.12.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.11.1/basic_configurations"><div class="labelWrapperDropdown_Mqbj">0.11.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.11.0/overview"><div class="labelWrapperDropdown_Mqbj">0.11.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.10.1/overview"><div class="labelWrapperDropdown_Mqbj">0.10.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.10.0/overview"><div class="labelWrapperDropdown_Mqbj">0.10.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.9.0/overview"><div class="labelWrapperDropdown_Mqbj">0.9.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.8.0/overview"><div class="labelWrapperDropdown_Mqbj">0.8.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.7.0/overview"><div class="labelWrapperDropdown_Mqbj">0.7.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.6.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.6.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.3/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.3</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.2/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.2</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.1/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.0</div></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj"><span><svg viewBox="0 0 20 20" width="20" height="20" aria-hidden="true" class="iconLanguage_zID8"><path fill="currentColor" d="M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"></path></svg><span>Chinese</span></span><svg width="14" height="14" viewBox="0 0 14 14" fill="none" xmlns="http://www.w3.org/2000/svg"><g clip-path="url(#a)"><path d="M14 6.457a6.842 6.842 0 0 0-7-6.02 6.843 6.843 0 0 0-7 6.02v1.085a6.843 6.843 0 0 0 7 6.02 6.843 6.843 0 0 0 7-6.02V6.457Zm-1.094 0h-2.625a9.92 9.92 0 0 0-.376-2.222 6.65 6.65 0 0 0 1.531-.875 5.25 5.25 0 0 1 1.444 3.097h.026Zm-8.032 0a8.479 8.479 0 0 1 .324-1.872 7.376 7.376 0 0 0 3.63 0c.175.61.284 1.239.325 1.872h-4.28Zm4.305 1.085a8.391 8.391 0 0 1-.324 1.873 7.464 7.464 0 0 0-3.658 0 8.479 8.479 0 0 1-.323-1.873h4.305Zm.35-4.375A10.342 10.342 0 0 0 8.75 1.75c.627.194 1.218.49 1.75.875a5.748 5.748 0 0 1-.998.577l.027-.035ZM7.254 1.54A8.75 8.75 0 0 1 8.46 3.552c-.48.11-.97.165-1.461.167-.492-.001-.982-.057-1.461-.167.308-.722.715-1.4 1.207-2.012h.508ZM4.498 3.202a5.748 5.748 0 0 1-.998-.577 6.029 6.029 0 0 1 1.75-.875c-.294.46-.546.947-.753 1.452Zm-1.873.15c.47.358.984.652 1.531.874A9.625 9.625 0 0 0 3.78 6.45H1.155a5.25 5.25 0 0 1 1.47-3.098ZM1.12 7.541h2.625c.038.753.164 1.5.376 2.223a6.649 6.649 0 0 0-1.531.875 5.25 5.25 0 0 1-1.47-3.098Zm3.377 3.255c.207.506.459.992.753 1.453a6.03 6.03 0 0 1-1.75-.875c.312-.226.646-.419.997-.578Zm2.25 1.663a8.594 8.594 0 0 1-1.208-2.013 6.501 6.501 0 0 1 2.922 0 8.54 8.54 0 0 1-1.207 2.013h-.508Zm2.755-1.663c.367.156.716.35 1.042.578a6.338 6.338 0 0 1-1.75.875c.275-.464.512-.95.708-1.453Zm1.873-.148a6.647 6.647 0 0 0-1.531-.875 9.45 9.45 0 0 0 .376-2.223h2.625a5.25 5.25 0 0 1-1.47 3.098Z" fill="#1C1E21"></path></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h14v14H0z"></path></clipPath></defs></svg></div></a><ul class="dropdown__menu"><li><a href="/docs/0.13.0/basic_configurations" target="_self" rel="noopener noreferrer" class="dropdown__link"><div class="labelWrapperDropdown_Mqbj">English</div></a></li><li><a href="/cn/docs/0.13.0/basic_configurations" target="_self" rel="noopener noreferrer" class="dropdown__link dropdown__link--active"><div class="labelWrapperDropdown_Mqbj">Chinese</div></a></li></ul></div><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-github-link" aria-label="GitHub repository"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-twitter-link" aria-label="Hudi Twitter Handle"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-slack-link" aria-label="Hudi Slack Channel"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-youtube-link" aria-label="Hudi YouTube Channel"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-linkedin-link" aria-label="Hudi Linkedin Page"><div class="labelWrapperDropdown_Mqbj"></div></a><div class="searchBox_fBfG"><div role="button" class="searchButton_g9-U" aria-label="Search"><span class="searchText_RI6l">Search</span><svg width="14" height="14" viewBox="0 0 14 14" fill="none" xmlns="http://www.w3.org/2000/svg"><circle cx="6.864" cy="6.864" r="5.243" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></circle><path d="m10.51 10.783 2.056 2.05" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div><div class="navbar-sidebar"><div class="navbar-sidebar__brand"><a class="navbar__brand" href="/cn/"><div class="navbar__logo"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--light_4Vu1"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--dark_uzRr"></div></a><button type="button" class="clean-btn navbar-sidebar__close"><svg viewBox="0 0 15 15" width="21" height="21"><g stroke="var(--ifm-color-emphasis-600)" stroke-width="1.2"><path d="M.75.75l13.5 13.5M14.25.75L.75 14.25"></path></g></svg></button></div><div class="navbar-sidebar__items"><div class="navbar-sidebar__item menu"><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" href="/cn/docs/overview"><div class="labelWrapperDropdown_Mqbj">Docs</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Learn</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Contribute</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Community</div></a></li><li class="menu__list-item"><a class="menu__link" href="/cn/blog"><div class="labelWrapperDropdown_Mqbj">Blog</div></a></li><li class="menu__list-item"><a class="menu__link" href="/cn/powered-by"><div class="labelWrapperDropdown_Mqbj">Who&#x27;s Using</div></a></li><li class="menu__list-item"><a class="menu__link" href="/cn/roadmap"><div class="labelWrapperDropdown_Mqbj">Roadmap</div></a></li><li class="menu__list-item"><a class="menu__link" href="/cn/releases/download"><div class="labelWrapperDropdown_Mqbj">Download</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Versions</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj"><span><svg viewBox="0 0 20 20" width="20" height="20" aria-hidden="true" class="iconLanguage_zID8"><path fill="currentColor" d="M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"></path></svg><span>Languages</span></span></div></a></li><li class="menu__list-item"><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="menu__link header-github-link" aria-label="GitHub repository"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="menu__link header-twitter-link" aria-label="Hudi Twitter Handle"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="menu__link header-slack-link" aria-label="Hudi Slack Channel"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="menu__link header-youtube-link" aria-label="Hudi YouTube Channel"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="menu__link header-linkedin-link" aria-label="Hudi Linkedin Page"><div class="labelWrapperDropdown_Mqbj"></div></a></li></ul></div><div class="navbar-sidebar__item menu"><button type="button" class="clean-btn navbar-sidebar__back">← Back to main menu</button></div></div></div></nav><div class="main-wrapper docs-wrapper docs-doc-page"><div class="docPage_GMj9"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_i9tI" type="button"></button><aside class="docSidebarContainer_k0Pq"><div class="sidebar_a3j0"><nav class="menu thin-scrollbar menu_cyFh menuWithAnnouncementBar_+O1J"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/cn/docs/0.13.0/overview">Overview</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/cn/docs/0.13.0/quick-start-guide">Quick Start</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.13.0/quick-start-guide">Spark Guide</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.13.0/flink-quick-start-guide">Flink Guide</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.13.0/docker_demo">Docker Demo</a></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/cn/docs/0.13.0/timeline">Concepts</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/cn/docs/0.13.0/table_management">How To</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/cn/docs/0.13.0/migration_guide">Services</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--active hasHref_TwRn" href="/cn/docs/0.13.0/basic_configurations">Configurations</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/cn/docs/0.13.0/basic_configurations">Basic Configurations</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.13.0/configurations">All Configurations</a></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/cn/docs/0.13.0/query_engine_setup">Guides</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/cn/docs/0.13.0/use_cases">Use Cases</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/cn/docs/0.13.0/faq">FAQs</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/cn/docs/0.13.0/privacy">Privacy Policy</a></li></ul></nav></div></aside><main class="docMainContainer_Q970"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_zHA2"><div class="theme-doc-version-banner alert alert--warning margin-bottom--md" role="alert"><div>This is documentation for <!-- -->Apache Hudi<!-- --> <b>0.13.0</b>, which is no longer actively maintained.</div><div class="margin-top--md">For up-to-date documentation, see the <b><a href="/cn/docs/basic_configurations">latest version</a></b> (<!-- -->0.14.1<!-- -->).</div></div><div class="docItemContainer_oiyr"><article><span class="theme-doc-version-badge badge badge--secondary">Version: <!-- -->0.13.0</span><div class="tocCollapsible_aw-L theme-doc-toc-mobile tocMobile_Tx6Y"><button type="button" class="clean-btn tocCollapsibleButton_zr6a">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>Basic Configurations</h1></header><p>This page covers the basic configurations you may use to write/read Hudi tables. This page only features a subset of the
most frequently used configurations. For a full list of all configs, please visit the <a href="/cn/docs/configurations">All Configurations</a> page.</p><ul><li><a href="#SPARK_DATASOURCE"><strong>Spark Datasource Configs</strong></a>: These configs control the Hudi Spark Datasource, providing ability to define keys/partitioning, pick out the write operation, specify how to merge records or choosing query type to read.</li><li><a href="#FLINK_SQL"><strong>Flink Sql Configs</strong></a>: These configs control the Hudi Flink SQL source/sink connectors, providing ability to define record keys, pick out the write operation, specify how to merge records, enable/disable asynchronous compaction or choosing query type to read.</li><li><a href="#WRITE_CLIENT"><strong>Write Client Configs</strong></a>: Internally, the Hudi datasource uses a RDD based HoodieWriteClient API to actually perform writes to storage. These configs provide deep control over lower level aspects like file sizing, compression, parallelism, compaction, write schema, cleaning etc. Although Hudi provides sane defaults, from time-time these configs may need to be tweaked to optimize for specific workloads.</li><li><a href="#METRICS"><strong>Metrics Configs</strong></a>: These set of configs are used to enable monitoring and reporting of key Hudi stats and metrics.</li><li><a href="#RECORD_PAYLOAD"><strong>Record Payload Config</strong></a>: This is the lowest level of customization offered by Hudi. Record payloads define how to produce new values to upsert based on incoming new record and stored old record. Hudi provides default implementations such as OverwriteWithLatestAvroPayload which simply update table with the latest/last-written record. This can be overridden to a custom class extending HoodieRecordPayload class, on both datasource and WriteClient levels.</li></ul><h2 class="anchor anchorWithStickyNavbar_y2LR" id="SPARK_DATASOURCE">Spark Datasource Configs<a class="hash-link" href="#SPARK_DATASOURCE" title="Direct link to heading"></a></h2><p>These configs control the Hudi Spark Datasource, providing ability to define keys/partitioning, pick out the write operation, specify how to merge records or choosing query type to read.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="Read-Options">Read Options<a class="hash-link" href="#Read-Options" title="Direct link to heading"></a></h3><p>Options useful for reading tables via <code>read.format.option(...)</code></p><p><code>Config Class</code>: org.apache.hudi.DataSourceOptions.scala<br></p><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiedatasourcequerytype">hoodie.datasource.query.type<a class="hash-link" href="#hoodiedatasourcequerytype" title="Direct link to heading"></a></h4><p>Whether data needs to be read, in incremental mode (new data since an instantTime) (or) Read Optimized mode (obtain latest view, based on base files) (or) Snapshot mode (obtain latest view, by merging base and (if any) log files)<br>
<strong>Default Value</strong>: snapshot (Optional)<br>
<code>Config Param: QUERY_TYPE</code><br></p></blockquote><hr><h3 class="anchor anchorWithStickyNavbar_y2LR" id="Write-Options">Write Options<a class="hash-link" href="#Write-Options" title="Direct link to heading"></a></h3><p>You can pass down any of the WriteClient level configs directly using <code>options()</code> or <code>option(k,v)</code> methods.</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">inputDF.write()</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">.format(&quot;org.apache.hudi&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">.options(clientOpts) // any of the Hudi client opts can be passed in as well</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">.option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), &quot;_row_key&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">.option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), &quot;partition&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">.option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY(), &quot;timestamp&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">.option(HoodieWriteConfig.TABLE_NAME, tableName)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">.mode(SaveMode.Append)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">.save(basePath);</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>Options useful for writing tables via <code>write.format.option(...)</code></p><p><code>Config Class</code>: org.apache.hudi.DataSourceOptions.scala<br></p><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiedatasourcewriteoperation">hoodie.datasource.write.operation<a class="hash-link" href="#hoodiedatasourcewriteoperation" title="Direct link to heading"></a></h4><p>Whether to do upsert, insert or bulkinsert for the write operation. Use bulkinsert to load new data into a table, and there after use upsert/insert. bulk insert uses a disk based write path to scale to load large inputs without need to cache it.<br>
<strong>Default Value</strong>: upsert (Optional)<br>
<code>Config Param: OPERATION</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiedatasourcewritetabletype">hoodie.datasource.write.table.type<a class="hash-link" href="#hoodiedatasourcewritetabletype" title="Direct link to heading"></a></h4><p>The table type for the underlying data, for this write. This can’t change between writes.<br>
<strong>Default Value</strong>: COPY_ON_WRITE (Optional)<br>
<code>Config Param: TABLE_TYPE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiedatasourcewritetablename">hoodie.datasource.write.table.name<a class="hash-link" href="#hoodiedatasourcewritetablename" title="Direct link to heading"></a></h4><p>Table name for the datasource write. Also used to register the table into meta stores.<br>
<strong>Default Value</strong>: N/A (Required)<br>
<code>Config Param: TABLE_NAME</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiedatasourcewriterecordkeyfield">hoodie.datasource.write.recordkey.field<a class="hash-link" href="#hoodiedatasourcewriterecordkeyfield" title="Direct link to heading"></a></h4><p>Record key field. Value to be used as the <code>recordKey</code> component of <code>HoodieKey</code>.
Actual value will be obtained by invoking .toString() on the field value. Nested fields can be specified using
the dot notation eg: <code>a.b.c</code><br>
<strong>Default Value</strong>: N/A (Required)<br>
<code>Config Param: RECORDKEY_FIELD</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiedatasourcewritepartitionpathfield">hoodie.datasource.write.partitionpath.field<a class="hash-link" href="#hoodiedatasourcewritepartitionpathfield" title="Direct link to heading"></a></h4><p>Partition path field. Value to be used at the partitionPath component of HoodieKey. Actual value ontained by invoking .toString()<br>
<strong>Default Value</strong>: N/A (Required)<br>
<code>Config Param: PARTITIONPATH_FIELD</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiedatasourcewritekeygeneratorclass">hoodie.datasource.write.keygenerator.class<a class="hash-link" href="#hoodiedatasourcewritekeygeneratorclass" title="Direct link to heading"></a></h4><p>Key generator class, that implements <code>org.apache.hudi.keygen.KeyGenerator</code><br>
<strong>Default Value</strong>: org.apache.hudi.keygen.SimpleKeyGenerator (Optional)<br>
<code>Config Param: KEYGENERATOR_CLASS_NAME</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiedatasourcewriteprecombinefield">hoodie.datasource.write.precombine.field<a class="hash-link" href="#hoodiedatasourcewriteprecombinefield" title="Direct link to heading"></a></h4><p>Field used in preCombining before actual write. When two records have the same key value, we will pick the one with the largest value for the precombine field, determined by Object.compareTo(..)<br>
<strong>Default Value</strong>: ts (Optional)<br>
<code>Config Param: PRECOMBINE_FIELD</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiedatasourcewritepayloadclass">hoodie.datasource.write.payload.class<a class="hash-link" href="#hoodiedatasourcewritepayloadclass" title="Direct link to heading"></a></h4><p>Payload class used. Override this, if you like to roll your own merge logic, when upserting/inserting. This will render any value set for PRECOMBINE_FIELD_OPT_VAL in-effective<br>
<strong>Default Value</strong>: org.apache.hudi.common.model.OverwriteWithLatestAvroPayload (Optional)<br>
<code>Config Param: PAYLOAD_CLASS_NAME</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiedatasourcewritepartitionpathurlencode">hoodie.datasource.write.partitionpath.urlencode<a class="hash-link" href="#hoodiedatasourcewritepartitionpathurlencode" title="Direct link to heading"></a></h4><p>Should we url encode the partition path value, before creating the folder structure.<br>
<strong>Default Value</strong>: false (Optional)<br>
<code>Config Param: URL_ENCODE_PARTITIONING</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiedatasourcehive_syncenable">hoodie.datasource.hive_sync.enable<a class="hash-link" href="#hoodiedatasourcehive_syncenable" title="Direct link to heading"></a></h4><p>When set to true, register/sync the table to Apache Hive metastore<br>
<strong>Default Value</strong>: false (Optional)<br>
<code>Config Param: HIVE_SYNC_ENABLED</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiedatasourcehive_syncmode">hoodie.datasource.hive_sync.mode<a class="hash-link" href="#hoodiedatasourcehive_syncmode" title="Direct link to heading"></a></h4><p>Mode to choose for Hive ops. Valid values are hms, jdbc and hiveql.<br>
<strong>Default Value</strong>: N/A (Required)<br>
<code>Config Param: HIVE_SYNC_MODE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiedatasourcewritehive_style_partitioning">hoodie.datasource.write.hive_style_partitioning<a class="hash-link" href="#hoodiedatasourcewritehive_style_partitioning" title="Direct link to heading"></a></h4><p>Flag to indicate whether to use Hive style partitioning.
If set true, the names of partition folders follow &lt;partition_column_name&gt;=&lt;partition_value&gt; format.
By default false (the names of partition folders are only partition values)<br>
<strong>Default Value</strong>: false (Optional)<br>
<code>Config Param: HIVE_STYLE_PARTITIONING</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiedatasourcehive_syncpartition_fields">hoodie.datasource.hive_sync.partition_fields<a class="hash-link" href="#hoodiedatasourcehive_syncpartition_fields" title="Direct link to heading"></a></h4><p>Field in the table to use for determining hive partition columns.<br>
<strong>Default Value</strong>: (Optional)<br>
<code>Config Param: HIVE_PARTITION_FIELDS</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiedatasourcehive_syncpartition_extractor_class">hoodie.datasource.hive_sync.partition_extractor_class<a class="hash-link" href="#hoodiedatasourcehive_syncpartition_extractor_class" title="Direct link to heading"></a></h4><p>Class which implements PartitionValueExtractor to extract the partition values, default &#x27;MultiPartKeysValueExtractor&#x27;.<br>
<strong>Default Value</strong>: org.apache.hudi.hive.MultiPartKeysValueExtractor (Optional)<br>
<code>Config Param: HIVE_PARTITION_EXTRACTOR_CLASS</code><br></p></blockquote><hr><h2 class="anchor anchorWithStickyNavbar_y2LR" id="FLINK_SQL">Flink Sql Configs<a class="hash-link" href="#FLINK_SQL" title="Direct link to heading"></a></h2><p>These configs control the Hudi Flink SQL source/sink connectors, providing ability to define record keys, pick out the write operation, specify how to merge records, enable/disable asynchronous compaction or choosing query type to read.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="Flink-Options">Flink Options<a class="hash-link" href="#Flink-Options" title="Direct link to heading"></a></h3><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="path">path<a class="hash-link" href="#path" title="Direct link to heading"></a></h4><p>Base path for the target hoodie table.
The path would be created if it does not exist,
otherwise a Hoodie table expects to be initialized successfully<br>
<strong>Default Value</strong>: N/A (Required)<br>
<code>Config Param: PATH</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodietablename">hoodie.table.name<a class="hash-link" href="#hoodietablename" title="Direct link to heading"></a></h4><p>Table name to register to Hive metastore<br>
<strong>Default Value</strong>: N/A (Required)<br>
<code>Config Param: TABLE_NAME</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="tabletype">table.type<a class="hash-link" href="#tabletype" title="Direct link to heading"></a></h4><p>Type of table to write. COPY_ON_WRITE (or) MERGE_ON_READ<br>
<strong>Default Value</strong>: COPY_ON_WRITE (Optional)<br>
<code>Config Param: TABLE_TYPE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="writeoperation">write.operation<a class="hash-link" href="#writeoperation" title="Direct link to heading"></a></h4><p>The write operation, that this write should do<br>
<strong>Default Value</strong>: upsert (Optional)<br>
<code>Config Param: OPERATION</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="writetasks">write.tasks<a class="hash-link" href="#writetasks" title="Direct link to heading"></a></h4><p>Parallelism of tasks that do actual write, default is 4<br>
<strong>Default Value</strong>: 4 (Optional)<br>
<code>Config Param: WRITE_TASKS</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="writebucket_assigntasks">write.bucket_assign.tasks<a class="hash-link" href="#writebucket_assigntasks" title="Direct link to heading"></a></h4><p>Parallelism of tasks that do bucket assign, default is the parallelism of the execution environment<br>
<strong>Default Value</strong>: N/A (Required)<br>
<code>Config Param: BUCKET_ASSIGN_TASKS</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="writeprecombine">write.precombine<a class="hash-link" href="#writeprecombine" title="Direct link to heading"></a></h4><p>Flag to indicate whether to drop duplicates before insert/upsert.
By default these cases will accept duplicates, to gain extra performance:
1) insert operation;
2) upsert for MOR table, the MOR table deduplicate on reading<br>
<strong>Default Value</strong>: false (Optional)<br>
<code>Config Param: PRE_COMBINE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="readtasks">read.tasks<a class="hash-link" href="#readtasks" title="Direct link to heading"></a></h4><p>Parallelism of tasks that do actual read, default is 4<br>
<strong>Default Value</strong>: 4 (Optional)<br>
<code>Config Param: READ_TASKS</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="readstart-commit">read.start-commit<a class="hash-link" href="#readstart-commit" title="Direct link to heading"></a></h4><p>Start commit instant for reading, the commit time format should be &#x27;yyyyMMddHHmmss&#x27;, by default reading from the latest instant for streaming read<br>
<strong>Default Value</strong>: N/A (Required)<br>
<code>Config Param: READ_START_COMMIT</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="readstreamingenabled">read.streaming.enabled<a class="hash-link" href="#readstreamingenabled" title="Direct link to heading"></a></h4><p>Whether to read as streaming source, default false<br>
<strong>Default Value</strong>: false (Optional)<br>
<code>Config Param: READ_AS_STREAMING</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="compactiontasks">compaction.tasks<a class="hash-link" href="#compactiontasks" title="Direct link to heading"></a></h4><p>Parallelism of tasks that do actual compaction, default is 4<br>
<strong>Default Value</strong>: 4 (Optional)<br>
<code>Config Param: COMPACTION_TASKS</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiedatasourcewritehive_style_partitioning-1">hoodie.datasource.write.hive_style_partitioning<a class="hash-link" href="#hoodiedatasourcewritehive_style_partitioning-1" title="Direct link to heading"></a></h4><p>Whether to use Hive style partitioning.
If set true, the names of partition folders follow <!-- -->&lt;<!-- -->partition_column_name<!-- -->&gt;<!-- -->=<!-- -->&lt;<!-- -->partition_value<!-- -->&gt;<!-- --> format.
By default false (the names of partition folders are only partition values)<br>
<strong>Default Value</strong>: false (Optional)<br>
<code>Config Param: HIVE_STYLE_PARTITIONING</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hive_syncenable">hive_sync.enable<a class="hash-link" href="#hive_syncenable" title="Direct link to heading"></a></h4><p>Asynchronously sync Hive meta to HMS, default false<br>
<strong>Default Value</strong>: false (Optional)<br>
<code>Config Param: HIVE_SYNC_ENABLED</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hive_syncmode">hive_sync.mode<a class="hash-link" href="#hive_syncmode" title="Direct link to heading"></a></h4><p>Mode to choose for Hive ops. Valid values are hms, jdbc and hiveql, default &#x27;jdbc&#x27;<br>
<strong>Default Value</strong>: jdbc (Optional)<br>
<code>Config Param: HIVE_SYNC_MODE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hive_synctable">hive_sync.table<a class="hash-link" href="#hive_synctable" title="Direct link to heading"></a></h4><p>Table name for hive sync, default &#x27;unknown&#x27;<br>
<strong>Default Value</strong>: unknown (Optional)<br>
<code>Config Param: HIVE_SYNC_TABLE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hive_syncdb">hive_sync.db<a class="hash-link" href="#hive_syncdb" title="Direct link to heading"></a></h4><p>Database name for hive sync, default &#x27;default&#x27;<br>
<strong>Default Value</strong>: default (Optional)<br>
<code>Config Param: HIVE_SYNC_DB</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hive_syncpartition_extractor_class">hive_sync.partition_extractor_class<a class="hash-link" href="#hive_syncpartition_extractor_class" title="Direct link to heading"></a></h4><p>Tool to extract the partition value from HDFS path, default &#x27;SlashEncodedDayPartitionValueExtractor&#x27;<br>
<strong>Default Value</strong>: org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor (Optional)<br>
<code>Config Param: HIVE_SYNC_PARTITION_EXTRACTOR_CLASS_NAME</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hive_syncmetastoreuris">hive_sync.metastore.uris<a class="hash-link" href="#hive_syncmetastoreuris" title="Direct link to heading"></a></h4><p>Metastore uris for hive sync, default &#x27;&#x27;<br>
<strong>Default Value</strong>: (Optional)<br>
<code>Config Param: HIVE_SYNC_METASTORE_URIS</code><br></p></blockquote><hr><h2 class="anchor anchorWithStickyNavbar_y2LR" id="WRITE_CLIENT">Write Client Configs<a class="hash-link" href="#WRITE_CLIENT" title="Direct link to heading"></a></h2><p>Internally, the Hudi datasource uses a RDD based HoodieWriteClient API to actually perform writes to storage. These configs provide deep control over lower level aspects like file sizing, compression, parallelism, compaction, write schema, cleaning etc. Although Hudi provides sane defaults, from time-time these configs may need to be tweaked to optimize for specific workloads.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="storage-configs">Storage Configs<a class="hash-link" href="#storage-configs" title="Direct link to heading"></a></h3><p>Configurations that control aspects around writing, sizing, reading base and log files.</p><p><code>Config Class</code>: org.apache.hudi.config.HoodieStorageConfig<br></p><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="writeparquetblocksize">write.parquet.block.size<a class="hash-link" href="#writeparquetblocksize" title="Direct link to heading"></a></h4><p>Parquet RowGroup size. It&#x27;s recommended to make this large enough that scan costs can be amortized by packing enough column values into a single row group.<br>
<strong>Default Value</strong>: 120 (Optional)<br>
<code>Config Param: WRITE_PARQUET_BLOCK_SIZE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="writeparquetmaxfilesize">write.parquet.max.file.size<a class="hash-link" href="#writeparquetmaxfilesize" title="Direct link to heading"></a></h4><p>Target size for parquet files produced by Hudi write phases. For DFS, this needs to be aligned with the underlying filesystem block size for optimal performance.<br>
<strong>Default Value</strong>: 120 (Optional)<br>
<code>Config Param: WRITE_PARQUET_MAX_FILE_SIZE</code><br></p></blockquote><hr><h3 class="anchor anchorWithStickyNavbar_y2LR" id="metadata-configs">Metadata Configs<a class="hash-link" href="#metadata-configs" title="Direct link to heading"></a></h3><p>Configurations used by the Hudi Metadata Table. This table maintains the metadata about a given Hudi table (e.g file listings) to avoid overhead of accessing cloud storage, during queries.</p><p><code>Config Class</code>: org.apache.hudi.common.config.HoodieMetadataConfig<br></p><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiemetadataenable">hoodie.metadata.enable<a class="hash-link" href="#hoodiemetadataenable" title="Direct link to heading"></a></h4><p>Enable the internal metadata table which serves table metadata like level file listings<br>
<strong>Default Value</strong>: true (Optional)<br>
<code>Config Param: ENABLE</code><br>
<code>Since Version: 0.7.0</code><br></p></blockquote><hr><h3 class="anchor anchorWithStickyNavbar_y2LR" id="write-configurations">Write Configurations<a class="hash-link" href="#write-configurations" title="Direct link to heading"></a></h3><p>Configurations that control write behavior on Hudi tables. These can be directly passed down from even higher level frameworks (e.g Spark datasources, Flink sink) and utilities (e.g DeltaStreamer).</p><p><code>Config Class</code>: org.apache.hudi.config.HoodieWriteConfig<br></p><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecombinebeforeupsert">hoodie.combine.before.upsert<a class="hash-link" href="#hoodiecombinebeforeupsert" title="Direct link to heading"></a></h4><p>When upserted records share same key, controls whether they should be first combined (i.e de-duplicated) before writing to storage. This should be turned off only if you are absolutely certain that there are no duplicates incoming, otherwise it can lead to duplicate keys and violate the uniqueness guarantees.<br>
<strong>Default Value</strong>: true (Optional)<br>
<code>Config Param: COMBINE_BEFORE_UPSERT</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiewritemarkerstype">hoodie.write.markers.type<a class="hash-link" href="#hoodiewritemarkerstype" title="Direct link to heading"></a></h4><p>Marker type to use. Two modes are supported: - DIRECT: individual marker file corresponding to each data file is directly created by the writer. - TIMELINE_SERVER_BASED: marker operations are all handled at the timeline service which serves as a proxy. New marker entries are batch processed and stored in a limited number of underlying files for efficiency. If HDFS is used or timeline server is disabled, DIRECT markers are used as fallback even if this is configured. For Spark structured streaming, this configuration does not take effect, i.e., DIRECT markers are always used for Spark structured streaming.<br>
<strong>Default Value</strong>: TIMELINE_SERVER_BASED (Optional)<br>
<code>Config Param: MARKERS_TYPE</code><br>
<code>Since Version: 0.9.0</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodieinsertshuffleparallelism">hoodie.insert.shuffle.parallelism<a class="hash-link" href="#hoodieinsertshuffleparallelism" title="Direct link to heading"></a></h4><p>Parallelism for inserting records into the table. Inserts can shuffle data before writing to tune file sizes and optimize the storage layout.<br>
<strong>Default Value</strong>: 200 (Optional)<br>
<code>Config Param: INSERT_PARALLELISM_VALUE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodierollbackparallelism">hoodie.rollback.parallelism<a class="hash-link" href="#hoodierollbackparallelism" title="Direct link to heading"></a></h4><p>Parallelism for rollback of commits. Rollbacks perform delete of files or logging delete blocks to file groups on storage in parallel.<br>
<strong>Default Value</strong>: 100 (Optional)<br>
<code>Config Param: ROLLBACK_PARALLELISM_VALUE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecombinebeforedelete">hoodie.combine.before.delete<a class="hash-link" href="#hoodiecombinebeforedelete" title="Direct link to heading"></a></h4><p>During delete operations, controls whether we should combine deletes (and potentially also upserts) before writing to storage.<br>
<strong>Default Value</strong>: true (Optional)<br>
<code>Config Param: COMBINE_BEFORE_DELETE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecombinebeforeinsert">hoodie.combine.before.insert<a class="hash-link" href="#hoodiecombinebeforeinsert" title="Direct link to heading"></a></h4><p>When inserted records share same key, controls whether they should be first combined (i.e de-duplicated) before writing to storage. When set to true the
precombine field value is used to reduce all records that share the same key. <br>
<strong>Default Value</strong>: false (Optional)<br>
<code>Config Param: COMBINE_BEFORE_INSERT</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiebulkinsertshuffleparallelism">hoodie.bulkinsert.shuffle.parallelism<a class="hash-link" href="#hoodiebulkinsertshuffleparallelism" title="Direct link to heading"></a></h4><p>For large initial imports using bulk_insert operation, controls the parallelism to use for sort modes or custom partitioning done before writing records to the table.<br>
<strong>Default Value</strong>: 200 (Optional)<br>
<code>Config Param: BULKINSERT_PARALLELISM_VALUE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiedeleteshuffleparallelism">hoodie.delete.shuffle.parallelism<a class="hash-link" href="#hoodiedeleteshuffleparallelism" title="Direct link to heading"></a></h4><p>Parallelism used for “delete” operation. Delete operations also perform shuffles, similar to upsert operation.<br>
<strong>Default Value</strong>: 200 (Optional)<br>
<code>Config Param: DELETE_PARALLELISM_VALUE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiebulkinsertsortmode">hoodie.bulkinsert.sort.mode<a class="hash-link" href="#hoodiebulkinsertsortmode" title="Direct link to heading"></a></h4><p>Sorting modes to use for sorting records for bulk insert. This is used when user hoodie.bulkinsert.user.defined.partitioner.class is not configured. Available values are - GLOBAL_SORT: this ensures best file sizes, with lowest memory overhead at cost of sorting. PARTITION_SORT: Strikes a balance by only sorting within a partition, still keeping the memory overhead of writing lowest and best effort file sizing. NONE: No sorting. Fastest and matches <code>spark.write.parquet()</code> in terms of number of files, overheads<br>
<strong>Default Value</strong>: GLOBAL_SORT (Optional)<br>
<code>Config Param: BULK_INSERT_SORT_MODE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodieembedtimelineserver">hoodie.embed.timeline.server<a class="hash-link" href="#hoodieembedtimelineserver" title="Direct link to heading"></a></h4><p>When true, spins up an instance of the timeline server (meta server that serves cached file listings, statistics),running on each writer&#x27;s driver process, accepting requests during the write from executors.<br>
<strong>Default Value</strong>: true (Optional)<br>
<code>Config Param: EMBEDDED_TIMELINE_SERVER_ENABLE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodieupsertshuffleparallelism">hoodie.upsert.shuffle.parallelism<a class="hash-link" href="#hoodieupsertshuffleparallelism" title="Direct link to heading"></a></h4><p>Parallelism to use for upsert operation on the table. Upserts can shuffle data to perform index lookups, file sizing, bin packing records optimally into file groups.<br>
<strong>Default Value</strong>: 200 (Optional)<br>
<code>Config Param: UPSERT_PARALLELISM_VALUE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodierollbackusingmarkers">hoodie.rollback.using.markers<a class="hash-link" href="#hoodierollbackusingmarkers" title="Direct link to heading"></a></h4><p>Enables a more efficient mechanism for rollbacks based on the marker files generated during the writes. Turned on by default.<br>
<strong>Default Value</strong>: true (Optional)<br>
<code>Config Param: ROLLBACK_USING_MARKERS_ENABLE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiefinalizewriteparallelism">hoodie.finalize.write.parallelism<a class="hash-link" href="#hoodiefinalizewriteparallelism" title="Direct link to heading"></a></h4><p>Parallelism for the write finalization internal operation, which involves removing any partially written files from lake storage, before committing the write. Reduce this value, if the high number of tasks incur delays for smaller tables or low latency writes.<br>
<strong>Default Value</strong>: 200 (Optional)<br>
<code>Config Param: FINALIZE_WRITE_PARALLELISM_VALUE</code><br></p></blockquote><hr><h3 class="anchor anchorWithStickyNavbar_y2LR" id="Compaction-Configs">Compaction Configs<a class="hash-link" href="#Compaction-Configs" title="Direct link to heading"></a></h3><p>Configurations that control compaction (merging of log files onto a new base files).</p><p><code>Config Class</code>: org.apache.hudi.config.HoodieCompactionConfig<br></p><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecompactionlazyblockread">hoodie.compaction.lazy.block.read<a class="hash-link" href="#hoodiecompactionlazyblockread" title="Direct link to heading"></a></h4><p>When merging the delta log files, this config helps to choose whether the log blocks should be read lazily or not. Choose true to use lazy block reading (low memory usage, but incurs seeks to each block header) or false for immediate block read (higher memory usage)<br>
<strong>Default Value</strong>: true (Optional)<br>
<code>Config Param: COMPACTION_LAZY_BLOCK_READ_ENABLE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodieparquetsmallfilelimit">hoodie.parquet.small.file.limit<a class="hash-link" href="#hoodieparquetsmallfilelimit" title="Direct link to heading"></a></h4><p>During upsert operation, we opportunistically expand existing small files on storage, instead of writing new files, to keep number of files to an optimum. This config sets the file size limit below which a file on storage becomes a candidate to be selected as such a <code>small file</code>. By default, treat any file &lt;= 100MB as a small file. Also note that if this set &lt;= 0, will not try to get small files and directly write new files<br>
<strong>Default Value</strong>: 104857600 (Optional)<br>
<code>Config Param: PARQUET_SMALL_FILE_LIMIT</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecompactionstrategy">hoodie.compaction.strategy<a class="hash-link" href="#hoodiecompactionstrategy" title="Direct link to heading"></a></h4><p>Compaction strategy decides which file groups are picked up for compaction during each compaction run. By default. Hudi picks the log file with most accumulated unmerged data<br>
<strong>Default Value</strong>: org.apache.hudi.table.action.compact.strategy.LogFileSizeBasedCompactionStrategy (Optional)<br>
<code>Config Param: COMPACTION_STRATEGY</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecopyonwriterecordsizeestimate">hoodie.copyonwrite.record.size.estimate<a class="hash-link" href="#hoodiecopyonwriterecordsizeestimate" title="Direct link to heading"></a></h4><p>The average record size. If not explicitly specified, hudi will compute the record size estimate compute dynamically based on commit metadata. This is critical in computing the insert parallelism and bin-packing inserts into small files.<br>
<strong>Default Value</strong>: 1024 (Optional)<br>
<code>Config Param: COPY_ON_WRITE_RECORD_SIZE_ESTIMATE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecompactinlinemaxdeltaseconds">hoodie.compact.inline.max.delta.seconds<a class="hash-link" href="#hoodiecompactinlinemaxdeltaseconds" title="Direct link to heading"></a></h4><p>Number of elapsed seconds after the last compaction, before scheduling a new one.<br>
<strong>Default Value</strong>: 3600 (Optional)<br>
<code>Config Param: INLINE_COMPACT_TIME_DELTA_SECONDS</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecompactiontargetio">hoodie.compaction.target.io<a class="hash-link" href="#hoodiecompactiontargetio" title="Direct link to heading"></a></h4><p>Amount of MBs to spend during compaction run for the LogFileSizeBasedCompactionStrategy. This value helps bound ingestion latency while compaction is run inline mode.<br>
<strong>Default Value</strong>: 512000 (Optional)<br>
<code>Config Param: TARGET_IO_PER_COMPACTION_IN_MB</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecompactionlogfilesizethreshold">hoodie.compaction.logfile.size.threshold<a class="hash-link" href="#hoodiecompactionlogfilesizethreshold" title="Direct link to heading"></a></h4><p>Only if the log file size is greater than the threshold in bytes, the file group will be compacted.<br>
<strong>Default Value</strong>: 0 (Optional)<br>
<code>Config Param: COMPACTION_LOG_FILE_SIZE_THRESHOLD</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecompactionpreservecommitmetadata">hoodie.compaction.preserve.commit.metadata<a class="hash-link" href="#hoodiecompactionpreservecommitmetadata" title="Direct link to heading"></a></h4><p>When rewriting data, preserves existing hoodie_commit_time<br>
<strong>Default Value</strong>: true (Optional)<br>
<code>Config Param: PRESERVE_COMMIT_METADATA</code><br>
<code>Since Version: 0.11.0</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecopyonwriteinsertautosplit">hoodie.copyonwrite.insert.auto.split<a class="hash-link" href="#hoodiecopyonwriteinsertautosplit" title="Direct link to heading"></a></h4><p>Config to control whether we control insert split sizes automatically based on average record sizes. It&#x27;s recommended to keep this turned on, since hand tuning is otherwise extremely cumbersome.<br>
<strong>Default Value</strong>: true (Optional)<br>
<code>Config Param: COPY_ON_WRITE_AUTO_SPLIT_INSERTS</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecompactinlinemaxdeltacommits">hoodie.compact.inline.max.delta.commits<a class="hash-link" href="#hoodiecompactinlinemaxdeltacommits" title="Direct link to heading"></a></h4><p>Number of delta commits after the last compaction, before scheduling of a new compaction is attempted.<br>
<strong>Default Value</strong>: 5 (Optional)<br>
<code>Config Param: INLINE_COMPACT_NUM_DELTA_COMMITS</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodierecordsizeestimationthreshold">hoodie.record.size.estimation.threshold<a class="hash-link" href="#hoodierecordsizeestimationthreshold" title="Direct link to heading"></a></h4><p>We use the previous commits&#x27; metadata to calculate the estimated record size and use it to bin pack records into partitions. If the previous commit is too small to make an accurate estimation, Hudi will search commits in the reverse order, until we find a commit that has totalBytesWritten larger than (PARQUET_SMALL_FILE_LIMIT_BYTES * this_threshold)<br>
<strong>Default Value</strong>: 1.0 (Optional)<br>
<code>Config Param: RECORD_SIZE_ESTIMATION_THRESHOLD</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecompactinlinetriggerstrategy">hoodie.compact.inline.trigger.strategy<a class="hash-link" href="#hoodiecompactinlinetriggerstrategy" title="Direct link to heading"></a></h4><p>Controls how compaction scheduling is triggered, by time or num delta commits or combination of both. Valid options: NUM_COMMITS,NUM_COMMITS_AFTER_LAST_REQUEST,TIME_ELAPSED,NUM_AND_TIME,NUM_OR_TIME<br>
<strong>Default Value</strong>: NUM_COMMITS (Optional)<br>
<code>Config Param: INLINE_COMPACT_TRIGGER_STRATEGY</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecompactionreverselogread">hoodie.compaction.reverse.log.read<a class="hash-link" href="#hoodiecompactionreverselogread" title="Direct link to heading"></a></h4><p>HoodieLogFormatReader reads a logfile in the forward direction starting from pos=0 to pos=file_length. If this config is set to true, the reader reads the logfile in reverse direction, from pos=file_length to pos=0<br>
<strong>Default Value</strong>: false (Optional)<br>
<code>Config Param: COMPACTION_REVERSE_LOG_READ_ENABLE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecopyonwriteinsertsplitsize">hoodie.copyonwrite.insert.split.size<a class="hash-link" href="#hoodiecopyonwriteinsertsplitsize" title="Direct link to heading"></a></h4><p>Number of inserts assigned for each partition/bucket for writing. We based the default on writing out 100MB files, with at least 1kb records (100K records per file), and over provision to 500K. As long as auto-tuning of splits is turned on, this only affects the first write, where there is no history to learn record sizes from.<br>
<strong>Default Value</strong>: 500000 (Optional)<br>
<code>Config Param: COPY_ON_WRITE_INSERT_SPLIT_SIZE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecompactscheduleinline">hoodie.compact.schedule.inline<a class="hash-link" href="#hoodiecompactscheduleinline" title="Direct link to heading"></a></h4><p>When set to true, compaction service will be attempted for inline scheduling after each write. Users have to ensure they have a separate job to run async compaction(execution) for the one scheduled by this writer. Users can choose to set both <code>hoodie.compact.inline</code> and <code>hoodie.compact.schedule.inline</code> to false and have both scheduling and execution triggered by any async process. But if <code>hoodie.compact.inline</code> is set to false, and <code>hoodie.compact.schedule.inline</code> is set to true, regular writers will schedule compaction inline, but users are expected to trigger async job for execution. If <code>hoodie.compact.inline</code> is set to true, regular writers will do both scheduling and execution inline for compaction<br>
<strong>Default Value</strong>: false (Optional)<br>
<code>Config Param: SCHEDULE_INLINE_COMPACT</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecompactiondaybasedtargetpartitions">hoodie.compaction.daybased.target.partitions<a class="hash-link" href="#hoodiecompactiondaybasedtargetpartitions" title="Direct link to heading"></a></h4><p>Used by org.apache.hudi.io.compact.strategy.DayBasedCompactionStrategy to denote the number of latest partitions to compact during a compaction run.<br>
<strong>Default Value</strong>: 10 (Optional)<br>
<code>Config Param: TARGET_PARTITIONS_PER_DAYBASED_COMPACTION</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecompactinline">hoodie.compact.inline<a class="hash-link" href="#hoodiecompactinline" title="Direct link to heading"></a></h4><p>When set to true, compaction service is triggered after each write. While being simpler operationally, this adds extra latency on the write path.<br>
<strong>Default Value</strong>: false (Optional)<br>
<code>Config Param: INLINE_COMPACT</code><br></p></blockquote><hr><h3 class="anchor anchorWithStickyNavbar_y2LR" id="Clean-Configs">Clean Configs<a class="hash-link" href="#Clean-Configs" title="Direct link to heading"></a></h3><p>Cleaning (reclamation of older/unused file groups/slices).</p><p><code>Config Class</code>: org.apache.hudi.config.HoodieCleanConfig<br></p><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecleanerfileversionsretained">hoodie.cleaner.fileversions.retained<a class="hash-link" href="#hoodiecleanerfileversionsretained" title="Direct link to heading"></a></h4><p>When KEEP_LATEST_FILE_VERSIONS cleaning policy is used, the minimum number of file slices to retain in each file group, during cleaning.<br>
<strong>Default Value</strong>: 3 (Optional)<br>
<code>Config Param: CLEANER_FILE_VERSIONS_RETAINED</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecleanmaxcommits">hoodie.clean.max.commits<a class="hash-link" href="#hoodiecleanmaxcommits" title="Direct link to heading"></a></h4><p>Number of commits after the last clean operation, before scheduling of a new clean is attempted.<br>
<strong>Default Value</strong>: 1 (Optional)<br>
<code>Config Param: CLEAN_MAX_COMMITS</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecleanallowmultiple">hoodie.clean.allow.multiple<a class="hash-link" href="#hoodiecleanallowmultiple" title="Direct link to heading"></a></h4><p>Allows scheduling/executing multiple cleans by enabling this config. If users prefer to strictly ensure clean requests should be mutually exclusive, .i.e. a 2nd clean will not be scheduled if another clean is not yet completed to avoid repeat cleaning of same files, they might want to disable this config.<br>
<strong>Default Value</strong>: true (Optional)<br>
<code>Config Param: ALLOW_MULTIPLE_CLEANS</code><br>
<code>Since Version: 0.11.0</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecleanautomatic">hoodie.clean.automatic<a class="hash-link" href="#hoodiecleanautomatic" title="Direct link to heading"></a></h4><p>When enabled, the cleaner table service is invoked immediately after each commit, to delete older file slices. It&#x27;s recommended to enable this, to ensure metadata and data storage growth is bounded.<br>
<strong>Default Value</strong>: true (Optional)<br>
<code>Config Param: AUTO_CLEAN</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecleanerparallelism">hoodie.cleaner.parallelism<a class="hash-link" href="#hoodiecleanerparallelism" title="Direct link to heading"></a></h4><p>Parallelism for the cleaning operation. Increase this if cleaning becomes slow.<br>
<strong>Default Value</strong>: 200 (Optional)<br>
<code>Config Param: CLEANER_PARALLELISM_VALUE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecleanerincrementalmode">hoodie.cleaner.incremental.mode<a class="hash-link" href="#hoodiecleanerincrementalmode" title="Direct link to heading"></a></h4><p>When enabled, the plans for each cleaner service run is computed incrementally off the events in the timeline, since the last cleaner run. This is much more efficient than obtaining listings for the full table for each planning (even with a metadata table).<br>
<strong>Default Value</strong>: true (Optional)<br>
<code>Config Param: CLEANER_INCREMENTAL_MODE_ENABLE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecleanasync">hoodie.clean.async<a class="hash-link" href="#hoodiecleanasync" title="Direct link to heading"></a></h4><p>Only applies when hoodie.clean.automatic is turned on. When turned on runs cleaner async with writing, which can speed up overall write performance.<br>
<strong>Default Value</strong>: false (Optional)<br>
<code>Config Param: ASYNC_CLEAN</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecleantriggerstrategy">hoodie.clean.trigger.strategy<a class="hash-link" href="#hoodiecleantriggerstrategy" title="Direct link to heading"></a></h4><p>Controls how cleaning is scheduled. Valid options: NUM_COMMITS<br>
<strong>Default Value</strong>: NUM_COMMITS (Optional)<br>
<code>Config Param: CLEAN_TRIGGER_STRATEGY</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecleanerdeletebootstrapbasefile">hoodie.cleaner.delete.bootstrap.base.file<a class="hash-link" href="#hoodiecleanerdeletebootstrapbasefile" title="Direct link to heading"></a></h4><p>When set to true, cleaner also deletes the bootstrap base file when it&#x27;s skeleton base file is cleaned. Turn this to true, if you want to ensure the bootstrap dataset storage is reclaimed over time, as the table receives updates/deletes. Another reason to turn this on, would be to ensure data residing in bootstrap base files are also physically deleted, to comply with data privacy enforcement processes.<br>
<strong>Default Value</strong>: false (Optional)<br>
<code>Config Param: CLEANER_BOOTSTRAP_BASE_FILE_ENABLE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecleanerhoursretained">hoodie.cleaner.hours.retained<a class="hash-link" href="#hoodiecleanerhoursretained" title="Direct link to heading"></a></h4><p>Number of hours for which commits need to be retained. This config provides a more flexible option ascompared to number of commits retained for cleaning service. Setting this property ensures all the files, but the latest in a file group, corresponding to commits with commit times older than the configured number of hours to be retained are cleaned.<br>
<strong>Default Value</strong>: 24 (Optional)<br>
<code>Config Param: CLEANER_HOURS_RETAINED</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecleanercommitsretained">hoodie.cleaner.commits.retained<a class="hash-link" href="#hoodiecleanercommitsretained" title="Direct link to heading"></a></h4><p>Number of commits to retain, without cleaning. This will be retained for num_of_commits * time_between_commits (scheduled). This also directly translates into how much data retention the table supports for incremental queries.<br>
<strong>Default Value</strong>: 10 (Optional)<br>
<code>Config Param: CLEANER_COMMITS_RETAINED</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecleanerpolicyfailedwrites">hoodie.cleaner.policy.failed.writes<a class="hash-link" href="#hoodiecleanerpolicyfailedwrites" title="Direct link to heading"></a></h4><p>Cleaning policy for failed writes to be used. Hudi will delete any files written by failed writes to re-claim space. Choose to perform this rollback of failed writes eagerly before every writer starts (only supported for single writer) or lazily by the cleaner (required for multi-writers)<br>
<strong>Default Value</strong>: EAGER (Optional)<br>
<code>Config Param: FAILED_WRITES_CLEANER_POLICY</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecleanerpolicy">hoodie.cleaner.policy<a class="hash-link" href="#hoodiecleanerpolicy" title="Direct link to heading"></a></h4><p>Cleaning policy to be used. The cleaner service deletes older file slices files to re-claim space. By default, cleaner spares the file slices written by the last N commits, determined by hoodie.cleaner.commits.retained Long running query plans may often refer to older file slices and will break if those are cleaned, before the query has had a chance to run. So, it is good to make sure that the data is retained for more than the maximum query execution time<br>
<strong>Default Value</strong>: KEEP_LATEST_COMMITS (Optional)<br>
<code>Config Param: CLEANER_POLICY</code><br></p></blockquote><hr><h3 class="anchor anchorWithStickyNavbar_y2LR" id="Archival-Configs">Archival Configs<a class="hash-link" href="#Archival-Configs" title="Direct link to heading"></a></h3><p>Configurations that control archival.</p><p><code>Config Class</code>: org.apache.hudi.config.HoodieArchivalConfig<br></p><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiearchivemergesmallfilelimitbytes">hoodie.archive.merge.small.file.limit.bytes<a class="hash-link" href="#hoodiearchivemergesmallfilelimitbytes" title="Direct link to heading"></a></h4><p>This config sets the archive file size limit below which an archive file becomes a candidate to be selected as such a small file.<br>
<strong>Default Value</strong>: 20971520 (Optional)<br>
<code>Config Param: ARCHIVE_MERGE_SMALL_FILE_LIMIT_BYTES</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiekeepmaxcommits">hoodie.keep.max.commits<a class="hash-link" href="#hoodiekeepmaxcommits" title="Direct link to heading"></a></h4><p>Archiving service moves older entries from timeline into an archived log after each write, to keep the metadata overhead constant, even as the table size grows.This config controls the maximum number of instants to retain in the active timeline. <br>
<strong>Default Value</strong>: 30 (Optional)<br>
<code>Config Param: MAX_COMMITS_TO_KEEP</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiearchivemergeenable">hoodie.archive.merge.enable<a class="hash-link" href="#hoodiearchivemergeenable" title="Direct link to heading"></a></h4><p>When enable, hoodie will auto merge several small archive files into larger one. It&#x27;s useful when storage scheme doesn&#x27;t support append operation.<br>
<strong>Default Value</strong>: false (Optional)<br>
<code>Config Param: ARCHIVE_MERGE_ENABLE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiearchiveautomatic">hoodie.archive.automatic<a class="hash-link" href="#hoodiearchiveautomatic" title="Direct link to heading"></a></h4><p>When enabled, the archival table service is invoked immediately after each commit, to archive commits if we cross a maximum value of commits. It&#x27;s recommended to enable this, to ensure number of active commits is bounded.<br>
<strong>Default Value</strong>: true (Optional)<br>
<code>Config Param: AUTO_ARCHIVE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiearchivedeleteparallelism">hoodie.archive.delete.parallelism<a class="hash-link" href="#hoodiearchivedeleteparallelism" title="Direct link to heading"></a></h4><p>Parallelism for deleting archived hoodie commits.<br>
<strong>Default Value</strong>: 100 (Optional)<br>
<code>Config Param: DELETE_ARCHIVED_INSTANT_PARALLELISM_VALUE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiearchivebeyondsavepoint">hoodie.archive.beyond.savepoint<a class="hash-link" href="#hoodiearchivebeyondsavepoint" title="Direct link to heading"></a></h4><p>If enabled, archival will proceed beyond savepoint, skipping savepoint commits. If disabled, archival will stop at the earliest savepoint commit.<br>
<strong>Default Value</strong>: false (Optional)<br>
<code>Config Param: ARCHIVE_BEYOND_SAVEPOINT</code><br>
<code>Since Version: 0.12.0</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecommitsarchivalbatch">hoodie.commits.archival.batch<a class="hash-link" href="#hoodiecommitsarchivalbatch" title="Direct link to heading"></a></h4><p>Archiving of instants is batched in best-effort manner, to pack more instants into a single archive log. This config controls such archival batch size.<br>
<strong>Default Value</strong>: 10 (Optional)<br>
<code>Config Param: COMMITS_ARCHIVAL_BATCH_SIZE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiearchiveasync">hoodie.archive.async<a class="hash-link" href="#hoodiearchiveasync" title="Direct link to heading"></a></h4><p>Only applies when hoodie.archive.automatic is turned on. When turned on runs archiver async with writing, which can speed up overall write performance.<br>
<strong>Default Value</strong>: false (Optional)<br>
<code>Config Param: ASYNC_ARCHIVE</code><br>
<code>Since Version: 0.11.0</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiekeepmincommits">hoodie.keep.min.commits<a class="hash-link" href="#hoodiekeepmincommits" title="Direct link to heading"></a></h4><p>Similar to hoodie.keep.max.commits, but controls the minimum number ofinstants to retain in the active timeline.<br>
<strong>Default Value</strong>: 20 (Optional)<br>
<code>Config Param: MIN_COMMITS_TO_KEEP</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiearchivemergefilesbatchsize">hoodie.archive.merge.files.batch.size<a class="hash-link" href="#hoodiearchivemergefilesbatchsize" title="Direct link to heading"></a></h4><p>The number of small archive files to be merged at once.<br>
<strong>Default Value</strong>: 10 (Optional)<br>
<code>Config Param: ARCHIVE_MERGE_FILES_BATCH_SIZE</code><br></p></blockquote><hr><h3 class="anchor anchorWithStickyNavbar_y2LR" id="index-configs">Index Configs<a class="hash-link" href="#index-configs" title="Direct link to heading"></a></h3><p>Configurations that control indexing behavior, which tags incoming records as either inserts or updates to older records.</p><p><code>Config Class</code>: org.apache.hudi.config.HoodieIndexConfig<br></p><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodieindextype">hoodie.index.type<a class="hash-link" href="#hoodieindextype" title="Direct link to heading"></a></h4><p>Type of index to use. Default is Bloom filter. Possible options are <!-- -->[BLOOM | GLOBAL_BLOOM |SIMPLE | GLOBAL_SIMPLE | INMEMORY | HBASE | BUCKET]<!-- -->. Bloom filters removes the dependency on a external system and is stored in the footer of the Parquet Data Files<br>
<strong>Default Value</strong>: N/A (Required)<br>
<code>Config Param: INDEX_TYPE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodieindexbloomfpp">hoodie.index.bloom.fpp<a class="hash-link" href="#hoodieindexbloomfpp" title="Direct link to heading"></a></h4><p>Only applies if index type is BLOOM. Error rate allowed given the number of entries. This is used to calculate how many bits should be assigned for the bloom filter and the number of hash functions. This is usually set very low (default: 0.000000001), we like to tradeoff disk space for lower false positives. If the number of entries added to bloom filter exceeds the configured value (hoodie.index.bloom.num_entries), then this fpp may not be honored.<br>
<strong>Default Value</strong>: 0.000000001 (Optional)<br>
<code>Config Param: BLOOM_FILTER_FPP_VALUE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodieindexbloomnum_entries">hoodie.index.bloom.num_entries<a class="hash-link" href="#hoodieindexbloomnum_entries" title="Direct link to heading"></a></h4><p>Only applies if index type is BLOOM. This is the number of entries to be stored in the bloom filter. The rationale for the default: Assume the maxParquetFileSize is 128MB and averageRecordSize is 1kb and hence we approx a total of 130K records in a file. The default (60000) is roughly half of this approximation. Warning: Setting this very low, will generate a lot of false positives and index lookup will have to scan a lot more files than it has to and setting this to a very high number will increase the size every base file linearly (roughly 4KB for every 50000 entries). This config is also used with DYNAMIC bloom filter which determines the initial size for the bloom.<br>
<strong>Default Value</strong>: 60000 (Optional)<br>
<code>Config Param: BLOOM_FILTER_NUM_ENTRIES_VALUE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiebloomindexupdatepartitionpath">hoodie.bloom.index.update.partition.path<a class="hash-link" href="#hoodiebloomindexupdatepartitionpath" title="Direct link to heading"></a></h4><p>Only applies if index type is GLOBAL_BLOOM. When set to true, an update including the partition path of a record that already exists will result in inserting the incoming record into the new partition and deleting the original record in the old partition. When set to false, the original record will only be updated in the old partition<br>
<strong>Default Value</strong>: true (Optional)<br>
<code>Config Param: BLOOM_INDEX_UPDATE_PARTITION_PATH_ENABLE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiebloomindexusecaching">hoodie.bloom.index.use.caching<a class="hash-link" href="#hoodiebloomindexusecaching" title="Direct link to heading"></a></h4><p>Only applies if index type is BLOOM. When true, the input RDD will cached to speed up index lookup by reducing IO for computing parallelism or affected partitions<br>
<strong>Default Value</strong>: true (Optional)<br>
<code>Config Param: BLOOM_INDEX_USE_CACHING</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiebloomindexparallelism">hoodie.bloom.index.parallelism<a class="hash-link" href="#hoodiebloomindexparallelism" title="Direct link to heading"></a></h4><p>Only applies if index type is BLOOM. This is the amount of parallelism for index lookup, which involves a shuffle. By default, this is auto computed based on input workload characteristics.<br>
<strong>Default Value</strong>: 0 (Optional)<br>
<code>Config Param: BLOOM_INDEX_PARALLELISM</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiebloomindexprunebyranges">hoodie.bloom.index.prune.by.ranges<a class="hash-link" href="#hoodiebloomindexprunebyranges" title="Direct link to heading"></a></h4><p>Only applies if index type is BLOOM. When true, range information from files to leveraged speed up index lookups. Particularly helpful, if the key has a monotonously increasing prefix, such as timestamp. If the record key is completely random, it is better to turn this off, since range pruning will only add extra overhead to the index lookup.<br>
<strong>Default Value</strong>: true (Optional)<br>
<code>Config Param: BLOOM_INDEX_PRUNE_BY_RANGES</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiebloomindexfiltertype">hoodie.bloom.index.filter.type<a class="hash-link" href="#hoodiebloomindexfiltertype" title="Direct link to heading"></a></h4><p>Filter type used. Default is BloomFilterTypeCode.DYNAMIC_V0. Available values are <!-- -->[BloomFilterTypeCode.SIMPLE , BloomFilterTypeCode.DYNAMIC_V0]<!-- -->. Dynamic bloom filters auto size themselves based on number of keys.<br>
<strong>Default Value</strong>: DYNAMIC_V0 (Optional)<br>
<code>Config Param: BLOOM_FILTER_TYPE</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiesimpleindexparallelism">hoodie.simple.index.parallelism<a class="hash-link" href="#hoodiesimpleindexparallelism" title="Direct link to heading"></a></h4><p>Only applies if index type is SIMPLE. This is the amount of parallelism for index lookup, which involves a Spark Shuffle<br>
<strong>Default Value</strong>: 50 (Optional)<br>
<code>Config Param: SIMPLE_INDEX_PARALLELISM</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiesimpleindexusecaching">hoodie.simple.index.use.caching<a class="hash-link" href="#hoodiesimpleindexusecaching" title="Direct link to heading"></a></h4><p>Only applies if index type is SIMPLE. When true, the incoming writes will cached to speed up index lookup by reducing IO for computing parallelism or affected partitions<br>
<strong>Default Value</strong>: true (Optional)<br>
<code>Config Param: SIMPLE_INDEX_USE_CACHING</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodieglobalsimpleindexparallelism">hoodie.global.simple.index.parallelism<a class="hash-link" href="#hoodieglobalsimpleindexparallelism" title="Direct link to heading"></a></h4><p>Only applies if index type is GLOBAL_SIMPLE. This is the amount of parallelism for index lookup, which involves a Spark Shuffle<br>
<strong>Default Value</strong>: 100 (Optional)<br>
<code>Config Param: GLOBAL_SIMPLE_INDEX_PARALLELISM</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiesimpleindexupdatepartitionpath">hoodie.simple.index.update.partition.path<a class="hash-link" href="#hoodiesimpleindexupdatepartitionpath" title="Direct link to heading"></a></h4><p>Similar to Key: &#x27;hoodie.bloom.index.update.partition.path&#x27; , default: true but for simple index. Since version: 0.6.0<br>
<strong>Default Value</strong>: true (Optional)<br>
<code>Config Param: SIMPLE_INDEX_UPDATE_PARTITION_PATH_ENABLE</code><br></p></blockquote><hr><h3 class="anchor anchorWithStickyNavbar_y2LR" id="Common-Configurations">Common Configurations<a class="hash-link" href="#Common-Configurations" title="Direct link to heading"></a></h3><p>The following set of configurations are common across Hudi.</p><p><code>Config Class</code>: org.apache.hudi.common.config.HoodieCommonConfig<br></p><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiecommonspillablediskmaptype">hoodie.common.spillable.diskmap.type<a class="hash-link" href="#hoodiecommonspillablediskmaptype" title="Direct link to heading"></a></h4><p>When handling input data that cannot be held in memory, to merge with a file on storage, a spillable diskmap is employed. By default, we use a persistent hashmap based loosely on bitcask, that offers O(1) inserts, lookups. Change this to <code>ROCKS_DB</code> to prefer using rocksDB, for handling the spill.<br>
<strong>Default Value</strong>: BITCASK (Optional)<br>
<code>Config Param: SPILLABLE_DISK_MAP_TYPE</code><br></p></blockquote><hr><h2 class="anchor anchorWithStickyNavbar_y2LR" id="METRICS">Metrics Configs<a class="hash-link" href="#METRICS" title="Direct link to heading"></a></h2><p>These set of configs are used to enable monitoring and reporting of key Hudi stats and metrics.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="Metrics-Configurations-for-Datadog-reporter">Metrics Configurations for Datadog reporter<a class="hash-link" href="#Metrics-Configurations-for-Datadog-reporter" title="Direct link to heading"></a></h3><p>Enables reporting on Hudi metrics using the Datadog reporter type. Hudi publishes metrics on every commit, clean, rollback etc.</p><p><code>Config Class</code>: org.apache.hudi.config.metrics.HoodieMetricsDatadogConfig<br></p><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiemetricson">hoodie.metrics.on<a class="hash-link" href="#hoodiemetricson" title="Direct link to heading"></a></h4><p>Turn on/off metrics reporting. off by default.<br>
<strong>Default Value</strong>: false (Optional)<br>
<code>Config Param: TURN_METRICS_ON</code><br>
<code>Since Version: 0.5.0</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiemetricsreportertype">hoodie.metrics.reporter.type<a class="hash-link" href="#hoodiemetricsreportertype" title="Direct link to heading"></a></h4><p>Type of metrics reporter.<br>
<strong>Default Value</strong>: GRAPHITE (Optional)<br>
<code>Config Param: METRICS_REPORTER_TYPE_VALUE</code><br>
<code>Since Version: 0.5.0</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiemetricsreporterclass">hoodie.metrics.reporter.class<a class="hash-link" href="#hoodiemetricsreporterclass" title="Direct link to heading"></a></h4><br>**Default Value**: (Optional)<br>`Config Param: METRICS_REPORTER_CLASS_NAME`<br>`Since Version: 0.6.0`<br></blockquote><hr><h2 class="anchor anchorWithStickyNavbar_y2LR" id="RECORD_PAYLOAD">Record Payload Config<a class="hash-link" href="#RECORD_PAYLOAD" title="Direct link to heading"></a></h2><p>This is the lowest level of customization offered by Hudi. Record payloads define how to produce new values to upsert based on incoming new record and stored old record. Hudi provides default implementations such as OverwriteWithLatestAvroPayload which simply update table with the latest/last-written record. This can be overridden to a custom class extending HoodieRecordPayload class, on both datasource and WriteClient levels.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="Payload-Configurations">Payload Configurations<a class="hash-link" href="#Payload-Configurations" title="Direct link to heading"></a></h3><p>Payload related configs, that can be leveraged to control merges based on specific business fields in the data.</p><p><code>Config Class</code>: org.apache.hudi.config.HoodiePayloadConfig<br></p><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiepayloadeventtimefield">hoodie.payload.event.time.field<a class="hash-link" href="#hoodiepayloadeventtimefield" title="Direct link to heading"></a></h4><p>Table column/field name to derive timestamp associated with the records. This can be useful for e.g, determining the freshness of the table.<br>
<strong>Default Value</strong>: ts (Optional)<br>
<code>Config Param: EVENT_TIME_FIELD</code><br></p></blockquote><hr><blockquote><h4 class="anchor anchorWithStickyNavbar_y2LR" id="hoodiepayloadorderingfield">hoodie.payload.ordering.field<a class="hash-link" href="#hoodiepayloadorderingfield" title="Direct link to heading"></a></h4><p>Table column/field name to order records that have the same key, before merging and writing to storage.<br>
<strong>Default Value</strong>: ts (Optional)<br>
<code>Config Param: ORDERING_FIELD</code><br></p></blockquote><hr></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="theme-doc-footer-edit-meta-row row"><div class="col"><a href="https://github.com/apache/hudi/tree/asf-site/website/versioned_docs/version-0.13.0/basic_configurations.md" target="_blank" rel="noreferrer noopener" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_mS5F" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_mt2f"></div></div></footer></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages navigation"><div class="pagination-nav__item"><a class="pagination-nav__link" href="/cn/docs/0.13.0/precommit_validator"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">Data Quality</div></a></div><div class="pagination-nav__item pagination-nav__item--next"><a class="pagination-nav__link" href="/cn/docs/0.13.0/configurations"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">All Configurations</div></a></div></nav></div></div><div class="col col--3"><div class="tableOfContents_vrFS thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#SPARK_DATASOURCE" class="table-of-contents__link toc-highlight">Spark Datasource Configs</a><ul><li><a href="#Read-Options" class="table-of-contents__link toc-highlight">Read Options</a></li><li><a href="#Write-Options" class="table-of-contents__link toc-highlight">Write Options</a></li></ul></li><li><a href="#FLINK_SQL" class="table-of-contents__link toc-highlight">Flink Sql Configs</a><ul><li><a href="#Flink-Options" class="table-of-contents__link toc-highlight">Flink Options</a></li></ul></li><li><a href="#WRITE_CLIENT" class="table-of-contents__link toc-highlight">Write Client Configs</a><ul><li><a href="#storage-configs" class="table-of-contents__link toc-highlight">Storage Configs</a></li><li><a href="#metadata-configs" class="table-of-contents__link toc-highlight">Metadata Configs</a></li><li><a href="#write-configurations" class="table-of-contents__link toc-highlight">Write Configurations</a></li><li><a href="#Compaction-Configs" class="table-of-contents__link toc-highlight">Compaction Configs</a></li><li><a href="#Clean-Configs" class="table-of-contents__link toc-highlight">Clean Configs</a></li><li><a href="#Archival-Configs" class="table-of-contents__link toc-highlight">Archival Configs</a></li><li><a href="#index-configs" class="table-of-contents__link toc-highlight">Index Configs</a></li><li><a href="#Common-Configurations" class="table-of-contents__link toc-highlight">Common Configurations</a></li></ul></li><li><a href="#METRICS" class="table-of-contents__link toc-highlight">Metrics Configs</a><ul><li><a href="#Metrics-Configurations-for-Datadog-reporter" class="table-of-contents__link toc-highlight">Metrics Configurations for Datadog reporter</a></li></ul></li><li><a href="#RECORD_PAYLOAD" class="table-of-contents__link toc-highlight">Record Payload Config</a><ul><li><a href="#Payload-Configurations" class="table-of-contents__link toc-highlight">Payload Configurations</a></li></ul></li></ul></div></div></div></div></main></div></div><footer class="footer"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">About</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/blog/2021/07/21/streaming-data-lake-platform">Our Vision</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/concepts">Concepts</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/community/team">Team</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/releases/release-0.14.1">Releases</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/releases/download">Download</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/powered-by">Who&#x27;s Using</a></li></ul></div><div class="col footer__col"><div class="footer__title">Learn</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/docs/quick-start-guide">Quick Start</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/docker_demo">Docker Demo</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/blog">Blog</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/talks">Talks</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/videos">Video Guides</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/faq">FAQ</a></li><li class="footer__item"><a href="https://cwiki.apache.org/confluence/display/HUDI" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Technical Wiki<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li></ul></div><div class="col footer__col"><div class="footer__title">Hudi On Cloud</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/docs/s3_hoodie">AWS</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/gcs_hoodie">Google Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/oss_hoodie">Alibaba Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/azure_hoodie">Microsoft Azure</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/cos_hoodie">Tencent Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/ibm_cos_hoodie">IBM Cloud</a></li></ul></div><div class="col footer__col"><div class="footer__title">Community</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/community/get-involved">Get Involved</a></li><li class="footer__item"><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Slack<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Twitter<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>YouTube<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Linkedin<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="mailto:dev-subscribe@hudi.apache.org?Subject=SubscribeToHudi" target="_blank" rel="noopener noreferrer" class="footer__link-item">Mailing List</a></li></ul></div><div class="col footer__col"><div class="footer__title">Apache</div><ul class="footer__items"><li class="footer__item"><a href="https://www.apache.org/events/current-event" target="_blank" rel="noopener noreferrer" class="footer__link-item">Events</a></li><li class="footer__item"><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Thanks</a></li><li class="footer__item"><a href="https://www.apache.org/licenses" target="_blank" rel="noopener noreferrer" class="footer__link-item">License</a></li><li class="footer__item"><a href="https://www.apache.org/security" target="_blank" rel="noopener noreferrer" class="footer__link-item">Security</a></li><li class="footer__item"><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Sponsorship</a></li><li class="footer__item"><a href="https://www.apache.org" target="_blank" rel="noopener noreferrer" class="footer__link-item">Foundation</a></li></ul></div></div><div class="footer__bottom text--center"><div class="margin-bottom--sm"><a href="https://hudi.apache.org/" target="_blank" rel="noopener noreferrer" class="footerLogoLink_SRtH"><img src="/cn/assets/images/logo-big.png" alt="Apache Hudi™" class="themedImage_TMUO themedImage--light_4Vu1 footer__logo"><img src="/cn/assets/images/logo-big.png" alt="Apache Hudi™" class="themedImage_TMUO themedImage--dark_uzRr footer__logo"></a></div><div class="footer__copyright">Copyright © 2021 <a href="https://apache.org">The Apache Software Foundation</a>, Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0"> Apache License, Version 2.0</a>.
Hudi, Apache and the Apache feather logo are trademarks of The Apache Software Foundation. <a href="/docs/privacy">Privacy Policy</a></div></div></div></footer></div>
<script src="/cn/assets/js/runtime~main.0acdb754.js"></script>
<script src="/cn/assets/js/main.6d6aa24f.js"></script>
</body>
</html>