blob: d5f3e8ea3c4f8fd518e949fe812c15ac01d7a8b2 [file] [log] [blame]
<!doctype html>
<html class="docs-version-0.14.1" lang="en" dir="ltr">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="generator" content="Docusaurus v2.0.0-beta.14">
<link rel="alternate" type="application/rss+xml" href="/blog/rss.xml" title="Apache Hudi: User-Facing Analytics RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/blog/atom.xml" title="Apache Hudi: User-Facing Analytics Atom Feed">
<link rel="alternate" type="application/json" href="/blog/feed.json" title="Apache Hudi: User-Facing Analytics JSON Feed">
<link rel="search" type="application/opensearchdescription+xml" title="Apache Hudi" href="/opensearch.xml">
<link rel="alternate" type="application/rss+xml" href="/videos/rss.xml" title="Apache Hudi RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/videos/atom.xml" title="Apache Hudi Atom Feed">
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Comfortaa|Ubuntu|Roboto|Source+Code+Pro">
<link rel="stylesheet" href="https://at-ui.github.io/feather-font/css/iconfont.css"><title data-react-helmet="true">Writing Data | Apache Hudi</title><meta data-react-helmet="true" name="twitter:card" content="summary_large_image"><meta data-react-helmet="true" property="og:url" content="https://hudi.apache.org/docs/writing_data"><meta data-react-helmet="true" name="docsearch:language" content="en"><meta data-react-helmet="true" name="docsearch:version" content="0.14.1"><meta data-react-helmet="true" name="docsearch:docusaurus_tag" content="docs-default-0.14.1"><meta data-react-helmet="true" property="og:title" content="Writing Data | Apache Hudi"><meta data-react-helmet="true" name="description" content="In this section, we will cover ways to ingest new changes from external sources or even other Hudi tables."><meta data-react-helmet="true" property="og:description" content="In this section, we will cover ways to ingest new changes from external sources or even other Hudi tables."><meta data-react-helmet="true" name="keywords" content="hudi,incremental,batch,stream,processing,Hive,ETL,Spark SQL"><link data-react-helmet="true" rel="icon" href="/assets/images/favicon.ico"><link data-react-helmet="true" rel="canonical" href="https://hudi.apache.org/docs/writing_data"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/docs/writing_data" hreflang="en"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/cn/docs/writing_data" hreflang="cn"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/docs/writing_data" hreflang="x-default"><link data-react-helmet="true" rel="preconnect" href="https://BH4D9OD16A-dsn.algolia.net" crossorigin="anonymous"><link rel="stylesheet" href="/assets/css/styles.ea681a30.css">
<link rel="preload" href="/assets/js/runtime~main.2cab5691.js" as="script">
<link rel="preload" href="/assets/js/main.bd020950.js" as="script">
</head>
<body>
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus">
<div><a href="#" class="skipToContent_OuoZ">Skip to main content</a></div><div class="announcementBar_axC9" role="banner"><div class="announcementBarPlaceholder_xYHE"></div><div class="announcementBarContent_6uhP">⭐️ If you like Apache Hudi, give it a star on <a target="_blank" rel="noopener noreferrer" href="https://github.com/apache/hudi">GitHub</a>! ⭐</div><button type="button" class="clean-btn close announcementBarClose_A3A1" aria-label="Close"><svg viewBox="0 0 15 15" width="14" height="14"><g stroke="currentColor" stroke-width="3.1"><path d="M.75.75l13.5 13.5M14.25.75L.75 14.25"></path></g></svg></button></div><nav class="navbar navbar--fixed-top navbarWrapper_UIa0"><div class="navbar__inner"><img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=8f594acf-9b77-44fb-9475-3e82ead1910c" width="0" height="0" alt=""><img referrerpolicy="no-referrer-when-downgrade" src="https://analytics.apache.org/matomo.php?idsite=47&amp;rec=1" width="0" height="0" alt=""><div class="navbar__items"><button aria-label="Navigation bar toggle" class="navbar__toggle clean-btn" type="button" tabindex="0"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo navbarLogo_Bz6n"><img src="/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--light_4Vu1"><img src="/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--dark_uzRr"></div></a><a class="navbar__item navbar__link" href="/docs/overview"><div class="labelWrapperDropdown_Mqbj">Docs</div></a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Learn<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/talks"><div class="labelWrapperDropdown_Mqbj">Talks</div></a></li><li><a class="dropdown__link" href="/videos"><div class="labelWrapperDropdown_Mqbj">Video Guides</div></a></li><li><a class="dropdown__link" href="/docs/faq"><div class="labelWrapperDropdown_Mqbj">FAQ</div></a></li><li><a class="dropdown__link" href="/tech-specs"><div class="labelWrapperDropdown_Mqbj">Tech Specs</div></a></li><li><a class="dropdown__link" href="/tech-specs-1point0"><div class="labelWrapperDropdown_Mqbj">Tech Specs 1.0</div></a></li><li><a href="https://cwiki.apache.org/confluence/display/HUDI" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span class="externalLink_AE3f">Technical Wiki<svg width="20" height="20" viewBox="0 0 26 26" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M16.965 8.745 9.01 16.7M10.561 8.758l6.403-.013-.013 6.403" stroke="#0DB1F9" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path><rect x="4.5" y="4.5" width="17" height="17" rx="2.5" stroke="#0DB1F9"></rect></svg></span></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Contribute<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/contribute/how-to-contribute"><div class="labelWrapperDropdown_Mqbj">How to Contribute</div></a></li><li><a class="dropdown__link" href="/contribute/developer-setup"><div class="labelWrapperDropdown_Mqbj">Developer Setup</div></a></li><li><a class="dropdown__link" href="/contribute/rfc-process"><div class="labelWrapperDropdown_Mqbj">RFC Process</div></a></li><li><a class="dropdown__link" href="/contribute/report-security-issues"><div class="labelWrapperDropdown_Mqbj">Report Security Issues</div></a></li><li><a href="https://issues.apache.org/jira/projects/HUDI/summary" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span class="externalLink_AE3f">Report Issues<svg width="20" height="20" viewBox="0 0 26 26" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M16.965 8.745 9.01 16.7M10.561 8.758l6.403-.013-.013 6.403" stroke="#0DB1F9" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path><rect x="4.5" y="4.5" width="17" height="17" rx="2.5" stroke="#0DB1F9"></rect></svg></span></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Community<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/community/get-involved"><div class="labelWrapperDropdown_Mqbj">Get Involved</div></a></li><li><a class="dropdown__link" href="/community/syncs"><div class="labelWrapperDropdown_Mqbj">Community Syncs</div></a></li><li><a class="dropdown__link" href="/community/office_hours"><div class="labelWrapperDropdown_Mqbj">Office Hours</div></a></li><li><a class="dropdown__link" href="/community/team"><div class="labelWrapperDropdown_Mqbj">Team</div></a></li></ul></div><a class="navbar__item navbar__link" href="/blog"><div class="labelWrapperDropdown_Mqbj">Blog</div></a><a class="navbar__item navbar__link" href="/powered-by"><div class="labelWrapperDropdown_Mqbj">Who&#x27;s Using</div></a><a class="navbar__item navbar__link" href="/roadmap"><div class="labelWrapperDropdown_Mqbj">Roadmap</div></a><a class="navbar__item navbar__link" href="/releases/download"><div class="labelWrapperDropdown_Mqbj">Download</div></a></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__link downloadLinkDropdownHide_aDP3" href="/docs/overview"><div class="labelWrapperDropdown_Mqbj">0.14.1<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/docs/next/writing_data"><div class="labelWrapperDropdown_Mqbj">Current</div></a></li><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/writing_data"><div class="labelWrapperDropdown_Mqbj">0.14.1</div></a></li><li><a class="dropdown__link" href="/docs/0.14.0/writing_data"><div class="labelWrapperDropdown_Mqbj">0.14.0</div></a></li><li><a class="dropdown__link" href="/docs/0.13.1/writing_data"><div class="labelWrapperDropdown_Mqbj">0.13.1</div></a></li><li><a class="dropdown__link" href="/docs/0.13.0/writing_data"><div class="labelWrapperDropdown_Mqbj">0.13.0</div></a></li><li><a class="dropdown__link" href="/docs/0.12.3/writing_data"><div class="labelWrapperDropdown_Mqbj">0.12.3</div></a></li><li><a class="dropdown__link" href="/docs/0.12.2/writing_data"><div class="labelWrapperDropdown_Mqbj">0.12.2</div></a></li><li><a class="dropdown__link" href="/docs/0.12.1/writing_data"><div class="labelWrapperDropdown_Mqbj">0.12.1</div></a></li><li><a class="dropdown__link" href="/docs/0.12.0/writing_data"><div class="labelWrapperDropdown_Mqbj">0.12.0</div></a></li><li><a class="dropdown__link" href="/docs/0.11.1/writing_data"><div class="labelWrapperDropdown_Mqbj">0.11.1</div></a></li><li><a class="dropdown__link" href="/docs/0.11.0/writing_data"><div class="labelWrapperDropdown_Mqbj">0.11.0</div></a></li><li><a class="dropdown__link" href="/docs/0.10.1/writing_data"><div class="labelWrapperDropdown_Mqbj">0.10.1</div></a></li><li><a class="dropdown__link" href="/docs/0.10.0/writing_data"><div class="labelWrapperDropdown_Mqbj">0.10.0</div></a></li><li><a class="dropdown__link" href="/docs/0.9.0/writing_data"><div class="labelWrapperDropdown_Mqbj">0.9.0</div></a></li><li><a class="dropdown__link" href="/docs/0.8.0/writing_data"><div class="labelWrapperDropdown_Mqbj">0.8.0</div></a></li><li><a class="dropdown__link" href="/docs/0.7.0/writing_data"><div class="labelWrapperDropdown_Mqbj">0.7.0</div></a></li><li><a class="dropdown__link" href="/docs/0.6.0/writing_data"><div class="labelWrapperDropdown_Mqbj">0.6.0</div></a></li><li><a class="dropdown__link" href="/docs/0.5.3/writing_data"><div class="labelWrapperDropdown_Mqbj">0.5.3</div></a></li><li><a class="dropdown__link" href="/docs/0.5.2/writing_data"><div class="labelWrapperDropdown_Mqbj">0.5.2</div></a></li><li><a class="dropdown__link" href="/docs/0.5.1/writing_data"><div class="labelWrapperDropdown_Mqbj">0.5.1</div></a></li><li><a class="dropdown__link" href="/docs/0.5.0/writing_data"><div class="labelWrapperDropdown_Mqbj">0.5.0</div></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj"><span><svg viewBox="0 0 20 20" width="20" height="20" aria-hidden="true" class="iconLanguage_zID8"><path fill="currentColor" d="M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"></path></svg><span>English</span></span><svg width="14" height="14" viewBox="0 0 14 14" fill="none" xmlns="http://www.w3.org/2000/svg"><g clip-path="url(#a)"><path d="M14 6.457a6.842 6.842 0 0 0-7-6.02 6.843 6.843 0 0 0-7 6.02v1.085a6.843 6.843 0 0 0 7 6.02 6.843 6.843 0 0 0 7-6.02V6.457Zm-1.094 0h-2.625a9.92 9.92 0 0 0-.376-2.222 6.65 6.65 0 0 0 1.531-.875 5.25 5.25 0 0 1 1.444 3.097h.026Zm-8.032 0a8.479 8.479 0 0 1 .324-1.872 7.376 7.376 0 0 0 3.63 0c.175.61.284 1.239.325 1.872h-4.28Zm4.305 1.085a8.391 8.391 0 0 1-.324 1.873 7.464 7.464 0 0 0-3.658 0 8.479 8.479 0 0 1-.323-1.873h4.305Zm.35-4.375A10.342 10.342 0 0 0 8.75 1.75c.627.194 1.218.49 1.75.875a5.748 5.748 0 0 1-.998.577l.027-.035ZM7.254 1.54A8.75 8.75 0 0 1 8.46 3.552c-.48.11-.97.165-1.461.167-.492-.001-.982-.057-1.461-.167.308-.722.715-1.4 1.207-2.012h.508ZM4.498 3.202a5.748 5.748 0 0 1-.998-.577 6.029 6.029 0 0 1 1.75-.875c-.294.46-.546.947-.753 1.452Zm-1.873.15c.47.358.984.652 1.531.874A9.625 9.625 0 0 0 3.78 6.45H1.155a5.25 5.25 0 0 1 1.47-3.098ZM1.12 7.541h2.625c.038.753.164 1.5.376 2.223a6.649 6.649 0 0 0-1.531.875 5.25 5.25 0 0 1-1.47-3.098Zm3.377 3.255c.207.506.459.992.753 1.453a6.03 6.03 0 0 1-1.75-.875c.312-.226.646-.419.997-.578Zm2.25 1.663a8.594 8.594 0 0 1-1.208-2.013 6.501 6.501 0 0 1 2.922 0 8.54 8.54 0 0 1-1.207 2.013h-.508Zm2.755-1.663c.367.156.716.35 1.042.578a6.338 6.338 0 0 1-1.75.875c.275-.464.512-.95.708-1.453Zm1.873-.148a6.647 6.647 0 0 0-1.531-.875 9.45 9.45 0 0 0 .376-2.223h2.625a5.25 5.25 0 0 1-1.47 3.098Z" fill="#1C1E21"></path></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h14v14H0z"></path></clipPath></defs></svg></div></a><ul class="dropdown__menu"><li><a href="/docs/writing_data" target="_self" rel="noopener noreferrer" class="dropdown__link dropdown__link--active"><div class="labelWrapperDropdown_Mqbj">English</div></a></li><li><a href="/cn/docs/writing_data" target="_self" rel="noopener noreferrer" class="dropdown__link"><div class="labelWrapperDropdown_Mqbj">Chinese</div></a></li></ul></div><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-github-link" aria-label="GitHub repository"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-twitter-link" aria-label="Hudi Twitter Handle"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-slack-link" aria-label="Hudi Slack Channel"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-youtube-link" aria-label="Hudi YouTube Channel"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-linkedin-link" aria-label="Hudi Linkedin Page"><div class="labelWrapperDropdown_Mqbj"></div></a><div class="searchBox_fBfG"><div role="button" class="searchButton_g9-U" aria-label="Search"><span class="searchText_RI6l">Search</span><svg width="14" height="14" viewBox="0 0 14 14" fill="none" xmlns="http://www.w3.org/2000/svg"><circle cx="6.864" cy="6.864" r="5.243" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></circle><path d="m10.51 10.783 2.056 2.05" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div><div class="navbar-sidebar"><div class="navbar-sidebar__brand"><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--light_4Vu1"><img src="/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--dark_uzRr"></div></a><button type="button" class="clean-btn navbar-sidebar__close"><svg viewBox="0 0 15 15" width="21" height="21"><g stroke="var(--ifm-color-emphasis-600)" stroke-width="1.2"><path d="M.75.75l13.5 13.5M14.25.75L.75 14.25"></path></g></svg></button></div><div class="navbar-sidebar__items"><div class="navbar-sidebar__item menu"><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" href="/docs/overview"><div class="labelWrapperDropdown_Mqbj">Docs</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Learn</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Contribute</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Community</div></a></li><li class="menu__list-item"><a class="menu__link" href="/blog"><div class="labelWrapperDropdown_Mqbj">Blog</div></a></li><li class="menu__list-item"><a class="menu__link" href="/powered-by"><div class="labelWrapperDropdown_Mqbj">Who&#x27;s Using</div></a></li><li class="menu__list-item"><a class="menu__link" href="/roadmap"><div class="labelWrapperDropdown_Mqbj">Roadmap</div></a></li><li class="menu__list-item"><a class="menu__link" href="/releases/download"><div class="labelWrapperDropdown_Mqbj">Download</div></a></li><li class="menu__list-item"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Versions</div></a><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="menu__list-item"><a class="menu__link" href="/docs/next/writing_data"><div class="labelWrapperDropdown_Mqbj">Current</div></a></li><li class="menu__list-item"><a aria-current="page" class="menu__link menu__link--active" href="/docs/writing_data"><div class="labelWrapperDropdown_Mqbj">0.14.1</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.14.0/writing_data"><div class="labelWrapperDropdown_Mqbj">0.14.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.13.1/writing_data"><div class="labelWrapperDropdown_Mqbj">0.13.1</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.13.0/writing_data"><div class="labelWrapperDropdown_Mqbj">0.13.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.12.3/writing_data"><div class="labelWrapperDropdown_Mqbj">0.12.3</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.12.2/writing_data"><div class="labelWrapperDropdown_Mqbj">0.12.2</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.12.1/writing_data"><div class="labelWrapperDropdown_Mqbj">0.12.1</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.12.0/writing_data"><div class="labelWrapperDropdown_Mqbj">0.12.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.11.1/writing_data"><div class="labelWrapperDropdown_Mqbj">0.11.1</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.11.0/writing_data"><div class="labelWrapperDropdown_Mqbj">0.11.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.10.1/writing_data"><div class="labelWrapperDropdown_Mqbj">0.10.1</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.10.0/writing_data"><div class="labelWrapperDropdown_Mqbj">0.10.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.9.0/writing_data"><div class="labelWrapperDropdown_Mqbj">0.9.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.8.0/writing_data"><div class="labelWrapperDropdown_Mqbj">0.8.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.7.0/writing_data"><div class="labelWrapperDropdown_Mqbj">0.7.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.6.0/writing_data"><div class="labelWrapperDropdown_Mqbj">0.6.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.5.3/writing_data"><div class="labelWrapperDropdown_Mqbj">0.5.3</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.5.2/writing_data"><div class="labelWrapperDropdown_Mqbj">0.5.2</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.5.1/writing_data"><div class="labelWrapperDropdown_Mqbj">0.5.1</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.5.0/writing_data"><div class="labelWrapperDropdown_Mqbj">0.5.0</div></a></li></ul></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj"><span><svg viewBox="0 0 20 20" width="20" height="20" aria-hidden="true" class="iconLanguage_zID8"><path fill="currentColor" d="M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"></path></svg><span>Languages</span></span></div></a></li><li class="menu__list-item"><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="menu__link header-github-link" aria-label="GitHub repository"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="menu__link header-twitter-link" aria-label="Hudi Twitter Handle"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="menu__link header-slack-link" aria-label="Hudi Slack Channel"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="menu__link header-youtube-link" aria-label="Hudi YouTube Channel"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="menu__link header-linkedin-link" aria-label="Hudi Linkedin Page"><div class="labelWrapperDropdown_Mqbj"></div></a></li></ul></div><div class="navbar-sidebar__item menu"><button type="button" class="clean-btn navbar-sidebar__back">← Back to main menu</button></div></div></div></nav><div class="main-wrapper docs-wrapper docs-doc-page"><div class="docPage_GMj9"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_i9tI" type="button"></button><aside class="docSidebarContainer_k0Pq"><div class="sidebar_a3j0"><nav class="menu thin-scrollbar menu_cyFh menuWithAnnouncementBar_+O1J"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/overview">Overview</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/docs/quick-start-guide">Quick Start</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/quick-start-guide">Spark Guide</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/flink-quick-start-guide">Flink Guide</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/docker_demo">Docker Demo</a></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/docs/timeline">Concepts</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--active hasHref_TwRn" href="/docs/sql_ddl">How To</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" tabindex="0" href="/docs/sql_ddl">SQL</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/docs/writing_data">Writing Data</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/hoodie_streaming_ingestion">Streaming Ingestion</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" tabindex="0" href="/docs/syncing_aws_glue_data_catalog">Syncing to Catalogs</a></div></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/docs/migration_guide">Services</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/docs/basic_configurations">Configurations</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/docs/performance">Guides</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/docs/faq">Frequently Asked Questions(FAQs)</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/use_cases">Use Cases</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/faq">Overview</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/privacy">Privacy Policy</a></li></ul></nav></div></aside><main class="docMainContainer_Q970"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_zHA2"><div class="docItemContainer_oiyr"><article><span class="theme-doc-version-badge badge badge--secondary">Version: <!-- -->0.14.1</span><div class="tocCollapsible_aw-L theme-doc-toc-mobile tocMobile_Tx6Y"><button type="button" class="clean-btn tocCollapsibleButton_zr6a">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>Writing Data</h1></header><p>In this section, we will cover ways to ingest new changes from external sources or even other Hudi tables.
Currently Hudi supports following ways to write the data.</p><ul><li><a href="/docs/hoodie_streaming_ingestion#hudi-streamer">Hudi Streamer</a></li><li><a href="#spark-datasource-writer">Spark Hudi Datasource</a></li><li><a href="/docs/hoodie_streaming_ingestion#structured-streaming">Spark Structured Streaming</a></li><li><a href="/docs/next/sql_ddl#spark-sql">Spark SQL</a></li><li><a href="/docs/next/hoodie_streaming_ingestion#flink-ingestion">Flink Writer</a></li><li><a href="/docs/next/sql_ddl#flink">Flink SQL</a></li><li><a href="#java-writer">Java Writer</a></li><li><a href="/docs/next/hoodie_streaming_ingestion#kafka-connect-sink">Kafka Connect</a></li></ul><h2 class="anchor anchorWithStickyNavbar_y2LR" id="spark-datasource-writer">Spark Datasource Writer<a class="hash-link" href="#spark-datasource-writer" title="Direct link to heading"></a></h2><p>The <code>hudi-spark</code> module offers the DataSource API to write (and read) a Spark DataFrame into a Hudi table. There are a number of options available:</p><p><strong><code>HoodieWriteConfig</code></strong>:</p><p><strong>TABLE_NAME</strong> (Required)<br></p><p><strong><code>DataSourceWriteOptions</code></strong>:</p><p><strong>RECORDKEY_FIELD_OPT_KEY</strong> (Required): Primary key field(s). Record keys uniquely identify a record/row within each partition. If one wants to have a global uniqueness, there are two options. You could either make the dataset non-partitioned, or, you can leverage Global indexes to ensure record keys are unique irrespective of the partition path. Record keys can either be a single column or refer to multiple columns. <code>KEYGENERATOR_CLASS_OPT_KEY</code> property should be set accordingly based on whether it is a simple or complex key. For eg: <code>&quot;col1&quot;</code> for simple field, <code>&quot;col1,col2,col3,etc&quot;</code> for complex field. Nested fields can be specified using the dot notation eg: <code>a.b.c</code>. <br>
Default value: <code>&quot;uuid&quot;</code><br></p><p><strong>PARTITIONPATH_FIELD_OPT_KEY</strong> (Required): Columns to be used for partitioning the table. To prevent partitioning, provide empty string as value eg: <code>&quot;&quot;</code>. Specify partitioning/no partitioning using <code>KEYGENERATOR_CLASS_OPT_KEY</code>. If partition path needs to be url encoded, you can set <code>URL_ENCODE_PARTITIONING_OPT_KEY</code>. If synchronizing to hive, also specify using <code>HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY.</code><br>
Default value: <code>&quot;partitionpath&quot;</code><br></p><p><strong>PRECOMBINE_FIELD_OPT_KEY</strong> (Required): When two records within the same batch have the same key value, the record with the largest value from the field specified will be choosen. If you are using default payload of OverwriteWithLatestAvroPayload for HoodieRecordPayload (<code>WRITE_PAYLOAD_CLASS</code>), an incoming record will always takes precendence compared to the one in storage ignoring this <code>PRECOMBINE_FIELD_OPT_KEY</code>. <br>
Default value: <code>&quot;ts&quot;</code><br></p><p><strong>OPERATION_OPT_KEY</strong>: The <a href="/docs/write_operations">write operations</a> to use.<br>
Available values:<br>
<code>UPSERT_OPERATION_OPT_VAL</code> (default), <code>BULK_INSERT_OPERATION_OPT_VAL</code>, <code>INSERT_OPERATION_OPT_VAL</code>, <code>DELETE_OPERATION_OPT_VAL</code></p><p><strong>TABLE_TYPE_OPT_KEY</strong>: The <a href="/docs/concepts#table-types">type of table</a> to write to. Note: After the initial creation of a table, this value must stay consistent when writing to (updating) the table using the Spark <code>SaveMode.Append</code> mode.<br>
Available values:<br>
<a href="/docs/concepts#copy-on-write-table"><code>COW_TABLE_TYPE_OPT_VAL</code></a> (default), <a href="/docs/concepts#merge-on-read-table"><code>MOR_TABLE_TYPE_OPT_VAL</code></a></p><p><strong>KEYGENERATOR_CLASS_OPT_KEY</strong>: Refer to <a href="/docs/key_generation">Key Generation</a> section below.</p><p><strong>HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY</strong>: If using hive, specify if the table should or should not be partitioned.<br>
Available values:<br>
<code>classOf[MultiPartKeysValueExtractor].getCanonicalName</code> (default), <code>classOf[SlashEncodedDayPartitionValueExtractor].getCanonicalName</code>, <code>classOf[TimestampBasedKeyGenerator].getCanonicalName</code>, <code>classOf[NonPartitionedExtractor].getCanonicalName</code>, <code>classOf[GlobalDeleteKeyGenerator].getCanonicalName</code> (to be used when <code>OPERATION_OPT_KEY</code> is set to <code>DELETE_OPERATION_OPT_VAL</code>)</p><p>Example:
Upsert a DataFrame, specifying the necessary field names for <code>recordKey =&gt; _row_key</code>, <code>partitionPath =&gt; partition</code>, and <code>precombineKey =&gt; timestamp</code></p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">inputDF.write()</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .format(&quot;org.apache.hudi&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .options(clientOpts) //Where clientOpts is of type Map[String, String]. clientOpts can include any other options necessary.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), &quot;_row_key&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), &quot;partition&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY(), &quot;timestamp&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .option(HoodieWriteConfig.TABLE_NAME, tableName)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .mode(SaveMode.Append)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .save(basePath);</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><div class="tabs-container"><ul role="tablist" aria-orientation="horizontal" class="tabs"><li role="tab" tabindex="0" aria-selected="true" class="tabs__item tabItem_vU9c tabs__item--active">Scala</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_vU9c">Python</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_vU9c">SparkSQL</li></ul><div class="margin-vert--md"><div role="tabpanel"><p>Generate some new trips, load them into a DataFrame and write the DataFrame into the Hudi table as below.</p><div class="codeBlockContainer_J+bg language-scala theme-code-block"><div class="codeBlockContent_csEI scala"><pre tabindex="0" class="prism-code language-scala codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">// spark-shell</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val inserts = convertToStringList(dataGen.generateInserts(10))</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val df = spark.read.json(spark.sparkContext.parallelize(inserts, 2))</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">df.write.format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> options(getQuickstartWriteConfigs).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(PRECOMBINE_FIELD_OPT_KEY, &quot;ts&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(RECORDKEY_FIELD_OPT_KEY, &quot;uuid&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(PARTITIONPATH_FIELD_OPT_KEY, &quot;partitionpath&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(TABLE_NAME, tableName).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> mode(Overwrite).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> save(basePath)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><div class="admonition admonition-info alert alert--info"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"></path></svg></span>info</h5></div><div class="admonition-content"><p><code>mode(Overwrite)</code> overwrites and recreates the table if it already exists.
You can check the data generated under <code>/tmp/hudi_trips_cow/&lt;region&gt;/&lt;country&gt;/&lt;city&gt;/</code>. We provided a record key
(<code>uuid</code> in <a href="https://github.com/apache/hudi/blob/6f9b02decb5bb2b83709b1b6ec04a97e4d102c11/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java#L60" target="_blank" rel="noopener noreferrer">schema</a>), partition field (<code>region/country/city</code>) and combine logic (<code>ts</code> in
<a href="https://github.com/apache/hudi/blob/6f9b02decb5bb2b83709b1b6ec04a97e4d102c11/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java#L60" target="_blank" rel="noopener noreferrer">schema</a>) to ensure trip records are unique within each partition. For more info, refer to
<a href="/docs/next/faq_general/#how-do-i-model-the-data-stored-in-hudi">Modeling data stored in Hudi</a>
and for info on ways to ingest data into Hudi, refer to <a href="/docs/writing_data">Writing Hudi Tables</a>.
Here we are using the default write operation : <code>upsert</code>. If you have a workload without updates, you can also issue
<code>insert</code> or <code>bulk_insert</code> operations which could be faster. To know more, refer to <a href="/docs/write_operations">Write operations</a></p></div></div></div><div role="tabpanel" hidden="">Generate some new trips, load them into a DataFrame and write the DataFrame into the Hudi table as below.<div class="codeBlockContainer_J+bg language-python theme-code-block"><div class="codeBlockContent_csEI python"><pre tabindex="0" class="prism-code language-python codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)"># pyspark</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">inserts </span><span class="token operator">=</span><span class="token plain"> sc</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">_jvm</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">org</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">apache</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">hudi</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">QuickstartUtils</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">convertToStringList</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">dataGen</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">generateInserts</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token number">10</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">df </span><span class="token operator">=</span><span class="token plain"> spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">read</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">json</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">sparkContext</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">parallelize</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">inserts</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">2</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi_options </span><span class="token operator">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.table.name&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> tableName</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.recordkey.field&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;uuid&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.partitionpath.field&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;partitionpath&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.table.name&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> tableName</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.operation&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;upsert&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.precombine.field&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;ts&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.upsert.shuffle.parallelism&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token number">2</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.insert.shuffle.parallelism&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token number">2</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">df</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">write</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token builtin" style="color:rgb(189, 147, 249)">format</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> options</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token operator">**</span><span class="token plain">hudi_options</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> mode</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;overwrite&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> save</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">basePath</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><div class="admonition admonition-info alert alert--info"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"></path></svg></span>info</h5></div><div class="admonition-content"><p><code>mode(Overwrite)</code> overwrites and recreates the table if it already exists.
You can check the data generated under <code>/tmp/hudi_trips_cow/&lt;region&gt;/&lt;country&gt;/&lt;city&gt;/</code>. We provided a record key
(<code>uuid</code> in <a href="https://github.com/apache/hudi/blob/2e6e302efec2fa848ded4f88a95540ad2adb7798/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java#L60" target="_blank" rel="noopener noreferrer">schema</a>), partition field (<code>region/country/city</code>) and combine logic (<code>ts</code> in
<a href="https://github.com/apache/hudi/blob/2e6e302efec2fa848ded4f88a95540ad2adb7798/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java#L60" target="_blank" rel="noopener noreferrer">schema</a>) to ensure trip records are unique within each partition. For more info, refer to
<a href="/docs/next/faq_general/#how-do-i-model-the-data-stored-in-hudi">Modeling data stored in Hudi</a>
and for info on ways to ingest data into Hudi, refer to <a href="/docs/writing_data">Writing Hudi Tables</a>.
Here we are using the default write operation : <code>upsert</code>. If you have a workload without updates, you can also issue
<code>insert</code> or <code>bulk_insert</code> operations which could be faster. To know more, refer to <a href="/docs/write_operations">Write operations</a></p></div></div></div><div role="tabpanel" hidden=""><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">into</span><span class="token plain"> h0 </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token number">1</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;a1&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">20</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">-- insert static partition</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">into</span><span class="token plain"> h_p0 </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">partition</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">dt </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;2021-01-02&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token number">1</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;a1&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">-- insert dynamic partition</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">into</span><span class="token plain"> h_p0 </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token number">1</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;a1&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> dt</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">-- insert dynamic partition</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">into</span><span class="token plain"> h_p1 </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token number">1</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> id</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;a1&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;2021-01-03&#x27;</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> dt</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;19&#x27;</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> hh</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">-- insert overwrite table</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token plain"> overwrite </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">table</span><span class="token plain"> h0 </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token number">1</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;a1&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">20</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">-- insert overwrite table with static partition</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token plain"> overwrite h_p0 </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">partition</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">dt </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;2021-01-02&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token number">1</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;a1&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">-- insert overwrite table with dynamic partition</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token plain"> overwrite </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">table</span><span class="token plain"> h_p1 </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token number">2</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> id</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;a2&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;2021-01-03&#x27;</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> dt</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;19&#x27;</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> hh</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p><strong>NOTICE</strong></p><ul><li><p>Insert mode : Hudi supports two insert modes when inserting data to a table with primary key(we call it pk-table as followed):<br>
Using <code>strict</code> mode, insert statement will keep the primary key uniqueness constraint for COW table which do not allow
duplicate records. If a record already exists during insert, a HoodieDuplicateKeyException will be thrown
for COW table. For MOR table, updates are allowed to existing record.<br>
Using <code>non-strict</code> mode, hudi uses the same code path used by <code>insert</code> operation in spark data source for the pk-table. <br>
One can set the insert mode by using the config: <strong>hoodie.sql.insert.mode</strong></p></li><li><p>Bulk Insert : By default, hudi uses the normal insert operation for insert statements. Users can set <strong>hoodie.sql.bulk.insert.enable</strong>
to true to enable the bulk insert for insert statement.</p></li></ul></div></div></div><p>Checkout <a href="https://hudi.apache.org/blog/2021/02/13/hudi-key-generators" target="_blank" rel="noopener noreferrer">https://hudi.apache.org/blog/2021/02/13/hudi-key-generators</a> for various key generator options, like Timestamp based,
complex, custom, NonPartitioned Key gen, etc.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="insert-overwrite-table">Insert Overwrite Table<a class="hash-link" href="#insert-overwrite-table" title="Direct link to heading"></a></h3><p>Generate some new trips, overwrite the table logically at the Hudi metadata level. The Hudi cleaner will eventually
clean up the previous table snapshot&#x27;s file groups. This can be faster than deleting the older table and recreating
in <code>Overwrite</code> mode.</p><div class="tabs-container"><ul role="tablist" aria-orientation="horizontal" class="tabs"><li role="tab" tabindex="0" aria-selected="true" class="tabs__item tabItem_vU9c tabs__item--active">Scala</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_vU9c">SparkSQL</li></ul><div class="margin-vert--md"><div role="tabpanel"><div class="codeBlockContainer_J+bg language-scala theme-code-block"><div class="codeBlockContent_csEI scala"><pre tabindex="0" class="prism-code language-scala codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">// spark-shell</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> read.format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load(basePath).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> select(&quot;uuid&quot;,&quot;partitionpath&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> show(10, false)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val inserts = convertToStringList(dataGen.generateInserts(10))</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val df = spark.read.json(spark.sparkContext.parallelize(inserts, 2))</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">df.write.format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> options(getQuickstartWriteConfigs).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(OPERATION_OPT_KEY,&quot;insert_overwrite_table&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(PRECOMBINE_FIELD_OPT_KEY, &quot;ts&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(RECORDKEY_FIELD_OPT_KEY, &quot;uuid&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(PARTITIONPATH_FIELD_OPT_KEY, &quot;partitionpath&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(TABLE_NAME, tableName).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> mode(Append).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> save(basePath)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// Should have different keys now, from query before.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> read.format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load(basePath).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> select(&quot;uuid&quot;,&quot;partitionpath&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> show(10, false)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div><div role="tabpanel" hidden=""><p>The insert overwrite non-partitioned table sql statement will convert to the <strong><em>insert_overwrite_table</em></strong> operation.
e.g.</p><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token plain"> overwrite </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">table</span><span class="token plain"> h0 </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token number">1</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;a1&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">20</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="insert-overwrite">Insert Overwrite<a class="hash-link" href="#insert-overwrite" title="Direct link to heading"></a></h3><p>Generate some new trips, overwrite the all the partitions that are present in the input. This operation can be faster
than <code>upsert</code> for batch ETL jobs, that are recomputing entire target partitions at once (as opposed to incrementally
updating the target tables). This is because, we are able to bypass indexing, precombining and other repartitioning
steps in the upsert write path completely.</p><div class="tabs-container"><ul role="tablist" aria-orientation="horizontal" class="tabs"><li role="tab" tabindex="0" aria-selected="true" class="tabs__item tabItem_vU9c tabs__item--active">Scala</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_vU9c">SparkSQL</li></ul><div class="margin-vert--md"><div role="tabpanel"><div class="codeBlockContainer_J+bg language-scala theme-code-block"><div class="codeBlockContent_csEI scala"><pre tabindex="0" class="prism-code language-scala codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">// spark-shell</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> read.format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load(basePath).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> select(&quot;uuid&quot;,&quot;partitionpath&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> sort(&quot;partitionpath&quot;,&quot;uuid&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> show(100, false)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val inserts = convertToStringList(dataGen.generateInserts(10))</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val df = spark.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> read.json(spark.sparkContext.parallelize(inserts, 2)).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> filter(&quot;partitionpath = &#x27;americas/united_states/san_francisco&#x27;&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">df.write.format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> options(getQuickstartWriteConfigs).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(OPERATION_OPT_KEY,&quot;insert_overwrite&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(PRECOMBINE_FIELD_OPT_KEY, &quot;ts&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(RECORDKEY_FIELD_OPT_KEY, &quot;uuid&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(PARTITIONPATH_FIELD_OPT_KEY, &quot;partitionpath&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(TABLE_NAME, tableName).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> mode(Append).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> save(basePath)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// Should have different keys now for San Francisco alone, from query before.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> read.format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load(basePath).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> select(&quot;uuid&quot;,&quot;partitionpath&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> sort(&quot;partitionpath&quot;,&quot;uuid&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> show(100, false)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div><div role="tabpanel" hidden=""><p>The insert overwrite partitioned table sql statement will convert to the <strong><em>insert_overwrite</em></strong> operation.
e.g.</p><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token plain"> overwrite </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">table</span><span class="token plain"> h_p1 </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token number">2</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> id</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;a2&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;2021-01-03&#x27;</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> dt</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;19&#x27;</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> hh</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="deletes">Deletes<a class="hash-link" href="#deletes" title="Direct link to heading"></a></h3><p>Hudi supports implementing two types of deletes on data stored in Hudi tables, by enabling the user to specify a different record payload implementation.
For more info refer to <a href="https://cwiki.apache.org/confluence/x/6IqvC" target="_blank" rel="noopener noreferrer">Delete support in Hudi</a>.</p><ul><li><strong>Soft Deletes</strong> : Retain the record key and just null out the values for all the other fields.
This can be achieved by ensuring the appropriate fields are nullable in the table schema and simply upserting the table after setting these fields to null.
Note that soft deletes are always persisted in storage and never removed, but all values are set to nulls.
So for GDPR or other compliance reasons, users should consider doing hard deletes if record key and partition path
contain PII.</li></ul><p>For example:</p><div class="codeBlockContainer_J+bg language-scala theme-code-block"><div class="codeBlockContent_csEI scala"><pre tabindex="0" class="prism-code language-scala codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">// fetch two records for soft deletes</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val softDeleteDs = spark.sql(&quot;select * from hudi_trips_snapshot&quot;).limit(2)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// prepare the soft deletes by ensuring the appropriate fields are nullified</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val nullifyColumns = softDeleteDs.schema.fields.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> map(field =&gt; (field.name, field.dataType.typeName)).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> filter(pair =&gt; (!HoodieRecord.HOODIE_META_COLUMNS.contains(pair._1)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> &amp;&amp; !Array(&quot;ts&quot;, &quot;uuid&quot;, &quot;partitionpath&quot;).contains(pair._1)))</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val softDeleteDf = nullifyColumns.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> foldLeft(softDeleteDs.drop(HoodieRecord.HOODIE_META_COLUMNS: _*))(</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> (ds, col) =&gt; ds.withColumn(col._1, lit(null).cast(col._2)))</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// simply upsert the table after setting these fields to null</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">softDeleteDf.write.format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> options(getQuickstartWriteConfigs).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(OPERATION_OPT_KEY, &quot;upsert&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(PRECOMBINE_FIELD_OPT_KEY, &quot;ts&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(RECORDKEY_FIELD_OPT_KEY, &quot;uuid&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(PARTITIONPATH_FIELD_OPT_KEY, &quot;partitionpath&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(TABLE_NAME, tableName).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> mode(Append).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> save(basePath)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><ul><li><strong>Hard Deletes</strong> : A stronger form of deletion is to physically remove any trace of the record from the table. This can be achieved in 3 different ways. </li></ul><ol><li>Using Datasource, set <code>OPERATION_OPT_KEY</code> to <code>DELETE_OPERATION_OPT_VAL</code>. This will remove all the records in the DataSet being submitted.</li></ol><p>Example, first read in a dataset:</p><div class="codeBlockContainer_J+bg language-scala theme-code-block"><div class="codeBlockContent_csEI scala"><pre tabindex="0" class="prism-code language-scala codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">val roViewDF = spark.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> read.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> format(&quot;org.apache.hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load(basePath + &quot;/*/*/*/*&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">roViewDF.createOrReplaceTempView(&quot;hudi_ro_table&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark.sql(&quot;select count(*) from hudi_ro_table&quot;).show() // should return 10 (number of records inserted above)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val riderValue = spark.sql(&quot;select distinct rider from hudi_ro_table&quot;).show()</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// copy the value displayed to be used in next step</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>Now write a query of which records you would like to delete:</p><div class="codeBlockContainer_J+bg language-scala theme-code-block"><div class="codeBlockContent_csEI scala"><pre tabindex="0" class="prism-code language-scala codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">val df = spark.sql(&quot;select uuid, partitionPath from hudi_ro_table where rider = &#x27;rider-213&#x27;&quot;)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>Lastly, execute the deletion of these records:</p><div class="codeBlockContainer_J+bg language-scala theme-code-block"><div class="codeBlockContent_csEI scala"><pre tabindex="0" class="prism-code language-scala codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">val deletes = dataGen.generateDeletes(df.collectAsList())</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val df = spark.read.json(spark.sparkContext.parallelize(deletes, 2));</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">df.write.format(&quot;org.apache.hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">options(getQuickstartWriteConfigs).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">option(OPERATION_OPT_KEY,&quot;delete&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">option(PRECOMBINE_FIELD_OPT_KEY, &quot;ts&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">option(RECORDKEY_FIELD_OPT_KEY, &quot;uuid&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">option(PARTITIONPATH_FIELD_OPT_KEY, &quot;partitionpath&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">option(TABLE_NAME, tableName).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">mode(Append).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">save(basePath);</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><ol start="2"><li>Using DataSource, set <code>PAYLOAD_CLASS_OPT_KEY</code> to <code>&quot;org.apache.hudi.EmptyHoodieRecordPayload&quot;</code>. This will remove all the records in the DataSet being submitted. </li></ol><p>This example will remove all the records from the table that exist in the DataSet <code>deleteDF</code>:</p><div class="codeBlockContainer_J+bg language-scala theme-code-block"><div class="codeBlockContent_csEI scala"><pre tabindex="0" class="prism-code language-scala codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain"> deleteDF // dataframe containing just records to be deleted</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .write().format(&quot;org.apache.hudi&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .option(...) // Add HUDI options like record-key, partition-path and others as needed for your setup</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> // specify record_key, partition_key, precombine_fieldkey &amp; usual params</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .option(DataSourceWriteOptions.PAYLOAD_CLASS_OPT_KEY, &quot;org.apache.hudi.EmptyHoodieRecordPayload&quot;)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><ol start="3"><li>Using DataSource or DeltaStreamer, add a column named <code>_hoodie_is_deleted</code> to DataSet. The value of this column must be set to <code>true</code> for all the records to be deleted and either <code>false</code> or left null for any records which are to be upserted.</li></ol><p>Let&#x27;s say the original schema is:</p><div class="codeBlockContainer_J+bg language-json theme-code-block"><div class="codeBlockContent_csEI json"><pre tabindex="0" class="prism-code language-json codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token operator">:</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;record&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">&quot;name&quot;</span><span class="token operator">:</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;example_tbl&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">&quot;fields&quot;</span><span class="token operator">:</span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">&quot;name&quot;</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;uuid&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;String&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">&quot;name&quot;</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;ts&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;string&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">&quot;name&quot;</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;partitionPath&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;string&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">&quot;name&quot;</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;rank&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;long&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>Make sure you add <code>_hoodie_is_deleted</code> column:</p><div class="codeBlockContainer_J+bg language-json theme-code-block"><div class="codeBlockContent_csEI json"><pre tabindex="0" class="prism-code language-json codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token operator">:</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;record&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">&quot;name&quot;</span><span class="token operator">:</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;example_tbl&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">&quot;fields&quot;</span><span class="token operator">:</span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">&quot;name&quot;</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;uuid&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;String&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">&quot;name&quot;</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;ts&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;string&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">&quot;name&quot;</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;partitionPath&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;string&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">&quot;name&quot;</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;rank&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;long&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">&quot;name&quot;</span><span class="token plain"> </span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;_hoodie_is_deleted&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token plain"> </span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;boolean&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">&quot;default&quot;</span><span class="token plain"> </span><span class="token operator">:</span><span class="token plain"> </span><span class="token boolean">false</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>Then any record you want to delete you can mark <code>_hoodie_is_deleted</code> as true:</p><div class="codeBlockContainer_J+bg language-json theme-code-block"><div class="codeBlockContent_csEI json"><pre tabindex="0" class="prism-code language-json codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token property">&quot;ts&quot;</span><span class="token operator">:</span><span class="token plain"> </span><span class="token number">0.0</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token property">&quot;uuid&quot;</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;19tdb048-c93e-4532-adf9-f61ce6afe10&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token property">&quot;rank&quot;</span><span class="token operator">:</span><span class="token plain"> </span><span class="token number">1045</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token property">&quot;partitionpath&quot;</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;americas/brazil/sao_paulo&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token property">&quot;_hoodie_is_deleted&quot;</span><span class="token plain"> </span><span class="token operator">:</span><span class="token plain"> </span><span class="token boolean">true</span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="concurrency-control">Concurrency Control<a class="hash-link" href="#concurrency-control" title="Direct link to heading"></a></h3><p>The <code>hudi-spark</code> module offers the DataSource API to write (and read) a Spark DataFrame into a Hudi table.</p><p>Following is an example of how to use optimistic_concurrency_control via spark datasource. Read more in depth about concurrency control in the <a href="https://hudi.apache.org/docs/concurrency_control" target="_blank" rel="noopener noreferrer">concurrency control concepts</a> section </p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">inputDF.write.format(&quot;hudi&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .options(getQuickstartWriteConfigs)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .option(PRECOMBINE_FIELD_OPT_KEY, &quot;ts&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .option(&quot;hoodie.cleaner.policy.failed.writes&quot;, &quot;LAZY&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .option(&quot;hoodie.write.concurrency.mode&quot;, &quot;optimistic_concurrency_control&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .option(&quot;hoodie.write.lock.zookeeper.url&quot;, &quot;zookeeper&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .option(&quot;hoodie.write.lock.zookeeper.port&quot;, &quot;2181&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .option(&quot;hoodie.write.lock.zookeeper.lock_key&quot;, &quot;test_table&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .option(&quot;hoodie.write.lock.zookeeper.base_path&quot;, &quot;/test&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .option(RECORDKEY_FIELD_OPT_KEY, &quot;uuid&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .option(PARTITIONPATH_FIELD_OPT_KEY, &quot;partitionpath&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .option(TABLE_NAME, tableName)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .mode(Overwrite)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .save(basePath)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="commit-notifications">Commit Notifications<a class="hash-link" href="#commit-notifications" title="Direct link to heading"></a></h3><p>Apache Hudi provides the ability to post a callback notification about a write commit. This may be valuable if you need
an event notification stream to take actions with other services after a Hudi write commit.
You can push a write commit callback notification into HTTP endpoints or to a Kafka server.</p><h4 class="anchor anchorWithStickyNavbar_y2LR" id="http-endpoints">HTTP Endpoints<a class="hash-link" href="#http-endpoints" title="Direct link to heading"></a></h4><p>You can push a commit notification to an HTTP URL and can specify custom values by extending a callback class defined below.</p><table><thead><tr><th>Config</th><th>Description</th><th>Required</th><th>Default</th></tr></thead><tbody><tr><td>TURN_CALLBACK_ON</td><td>Turn commit callback on/off</td><td>optional</td><td>false (<em>callbacks off</em>)</td></tr><tr><td>CALLBACK_HTTP_URL</td><td>Callback host to be sent along with callback messages</td><td>required</td><td>N/A</td></tr><tr><td>CALLBACK_HTTP_TIMEOUT_IN_SECONDS</td><td>Callback timeout in seconds</td><td>optional</td><td>3</td></tr><tr><td>CALLBACK_CLASS_NAME</td><td>Full path of callback class and must be a subclass of HoodieWriteCommitCallback class, org.apache.hudi.callback.impl.HoodieWriteCommitHttpCallback by default</td><td>optional</td><td>org.apache.hudi.callback.impl.HoodieWriteCommitHttpCallback</td></tr><tr><td>CALLBACK_HTTP_API_KEY_VALUE</td><td>Http callback API key</td><td>optional</td><td>hudi_write_commit_http_callback</td></tr><tr><td></td><td></td><td></td><td></td></tr></tbody></table><h4 class="anchor anchorWithStickyNavbar_y2LR" id="kafka-endpoints">Kafka Endpoints<a class="hash-link" href="#kafka-endpoints" title="Direct link to heading"></a></h4><p>You can push a commit notification to a Kafka topic so it can be used by other real time systems.</p><table><thead><tr><th>Config</th><th>Description</th><th>Required</th><th>Default</th></tr></thead><tbody><tr><td>TOPIC</td><td>Kafka topic name to publish timeline activity into.</td><td>required</td><td>N/A</td></tr><tr><td>PARTITION</td><td>It may be desirable to serialize all changes into a single Kafka partition for providing strict ordering. By default, Kafka messages are keyed by table name, which guarantees ordering at the table level, but not globally (or when new partitions are added)</td><td>required</td><td>N/A</td></tr><tr><td>RETRIES</td><td>Times to retry the produce</td><td>optional</td><td>3</td></tr><tr><td>ACKS</td><td>kafka acks level, all by default to ensure strong durability</td><td>optional</td><td>all</td></tr><tr><td>BOOTSTRAP_SERVERS</td><td>Bootstrap servers of kafka cluster, to be used for publishing commit metadata</td><td>required</td><td>N/A</td></tr><tr><td></td><td></td><td></td><td></td></tr></tbody></table><h4 class="anchor anchorWithStickyNavbar_y2LR" id="pulsar-endpoints">Pulsar Endpoints<a class="hash-link" href="#pulsar-endpoints" title="Direct link to heading"></a></h4><p>You can push a commit notification to a Pulsar topic so it can be used by other real time systems. </p><table><thead><tr><th>Config</th><th>Description</th><th>Required</th><th>Default</th></tr></thead><tbody><tr><td>hoodie.write.commit.callback.pulsar.broker.service.url</td><td>Server&#x27;s Url of pulsar cluster to use to publish commit metadata.</td><td>required</td><td>N/A</td></tr><tr><td>hoodie.write.commit.callback.pulsar.topic</td><td>Pulsar topic name to publish timeline activity into</td><td>required</td><td>N/A</td></tr><tr><td>hoodie.write.commit.callback.pulsar.producer.route-mode</td><td>Message routing logic for producers on partitioned topics.</td><td>optional</td><td>RoundRobinPartition</td></tr><tr><td>hoodie.write.commit.callback.pulsar.producer.pending-queue-size</td><td>The maximum size of a queue holding pending messages.</td><td>optional</td><td>1000</td></tr><tr><td>hoodie.write.commit.callback.pulsar.producer.pending-total-size</td><td>The maximum number of pending messages across partitions.</td><td>required</td><td>50000</td></tr><tr><td>hoodie.write.commit.callback.pulsar.producer.block-if-queue-full</td><td>When the queue is full, the method is blocked instead of an exception is thrown.</td><td>optional</td><td>true</td></tr><tr><td>hoodie.write.commit.callback.pulsar.producer.send-timeout</td><td>The timeout in each sending to pulsar.</td><td>optional</td><td>30s</td></tr><tr><td>hoodie.write.commit.callback.pulsar.operation-timeout</td><td>Duration of waiting for completing an operation.</td><td>optional</td><td>30s</td></tr><tr><td>hoodie.write.commit.callback.pulsar.connection-timeout</td><td>Duration of waiting for a connection to a broker to be established.</td><td>optional</td><td>10s</td></tr><tr><td>hoodie.write.commit.callback.pulsar.request-timeout</td><td>Duration of waiting for completing a request.</td><td>optional</td><td>60s</td></tr><tr><td>hoodie.write.commit.callback.pulsar.keepalive-interval</td><td>Duration of keeping alive interval for each client broker connection.</td><td>optional</td><td>30s</td></tr><tr><td></td><td></td><td></td><td></td></tr></tbody></table><h4 class="anchor anchorWithStickyNavbar_y2LR" id="bring-your-own-implementation">Bring your own implementation<a class="hash-link" href="#bring-your-own-implementation" title="Direct link to heading"></a></h4><p>You can extend the HoodieWriteCommitCallback class to implement your own way to asynchronously handle the callback
of a successful write. Use this public API:</p><p><a href="https://github.com/apache/hudi/blob/master/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/callback/HoodieWriteCommitCallback.java" target="_blank" rel="noopener noreferrer">https://github.com/apache/hudi/blob/master/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/callback/HoodieWriteCommitCallback.java</a></p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="flink-sql-writer">Flink SQL Writer<a class="hash-link" href="#flink-sql-writer" title="Direct link to heading"></a></h2><p>The hudi-flink module defines the Flink SQL connector for both hudi source and sink.
There are a number of options available for the sink table:</p><table><thead><tr><th>Option Name</th><th>Required</th><th>Default</th><th>Remarks</th></tr></thead><tbody><tr><td>path</td><td>Y</td><td>N/A</td><td>Base path for the target hoodie table. The path would be created if it does not exist, otherwise a hudi table expects to be initialized successfully</td></tr><tr><td>table.type</td><td>N</td><td>COPY_ON_WRITE</td><td>Type of table to write. COPY_ON_WRITE (or) MERGE_ON_READ</td></tr><tr><td>write.operation</td><td>N</td><td>upsert</td><td>The write operation, that this write should do (insert or upsert is supported)</td></tr><tr><td>write.precombine.field</td><td>N</td><td>ts</td><td>Field used in preCombining before actual write. When two records have the same key value, we will pick the one with the largest value for the precombine field, determined by Object.compareTo(..)</td></tr><tr><td>write.payload.class</td><td>N</td><td>OverwriteWithLatestAvroPayload.class</td><td>Payload class used. Override this, if you like to roll your own merge logic, when upserting/inserting. This will render any value set for the option in-effective</td></tr><tr><td>write.insert.drop.duplicates</td><td>N</td><td>false</td><td>Flag to indicate whether to drop duplicates upon insert. By default insert will accept duplicates, to gain extra performance</td></tr><tr><td>write.ignore.failed</td><td>N</td><td>true</td><td>Flag to indicate whether to ignore any non exception error (e.g. writestatus error). within a checkpoint batch. By default true (in favor of streaming progressing over data integrity)</td></tr><tr><td>hoodie.datasource.write.recordkey.field</td><td>N</td><td>uuid</td><td>Record key field. Value to be used as the <code>recordKey</code> component of <code>HoodieKey</code>. Actual value will be obtained by invoking .toString() on the field value. Nested fields can be specified using the dot notation eg: <code>a.b.c</code></td></tr><tr><td>hoodie.datasource.write.keygenerator.class</td><td>N</td><td>SimpleAvroKeyGenerator.class</td><td>Key generator class, that implements will extract the key out of incoming record</td></tr><tr><td>write.tasks</td><td>N</td><td>4</td><td>Parallelism of tasks that do actual write, default is 4</td></tr><tr><td>write.batch.size.MB</td><td>N</td><td>128</td><td>Batch buffer size in MB to flush data into the underneath filesystem</td></tr></tbody></table><p>If the table type is MERGE_ON_READ, you can also specify the asynchronous compaction strategy through options:</p><table><thead><tr><th>Option Name</th><th>Required</th><th>Default</th><th>Remarks</th></tr></thead><tbody><tr><td>compaction.async.enabled</td><td>N</td><td>true</td><td>Async Compaction, enabled by default for MOR</td></tr><tr><td>compaction.trigger.strategy</td><td>N</td><td>num_commits</td><td>Strategy to trigger compaction, options are &#x27;num_commits&#x27;: trigger compaction when reach N delta commits; &#x27;time_elapsed&#x27;: trigger compaction when time elapsed &gt; N seconds since last compaction; &#x27;num_and_time&#x27;: trigger compaction when both NUM_COMMITS and TIME_ELAPSED are satisfied; &#x27;num_or_time&#x27;: trigger compaction when NUM_COMMITS or TIME_ELAPSED is satisfied. Default is &#x27;num_commits&#x27;</td></tr><tr><td>compaction.delta_commits</td><td>N</td><td>5</td><td>Max delta commits needed to trigger compaction, default 5 commits</td></tr><tr><td>compaction.delta_seconds</td><td>N</td><td>3600</td><td>Max delta seconds time needed to trigger compaction, default 1 hour</td></tr></tbody></table><p>You can write the data using the SQL <code>INSERT INTO</code> statements:</p><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">INSERT</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">INTO</span><span class="token plain"> hudi_table </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">from</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"> </span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p><strong>Note</strong>: INSERT OVERWRITE is not supported yet but already on the roadmap.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="non-blocking-concurrency-control-experimental">Non-Blocking Concurrency Control (Experimental)<a class="hash-link" href="#non-blocking-concurrency-control-experimental" title="Direct link to heading"></a></h3><p>Hudi Flink supports a new non-blocking concurrency control mode, where multiple writer tasks can be executed
concurrently without blocking each other. One can read more about this mode in
the <a href="/docs/next/concurrency_control#model-c-multi-writer">concurrency control</a> docs. Let us see it in action here.</p><p>In the below example, we have two streaming ingestion pipelines that concurrently update the same table. One of the
pipeline is responsible for the compaction and cleaning table services, while the other pipeline is just for data
ingestion.</p><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)">-- set the interval as 30 seconds</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">execution</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">checkpointing</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">interval</span><span class="token plain">: </span><span class="token number">30000</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">state</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">backend: rocksdb</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">-- This is a datagen source that can generates records continuously</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">CREATE</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">TABLE</span><span class="token plain"> sourceT </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> uuid </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">varchar</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token number">20</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> name </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">varchar</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token number">10</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> age </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">int</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ts </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">timestamp</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token number">3</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token identifier punctuation" style="color:rgb(248, 248, 242)">`</span><span class="token identifier">partition</span><span class="token identifier punctuation" style="color:rgb(248, 248, 242)">`</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;par1&#x27;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">WITH</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;connector&#x27;</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;datagen&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;rows-per-second&#x27;</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;200&#x27;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">-- pipeline1: by default enable the compaction and cleaning services</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">CREATE</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">TABLE</span><span class="token plain"> t1</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> uuid </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">varchar</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token number">20</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> name </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">varchar</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token number">10</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> age </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">int</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ts </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">timestamp</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token number">3</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token identifier punctuation" style="color:rgb(248, 248, 242)">`</span><span class="token identifier">partition</span><span class="token identifier punctuation" style="color:rgb(248, 248, 242)">`</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">varchar</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token number">20</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">WITH</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;connector&#x27;</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hudi&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;path&#x27;</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;/Users/chenyuzhao/workspace/hudi-demo/t1&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;table.type&#x27;</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;MERGE_ON_READ&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;index.type&#x27;</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;BUCKET&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.write.concurrency.mode&#x27;</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;NON_BLOCKING_CONCURRENCY_CONTROL&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;write.tasks&#x27;</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;2&#x27;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">-- pipeline2: disable the compaction and cleaning services manually</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">CREATE</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">TABLE</span><span class="token plain"> t1_2</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> uuid </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">varchar</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token number">20</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> name </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">varchar</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token number">10</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> age </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">int</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ts </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">timestamp</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token number">3</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token identifier punctuation" style="color:rgb(248, 248, 242)">`</span><span class="token identifier">partition</span><span class="token identifier punctuation" style="color:rgb(248, 248, 242)">`</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">varchar</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token number">20</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">WITH</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;connector&#x27;</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hudi&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;path&#x27;</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;/Users/chenyuzhao/workspace/hudi-demo/t1&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;table.type&#x27;</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;MERGE_ON_READ&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;index.type&#x27;</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;BUCKET&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.write.concurrency.mode&#x27;</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;NON_BLOCKING_CONCURRENCY_CONTROL&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;write.tasks&#x27;</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;2&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;compaction.schedule.enabled&#x27;</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;false&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;compaction.async.enabled&#x27;</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;false&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;clean.async.enabled&#x27;</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;false&#x27;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">-- submit the pipelines</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">into</span><span class="token plain"> t1 </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token operator">*</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">from</span><span class="token plain"> sourceT</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">into</span><span class="token plain"> t1_2 </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token operator">*</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">from</span><span class="token plain"> sourceT</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token operator">*</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">from</span><span class="token plain"> t1 </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">limit</span><span class="token plain"> </span><span class="token number">20</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>As you can see from the above example, we have two pipelines with multiple tasks that concurrently write to the
same table. To use the new concurrency mode, all you need to do is set the <code>hoodie.write.concurrency.mode</code>
to <code>NON_BLOCKING_CONCURRENCY_CONTROL</code>. The <code>write.tasks</code> option is used to specify the number of write tasks that will
be used for writing to the table. The <code>compaction.schedule.enabled</code>, <code>compaction.async.enabled</code>
and <code>clean.async.enabled</code> options are used to disable the compaction and cleaning services for the second pipeline.
This is done to ensure that the compaction and cleaning services are not executed twice for the same table.</p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="java-writer">Java Writer<a class="hash-link" href="#java-writer" title="Direct link to heading"></a></h2><p>We can use plain java to write to hudi tables. To use Java client we can refere <a href="https://github.com/apache/hudi/blob/master/hudi-examples/hudi-examples-java/src/main/java/org/apache/hudi/examples/java/HoodieJavaWriteClientExample.java" target="_blank" rel="noopener noreferrer">here</a></p></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="theme-doc-footer-edit-meta-row row"><div class="col"><a href="https://github.com/apache/hudi/tree/asf-site/website/versioned_docs/version-0.14.1/writing_data.md" target="_blank" rel="noreferrer noopener" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_mS5F" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_mt2f"></div></div></footer></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages navigation"><div class="pagination-nav__item"><a class="pagination-nav__link" href="/docs/procedures"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">Procedures</div></a></div><div class="pagination-nav__item pagination-nav__item--next"><a class="pagination-nav__link" href="/docs/hoodie_streaming_ingestion"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Streaming Ingestion</div></a></div></nav></div></div><div class="col col--3"><div class="tableOfContents_vrFS thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#spark-datasource-writer" class="table-of-contents__link toc-highlight">Spark Datasource Writer</a><ul><li><a href="#insert-overwrite-table" class="table-of-contents__link toc-highlight">Insert Overwrite Table</a></li><li><a href="#insert-overwrite" class="table-of-contents__link toc-highlight">Insert Overwrite</a></li><li><a href="#deletes" class="table-of-contents__link toc-highlight">Deletes</a></li><li><a href="#concurrency-control" class="table-of-contents__link toc-highlight">Concurrency Control</a></li><li><a href="#commit-notifications" class="table-of-contents__link toc-highlight">Commit Notifications</a></li></ul></li><li><a href="#flink-sql-writer" class="table-of-contents__link toc-highlight">Flink SQL Writer</a><ul><li><a href="#non-blocking-concurrency-control-experimental" class="table-of-contents__link toc-highlight">Non-Blocking Concurrency Control (Experimental)</a></li></ul></li><li><a href="#java-writer" class="table-of-contents__link toc-highlight">Java Writer</a></li></ul></div></div></div></div></main></div></div><footer class="footer"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">About</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/blog/2021/07/21/streaming-data-lake-platform">Our Vision</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/concepts">Concepts</a></li><li class="footer__item"><a class="footer__link-item" href="/community/team">Team</a></li><li class="footer__item"><a class="footer__link-item" href="/releases/release-0.14.1">Releases</a></li><li class="footer__item"><a class="footer__link-item" href="/releases/download">Download</a></li><li class="footer__item"><a class="footer__link-item" href="/powered-by">Who&#x27;s Using</a></li></ul></div><div class="col footer__col"><div class="footer__title">Learn</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/docs/quick-start-guide">Quick Start</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/docker_demo">Docker Demo</a></li><li class="footer__item"><a class="footer__link-item" href="/blog">Blog</a></li><li class="footer__item"><a class="footer__link-item" href="/talks">Talks</a></li><li class="footer__item"><a class="footer__link-item" href="/videos">Video Guides</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/faq">FAQ</a></li><li class="footer__item"><a href="https://cwiki.apache.org/confluence/display/HUDI" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Technical Wiki<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li></ul></div><div class="col footer__col"><div class="footer__title">Hudi On Cloud</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/docs/s3_hoodie">AWS</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/gcs_hoodie">Google Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/oss_hoodie">Alibaba Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/azure_hoodie">Microsoft Azure</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/cos_hoodie">Tencent Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/ibm_cos_hoodie">IBM Cloud</a></li></ul></div><div class="col footer__col"><div class="footer__title">Community</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/community/get-involved">Get Involved</a></li><li class="footer__item"><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Slack<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Twitter<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>YouTube<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Linkedin<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="mailto:dev-subscribe@hudi.apache.org?Subject=SubscribeToHudi" target="_blank" rel="noopener noreferrer" class="footer__link-item">Mailing List</a></li></ul></div><div class="col footer__col"><div class="footer__title">Apache</div><ul class="footer__items"><li class="footer__item"><a href="https://www.apache.org/events/current-event" target="_blank" rel="noopener noreferrer" class="footer__link-item">Events</a></li><li class="footer__item"><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Thanks</a></li><li class="footer__item"><a href="https://www.apache.org/licenses" target="_blank" rel="noopener noreferrer" class="footer__link-item">License</a></li><li class="footer__item"><a href="https://www.apache.org/security" target="_blank" rel="noopener noreferrer" class="footer__link-item">Security</a></li><li class="footer__item"><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Sponsorship</a></li><li class="footer__item"><a href="https://www.apache.org" target="_blank" rel="noopener noreferrer" class="footer__link-item">Foundation</a></li></ul></div></div><div class="footer__bottom text--center"><div class="margin-bottom--sm"><a href="https://hudi.apache.org/" target="_blank" rel="noopener noreferrer" class="footerLogoLink_SRtH"><img src="/assets/images/logo-big.png" alt="Apache Hudi™" class="themedImage_TMUO themedImage--light_4Vu1 footer__logo"><img src="/assets/images/logo-big.png" alt="Apache Hudi™" class="themedImage_TMUO themedImage--dark_uzRr footer__logo"></a></div><div class="footer__copyright">Copyright © 2021 <a href="https://apache.org">The Apache Software Foundation</a>, Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0"> Apache License, Version 2.0</a>. <br>Hudi, Apache and the Apache feather logo are trademarks of The Apache Software Foundation.</div></div></div></footer></div>
<script src="/assets/js/runtime~main.2cab5691.js"></script>
<script src="/assets/js/main.bd020950.js"></script>
</body>
</html>