blob: fa7668eafc587c1bb5eae748c37dc09736a69bad [file] [log] [blame]
<!doctype html>
<html class="docs-version-0.10.0" lang="en" dir="ltr">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="generator" content="Docusaurus v2.0.0-beta.14">
<link rel="alternate" type="application/rss+xml" href="/blog/rss.xml" title="Apache Hudi: User-Facing Analytics RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/blog/atom.xml" title="Apache Hudi: User-Facing Analytics Atom Feed">
<link rel="alternate" type="application/json" href="/blog/feed.json" title="Apache Hudi: User-Facing Analytics JSON Feed">
<link rel="search" type="application/opensearchdescription+xml" title="Apache Hudi" href="/opensearch.xml">
<link rel="alternate" type="application/rss+xml" href="/videos/rss.xml" title="Apache Hudi RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/videos/atom.xml" title="Apache Hudi Atom Feed">
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Comfortaa|Ubuntu|Roboto|Source+Code+Pro">
<link rel="stylesheet" href="https://at-ui.github.io/feather-font/css/iconfont.css"><title data-react-helmet="true">Spark Guide | Apache Hudi</title><meta data-react-helmet="true" name="twitter:card" content="summary_large_image"><meta data-react-helmet="true" property="og:url" content="https://hudi.apache.org/docs/0.10.0/quick-start-guide"><meta data-react-helmet="true" name="docsearch:language" content="en"><meta data-react-helmet="true" name="docsearch:version" content="0.10.0"><meta data-react-helmet="true" name="docsearch:docusaurus_tag" content="docs-default-0.10.0"><meta data-react-helmet="true" name="keywords" content="apache hudi, data lake, lakehouse, big data, apache spark, apache flink, presto, trino, analytics, data engineering"><meta data-react-helmet="true" property="og:title" content="Spark Guide | Apache Hudi"><meta data-react-helmet="true" name="description" content="This guide provides a quick peek at Hudi&#x27;s capabilities using spark-shell. Using Spark datasources, we will walk through"><meta data-react-helmet="true" property="og:description" content="This guide provides a quick peek at Hudi&#x27;s capabilities using spark-shell. Using Spark datasources, we will walk through"><link data-react-helmet="true" rel="icon" href="/assets/images/favicon.ico"><link data-react-helmet="true" rel="canonical" href="https://hudi.apache.org/docs/0.10.0/quick-start-guide"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/docs/0.10.0/quick-start-guide" hreflang="en"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/cn/docs/0.10.0/quick-start-guide" hreflang="cn"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/docs/0.10.0/quick-start-guide" hreflang="x-default"><link data-react-helmet="true" rel="preconnect" href="https://BH4D9OD16A-dsn.algolia.net" crossorigin="anonymous"><link rel="stylesheet" href="/assets/css/styles.ea681a30.css">
<link rel="preload" href="/assets/js/runtime~main.2cab5691.js" as="script">
<link rel="preload" href="/assets/js/main.bd020950.js" as="script">
</head>
<body>
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus">
<div><a href="#" class="skipToContent_OuoZ">Skip to main content</a></div><div class="announcementBar_axC9" role="banner"><div class="announcementBarPlaceholder_xYHE"></div><div class="announcementBarContent_6uhP">⭐️ If you like Apache Hudi, give it a star on <a target="_blank" rel="noopener noreferrer" href="https://github.com/apache/hudi">GitHub</a>! ⭐</div><button type="button" class="clean-btn close announcementBarClose_A3A1" aria-label="Close"><svg viewBox="0 0 15 15" width="14" height="14"><g stroke="currentColor" stroke-width="3.1"><path d="M.75.75l13.5 13.5M14.25.75L.75 14.25"></path></g></svg></button></div><nav class="navbar navbar--fixed-top navbarWrapper_UIa0"><div class="navbar__inner"><img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=8f594acf-9b77-44fb-9475-3e82ead1910c" width="0" height="0" alt=""><img referrerpolicy="no-referrer-when-downgrade" src="https://analytics.apache.org/matomo.php?idsite=47&amp;rec=1" width="0" height="0" alt=""><div class="navbar__items"><button aria-label="Navigation bar toggle" class="navbar__toggle clean-btn" type="button" tabindex="0"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo navbarLogo_Bz6n"><img src="/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--light_4Vu1"><img src="/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--dark_uzRr"></div></a><a class="navbar__item navbar__link" href="/docs/overview"><div class="labelWrapperDropdown_Mqbj">Docs</div></a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Learn<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/talks"><div class="labelWrapperDropdown_Mqbj">Talks</div></a></li><li><a class="dropdown__link" href="/videos"><div class="labelWrapperDropdown_Mqbj">Video Guides</div></a></li><li><a class="dropdown__link" href="/docs/faq"><div class="labelWrapperDropdown_Mqbj">FAQ</div></a></li><li><a class="dropdown__link" href="/tech-specs"><div class="labelWrapperDropdown_Mqbj">Tech Specs</div></a></li><li><a class="dropdown__link" href="/tech-specs-1point0"><div class="labelWrapperDropdown_Mqbj">Tech Specs 1.0</div></a></li><li><a href="https://cwiki.apache.org/confluence/display/HUDI" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span class="externalLink_AE3f">Technical Wiki<svg width="20" height="20" viewBox="0 0 26 26" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M16.965 8.745 9.01 16.7M10.561 8.758l6.403-.013-.013 6.403" stroke="#0DB1F9" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path><rect x="4.5" y="4.5" width="17" height="17" rx="2.5" stroke="#0DB1F9"></rect></svg></span></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Contribute<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/contribute/how-to-contribute"><div class="labelWrapperDropdown_Mqbj">How to Contribute</div></a></li><li><a class="dropdown__link" href="/contribute/developer-setup"><div class="labelWrapperDropdown_Mqbj">Developer Setup</div></a></li><li><a class="dropdown__link" href="/contribute/rfc-process"><div class="labelWrapperDropdown_Mqbj">RFC Process</div></a></li><li><a class="dropdown__link" href="/contribute/report-security-issues"><div class="labelWrapperDropdown_Mqbj">Report Security Issues</div></a></li><li><a href="https://issues.apache.org/jira/projects/HUDI/summary" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span class="externalLink_AE3f">Report Issues<svg width="20" height="20" viewBox="0 0 26 26" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M16.965 8.745 9.01 16.7M10.561 8.758l6.403-.013-.013 6.403" stroke="#0DB1F9" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path><rect x="4.5" y="4.5" width="17" height="17" rx="2.5" stroke="#0DB1F9"></rect></svg></span></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Community<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/community/get-involved"><div class="labelWrapperDropdown_Mqbj">Get Involved</div></a></li><li><a class="dropdown__link" href="/community/syncs"><div class="labelWrapperDropdown_Mqbj">Community Syncs</div></a></li><li><a class="dropdown__link" href="/community/office_hours"><div class="labelWrapperDropdown_Mqbj">Office Hours</div></a></li><li><a class="dropdown__link" href="/community/team"><div class="labelWrapperDropdown_Mqbj">Team</div></a></li></ul></div><a class="navbar__item navbar__link" href="/blog"><div class="labelWrapperDropdown_Mqbj">Blog</div></a><a class="navbar__item navbar__link" href="/powered-by"><div class="labelWrapperDropdown_Mqbj">Who&#x27;s Using</div></a><a class="navbar__item navbar__link" href="/roadmap"><div class="labelWrapperDropdown_Mqbj">Roadmap</div></a><a class="navbar__item navbar__link" href="/releases/download"><div class="labelWrapperDropdown_Mqbj">Download</div></a></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__link downloadLinkDropdownHide_aDP3" href="/docs/0.10.0/overview"><div class="labelWrapperDropdown_Mqbj">0.10.0<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/docs/next/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">Current</div></a></li><li><a class="dropdown__link" href="/docs/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.14.1</div></a></li><li><a class="dropdown__link" href="/docs/0.14.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.14.0</div></a></li><li><a class="dropdown__link" href="/docs/0.13.1/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.13.1</div></a></li><li><a class="dropdown__link" href="/docs/0.13.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.13.0</div></a></li><li><a class="dropdown__link" href="/docs/0.12.3/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.12.3</div></a></li><li><a class="dropdown__link" href="/docs/0.12.2/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.12.2</div></a></li><li><a class="dropdown__link" href="/docs/0.12.1/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.12.1</div></a></li><li><a class="dropdown__link" href="/docs/0.12.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.12.0</div></a></li><li><a class="dropdown__link" href="/docs/0.11.1/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.11.1</div></a></li><li><a class="dropdown__link" href="/docs/0.11.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.11.0</div></a></li><li><a class="dropdown__link" href="/docs/0.10.1/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.10.1</div></a></li><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/0.10.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.10.0</div></a></li><li><a class="dropdown__link" href="/docs/0.9.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.9.0</div></a></li><li><a class="dropdown__link" href="/docs/0.8.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.8.0</div></a></li><li><a class="dropdown__link" href="/docs/0.7.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.7.0</div></a></li><li><a class="dropdown__link" href="/docs/0.6.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.6.0</div></a></li><li><a class="dropdown__link" href="/docs/0.5.3/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.3</div></a></li><li><a class="dropdown__link" href="/docs/0.5.2/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.2</div></a></li><li><a class="dropdown__link" href="/docs/0.5.1/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.1</div></a></li><li><a class="dropdown__link" href="/docs/0.5.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.0</div></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj"><span><svg viewBox="0 0 20 20" width="20" height="20" aria-hidden="true" class="iconLanguage_zID8"><path fill="currentColor" d="M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"></path></svg><span>English</span></span><svg width="14" height="14" viewBox="0 0 14 14" fill="none" xmlns="http://www.w3.org/2000/svg"><g clip-path="url(#a)"><path d="M14 6.457a6.842 6.842 0 0 0-7-6.02 6.843 6.843 0 0 0-7 6.02v1.085a6.843 6.843 0 0 0 7 6.02 6.843 6.843 0 0 0 7-6.02V6.457Zm-1.094 0h-2.625a9.92 9.92 0 0 0-.376-2.222 6.65 6.65 0 0 0 1.531-.875 5.25 5.25 0 0 1 1.444 3.097h.026Zm-8.032 0a8.479 8.479 0 0 1 .324-1.872 7.376 7.376 0 0 0 3.63 0c.175.61.284 1.239.325 1.872h-4.28Zm4.305 1.085a8.391 8.391 0 0 1-.324 1.873 7.464 7.464 0 0 0-3.658 0 8.479 8.479 0 0 1-.323-1.873h4.305Zm.35-4.375A10.342 10.342 0 0 0 8.75 1.75c.627.194 1.218.49 1.75.875a5.748 5.748 0 0 1-.998.577l.027-.035ZM7.254 1.54A8.75 8.75 0 0 1 8.46 3.552c-.48.11-.97.165-1.461.167-.492-.001-.982-.057-1.461-.167.308-.722.715-1.4 1.207-2.012h.508ZM4.498 3.202a5.748 5.748 0 0 1-.998-.577 6.029 6.029 0 0 1 1.75-.875c-.294.46-.546.947-.753 1.452Zm-1.873.15c.47.358.984.652 1.531.874A9.625 9.625 0 0 0 3.78 6.45H1.155a5.25 5.25 0 0 1 1.47-3.098ZM1.12 7.541h2.625c.038.753.164 1.5.376 2.223a6.649 6.649 0 0 0-1.531.875 5.25 5.25 0 0 1-1.47-3.098Zm3.377 3.255c.207.506.459.992.753 1.453a6.03 6.03 0 0 1-1.75-.875c.312-.226.646-.419.997-.578Zm2.25 1.663a8.594 8.594 0 0 1-1.208-2.013 6.501 6.501 0 0 1 2.922 0 8.54 8.54 0 0 1-1.207 2.013h-.508Zm2.755-1.663c.367.156.716.35 1.042.578a6.338 6.338 0 0 1-1.75.875c.275-.464.512-.95.708-1.453Zm1.873-.148a6.647 6.647 0 0 0-1.531-.875 9.45 9.45 0 0 0 .376-2.223h2.625a5.25 5.25 0 0 1-1.47 3.098Z" fill="#1C1E21"></path></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h14v14H0z"></path></clipPath></defs></svg></div></a><ul class="dropdown__menu"><li><a href="/docs/0.10.0/quick-start-guide" target="_self" rel="noopener noreferrer" class="dropdown__link dropdown__link--active"><div class="labelWrapperDropdown_Mqbj">English</div></a></li><li><a href="/cn/docs/0.10.0/quick-start-guide" target="_self" rel="noopener noreferrer" class="dropdown__link"><div class="labelWrapperDropdown_Mqbj">Chinese</div></a></li></ul></div><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-github-link" aria-label="GitHub repository"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-twitter-link" aria-label="Hudi Twitter Handle"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-slack-link" aria-label="Hudi Slack Channel"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-youtube-link" aria-label="Hudi YouTube Channel"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-linkedin-link" aria-label="Hudi Linkedin Page"><div class="labelWrapperDropdown_Mqbj"></div></a><div class="searchBox_fBfG"><div role="button" class="searchButton_g9-U" aria-label="Search"><span class="searchText_RI6l">Search</span><svg width="14" height="14" viewBox="0 0 14 14" fill="none" xmlns="http://www.w3.org/2000/svg"><circle cx="6.864" cy="6.864" r="5.243" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></circle><path d="m10.51 10.783 2.056 2.05" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div><div class="navbar-sidebar"><div class="navbar-sidebar__brand"><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--light_4Vu1"><img src="/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--dark_uzRr"></div></a><button type="button" class="clean-btn navbar-sidebar__close"><svg viewBox="0 0 15 15" width="21" height="21"><g stroke="var(--ifm-color-emphasis-600)" stroke-width="1.2"><path d="M.75.75l13.5 13.5M14.25.75L.75 14.25"></path></g></svg></button></div><div class="navbar-sidebar__items"><div class="navbar-sidebar__item menu"><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" href="/docs/overview"><div class="labelWrapperDropdown_Mqbj">Docs</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Learn</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Contribute</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Community</div></a></li><li class="menu__list-item"><a class="menu__link" href="/blog"><div class="labelWrapperDropdown_Mqbj">Blog</div></a></li><li class="menu__list-item"><a class="menu__link" href="/powered-by"><div class="labelWrapperDropdown_Mqbj">Who&#x27;s Using</div></a></li><li class="menu__list-item"><a class="menu__link" href="/roadmap"><div class="labelWrapperDropdown_Mqbj">Roadmap</div></a></li><li class="menu__list-item"><a class="menu__link" href="/releases/download"><div class="labelWrapperDropdown_Mqbj">Download</div></a></li><li class="menu__list-item"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Versions</div></a><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="menu__list-item"><a class="menu__link" href="/docs/next/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">Current</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.14.1</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.14.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.14.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.13.1/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.13.1</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.13.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.13.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.12.3/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.12.3</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.12.2/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.12.2</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.12.1/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.12.1</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.12.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.12.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.11.1/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.11.1</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.11.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.11.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.10.1/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.10.1</div></a></li><li class="menu__list-item"><a aria-current="page" class="menu__link menu__link--active" href="/docs/0.10.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.10.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.9.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.9.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.8.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.8.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.7.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.7.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.6.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.6.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.5.3/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.3</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.5.2/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.2</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.5.1/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.1</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.5.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.0</div></a></li></ul></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj"><span><svg viewBox="0 0 20 20" width="20" height="20" aria-hidden="true" class="iconLanguage_zID8"><path fill="currentColor" d="M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"></path></svg><span>Languages</span></span></div></a></li><li class="menu__list-item"><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="menu__link header-github-link" aria-label="GitHub repository"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="menu__link header-twitter-link" aria-label="Hudi Twitter Handle"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="menu__link header-slack-link" aria-label="Hudi Slack Channel"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="menu__link header-youtube-link" aria-label="Hudi YouTube Channel"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="menu__link header-linkedin-link" aria-label="Hudi Linkedin Page"><div class="labelWrapperDropdown_Mqbj"></div></a></li></ul></div><div class="navbar-sidebar__item menu"><button type="button" class="clean-btn navbar-sidebar__back">← Back to main menu</button></div></div></div></nav><div class="main-wrapper docs-wrapper docs-doc-page"><div class="docPage_GMj9"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_i9tI" type="button"></button><aside class="docSidebarContainer_k0Pq"><div class="sidebar_a3j0"><nav class="menu thin-scrollbar menu_cyFh menuWithAnnouncementBar_+O1J"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/0.10.0/overview">Overview</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--active hasHref_TwRn" href="/docs/0.10.0/quick-start-guide">Quick Start</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/docs/0.10.0/quick-start-guide">Spark Guide</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/0.10.0/flink-quick-start-guide">Flink Guide</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/0.10.0/docker_demo">Docker Demo</a></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/docs/0.10.0/timeline">Concepts</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/docs/0.10.0/table_management">How To</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/docs/0.10.0/migration_guide">Services</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/0.10.0/configurations">Configurations</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/docs/0.10.0/query_engine_setup">Guides</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/0.10.0/use_cases">Use Cases</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/0.10.0/faq">FAQs</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/0.10.0/privacy">Privacy Policy</a></li></ul></nav></div></aside><main class="docMainContainer_Q970"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_zHA2"><div class="theme-doc-version-banner alert alert--warning margin-bottom--md" role="alert"><div>This is documentation for <!-- -->Apache Hudi<!-- --> <b>0.10.0</b>, which is no longer actively maintained.</div><div class="margin-top--md">For up-to-date documentation, see the <b><a href="/docs/quick-start-guide">latest version</a></b> (<!-- -->0.14.1<!-- -->).</div></div><div class="docItemContainer_oiyr"><article><span class="theme-doc-version-badge badge badge--secondary">Version: <!-- -->0.10.0</span><div class="tocCollapsible_aw-L theme-doc-toc-mobile tocMobile_Tx6Y"><button type="button" class="clean-btn tocCollapsibleButton_zr6a">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>Spark Guide</h1></header><p>This guide provides a quick peek at Hudi&#x27;s capabilities using spark-shell. Using Spark datasources, we will walk through
code snippets that allows you to insert and update a Hudi table of default table type:
<a href="/docs/concepts#copy-on-write-table">Copy on Write</a>.
After each write operation we will also show how to read the data both snapshot and incrementally.</p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="setup">Setup<a class="hash-link" href="#setup" title="Direct link to heading"></a></h2><p>Hudi works with Spark-2.4.3+ &amp; Spark 3.x versions. You can follow instructions <a href="https://spark.apache.org/downloads" target="_blank" rel="noopener noreferrer">here</a> for setting up spark.</p><p><strong>Spark 3 Support Matrix</strong></p><table><thead><tr><th>Hudi</th><th>Supported Spark 3 version</th></tr></thead><tbody><tr><td>0.10.0</td><td>3.1.x (default build), 3.0.x</td></tr><tr><td>0.7.0 - 0.9.0</td><td>3.0.x</td></tr><tr><td>0.6.0 and prior</td><td>not supported</td></tr></tbody></table><p>As of 0.9.0 release, Spark SQL DML support has been added and is experimental.</p><div class="tabs-container"><ul role="tablist" aria-orientation="horizontal" class="tabs"><li role="tab" tabindex="0" aria-selected="true" class="tabs__item tabItem_vU9c tabs__item--active">Scala</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_vU9c">Python</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_vU9c">SparkSQL</li></ul><div class="margin-vert--md"><div role="tabpanel"><p>From the extracted directory run spark-shell with Hudi as:</p><div class="codeBlockContainer_J+bg language-shell theme-code-block"><div class="codeBlockContent_csEI shell"><pre tabindex="0" class="prism-code language-shell codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)"># spark-shell for spark 3</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark-shell </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --packages org.apache.hudi:hudi-spark3-bundle_2.12:0.10.0,org.apache.spark:spark-avro_2.12:3.1.2 </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;spark.serializer=org.apache.spark.serializer.KryoSerializer&#x27;</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension&#x27;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># spark-shell for spark 2 with scala 2.12</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark-shell </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --packages org.apache.hudi:hudi-spark-bundle_2.12:0.10.0,org.apache.spark:spark-avro_2.12:2.4.4 </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;spark.serializer=org.apache.spark.serializer.KryoSerializer&#x27;</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension&#x27;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># spark-shell for spark 2 with scala 2.11</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark-shell </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --packages org.apache.hudi:hudi-spark-bundle_2.11:0.10.0,org.apache.spark:spark-avro_2.11:2.4.4 </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;spark.serializer=org.apache.spark.serializer.KryoSerializer&#x27;</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div><div role="tabpanel" hidden=""><p>Hudi support using Spark SQL to write and read data with the <strong>HoodieSparkSessionExtension</strong> sql extension.
From the extracted directory run Spark SQL with Hudi as:</p><div class="codeBlockContainer_J+bg language-shell theme-code-block"><div class="codeBlockContent_csEI shell"><pre tabindex="0" class="prism-code language-shell codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)"># Spark SQL for spark 3</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark-sql --packages org.apache.hudi:hudi-spark3-bundle_2.12:0.10.0,org.apache.spark:spark-avro_2.12:3.1.2 </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--conf </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;spark.serializer=org.apache.spark.serializer.KryoSerializer&#x27;</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--conf </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension&#x27;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># Spark SQL for spark 2 with scala 2.11</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark-sql --packages org.apache.hudi:hudi-spark-bundle_2.11:0.10.0,org.apache.spark:spark-avro_2.11:2.4.4 </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--conf </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;spark.serializer=org.apache.spark.serializer.KryoSerializer&#x27;</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--conf </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension&#x27;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># Spark SQL for spark 2 with scala 2.12</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark-sql </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --packages org.apache.hudi:hudi-spark-bundle_2.12:0.10.0,org.apache.spark:spark-avro_2.12:2.4.4 </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;spark.serializer=org.apache.spark.serializer.KryoSerializer&#x27;</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div><div role="tabpanel" hidden=""><p>From the extracted directory run pyspark with Hudi as:</p><div class="codeBlockContainer_J+bg language-shell theme-code-block"><div class="codeBlockContent_csEI shell"><pre tabindex="0" class="prism-code language-shell codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)"># pyspark</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token builtin class-name" style="color:rgb(189, 147, 249)">export</span><span class="token plain"> </span><span class="token assign-left variable" style="color:rgb(189, 147, 249);font-style:italic">PYSPARK_PYTHON</span><span class="token operator">=</span><span class="token variable" style="color:rgb(189, 147, 249);font-style:italic">$(</span><span class="token variable function" style="color:rgb(80, 250, 123);font-style:italic">which</span><span class="token variable" style="color:rgb(189, 147, 249);font-style:italic"> python3</span><span class="token variable" style="color:rgb(189, 147, 249);font-style:italic">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># for spark3</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">pyspark </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--packages org.apache.hudi:hudi-spark3-bundle_2.12:0.10.0,org.apache.spark:spark-avro_2.12:3.1.2 </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--conf </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;spark.serializer=org.apache.spark.serializer.KryoSerializer&#x27;</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--conf </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension&#x27;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># for spark2 with scala 2.12</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">pyspark </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--packages org.apache.hudi:hudi-spark-bundle_2.12:0.10.0,org.apache.spark:spark-avro_2.12:2.4.4 </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--conf </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;spark.serializer=org.apache.spark.serializer.KryoSerializer&#x27;</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--conf </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension&#x27;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># for spark2 with scala 2.11</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">pyspark </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--packages org.apache.hudi:hudi-spark-bundle_2.11:0.10.0,org.apache.spark:spark-avro_2.11:2.4.4 </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--conf </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;spark.serializer=org.apache.spark.serializer.KryoSerializer&#x27;</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--conf </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div></div></div><div class="admonition admonition-note alert alert--secondary"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M6.3 5.69a.942.942 0 0 1-.28-.7c0-.28.09-.52.28-.7.19-.18.42-.28.7-.28.28 0 .52.09.7.28.18.19.28.42.28.7 0 .28-.09.52-.28.7a1 1 0 0 1-.7.3c-.28 0-.52-.11-.7-.3zM8 7.99c-.02-.25-.11-.48-.31-.69-.2-.19-.42-.3-.69-.31H6c-.27.02-.48.13-.69.31-.2.2-.3.44-.31.69h1v3c.02.27.11.5.31.69.2.2.42.31.69.31h1c.27 0 .48-.11.69-.31.2-.19.3-.42.31-.69H8V7.98v.01zM7 2.3c-3.14 0-5.7 2.54-5.7 5.68 0 3.14 2.56 5.7 5.7 5.7s5.7-2.55 5.7-5.7c0-3.15-2.56-5.69-5.7-5.69v.01zM7 .98c3.86 0 7 3.14 7 7s-3.14 7-7 7-7-3.12-7-7 3.14-7 7-7z"></path></svg></span>Please note the following</h5></div><div class="admonition-content"><ul><li>spark-avro module needs to be specified in --packages as it is not included with spark-shell by default</li><li>spark-avro and spark versions must match (we have used 3.1.2 for both above)</li><li>we have used hudi-spark-bundle built for scala 2.12 since the spark-avro module used also depends on 2.12. If spark-avro_2.11 is used, correspondingly hudi-spark-bundle_2.11 needs to be used. </li></ul></div></div><p>Setup table name, base path and a data generator to generate records for this guide.</p><div class="tabs-container"><ul role="tablist" aria-orientation="horizontal" class="tabs"><li role="tab" tabindex="0" aria-selected="true" class="tabs__item tabItem_vU9c tabs__item--active">Scala</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_vU9c">Python</li></ul><div class="margin-vert--md"><div role="tabpanel"><div class="codeBlockContainer_J+bg language-scala theme-code-block"><div class="codeBlockContent_csEI scala"><pre tabindex="0" class="prism-code language-scala codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">// spark-shell</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">import org.apache.hudi.QuickstartUtils._</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">import scala.collection.JavaConversions._</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">import org.apache.spark.sql.SaveMode._</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">import org.apache.hudi.DataSourceReadOptions._</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">import org.apache.hudi.DataSourceWriteOptions._</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">import org.apache.hudi.config.HoodieWriteConfig._</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">import org.apache.hudi.common.model.HoodieRecord</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val tableName = &quot;hudi_trips_cow&quot;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val basePath = &quot;file:///tmp/hudi_trips_cow&quot;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val dataGen = new DataGenerator</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div><div role="tabpanel" hidden=""><div class="codeBlockContainer_J+bg language-python theme-code-block"><div class="codeBlockContent_csEI python"><pre tabindex="0" class="prism-code language-python codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)"># pyspark</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">tableName </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi_trips_cow&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">basePath </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;file:///tmp/hudi_trips_cow&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">dataGen </span><span class="token operator">=</span><span class="token plain"> sc</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">_jvm</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">org</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">apache</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">hudi</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">QuickstartUtils</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">DataGenerator</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div></div></div><div class="admonition admonition-tip alert alert--success"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="12" height="16" viewBox="0 0 12 16"><path fill-rule="evenodd" d="M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"></path></svg></span>tip</h5></div><div class="admonition-content"><p>The <a href="https://github.com/apache/hudi/blob/master/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java#L51" target="_blank" rel="noopener noreferrer">DataGenerator</a>
can generate sample inserts and updates based on the the sample trip schema <a href="https://github.com/apache/hudi/blob/master/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java#L58" target="_blank" rel="noopener noreferrer">here</a></p></div></div><h2 class="anchor anchorWithStickyNavbar_y2LR" id="spark-sql-type-support">Spark SQL Type Support<a class="hash-link" href="#spark-sql-type-support" title="Direct link to heading"></a></h2><table><thead><tr><th>Spark</th><th>Hudi</th><th>Notes</th></tr></thead><tbody><tr><td>boolean</td><td>boolean</td><td></td></tr><tr><td>byte</td><td>int</td><td></td></tr><tr><td>short</td><td>int</td><td></td></tr><tr><td>integer</td><td>int</td><td></td></tr><tr><td>long</td><td>long</td><td></td></tr><tr><td>date</td><td>date</td><td></td></tr><tr><td>timestamp</td><td>timestamp</td><td></td></tr><tr><td>float</td><td>float</td><td></td></tr><tr><td>double</td><td>double</td><td></td></tr><tr><td>string</td><td>string</td><td></td></tr><tr><td>decimal</td><td>decimal</td><td></td></tr><tr><td>binary</td><td>bytes</td><td></td></tr><tr><td>array</td><td>array</td><td></td></tr><tr><td>map</td><td>map</td><td></td></tr><tr><td>struct</td><td>struct</td><td></td></tr><tr><td>char</td><td></td><td>not supported</td></tr><tr><td>varchar</td><td></td><td>not supported</td></tr><tr><td>numeric</td><td></td><td>not supported</td></tr><tr><td>null</td><td></td><td>not supported</td></tr><tr><td>object</td><td></td><td>not supported</td></tr></tbody></table><h2 class="anchor anchorWithStickyNavbar_y2LR" id="create-table">Create Table<a class="hash-link" href="#create-table" title="Direct link to heading"></a></h2><div class="tabs-container"><ul role="tablist" aria-orientation="horizontal" class="tabs"><li role="tab" tabindex="0" aria-selected="true" class="tabs__item tabItem_vU9c tabs__item--active">Scala</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_vU9c">Python</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_vU9c">SparkSQL</li></ul><div class="margin-vert--md"><div role="tabpanel"><div class="codeBlockContainer_J+bg language-scala theme-code-block"><div class="codeBlockContent_csEI scala"><pre tabindex="0" class="prism-code language-scala codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">// scala</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// No separate create table command required in spark. First batch of write to a table will create the table if not exists. </span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div><div role="tabpanel" hidden=""><div class="codeBlockContainer_J+bg language-python theme-code-block"><div class="codeBlockContent_csEI python"><pre tabindex="0" class="prism-code language-python codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)"># pyspark</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># No separate create table command required in spark. First batch of write to a table will create the table if not exists.</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div><div role="tabpanel" hidden=""><p>Spark SQL needs an explicit create table command.</p><p><strong>Table Concepts</strong></p><ul><li><p>Table types:
Both of Hudi&#x27;s table types (Copy-On-Write (COW) and Merge-On-Read (MOR)) can be created using Spark SQL.</p><p>While creating the table, table type can be specified using <strong>type</strong> option. <strong>type = &#x27;cow&#x27;</strong> represents COW table, while <strong>type = &#x27;mor&#x27;</strong> represents MOR table.</p></li><li><p>Partitioned &amp; Non-Partitioned table:
Users can create a partitioned table or a non-partitioned table in Spark SQL.
To create a partitioned table, one needs to use <strong>partitioned by</strong> statement to specify the partition columns to create a partitioned table.
When there is no <strong>partitioned by</strong> statement with create table command, table is considered to be a non-partitioned table.</p></li><li><p>Managed &amp; External table:
In general, Spark SQL supports two kinds of tables, namely managed and external.
If one specifies a location using <strong>location</strong> statement or use <code>create external table</code> to create table explicitly, it is an external table, else its considered a managed table.
You can read more about external vs managed tables <a href="https://sparkbyexamples.com/apache-hive/difference-between-hive-internal-tables-and-external-tables/" target="_blank" rel="noopener noreferrer">here</a>.</p></li></ul><div class="admonition admonition-note alert alert--secondary"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M6.3 5.69a.942.942 0 0 1-.28-.7c0-.28.09-.52.28-.7.19-.18.42-.28.7-.28.28 0 .52.09.7.28.18.19.28.42.28.7 0 .28-.09.52-.28.7a1 1 0 0 1-.7.3c-.28 0-.52-.11-.7-.3zM8 7.99c-.02-.25-.11-.48-.31-.69-.2-.19-.42-.3-.69-.31H6c-.27.02-.48.13-.69.31-.2.2-.3.44-.31.69h1v3c.02.27.11.5.31.69.2.2.42.31.69.31h1c.27 0 .48-.11.69-.31.2-.19.3-.42.31-.69H8V7.98v.01zM7 2.3c-3.14 0-5.7 2.54-5.7 5.68 0 3.14 2.56 5.7 5.7 5.7s5.7-2.55 5.7-5.7c0-3.15-2.56-5.69-5.7-5.69v.01zM7 .98c3.86 0 7 3.14 7 7s-3.14 7-7 7-7-3.12-7-7 3.14-7 7-7z"></path></svg></span>note</h5></div><div class="admonition-content"><ol><li>Since hudi 0.10.0, <code>primaryKey</code> is required to specify. It can align with Hudi datasource writer’s and resolve many behavioural discrepancies reported in previous versions.
Non-primaryKey tables are no longer supported. Any hudi table created pre 0.10.0 without a <code>primaryKey</code> needs to be recreated with a <code>primaryKey</code> field with 0.10.0.
Same as <code>hoodie.datasource.write.recordkey.field</code>, hudi use <code>uuid</code> as the default primaryKey. So if you want to use <code>uuid</code> as your table&#x27;s <code>primaryKey</code>, you can omit the <code>primaryKey</code> config in <code>tblproperties</code>.</li><li><code>primaryKey</code>, <code>preCombineField</code>, <code>type</code> is case sensitive.</li><li>To specify <code>primaryKey</code>, <code>preCombineField</code>, <code>type</code> or other hudi configs, <code>tblproperties</code> is the preferred way than <code>options</code>. Spark SQL syntax is detailed here.</li><li>A new hudi table created by Spark SQL will set <code>hoodie.table.keygenerator.class</code> as <code>org.apache.hudi.keygen.ComplexKeyGenerator</code>, and
<code>hoodie.datasource.write.hive_style_partitioning</code> as <code>true</code> by default.
:::</li></ol><p>Let&#x27;s go over some of the create table commands.</p><p><strong>Create a Non-Partitioned Table</strong></p><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)">-- create a cow table, with default primaryKey &#x27;uuid&#x27; and without preCombineField provided</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">create</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">table</span><span class="token plain"> hudi_cow_nonpcf_tbl </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> uuid </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">int</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> name string</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> price </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">double</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">using</span><span class="token plain"> hudi</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">-- create a mor non-partitioned table without preCombineField provided</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">create</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">table</span><span class="token plain"> hudi_mor_tbl </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> id </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">int</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> name string</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> price </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">double</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ts </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">bigint</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">using</span><span class="token plain"> hudi</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">tblproperties </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">type</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;mor&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> primaryKey </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;id&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> preCombineField </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;ts&#x27;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>Here is an example of creating an external COW partitioned table.</p><p><strong>Create Partitioned Table</strong></p><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)">-- create a partitioned, preCombineField-provided cow table</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">create</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">table</span><span class="token plain"> hudi_cow_pt_tbl </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> id </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">bigint</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> name string</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ts </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">bigint</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> dt string</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> hh string</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">using</span><span class="token plain"> hudi</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">tblproperties </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">type</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;cow&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> primaryKey </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;id&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> preCombineField </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;ts&#x27;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">partitioned </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">by</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">dt</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> hh</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">location </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;/tmp/hudi/hudi_cow_pt_tbl&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p><strong>Create Table for an existing Hudi Table</strong></p><p>We can create a table on an existing hudi table(created with spark-shell or deltastreamer). This is useful to
read/write to/from a pre-existing hudi table.</p><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)">-- create an external hudi table based on an existing path</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">-- for non-partitioned table</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">create</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">table</span><span class="token plain"> hudi_existing_tbl0 </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">using</span><span class="token plain"> hudi</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">location </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;file:///tmp/hudi/dataframe_hudi_nonpt_table&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">-- for partitioned table</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">create</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">table</span><span class="token plain"> hudi_existing_tbl1 </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">using</span><span class="token plain"> hudi</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">partitioned </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">by</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">dt</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> hh</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">location </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;file:///tmp/hudi/dataframe_hudi_pt_table&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div></div><p>You don&#x27;t need to specify schema and any properties except the partitioned columns if existed. Hudi can automatically recognize the schema and configurations.
:::</p><p><strong>CTAS</strong></p><p>Hudi supports CTAS (Create Table As Select) on Spark SQL. <br>
Note: For better performance to load data to hudi table, CTAS uses the <strong>bulk insert</strong> as the write operation.</p><p>Example CTAS command to create a non-partitioned COW table without preCombineField.</p><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)">-- CTAS: create a non-partitioned cow table without preCombineField</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">create</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">table</span><span class="token plain"> hudi_ctas_cow_nonpcf_tbl</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">using</span><span class="token plain"> hudi</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">tblproperties </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">primaryKey </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;id&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token number">1</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> id</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;a1&#x27;</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> name</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">10</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> price</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>Example CTAS command to create a partitioned, primary key COW table.</p><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)">-- CTAS: create a partitioned, preCombineField-provided cow table</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">create</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">table</span><span class="token plain"> hudi_ctas_cow_pt_tbl</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">using</span><span class="token plain"> hudi</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">tblproperties </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">type</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;cow&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> primaryKey </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;id&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> preCombineField </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;ts&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">partitioned </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">by</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">dt</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token number">1</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> id</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;a1&#x27;</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> name</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">10</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> price</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">1000</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> ts</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;2021-12-01&#x27;</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> dt</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>Example CTAS command to load data from another table.</p><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)"># create managed parquet table</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">create</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">table</span><span class="token plain"> parquet_mngd </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">using</span><span class="token plain"> parquet location </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;file:///tmp/parquet_dataset/*.parquet&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># CTAS by loading data into hudi table</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">create</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">table</span><span class="token plain"> hudi_ctas_cow_pt_tbl2 </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">using</span><span class="token plain"> hudi location </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;file:/tmp/hudi/hudi_tbl/&#x27;</span><span class="token plain"> options </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">type</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;cow&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> primaryKey </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;id&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> preCombineField </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;ts&#x27;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">partitioned </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">by</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">datestr</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token operator">*</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">from</span><span class="token plain"> parquet_mngd</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p><strong>Create Table Properties</strong></p><p>Users can set table properties while creating a hudi table. Critical options are listed here.</p><table><thead><tr><th>Parameter Name</th><th>Default</th><th>Introduction</th></tr></thead><tbody><tr><td>primaryKey</td><td>uuid</td><td>The primary key names of the table, multiple fields separated by commas. Same as <code>hoodie.datasource.write.recordkey.field</code></td></tr><tr><td>preCombineField</td><td></td><td>The pre-combine field of the table. Same as <code>hoodie.datasource.write.precombine.field</code></td></tr><tr><td>type</td><td>cow</td><td>The table type to create. type = &#x27;cow&#x27; means a COPY-ON-WRITE table, while type = &#x27;mor&#x27; means a MERGE-ON-READ table. Same as <code>hoodie.datasource.write.table.type</code></td></tr></tbody></table><p>To set any custom hudi config(like index type, max parquet size, etc), see the &quot;Set hudi config section&quot; .</p></div></div></div><h2 class="anchor anchorWithStickyNavbar_y2LR" id="insert-data">Insert data<a class="hash-link" href="#insert-data" title="Direct link to heading"></a></h2><div class="tabs-container"><ul role="tablist" aria-orientation="horizontal" class="tabs"><li role="tab" tabindex="0" aria-selected="true" class="tabs__item tabItem_vU9c tabs__item--active">Scala</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_vU9c">Python</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_vU9c">SparkSQL</li></ul><div class="margin-vert--md"><div role="tabpanel"><p>Generate some new trips, load them into a DataFrame and write the DataFrame into the Hudi table as below.</p><div class="codeBlockContainer_J+bg language-scala theme-code-block"><div class="codeBlockContent_csEI scala"><pre tabindex="0" class="prism-code language-scala codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">// spark-shell</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val inserts = convertToStringList(dataGen.generateInserts(10))</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val df = spark.read.json(spark.sparkContext.parallelize(inserts, 2))</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">df.write.format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> options(getQuickstartWriteConfigs).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(PRECOMBINE_FIELD_OPT_KEY, &quot;ts&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(RECORDKEY_FIELD_OPT_KEY, &quot;uuid&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(PARTITIONPATH_FIELD_OPT_KEY, &quot;partitionpath&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(TABLE_NAME, tableName).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> mode(Overwrite).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> save(basePath)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><div class="admonition admonition-info alert alert--info"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"></path></svg></span>info</h5></div><div class="admonition-content"><p><code>mode(Overwrite)</code> overwrites and recreates the table if it already exists.
You can check the data generated under <code>/tmp/hudi_trips_cow/&lt;region&gt;/&lt;country&gt;/&lt;city&gt;/</code>. We provided a record key
(<code>uuid</code> in <a href="https://github.com/apache/hudi/blob/master/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java#L58" target="_blank" rel="noopener noreferrer">schema</a>), partition field (<code>region/country/city</code>) and combine logic (<code>ts</code> in
<a href="https://github.com/apache/hudi/blob/master/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java#L58" target="_blank" rel="noopener noreferrer">schema</a>) to ensure trip records are unique within each partition. For more info, refer to
<a href="https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=113709185#FAQ-HowdoImodelthedatastoredinHudi" target="_blank" rel="noopener noreferrer">Modeling data stored in Hudi</a>
and for info on ways to ingest data into Hudi, refer to <a href="/docs/writing_data">Writing Hudi Tables</a>.
Here we are using the default write operation : <code>upsert</code>. If you have a workload without updates, you can also issue
<code>insert</code> or <code>bulk_insert</code> operations which could be faster. To know more, refer to <a href="/docs/writing_data#write-operations">Write operations</a></p></div></div></div><div role="tabpanel" hidden="">Generate some new trips, load them into a DataFrame and write the DataFrame into the Hudi table as below.<div class="codeBlockContainer_J+bg language-python theme-code-block"><div class="codeBlockContent_csEI python"><pre tabindex="0" class="prism-code language-python codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)"># pyspark</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">inserts </span><span class="token operator">=</span><span class="token plain"> sc</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">_jvm</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">org</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">apache</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">hudi</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">QuickstartUtils</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">convertToStringList</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">dataGen</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">generateInserts</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token number">10</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">df </span><span class="token operator">=</span><span class="token plain"> spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">read</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">json</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">sparkContext</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">parallelize</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">inserts</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">2</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi_options </span><span class="token operator">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.table.name&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> tableName</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.recordkey.field&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;uuid&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.partitionpath.field&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;partitionpath&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.table.name&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> tableName</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.operation&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;upsert&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.precombine.field&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;ts&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.upsert.shuffle.parallelism&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token number">2</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.insert.shuffle.parallelism&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token number">2</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">df</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">write</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token builtin" style="color:rgb(189, 147, 249)">format</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> options</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token operator">**</span><span class="token plain">hudi_options</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> mode</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;overwrite&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> save</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">basePath</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><div class="admonition admonition-info alert alert--info"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"></path></svg></span>info</h5></div><div class="admonition-content"><p><code>mode(Overwrite)</code> overwrites and recreates the table if it already exists.
You can check the data generated under <code>/tmp/hudi_trips_cow/&lt;region&gt;/&lt;country&gt;/&lt;city&gt;/</code>. We provided a record key
(<code>uuid</code> in <a href="https://github.com/apache/hudi/blob/master/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java#L58" target="_blank" rel="noopener noreferrer">schema</a>), partition field (<code>region/country/city</code>) and combine logic (<code>ts</code> in
<a href="https://github.com/apache/hudi/blob/master/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java#L58" target="_blank" rel="noopener noreferrer">schema</a>) to ensure trip records are unique within each partition. For more info, refer to
<a href="https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=113709185#FAQ-HowdoImodelthedatastoredinHudi" target="_blank" rel="noopener noreferrer">Modeling data stored in Hudi</a>
and for info on ways to ingest data into Hudi, refer to <a href="/docs/writing_data">Writing Hudi Tables</a>.
Here we are using the default write operation : <code>upsert</code>. If you have a workload without updates, you can also issue
<code>insert</code> or <code>bulk_insert</code> operations which could be faster. To know more, refer to <a href="/docs/writing_data#write-operations">Write operations</a></p></div></div></div><div role="tabpanel" hidden=""><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)">-- insert into non-partitioned table</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">into</span><span class="token plain"> hudi_cow_nonpcf_tbl </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token number">1</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;a1&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">20</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">into</span><span class="token plain"> hudi_mor_tbl </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token number">1</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;a1&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">20</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">1000</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">-- insert dynamic partition</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">into</span><span class="token plain"> hudi_cow_pt_tbl </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">partition</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">dt</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> hh</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token number">1</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> id</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;a1&#x27;</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> name</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">1000</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> ts</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;2021-12-09&#x27;</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> dt</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;10&#x27;</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> hh</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">-- insert static partition</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">into</span><span class="token plain"> hudi_cow_pt_tbl </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">partition</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">dt </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;2021-12-09&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> hh</span><span class="token operator">=</span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;11&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token number">2</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;a2&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">1000</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p><strong>NOTICE</strong></p><ul><li>By default, if <code>preCombineKey </code> is provided, <code>insert into</code> use <code>upsert</code> as the type of write operation, otherwise use <code>insert</code>.</li><li>We support to use <code>bulk_insert</code> as the type of write operation, just need to set two configs: <code>hoodie.sql.bulk.insert.enable</code> and <code>hoodie.sql.insert.mode</code>. Example as follow:</li></ul><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)">-- upsert mode for preCombineField-provided table</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">into</span><span class="token plain"> hudi_mor_tbl </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token number">1</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;a1_1&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">20</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">1001</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> id</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> name</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> price</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> ts </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">from</span><span class="token plain"> hudi_mor_tbl</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token number">1</span><span class="token plain"> a1_1 </span><span class="token number">20.0</span><span class="token plain"> </span><span class="token number">1001</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">-- bulk_insert mode for preCombineField-provided table</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">set</span><span class="token plain"> hoodie</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">sql</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">bulk</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">enable</span><span class="token operator">=</span><span class="token boolean">true</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">set</span><span class="token plain"> hoodie</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">sql</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">mode</span><span class="token operator">=</span><span class="token plain">non</span><span class="token operator">-</span><span class="token plain">strict</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">into</span><span class="token plain"> hudi_mor_tbl </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token number">1</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;a1_2&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">20</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">1002</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> id</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> name</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> price</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> ts </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">from</span><span class="token plain"> hudi_mor_tbl</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token number">1</span><span class="token plain"> a1_1 </span><span class="token number">20.0</span><span class="token plain"> </span><span class="token number">1001</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token number">1</span><span class="token plain"> a1_2 </span><span class="token number">20.0</span><span class="token plain"> </span><span class="token number">1002</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div></div></div><p>Checkout <a href="https://hudi.apache.org/blog/2021/02/13/hudi-key-generators" target="_blank" rel="noopener noreferrer">https://hudi.apache.org/blog/2021/02/13/hudi-key-generators</a> for various key generator options, like Timestamp based,
complex, custom, NonPartitioned Key gen, etc.</p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="query-data">Query data<a class="hash-link" href="#query-data" title="Direct link to heading"></a></h2><p>Load the data files into a DataFrame.</p><div class="tabs-container"><ul role="tablist" aria-orientation="horizontal" class="tabs"><li role="tab" tabindex="0" aria-selected="true" class="tabs__item tabItem_vU9c tabs__item--active">Scala</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_vU9c">Python</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_vU9c">SparkSQL</li></ul><div class="margin-vert--md"><div role="tabpanel"><div class="codeBlockContainer_J+bg language-scala theme-code-block"><div class="codeBlockContent_csEI scala"><pre tabindex="0" class="prism-code language-scala codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">// spark-shell</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val tripsSnapshotDF = spark.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> read.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load(basePath)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">//load(basePath) use &quot;/partitionKey=partitionValue&quot; folder structure for Spark auto partition discovery</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">tripsSnapshotDF.createOrReplaceTempView(&quot;hudi_trips_snapshot&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark.sql(&quot;select fare, begin_lon, begin_lat, ts from hudi_trips_snapshot where fare &gt; 20.0&quot;).show()</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark.sql(&quot;select _hoodie_commit_time, _hoodie_record_key, _hoodie_partition_path, rider, driver, fare from hudi_trips_snapshot&quot;).show()</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="time-travel-query">Time Travel Query<a class="hash-link" href="#time-travel-query" title="Direct link to heading"></a></h3><p>Hudi support time travel query since 0.9.0. Currently three query time formats are supported as given below.</p><div class="codeBlockContainer_J+bg language-scala theme-code-block"><div class="codeBlockContent_csEI scala"><pre tabindex="0" class="prism-code language-scala codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark.read.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(&quot;as.of.instant&quot;, &quot;20210728141108100&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load(basePath)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark.read.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(&quot;as.of.instant&quot;, &quot;2021-07-28 14:11:08.200&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load(basePath)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// It is equal to &quot;as.of.instant = 2021-07-28 00:00:00&quot;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark.read.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(&quot;as.of.instant&quot;, &quot;2021-07-28&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load(basePath)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><div class="admonition admonition-info alert alert--info"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"></path></svg></span>info</h5></div><div class="admonition-content"><p>Since 0.9.0 hudi has support a hudi built-in FileIndex: <strong>HoodieFileIndex</strong> to query hudi table,
which supports partition pruning and metatable for query. This will help improve query performance.
It also supports non-global query path which means users can query the table by the base path without
specifing the &quot;*&quot; in the query path. This feature has enabled by default for the non-global query path.
For the global query path, hudi uses the old query path.
Refer to <a href="/docs/concepts#table-types--queries">Table types and queries</a> for more info on all table types and query types supported.</p></div></div></div><div role="tabpanel" hidden=""><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> fare</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> begin_lon</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> begin_lat</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> ts </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">from</span><span class="token plain"> hudi_trips_snapshot </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">where</span><span class="token plain"> fare </span><span class="token operator">&gt;</span><span class="token plain"> </span><span class="token number">20.0</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div><div role="tabpanel" hidden=""><div class="codeBlockContainer_J+bg language-python theme-code-block"><div class="codeBlockContent_csEI python"><pre tabindex="0" class="prism-code language-python codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)"># pyspark</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">tripsSnapshotDF </span><span class="token operator">=</span><span class="token plain"> spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> read</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token builtin" style="color:rgb(189, 147, 249)">format</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">basePath</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># load(basePath) use &quot;/partitionKey=partitionValue&quot; folder structure for Spark auto partition discovery</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">tripsSnapshotDF</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">createOrReplaceTempView</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi_trips_snapshot&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">sql</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;select fare, begin_lon, begin_lat, ts from hudi_trips_snapshot where fare &gt; 20.0&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">show</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">sql</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;select _hoodie_commit_time, _hoodie_record_key, _hoodie_partition_path, rider, driver, fare from hudi_trips_snapshot&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">show</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p><strong>Time Travel Query</strong></p><p>Hudi support time travel query since 0.9.0. Currently three query time formats are supported as given below.</p><div class="codeBlockContainer_J+bg language-python theme-code-block"><div class="codeBlockContent_csEI python"><pre tabindex="0" class="prism-code language-python codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)">#pyspark</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">read</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token builtin" style="color:rgb(189, 147, 249)">format</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;as.of.instant&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;20210728141108&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">basePath</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">read</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token builtin" style="color:rgb(189, 147, 249)">format</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;as.of.instant&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;2021-07-28 14: 11: 08&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">basePath</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token operator">//</span><span class="token plain"> It </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">is</span><span class="token plain"> equal to </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;as.of.instant = 2021-07-28 00:00:00&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">read</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token builtin" style="color:rgb(189, 147, 249)">format</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;as.of.instant&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;2021-07-28&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">basePath</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><div class="admonition admonition-info alert alert--info"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"></path></svg></span>info</h5></div><div class="admonition-content"><p>Since 0.9.0 hudi has support a hudi built-in FileIndex: <strong>HoodieFileIndex</strong> to query hudi table,
which supports partition pruning and metatable for query. This will help improve query performance.
It also supports non-global query path which means users can query the table by the base path without
specifing the &quot;*&quot; in the query path. This feature has enabled by default for the non-global query path.
For the global query path, hudi uses the old query path.
Refer to <a href="/docs/concepts#table-types--queries">Table types and queries</a> for more info on all table types and query types supported.</p></div></div></div></div></div><h2 class="anchor anchorWithStickyNavbar_y2LR" id="update-data">Update data<a class="hash-link" href="#update-data" title="Direct link to heading"></a></h2><p>This is similar to inserting new data. Generate updates to existing trips using the data generator, load into a DataFrame
and write DataFrame into the hudi table.</p><div class="tabs-container"><ul role="tablist" aria-orientation="horizontal" class="tabs"><li role="tab" tabindex="0" aria-selected="true" class="tabs__item tabItem_vU9c tabs__item--active">Scala</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_vU9c">Python</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_vU9c">SparkSQL</li></ul><div class="margin-vert--md"><div role="tabpanel"><div class="codeBlockContainer_J+bg language-scala theme-code-block"><div class="codeBlockContent_csEI scala"><pre tabindex="0" class="prism-code language-scala codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">// spark-shell</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val updates = convertToStringList(dataGen.generateUpdates(10))</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val df = spark.read.json(spark.sparkContext.parallelize(updates, 2))</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">df.write.format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> options(getQuickstartWriteConfigs).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(PRECOMBINE_FIELD_OPT_KEY, &quot;ts&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(RECORDKEY_FIELD_OPT_KEY, &quot;uuid&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(PARTITIONPATH_FIELD_OPT_KEY, &quot;partitionpath&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(TABLE_NAME, tableName).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> mode(Append).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> save(basePath)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><div class="admonition admonition-note alert alert--secondary"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M6.3 5.69a.942.942 0 0 1-.28-.7c0-.28.09-.52.28-.7.19-.18.42-.28.7-.28.28 0 .52.09.7.28.18.19.28.42.28.7 0 .28-.09.52-.28.7a1 1 0 0 1-.7.3c-.28 0-.52-.11-.7-.3zM8 7.99c-.02-.25-.11-.48-.31-.69-.2-.19-.42-.3-.69-.31H6c-.27.02-.48.13-.69.31-.2.2-.3.44-.31.69h1v3c.02.27.11.5.31.69.2.2.42.31.69.31h1c.27 0 .48-.11.69-.31.2-.19.3-.42.31-.69H8V7.98v.01zM7 2.3c-3.14 0-5.7 2.54-5.7 5.68 0 3.14 2.56 5.7 5.7 5.7s5.7-2.55 5.7-5.7c0-3.15-2.56-5.69-5.7-5.69v.01zM7 .98c3.86 0 7 3.14 7 7s-3.14 7-7 7-7-3.12-7-7 3.14-7 7-7z"></path></svg></span>note</h5></div><div class="admonition-content"><p>Notice that the save mode is now <code>Append</code>. In general, always use append mode unless you are trying to create the table for the first time.
<a href="#query-data">Querying</a> the data again will now show updated trips. Each write operation generates a new <a href="/docs/concepts">commit</a>
denoted by the timestamp. Look for changes in <code>_hoodie_commit_time</code>, <code>rider</code>, <code>driver</code> fields for the same <code>_hoodie_record_key</code>s in previous commit.</p></div></div></div><div role="tabpanel" hidden=""><p>Spark SQL supports two kinds of DML to update hudi table: Merge-Into and Update.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="update">Update<a class="hash-link" href="#update" title="Direct link to heading"></a></h3><p><strong>Syntax</strong></p><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">UPDATE</span><span class="token plain"> tableIdentifier </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">SET</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">column</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"> EXPRESSION</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">column</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"> EXPRESSION</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">WHERE</span><span class="token plain"> boolExpression</span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p><strong>Case</strong></p><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">update</span><span class="token plain"> hudi_mor_tbl </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">set</span><span class="token plain"> price </span><span class="token operator">=</span><span class="token plain"> price </span><span class="token operator">*</span><span class="token plain"> </span><span class="token number">2</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> ts </span><span class="token operator">=</span><span class="token plain"> </span><span class="token number">1111</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">where</span><span class="token plain"> id </span><span class="token operator">=</span><span class="token plain"> </span><span class="token number">1</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">update</span><span class="token plain"> hudi_cow_pt_tbl </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">set</span><span class="token plain"> name </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;a1_1&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> ts </span><span class="token operator">=</span><span class="token plain"> </span><span class="token number">1001</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">where</span><span class="token plain"> id </span><span class="token operator">=</span><span class="token plain"> </span><span class="token number">1</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><div class="admonition admonition-note alert alert--secondary"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M6.3 5.69a.942.942 0 0 1-.28-.7c0-.28.09-.52.28-.7.19-.18.42-.28.7-.28.28 0 .52.09.7.28.18.19.28.42.28.7 0 .28-.09.52-.28.7a1 1 0 0 1-.7.3c-.28 0-.52-.11-.7-.3zM8 7.99c-.02-.25-.11-.48-.31-.69-.2-.19-.42-.3-.69-.31H6c-.27.02-.48.13-.69.31-.2.2-.3.44-.31.69h1v3c.02.27.11.5.31.69.2.2.42.31.69.31h1c.27 0 .48-.11.69-.31.2-.19.3-.42.31-.69H8V7.98v.01zM7 2.3c-3.14 0-5.7 2.54-5.7 5.68 0 3.14 2.56 5.7 5.7 5.7s5.7-2.55 5.7-5.7c0-3.15-2.56-5.69-5.7-5.69v.01zM7 .98c3.86 0 7 3.14 7 7s-3.14 7-7 7-7-3.12-7-7 3.14-7 7-7z"></path></svg></span>note</h5></div><div class="admonition-content"><p><code>Update</code> operation requires <code>preCombineField</code> specified.</p></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="mergeinto">MergeInto<a class="hash-link" href="#mergeinto" title="Direct link to heading"></a></h3><p><strong>Syntax</strong></p><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">MERGE</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">INTO</span><span class="token plain"> tableIdentifier </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">AS</span><span class="token plain"> target_alias</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">USING</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">sub_query </span><span class="token operator">|</span><span class="token plain"> tableIdentifier</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">AS</span><span class="token plain"> source_alias</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">ON</span><span class="token plain"> </span><span class="token operator">&lt;</span><span class="token plain">merge_condition</span><span class="token operator">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">WHEN</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">MATCHED</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token plain"> </span><span class="token operator">AND</span><span class="token plain"> </span><span class="token operator">&lt;</span><span class="token plain">condition</span><span class="token operator">&gt;</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">THEN</span><span class="token plain"> </span><span class="token operator">&lt;</span><span class="token plain">matched_action</span><span class="token operator">&gt;</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">WHEN</span><span class="token plain"> </span><span class="token operator">NOT</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">MATCHED</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token plain"> </span><span class="token operator">AND</span><span class="token plain"> </span><span class="token operator">&lt;</span><span class="token plain">condition</span><span class="token operator">&gt;</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">THEN</span><span class="token plain"> </span><span class="token operator">&lt;</span><span class="token plain">not_matched_action</span><span class="token operator">&gt;</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token operator">&lt;</span><span class="token plain">merge_condition</span><span class="token operator">&gt;</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain">A equal </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">bool</span><span class="token plain"> condition </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token operator">&lt;</span><span class="token plain">matched_action</span><span class="token operator">&gt;</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">DELETE</span><span class="token plain"> </span><span class="token operator">|</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">UPDATE</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">SET</span><span class="token plain"> </span><span class="token operator">*</span><span class="token plain"> </span><span class="token operator">|</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">UPDATE</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">SET</span><span class="token plain"> column1 </span><span class="token operator">=</span><span class="token plain"> expression1 </span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> column2 </span><span class="token operator">=</span><span class="token plain"> expression2 </span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token operator">&lt;</span><span class="token plain">not_matched_action</span><span class="token operator">&gt;</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">INSERT</span><span class="token plain"> </span><span class="token operator">*</span><span class="token plain"> </span><span class="token operator">|</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">INSERT</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">column1 </span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> column2 </span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">VALUES</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">value1 </span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> value2 </span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p><strong>Example</strong></p><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)">-- source table using hudi for testing merging into non-partitioned table</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">create</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">table</span><span class="token plain"> merge_source </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">id </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">int</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> name string</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> price </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">double</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> ts </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">bigint</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">using</span><span class="token plain"> hudi</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">tblproperties </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">primaryKey </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;id&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> preCombineField </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;ts&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">into</span><span class="token plain"> merge_source </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">values</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token number">1</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;old_a1&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">22.22</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">900</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token number">2</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;new_a2&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">33.33</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">2000</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token number">3</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;new_a3&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">44.44</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">2000</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">merge</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">into</span><span class="token plain"> hudi_mor_tbl </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> target</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">using</span><span class="token plain"> merge_source </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> source</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">on</span><span class="token plain"> target</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">id </span><span class="token operator">=</span><span class="token plain"> source</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">id</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">when</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">matched</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">then</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">update</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">set</span><span class="token plain"> </span><span class="token operator">*</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">when</span><span class="token plain"> </span><span class="token operator">not</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">matched</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">then</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token plain"> </span><span class="token operator">*</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">-- source table using parquet for testing merging into partitioned table</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">create</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">table</span><span class="token plain"> merge_source2 </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">id </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">int</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> name string</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> flag string</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> dt string</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> hh string</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">using</span><span class="token plain"> parquet</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">into</span><span class="token plain"> merge_source2 </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">values</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token number">1</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;new_a1&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;update&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;2021-12-09&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;10&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token number">2</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;new_a2&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;delete&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;2021-12-09&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;11&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token number">3</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;new_a3&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;insert&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;2021-12-09&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;12&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">merge</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">into</span><span class="token plain"> hudi_cow_pt_tbl </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> target</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">using</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> id</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> name</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;1000&#x27;</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">as</span><span class="token plain"> ts</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> flag</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> dt</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> hh </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">from</span><span class="token plain"> merge_source2</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> source</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">on</span><span class="token plain"> target</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">id </span><span class="token operator">=</span><span class="token plain"> source</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">id</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">when</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">matched</span><span class="token plain"> </span><span class="token operator">and</span><span class="token plain"> flag </span><span class="token operator">!=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;delete&#x27;</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">then</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">update</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">set</span><span class="token plain"> id </span><span class="token operator">=</span><span class="token plain"> source</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">id</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> name </span><span class="token operator">=</span><span class="token plain"> source</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">name</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> ts </span><span class="token operator">=</span><span class="token plain"> source</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">ts</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> dt </span><span class="token operator">=</span><span class="token plain"> source</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">dt</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> hh </span><span class="token operator">=</span><span class="token plain"> source</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">hh</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">when</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">matched</span><span class="token plain"> </span><span class="token operator">and</span><span class="token plain"> flag </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;delete&#x27;</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">then</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">delete</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">when</span><span class="token plain"> </span><span class="token operator">not</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">matched</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">then</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">id</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> name</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> ts</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> dt</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> hh</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">values</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">source</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">id</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> source</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">name</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> source</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">ts</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> source</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">dt</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> source</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">hh</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div><div role="tabpanel" hidden=""><div class="codeBlockContainer_J+bg language-python theme-code-block"><div class="codeBlockContent_csEI python"><pre tabindex="0" class="prism-code language-python codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)"># pyspark</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">updates </span><span class="token operator">=</span><span class="token plain"> sc</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">_jvm</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">org</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">apache</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">hudi</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">QuickstartUtils</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">convertToStringList</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">dataGen</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">generateUpdates</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token number">10</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">df </span><span class="token operator">=</span><span class="token plain"> spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">read</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">json</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">sparkContext</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">parallelize</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">updates</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">2</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">df</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">write</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token builtin" style="color:rgb(189, 147, 249)">format</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> options</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token operator">**</span><span class="token plain">hudi_options</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> mode</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;append&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> save</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">basePath</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><div class="admonition admonition-note alert alert--secondary"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M6.3 5.69a.942.942 0 0 1-.28-.7c0-.28.09-.52.28-.7.19-.18.42-.28.7-.28.28 0 .52.09.7.28.18.19.28.42.28.7 0 .28-.09.52-.28.7a1 1 0 0 1-.7.3c-.28 0-.52-.11-.7-.3zM8 7.99c-.02-.25-.11-.48-.31-.69-.2-.19-.42-.3-.69-.31H6c-.27.02-.48.13-.69.31-.2.2-.3.44-.31.69h1v3c.02.27.11.5.31.69.2.2.42.31.69.31h1c.27 0 .48-.11.69-.31.2-.19.3-.42.31-.69H8V7.98v.01zM7 2.3c-3.14 0-5.7 2.54-5.7 5.68 0 3.14 2.56 5.7 5.7 5.7s5.7-2.55 5.7-5.7c0-3.15-2.56-5.69-5.7-5.69v.01zM7 .98c3.86 0 7 3.14 7 7s-3.14 7-7 7-7-3.12-7-7 3.14-7 7-7z"></path></svg></span>note</h5></div><div class="admonition-content"><p>Notice that the save mode is now <code>Append</code>. In general, always use append mode unless you are trying to create the table for the first time.
<a href="#query-data">Querying</a> the data again will now show updated trips. Each write operation generates a new <a href="/docs/concepts">commit</a>
denoted by the timestamp. Look for changes in <code>_hoodie_commit_time</code>, <code>rider</code>, <code>driver</code> fields for the same <code>_hoodie_record_key</code>s in previous commit.</p></div></div></div></div></div><h2 class="anchor anchorWithStickyNavbar_y2LR" id="incremental-query">Incremental query<a class="hash-link" href="#incremental-query" title="Direct link to heading"></a></h2><p>Hudi also provides capability to obtain a stream of records that changed since given commit timestamp.
This can be achieved using Hudi&#x27;s incremental querying and providing a begin time from which changes need to be streamed.
We do not need to specify endTime, if we want all changes after the given commit (as is the common case).</p><div class="tabs-container"><ul role="tablist" aria-orientation="horizontal" class="tabs"><li role="tab" tabindex="0" aria-selected="true" class="tabs__item tabItem_vU9c tabs__item--active">Scala</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_vU9c">Python</li></ul><div class="margin-vert--md"><div role="tabpanel"><div class="codeBlockContainer_J+bg language-scala theme-code-block"><div class="codeBlockContent_csEI scala"><pre tabindex="0" class="prism-code language-scala codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">// spark-shell</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// reload data</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> read.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load(basePath).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> createOrReplaceTempView(&quot;hudi_trips_snapshot&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val commits = spark.sql(&quot;select distinct(_hoodie_commit_time) as commitTime from hudi_trips_snapshot order by commitTime&quot;).map(k =&gt; k.getString(0)).take(50)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val beginTime = commits(commits.length - 2) // commit time we are interested in</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// incrementally query data</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val tripsIncrementalDF = spark.read.format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(QUERY_TYPE_OPT_KEY, QUERY_TYPE_INCREMENTAL_OPT_VAL).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(BEGIN_INSTANTTIME_OPT_KEY, beginTime).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load(basePath)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">tripsIncrementalDF.createOrReplaceTempView(&quot;hudi_trips_incremental&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark.sql(&quot;select `_hoodie_commit_time`, fare, begin_lon, begin_lat, ts from hudi_trips_incremental where fare &gt; 20.0&quot;).show()</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div><div role="tabpanel" hidden=""><div class="codeBlockContainer_J+bg language-python theme-code-block"><div class="codeBlockContent_csEI python"><pre tabindex="0" class="prism-code language-python codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)"># pyspark</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># reload data</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> read</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token builtin" style="color:rgb(189, 147, 249)">format</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">basePath</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> createOrReplaceTempView</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi_trips_snapshot&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">commits </span><span class="token operator">=</span><span class="token plain"> </span><span class="token builtin" style="color:rgb(189, 147, 249)">list</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token builtin" style="color:rgb(189, 147, 249)">map</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">lambda</span><span class="token plain"> row</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> row</span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token number">0</span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">sql</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;select distinct(_hoodie_commit_time) as commitTime from hudi_trips_snapshot order by commitTime&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">limit</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token number">50</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">collect</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">beginTime </span><span class="token operator">=</span><span class="token plain"> commits</span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token builtin" style="color:rgb(189, 147, 249)">len</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">commits</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> </span><span class="token operator">-</span><span class="token plain"> </span><span class="token number">2</span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token plain"> </span><span class="token comment" style="color:rgb(98, 114, 164)"># commit time we are interested in</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># incrementally query data</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">incremental_read_options </span><span class="token operator">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.query.type&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;incremental&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.read.begin.instanttime&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> beginTime</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">tripsIncrementalDF </span><span class="token operator">=</span><span class="token plain"> spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">read</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token builtin" style="color:rgb(189, 147, 249)">format</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> options</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token operator">**</span><span class="token plain">incremental_read_options</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">basePath</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">tripsIncrementalDF</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">createOrReplaceTempView</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi_trips_incremental&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">sql</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;select `_hoodie_commit_time`, fare, begin_lon, begin_lat, ts from hudi_trips_incremental where fare &gt; 20.0&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">show</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div></div></div><div class="admonition admonition-info alert alert--info"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"></path></svg></span>info</h5></div><div class="admonition-content"><p>This will give all changes that happened after the beginTime commit with the filter of fare &gt; 20.0. The unique thing about this
feature is that it now lets you author streaming pipelines on batch data.</p></div></div><h2 class="anchor anchorWithStickyNavbar_y2LR" id="structured-streaming">Structured Streaming<a class="hash-link" href="#structured-streaming" title="Direct link to heading"></a></h2><p>Hudi supports Spark Structured Streaming reads and writes.
Structured Streaming reads are based on Hudi Incremental Query feature, therefore streaming read can return data for which commits and base files were not yet removed by the cleaner. You can control commits retention time.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="streaming-read">Streaming Read<a class="hash-link" href="#streaming-read" title="Direct link to heading"></a></h3><div class="tabs-container"><ul role="tablist" aria-orientation="horizontal" class="tabs"><li role="tab" tabindex="0" aria-selected="true" class="tabs__item tabItem_vU9c tabs__item--active">Scala</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_vU9c">Python</li></ul><div class="margin-vert--md"><div role="tabpanel"><div class="codeBlockContainer_J+bg language-scala theme-code-block"><div class="codeBlockContent_csEI scala"><pre tabindex="0" class="prism-code language-scala codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">// spark-shell</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// reload data</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">df.write.format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> options(getQuickstartWriteConfigs).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(PRECOMBINE_FIELD_OPT_KEY, &quot;ts&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(RECORDKEY_FIELD_OPT_KEY, &quot;uuid&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(PARTITIONPATH_FIELD_OPT_KEY, &quot;partitionpath&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(TABLE_NAME, tableName).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> mode(Overwrite).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> save(basePath)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// read stream and output results to console</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark.readStream.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load(basePath).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> writeStream.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> format(&quot;console&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> start()</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// read stream to streaming df</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val df = spark.readStream.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load(basePath)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div><div role="tabpanel" hidden=""><div class="codeBlockContainer_J+bg language-python theme-code-block"><div class="codeBlockContent_csEI python"><pre tabindex="0" class="prism-code language-python codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)"># pyspark</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># reload data</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">inserts </span><span class="token operator">=</span><span class="token plain"> sc</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">_jvm</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">org</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">apache</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">hudi</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">QuickstartUtils</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">convertToStringList</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> dataGen</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">generateInserts</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token number">10</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">df </span><span class="token operator">=</span><span class="token plain"> spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">read</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">json</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">sparkContext</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">parallelize</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">inserts</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">2</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi_options </span><span class="token operator">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.table.name&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> tableName</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.recordkey.field&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;uuid&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.partitionpath.field&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;partitionpath&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.table.name&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> tableName</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.operation&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;upsert&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.precombine.field&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;ts&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.upsert.shuffle.parallelism&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token number">2</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.insert.shuffle.parallelism&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token number">2</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">df</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">write</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token builtin" style="color:rgb(189, 147, 249)">format</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> options</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token operator">**</span><span class="token plain">hudi_options</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> mode</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;overwrite&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> save</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">basePath</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># read stream to streaming df</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">df </span><span class="token operator">=</span><span class="token plain"> spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">readStream \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token builtin" style="color:rgb(189, 147, 249)">format</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">load</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">basePath</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># read stream and output results to console</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">readStream \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token builtin" style="color:rgb(189, 147, 249)">format</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">load</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">basePath</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">writeStream \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token builtin" style="color:rgb(189, 147, 249)">format</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;console&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">start</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="streaming-write">Streaming Write<a class="hash-link" href="#streaming-write" title="Direct link to heading"></a></h3><div class="tabs-container"><ul role="tablist" aria-orientation="horizontal" class="tabs"><li role="tab" tabindex="0" aria-selected="true" class="tabs__item tabItem_vU9c tabs__item--active">Scala</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_vU9c">Python</li></ul><div class="margin-vert--md"><div role="tabpanel"><div class="codeBlockContainer_J+bg language-scala theme-code-block"><div class="codeBlockContent_csEI scala"><pre tabindex="0" class="prism-code language-scala codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">// spark-shell</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// prepare to stream write to new table</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">import org.apache.spark.sql.streaming.Trigger</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val streamingTableName = &quot;hudi_trips_cow_streaming&quot;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val baseStreamingPath = &quot;file:///tmp/hudi_trips_cow_streaming&quot;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val checkpointLocation = &quot;file:///tmp/checkpoints/hudi_trips_cow_streaming&quot;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// create streaming df</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val df = spark.readStream.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load(basePath)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// write stream to new hudi table</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">df.writeStream.format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> options(getQuickstartWriteConfigs).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(PRECOMBINE_FIELD_OPT_KEY, &quot;ts&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(RECORDKEY_FIELD_OPT_KEY, &quot;uuid&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(PARTITIONPATH_FIELD_OPT_KEY, &quot;partitionpath&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(TABLE_NAME, streamingTableName).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> outputMode(&quot;append&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(&quot;path&quot;, baseStreamingPath).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(&quot;checkpointLocation&quot;, checkpointLocation).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> trigger(Trigger.Once()).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> start()</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div><div role="tabpanel" hidden=""><div class="codeBlockContainer_J+bg language-python theme-code-block"><div class="codeBlockContent_csEI python"><pre tabindex="0" class="prism-code language-python codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)"># pyspark</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># prepare to stream write to new table</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">streamingTableName </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi_trips_cow_streaming&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">baseStreamingPath </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;file:///tmp/hudi_trips_cow_streaming&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">checkpointLocation </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;file:///tmp/checkpoints/hudi_trips_cow_streaming&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi_streaming_options </span><span class="token operator">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.table.name&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> streamingTableName</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.recordkey.field&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;uuid&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.partitionpath.field&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;partitionpath&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.table.name&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> streamingTableName</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.operation&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;upsert&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.precombine.field&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;ts&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.upsert.shuffle.parallelism&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token number">2</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.insert.shuffle.parallelism&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token number">2</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># create streaming df</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">df </span><span class="token operator">=</span><span class="token plain"> spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">readStream \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token builtin" style="color:rgb(189, 147, 249)">format</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">load</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">basePath</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># write stream to new hudi table</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">df</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">writeStream</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token builtin" style="color:rgb(189, 147, 249)">format</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">options</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token operator">**</span><span class="token plain">hudi_streaming_options</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">outputMode</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;append&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">option</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;path&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> baseStreamingPath</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">option</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;checkpointLocation&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> checkpointLocation</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">trigger</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">once</span><span class="token operator">=</span><span class="token boolean">True</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">start</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div></div></div><div class="admonition admonition-info alert alert--info"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"></path></svg></span>info</h5></div><div class="admonition-content"><p>Spark SQL can be used within ForeachBatch sink to do INSERT, UPDATE, DELETE and MERGE INTO.
Target table must exist before write.</p></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="table-maintenance">Table maintenance<a class="hash-link" href="#table-maintenance" title="Direct link to heading"></a></h3><p>Hudi can run async or inline table services while running Structured Streaming query and takes care of cleaning, compaction and clustering. There&#x27;s no operational overhead for the user.<br>
<!-- -->For CoW tables, table services work in inline mode by default.<br>
<!-- -->For MoR tables, some async services are enabled by default.</p><div class="admonition admonition-note alert alert--secondary"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M6.3 5.69a.942.942 0 0 1-.28-.7c0-.28.09-.52.28-.7.19-.18.42-.28.7-.28.28 0 .52.09.7.28.18.19.28.42.28.7 0 .28-.09.52-.28.7a1 1 0 0 1-.7.3c-.28 0-.52-.11-.7-.3zM8 7.99c-.02-.25-.11-.48-.31-.69-.2-.19-.42-.3-.69-.31H6c-.27.02-.48.13-.69.31-.2.2-.3.44-.31.69h1v3c.02.27.11.5.31.69.2.2.42.31.69.31h1c.27 0 .48-.11.69-.31.2-.19.3-.42.31-.69H8V7.98v.01zM7 2.3c-3.14 0-5.7 2.54-5.7 5.68 0 3.14 2.56 5.7 5.7 5.7s5.7-2.55 5.7-5.7c0-3.15-2.56-5.69-5.7-5.69v.01zM7 .98c3.86 0 7 3.14 7 7s-3.14 7-7 7-7-3.12-7-7 3.14-7 7-7z"></path></svg></span>note</h5></div><div class="admonition-content"><p>In Hudi 0.10 Metadata Table was introduced. When using async table services with Metadata Table enabled you must use Optimistic Concurrency Control to avoid the risk of data loss (even in single writer scenario). See <a href="/docs/0.10.0/metadata#deployment-considerations">Metadata Table deployment considerations</a> for detailed instructions.</p><p>If you&#x27;re using Foreach or ForeachBatch streaming sink you must use inline table services, async table services are not supported.</p></div></div><p>Hive Sync works with Structured Streaming, it will create table if not exists and synchronize table to metastore aftear each streaming write.</p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="point-in-time-query">Point in time query<a class="hash-link" href="#point-in-time-query" title="Direct link to heading"></a></h2><p>Lets look at how to query data as of a specific time. The specific time can be represented by pointing endTime to a
specific commit time and beginTime to &quot;000&quot; (denoting earliest possible commit time).</p><div class="tabs-container"><ul role="tablist" aria-orientation="horizontal" class="tabs"><li role="tab" tabindex="0" aria-selected="true" class="tabs__item tabItem_vU9c tabs__item--active">Scala</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_vU9c">Python</li></ul><div class="margin-vert--md"><div role="tabpanel"><div class="codeBlockContainer_J+bg language-scala theme-code-block"><div class="codeBlockContent_csEI scala"><pre tabindex="0" class="prism-code language-scala codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">// spark-shell</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val beginTime = &quot;000&quot; // Represents all commits &gt; this time.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val endTime = commits(commits.length - 2) // commit time we are interested in</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">//incrementally query data</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val tripsPointInTimeDF = spark.read.format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(QUERY_TYPE_OPT_KEY, QUERY_TYPE_INCREMENTAL_OPT_VAL).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(BEGIN_INSTANTTIME_OPT_KEY, beginTime).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(END_INSTANTTIME_OPT_KEY, endTime).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load(basePath)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">tripsPointInTimeDF.createOrReplaceTempView(&quot;hudi_trips_point_in_time&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark.sql(&quot;select `_hoodie_commit_time`, fare, begin_lon, begin_lat, ts from hudi_trips_point_in_time where fare &gt; 20.0&quot;).show()</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div><div role="tabpanel" hidden=""><div class="codeBlockContainer_J+bg language-python theme-code-block"><div class="codeBlockContent_csEI python"><pre tabindex="0" class="prism-code language-python codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)"># pyspark</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">beginTime </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;000&quot;</span><span class="token plain"> </span><span class="token comment" style="color:rgb(98, 114, 164)"># Represents all commits &gt; this time.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">endTime </span><span class="token operator">=</span><span class="token plain"> commits</span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token builtin" style="color:rgb(189, 147, 249)">len</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">commits</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> </span><span class="token operator">-</span><span class="token plain"> </span><span class="token number">2</span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># query point in time data</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">point_in_time_read_options </span><span class="token operator">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.query.type&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;incremental&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.read.end.instanttime&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> endTime</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.read.begin.instanttime&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> beginTime</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">tripsPointInTimeDF </span><span class="token operator">=</span><span class="token plain"> spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">read</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token builtin" style="color:rgb(189, 147, 249)">format</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> options</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token operator">**</span><span class="token plain">point_in_time_read_options</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">basePath</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">tripsPointInTimeDF</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">createOrReplaceTempView</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi_trips_point_in_time&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">sql</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;select `_hoodie_commit_time`, fare, begin_lon, begin_lat, ts from hudi_trips_point_in_time where fare &gt; 20.0&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">show</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div></div></div><h2 class="anchor anchorWithStickyNavbar_y2LR" id="deletes">Delete data<a class="hash-link" href="#deletes" title="Direct link to heading"></a></h2><p>Apache Hudi supports two types of deletes: (1) <strong>Soft Deletes</strong>: retaining the record key and just nulling out the values
for all the other fields (records with nulls in soft deletes are always persisted in storage and never removed);
(2) <strong>Hard Deletes</strong>: physically removing any trace of the record from the table. See the
<a href="/docs/writing_data#deletes">deletion section</a> of the writing data page for more details.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="soft-deletes">Soft Deletes<a class="hash-link" href="#soft-deletes" title="Direct link to heading"></a></h3><div class="tabs-container"><ul role="tablist" aria-orientation="horizontal" class="tabs"><li role="tab" tabindex="0" aria-selected="true" class="tabs__item tabItem_vU9c tabs__item--active">Scala</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_vU9c">Python</li></ul><div class="margin-vert--md"><div role="tabpanel"><div class="codeBlockContainer_J+bg language-scala theme-code-block"><div class="codeBlockContent_csEI scala"><pre tabindex="0" class="prism-code language-scala codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">// spark-shell</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> read.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load(basePath).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> createOrReplaceTempView(&quot;hudi_trips_snapshot&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// fetch total records count</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark.sql(&quot;select uuid, partitionpath from hudi_trips_snapshot&quot;).count()</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark.sql(&quot;select uuid, partitionpath from hudi_trips_snapshot where rider is not null&quot;).count()</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// fetch two records for soft deletes</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val softDeleteDs = spark.sql(&quot;select * from hudi_trips_snapshot&quot;).limit(2)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// prepare the soft deletes by ensuring the appropriate fields are nullified</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val nullifyColumns = softDeleteDs.schema.fields.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> map(field =&gt; (field.name, field.dataType.typeName)).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> filter(pair =&gt; (!HoodieRecord.HOODIE_META_COLUMNS.contains(pair._1)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> &amp;&amp; !Array(&quot;ts&quot;, &quot;uuid&quot;, &quot;partitionpath&quot;).contains(pair._1)))</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val softDeleteDf = nullifyColumns.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> foldLeft(softDeleteDs.drop(HoodieRecord.HOODIE_META_COLUMNS: _*))(</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> (ds, col) =&gt; ds.withColumn(col._1, lit(null).cast(col._2)))</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// simply upsert the table after setting these fields to null</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">softDeleteDf.write.format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> options(getQuickstartWriteConfigs).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(OPERATION_OPT_KEY, &quot;upsert&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(PRECOMBINE_FIELD_OPT_KEY, &quot;ts&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(RECORDKEY_FIELD_OPT_KEY, &quot;uuid&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(PARTITIONPATH_FIELD_OPT_KEY, &quot;partitionpath&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(TABLE_NAME, tableName).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> mode(Append).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> save(basePath)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// reload data</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> read.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load(basePath).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> createOrReplaceTempView(&quot;hudi_trips_snapshot&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// This should return the same total count as before</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark.sql(&quot;select uuid, partitionpath from hudi_trips_snapshot&quot;).count()</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// This should return (total - 2) count as two records are updated with nulls</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark.sql(&quot;select uuid, partitionpath from hudi_trips_snapshot where rider is not null&quot;).count()</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><div class="admonition admonition-note alert alert--secondary"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M6.3 5.69a.942.942 0 0 1-.28-.7c0-.28.09-.52.28-.7.19-.18.42-.28.7-.28.28 0 .52.09.7.28.18.19.28.42.28.7 0 .28-.09.52-.28.7a1 1 0 0 1-.7.3c-.28 0-.52-.11-.7-.3zM8 7.99c-.02-.25-.11-.48-.31-.69-.2-.19-.42-.3-.69-.31H6c-.27.02-.48.13-.69.31-.2.2-.3.44-.31.69h1v3c.02.27.11.5.31.69.2.2.42.31.69.31h1c.27 0 .48-.11.69-.31.2-.19.3-.42.31-.69H8V7.98v.01zM7 2.3c-3.14 0-5.7 2.54-5.7 5.68 0 3.14 2.56 5.7 5.7 5.7s5.7-2.55 5.7-5.7c0-3.15-2.56-5.69-5.7-5.69v.01zM7 .98c3.86 0 7 3.14 7 7s-3.14 7-7 7-7-3.12-7-7 3.14-7 7-7z"></path></svg></span>note</h5></div><div class="admonition-content"><p>Notice that the save mode is <code>Append</code>.</p></div></div></div><div role="tabpanel" hidden=""><div class="codeBlockContainer_J+bg language-python theme-code-block"><div class="codeBlockContent_csEI python"><pre tabindex="0" class="prism-code language-python codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)"># pyspark</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">from</span><span class="token plain"> pyspark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">sql</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">functions </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">import</span><span class="token plain"> lit</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">from</span><span class="token plain"> functools </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">import</span><span class="token plain"> </span><span class="token builtin" style="color:rgb(189, 147, 249)">reduce</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">read</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token builtin" style="color:rgb(189, 147, 249)">format</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">basePath</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> createOrReplaceTempView</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi_trips_snapshot&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># fetch total records count</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">sql</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;select uuid, partitionpath from hudi_trips_snapshot&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">count</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">sql</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;select uuid, partitionpath from hudi_trips_snapshot where rider is not null&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">count</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># fetch two records for soft deletes</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">soft_delete_ds </span><span class="token operator">=</span><span class="token plain"> spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">sql</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;select * from hudi_trips_snapshot&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">limit</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token number">2</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># prepare the soft deletes by ensuring the appropriate fields are nullified</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">meta_columns </span><span class="token operator">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;_hoodie_commit_time&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;_hoodie_commit_seqno&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;_hoodie_record_key&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;_hoodie_partition_path&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;_hoodie_file_name&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">excluded_columns </span><span class="token operator">=</span><span class="token plain"> meta_columns </span><span class="token operator">+</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;ts&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;uuid&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&quot;partitionpath&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">nullify_columns </span><span class="token operator">=</span><span class="token plain"> </span><span class="token builtin" style="color:rgb(189, 147, 249)">list</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token builtin" style="color:rgb(189, 147, 249)">filter</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">lambda</span><span class="token plain"> field</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> field</span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token number">0</span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">not</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">in</span><span class="token plain"> excluded_columns</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token builtin" style="color:rgb(189, 147, 249)">list</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token builtin" style="color:rgb(189, 147, 249)">map</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">lambda</span><span class="token plain"> field</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">field</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">name</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> field</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">dataType</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> softDeleteDs</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">schema</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">fields</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi_soft_delete_options </span><span class="token operator">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.table.name&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> tableName</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.recordkey.field&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;uuid&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.partitionpath.field&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;partitionpath&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.table.name&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> tableName</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.operation&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;upsert&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.precombine.field&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;ts&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.upsert.shuffle.parallelism&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token number">2</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.insert.shuffle.parallelism&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token number">2</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">soft_delete_df </span><span class="token operator">=</span><span class="token plain"> </span><span class="token builtin" style="color:rgb(189, 147, 249)">reduce</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">lambda</span><span class="token plain"> df</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain">col</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> df</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">withColumn</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">col</span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token number">0</span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> lit</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token boolean">None</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">cast</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">col</span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token number">1</span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> nullify_columns</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token builtin" style="color:rgb(189, 147, 249)">reduce</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">lambda</span><span class="token plain"> df</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain">col</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> df</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">drop</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">col</span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token number">0</span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> meta_columns</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> soft_delete_ds</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># simply upsert the table after setting these fields to null</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">soft_delete_df</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">write</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token builtin" style="color:rgb(189, 147, 249)">format</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> options</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token operator">**</span><span class="token plain">hudi_soft_delete_options</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> mode</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;append&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> save</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">basePath</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># reload data</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">read</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token builtin" style="color:rgb(189, 147, 249)">format</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">basePath</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> createOrReplaceTempView</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi_trips_snapshot&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># This should return the same total count as before</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">sql</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;select uuid, partitionpath from hudi_trips_snapshot&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">count</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># This should return (total - 2) count as two records are updated with nulls</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">sql</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;select uuid, partitionpath from hudi_trips_snapshot where rider is not null&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">count</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><div class="admonition admonition-note alert alert--secondary"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M6.3 5.69a.942.942 0 0 1-.28-.7c0-.28.09-.52.28-.7.19-.18.42-.28.7-.28.28 0 .52.09.7.28.18.19.28.42.28.7 0 .28-.09.52-.28.7a1 1 0 0 1-.7.3c-.28 0-.52-.11-.7-.3zM8 7.99c-.02-.25-.11-.48-.31-.69-.2-.19-.42-.3-.69-.31H6c-.27.02-.48.13-.69.31-.2.2-.3.44-.31.69h1v3c.02.27.11.5.31.69.2.2.42.31.69.31h1c.27 0 .48-.11.69-.31.2-.19.3-.42.31-.69H8V7.98v.01zM7 2.3c-3.14 0-5.7 2.54-5.7 5.68 0 3.14 2.56 5.7 5.7 5.7s5.7-2.55 5.7-5.7c0-3.15-2.56-5.69-5.7-5.69v.01zM7 .98c3.86 0 7 3.14 7 7s-3.14 7-7 7-7-3.12-7-7 3.14-7 7-7z"></path></svg></span>note</h5></div><div class="admonition-content"><p>Notice that the save mode is <code>Append</code>.</p></div></div></div></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="hard-deletes">Hard Deletes<a class="hash-link" href="#hard-deletes" title="Direct link to heading"></a></h3><div class="tabs-container"><ul role="tablist" aria-orientation="horizontal" class="tabs"><li role="tab" tabindex="0" aria-selected="true" class="tabs__item tabItem_vU9c tabs__item--active">Scala</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_vU9c">Python</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_vU9c">SparkSQL</li></ul><div class="margin-vert--md"><div role="tabpanel">Delete records for the HoodieKeys passed in.<br><div class="codeBlockContainer_J+bg language-scala theme-code-block"><div class="codeBlockContent_csEI scala"><pre tabindex="0" class="prism-code language-scala codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">// spark-shell</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// fetch total records count</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark.sql(&quot;select uuid, partitionpath from hudi_trips_snapshot&quot;).count()</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// fetch two records to be deleted</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val ds = spark.sql(&quot;select uuid, partitionpath from hudi_trips_snapshot&quot;).limit(2)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// issue deletes</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val deletes = dataGen.generateDeletes(ds.collectAsList())</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val hardDeleteDf = spark.read.json(spark.sparkContext.parallelize(deletes, 2))</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hardDeleteDf.write.format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> options(getQuickstartWriteConfigs).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(OPERATION_OPT_KEY,&quot;delete&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(PRECOMBINE_FIELD_OPT_KEY, &quot;ts&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(RECORDKEY_FIELD_OPT_KEY, &quot;uuid&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(PARTITIONPATH_FIELD_OPT_KEY, &quot;partitionpath&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(TABLE_NAME, tableName).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> mode(Append).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> save(basePath)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// run the same read query as above.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val roAfterDeleteViewDF = spark.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> read.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load(basePath)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">roAfterDeleteViewDF.registerTempTable(&quot;hudi_trips_snapshot&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// fetch should return (total - 2) records</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark.sql(&quot;select uuid, partitionpath from hudi_trips_snapshot&quot;).count()</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><div class="admonition admonition-note alert alert--secondary"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M6.3 5.69a.942.942 0 0 1-.28-.7c0-.28.09-.52.28-.7.19-.18.42-.28.7-.28.28 0 .52.09.7.28.18.19.28.42.28.7 0 .28-.09.52-.28.7a1 1 0 0 1-.7.3c-.28 0-.52-.11-.7-.3zM8 7.99c-.02-.25-.11-.48-.31-.69-.2-.19-.42-.3-.69-.31H6c-.27.02-.48.13-.69.31-.2.2-.3.44-.31.69h1v3c.02.27.11.5.31.69.2.2.42.31.69.31h1c.27 0 .48-.11.69-.31.2-.19.3-.42.31-.69H8V7.98v.01zM7 2.3c-3.14 0-5.7 2.54-5.7 5.68 0 3.14 2.56 5.7 5.7 5.7s5.7-2.55 5.7-5.7c0-3.15-2.56-5.69-5.7-5.69v.01zM7 .98c3.86 0 7 3.14 7 7s-3.14 7-7 7-7-3.12-7-7 3.14-7 7-7z"></path></svg></span>note</h5></div><div class="admonition-content"><p>Only <code>Append</code> mode is supported for delete operation.</p></div></div></div><div role="tabpanel" hidden=""><p><strong>Syntax</strong></p><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">DELETE</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">FROM</span><span class="token plain"> tableIdentifier </span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">WHERE</span><span class="token plain"> BOOL_EXPRESSION</span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p><strong>Example</strong></p><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">delete</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">from</span><span class="token plain"> hudi_cow_nonpcf_tbl </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">where</span><span class="token plain"> uuid </span><span class="token operator">=</span><span class="token plain"> </span><span class="token number">1</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">delete</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">from</span><span class="token plain"> hudi_mor_tbl </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">where</span><span class="token plain"> id </span><span class="token operator">%</span><span class="token plain"> </span><span class="token number">2</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"> </span><span class="token number">0</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div><div role="tabpanel" hidden="">Delete records for the HoodieKeys passed in.<br><div class="codeBlockContainer_J+bg language-python theme-code-block"><div class="codeBlockContent_csEI python"><pre tabindex="0" class="prism-code language-python codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)"># pyspark</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># fetch total records count</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">sql</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;select uuid, partitionpath from hudi_trips_snapshot&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">count</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># fetch two records to be deleted</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">ds </span><span class="token operator">=</span><span class="token plain"> spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">sql</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;select uuid, partitionpath from hudi_trips_snapshot&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">limit</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token number">2</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># issue deletes</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi_hard_delete_options </span><span class="token operator">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.table.name&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> tableName</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.recordkey.field&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;uuid&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.partitionpath.field&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;partitionpath&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.table.name&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> tableName</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.operation&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;delete&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.datasource.write.precombine.field&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;ts&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.upsert.shuffle.parallelism&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token number">2</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hoodie.insert.shuffle.parallelism&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token number">2</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">from</span><span class="token plain"> pyspark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">sql</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">functions </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">import</span><span class="token plain"> lit</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">deletes </span><span class="token operator">=</span><span class="token plain"> </span><span class="token builtin" style="color:rgb(189, 147, 249)">list</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token builtin" style="color:rgb(189, 147, 249)">map</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">lambda</span><span class="token plain"> row</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">row</span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token number">0</span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> row</span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token number">1</span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> ds</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">collect</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hard_delete_df </span><span class="token operator">=</span><span class="token plain"> spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">sparkContext</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">parallelize</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">deletes</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">toDF</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;uuid&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;partitionpath&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">withColumn</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;ts&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> lit</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token number">0.0</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hard_delete_df</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">write</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token builtin" style="color:rgb(189, 147, 249)">format</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> options</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token operator">**</span><span class="token plain">hudi_hard_delete_options</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> mode</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;append&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> save</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">basePath</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># run the same read query as above.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">roAfterDeleteViewDF </span><span class="token operator">=</span><span class="token plain"> spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> read</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token builtin" style="color:rgb(189, 147, 249)">format</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">basePath</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">roAfterDeleteViewDF</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">registerTempTable</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;hudi_trips_snapshot&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)"># fetch should return (total - 2) records</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">sql</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;select uuid, partitionpath from hudi_trips_snapshot&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">count</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><div class="admonition admonition-note alert alert--secondary"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M6.3 5.69a.942.942 0 0 1-.28-.7c0-.28.09-.52.28-.7.19-.18.42-.28.7-.28.28 0 .52.09.7.28.18.19.28.42.28.7 0 .28-.09.52-.28.7a1 1 0 0 1-.7.3c-.28 0-.52-.11-.7-.3zM8 7.99c-.02-.25-.11-.48-.31-.69-.2-.19-.42-.3-.69-.31H6c-.27.02-.48.13-.69.31-.2.2-.3.44-.31.69h1v3c.02.27.11.5.31.69.2.2.42.31.69.31h1c.27 0 .48-.11.69-.31.2-.19.3-.42.31-.69H8V7.98v.01zM7 2.3c-3.14 0-5.7 2.54-5.7 5.68 0 3.14 2.56 5.7 5.7 5.7s5.7-2.55 5.7-5.7c0-3.15-2.56-5.69-5.7-5.69v.01zM7 .98c3.86 0 7 3.14 7 7s-3.14 7-7 7-7-3.12-7-7 3.14-7 7-7z"></path></svg></span>note</h5></div><div class="admonition-content"><p>Only <code>Append</code> mode is supported for delete operation.</p></div></div></div></div></div><h2 class="anchor anchorWithStickyNavbar_y2LR" id="insert-overwrite">Insert Overwrite<a class="hash-link" href="#insert-overwrite" title="Direct link to heading"></a></h2><p>Generate some new trips, overwrite the all the partitions that are present in the input. This operation can be faster
than <code>upsert</code> for batch ETL jobs, that are recomputing entire target partitions at once (as opposed to incrementally
updating the target tables). This is because, we are able to bypass indexing, precombining and other repartitioning
steps in the upsert write path completely.</p><div class="tabs-container"><ul role="tablist" aria-orientation="horizontal" class="tabs"><li role="tab" tabindex="0" aria-selected="true" class="tabs__item tabItem_vU9c tabs__item--active">Scala</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_vU9c">SparkSQL</li></ul><div class="margin-vert--md"><div role="tabpanel"><div class="codeBlockContainer_J+bg language-scala theme-code-block"><div class="codeBlockContent_csEI scala"><pre tabindex="0" class="prism-code language-scala codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">// spark-shell</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> read.format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load(basePath).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> select(&quot;uuid&quot;,&quot;partitionpath&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> sort(&quot;partitionpath&quot;,&quot;uuid&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> show(100, false)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val inserts = convertToStringList(dataGen.generateInserts(10))</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">val df = spark.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> read.json(spark.sparkContext.parallelize(inserts, 2)).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> filter(&quot;partitionpath = &#x27;americas/united_states/san_francisco&#x27;&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">df.write.format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> options(getQuickstartWriteConfigs).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(OPERATION.key(),&quot;insert_overwrite&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(PRECOMBINE_FIELD.key(), &quot;ts&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(RECORDKEY_FIELD.key(), &quot;uuid&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(PARTITIONPATH_FIELD.key(), &quot;partitionpath&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> option(TBL_NAME.key(), tableName).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> mode(Append).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> save(basePath)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">// Should have different keys now for San Francisco alone, from query before.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> read.format(&quot;hudi&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> load(basePath).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> select(&quot;uuid&quot;,&quot;partitionpath&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> sort(&quot;partitionpath&quot;,&quot;uuid&quot;).</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> show(100, false)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div><div role="tabpanel" hidden=""><p><code>insert overwrite</code> a partitioned table use the <code>INSERT_OVERWRITE</code> type of write operation, while a non-partitioned table to <code>INSERT_OVERWRITE_TABLE</code>.</p><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)">-- insert overwrite non-partitioned table</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token plain"> overwrite hudi_mor_tbl </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token number">99</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;a99&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">20.0</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">900</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token plain"> overwrite hudi_cow_nonpcf_tbl </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token number">99</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;a99&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">20.0</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">-- insert overwrite partitioned table with dynamic partition</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token plain"> overwrite </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">table</span><span class="token plain"> hudi_cow_pt_tbl </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token number">10</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;a10&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">1100</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;2021-12-09&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;10&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">-- insert overwrite partitioned table with static partition</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">insert</span><span class="token plain"> overwrite hudi_cow_pt_tbl </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">partition</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">dt </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;2021-12-09&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> hh</span><span class="token operator">=</span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;12&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token number">13</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;a13&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token number">1100</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div></div></div><h2 class="anchor anchorWithStickyNavbar_y2LR" id="more-spark-sql-commands">More Spark SQL Commands<a class="hash-link" href="#more-spark-sql-commands" title="Direct link to heading"></a></h2><h3 class="anchor anchorWithStickyNavbar_y2LR" id="altertable">AlterTable<a class="hash-link" href="#altertable" title="Direct link to heading"></a></h3><p><strong>Syntax</strong></p><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)">-- Alter table name</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">ALTER</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">TABLE</span><span class="token plain"> oldTableName </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">RENAME</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">TO</span><span class="token plain"> newTableName</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">-- Alter table add columns</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">ALTER</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">TABLE</span><span class="token plain"> tableIdentifier </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">ADD</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">COLUMNS</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">colAndType </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain">colAndType</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token operator">*</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">-- Alter table column type</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">ALTER</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">TABLE</span><span class="token plain"> tableIdentifier CHANGE </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">COLUMN</span><span class="token plain"> colName colName colType</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">-- Alter table properties</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">ALTER</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">TABLE</span><span class="token plain"> tableIdentifier </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">SET</span><span class="token plain"> TBLPROPERTIES </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">key</span><span class="token plain"> </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;value&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p><strong>Examples</strong></p><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)">--rename to:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">ALTER</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">TABLE</span><span class="token plain"> hudi_cow_nonpcf_tbl </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">RENAME</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">TO</span><span class="token plain"> hudi_cow_nonpcf_tbl2</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">--add column:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">ALTER</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">TABLE</span><span class="token plain"> hudi_cow_nonpcf_tbl2 </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">add</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">columns</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">remark string</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">--change column:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">ALTER</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">TABLE</span><span class="token plain"> hudi_cow_nonpcf_tbl2 change </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">column</span><span class="token plain"> uuid uuid </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">bigint</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">--set properties;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">alter</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">table</span><span class="token plain"> hudi_cow_nonpcf_tbl2 </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">set</span><span class="token plain"> tblproperties </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">hoodie</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">keep</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">max</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">commits </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;10&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="partition-sql-command">Partition SQL Command<a class="hash-link" href="#partition-sql-command" title="Direct link to heading"></a></h3><p><strong>Syntax</strong></p><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)">-- Drop Partition</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">ALTER</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">TABLE</span><span class="token plain"> tableIdentifier </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">DROP</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">PARTITION</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain"> partition_col_name </span><span class="token operator">=</span><span class="token plain"> partition_col_val </span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">-- Show Partitions</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">SHOW</span><span class="token plain"> PARTITIONS tableIdentifier</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p><strong>Examples</strong></p><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token comment" style="color:rgb(98, 114, 164)">--show partition:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">show</span><span class="token plain"> partitions hudi_cow_pt_tbl</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token comment" style="color:rgb(98, 114, 164)">--drop partition:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">alter</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">table</span><span class="token plain"> hudi_cow_pt_tbl </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">drop</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">partition</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">dt</span><span class="token operator">=</span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;2021-12-09&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> hh</span><span class="token operator">=</span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;10&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token punctuation" style="color:rgb(248, 248, 242)">;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><div class="admonition admonition-note alert alert--secondary"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M6.3 5.69a.942.942 0 0 1-.28-.7c0-.28.09-.52.28-.7.19-.18.42-.28.7-.28.28 0 .52.09.7.28.18.19.28.42.28.7 0 .28-.09.52-.28.7a1 1 0 0 1-.7.3c-.28 0-.52-.11-.7-.3zM8 7.99c-.02-.25-.11-.48-.31-.69-.2-.19-.42-.3-.69-.31H6c-.27.02-.48.13-.69.31-.2.2-.3.44-.31.69h1v3c.02.27.11.5.31.69.2.2.42.31.69.31h1c.27 0 .48-.11.69-.31.2-.19.3-.42.31-.69H8V7.98v.01zM7 2.3c-3.14 0-5.7 2.54-5.7 5.68 0 3.14 2.56 5.7 5.7 5.7s5.7-2.55 5.7-5.7c0-3.15-2.56-5.69-5.7-5.69v.01zM7 .98c3.86 0 7 3.14 7 7s-3.14 7-7 7-7-3.12-7-7 3.14-7 7-7z"></path></svg></span>note</h5></div><div class="admonition-content"><p>Currently, the result of <code>show partitions</code> is based on the filesystem table path. It&#x27;s not precise when delete the whole partition data or drop certain partition directly.</p></div></div><h2 class="anchor anchorWithStickyNavbar_y2LR" id="where-to-go-from-here">Where to go from here?<a class="hash-link" href="#where-to-go-from-here" title="Direct link to heading"></a></h2><p>You can also do the quickstart by <a href="https://github.com/apache/hudi#building-apache-hudi-from-source" target="_blank" rel="noopener noreferrer">building hudi yourself</a>,
and using <code>--jars &lt;path to hudi_code&gt;/packaging/hudi-spark-bundle/target/hudi-spark-bundle_2.1?-*.*.*-SNAPSHOT.jar</code> in the spark-shell command above
instead of <code>--packages org.apache.hudi:hudi-spark3-bundle_2.12:0.10.0</code>. Hudi also supports scala 2.12. Refer <a href="https://github.com/apache/hudi#build-with-scala-212" target="_blank" rel="noopener noreferrer">build with scala 2.12</a>
for more info.</p><p>Also, we used Spark here to show case the capabilities of Hudi. However, Hudi can support multiple table types/query types and
Hudi tables can be queried from query engines like Hive, Spark, Presto and much more. We have put together a
<a href="https://www.youtube.com/watch?v=VhNgUsxdrD0" target="_blank" rel="noopener noreferrer">demo video</a> that show cases all of this on a docker based setup with all
dependent systems running locally. We recommend you replicate the same setup and run the demo yourself, by following
steps <a href="/docs/docker_demo">here</a> to get a taste for it. Also, if you are looking for ways to migrate your existing data
to Hudi, refer to <a href="/docs/migration_guide">migration guide</a>.</p></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="theme-doc-footer-edit-meta-row row"><div class="col"><a href="https://github.com/apache/hudi/tree/asf-site/website/versioned_docs/version-0.10.0/quick-start-guide.md" target="_blank" rel="noreferrer noopener" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_mS5F" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_mt2f"></div></div></footer></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages navigation"><div class="pagination-nav__item"><a class="pagination-nav__link" href="/docs/0.10.0/overview"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">Overview</div></a></div><div class="pagination-nav__item pagination-nav__item--next"><a class="pagination-nav__link" href="/docs/0.10.0/flink-quick-start-guide"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Flink Guide</div></a></div></nav></div></div><div class="col col--3"><div class="tableOfContents_vrFS thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#setup" class="table-of-contents__link toc-highlight">Setup</a></li><li><a href="#spark-sql-type-support" class="table-of-contents__link toc-highlight">Spark SQL Type Support</a></li><li><a href="#create-table" class="table-of-contents__link toc-highlight">Create Table</a></li><li><a href="#insert-data" class="table-of-contents__link toc-highlight">Insert data</a></li><li><a href="#query-data" class="table-of-contents__link toc-highlight">Query data</a><ul><li><a href="#time-travel-query" class="table-of-contents__link toc-highlight">Time Travel Query</a></li></ul></li><li><a href="#update-data" class="table-of-contents__link toc-highlight">Update data</a><ul><li><a href="#update" class="table-of-contents__link toc-highlight">Update</a></li><li><a href="#mergeinto" class="table-of-contents__link toc-highlight">MergeInto</a></li></ul></li><li><a href="#incremental-query" class="table-of-contents__link toc-highlight">Incremental query</a></li><li><a href="#structured-streaming" class="table-of-contents__link toc-highlight">Structured Streaming</a><ul><li><a href="#streaming-read" class="table-of-contents__link toc-highlight">Streaming Read</a></li><li><a href="#streaming-write" class="table-of-contents__link toc-highlight">Streaming Write</a></li><li><a href="#table-maintenance" class="table-of-contents__link toc-highlight">Table maintenance</a></li></ul></li><li><a href="#point-in-time-query" class="table-of-contents__link toc-highlight">Point in time query</a></li><li><a href="#deletes" class="table-of-contents__link toc-highlight">Delete data</a><ul><li><a href="#soft-deletes" class="table-of-contents__link toc-highlight">Soft Deletes</a></li><li><a href="#hard-deletes" class="table-of-contents__link toc-highlight">Hard Deletes</a></li></ul></li><li><a href="#insert-overwrite" class="table-of-contents__link toc-highlight">Insert Overwrite</a></li><li><a href="#more-spark-sql-commands" class="table-of-contents__link toc-highlight">More Spark SQL Commands</a><ul><li><a href="#altertable" class="table-of-contents__link toc-highlight">AlterTable</a></li><li><a href="#partition-sql-command" class="table-of-contents__link toc-highlight">Partition SQL Command</a></li></ul></li><li><a href="#where-to-go-from-here" class="table-of-contents__link toc-highlight">Where to go from here?</a></li></ul></div></div></div></div></main></div></div><footer class="footer"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">About</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/blog/2021/07/21/streaming-data-lake-platform">Our Vision</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/concepts">Concepts</a></li><li class="footer__item"><a class="footer__link-item" href="/community/team">Team</a></li><li class="footer__item"><a class="footer__link-item" href="/releases/release-0.14.1">Releases</a></li><li class="footer__item"><a class="footer__link-item" href="/releases/download">Download</a></li><li class="footer__item"><a class="footer__link-item" href="/powered-by">Who&#x27;s Using</a></li></ul></div><div class="col footer__col"><div class="footer__title">Learn</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/docs/quick-start-guide">Quick Start</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/docker_demo">Docker Demo</a></li><li class="footer__item"><a class="footer__link-item" href="/blog">Blog</a></li><li class="footer__item"><a class="footer__link-item" href="/talks">Talks</a></li><li class="footer__item"><a class="footer__link-item" href="/videos">Video Guides</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/faq">FAQ</a></li><li class="footer__item"><a href="https://cwiki.apache.org/confluence/display/HUDI" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Technical Wiki<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li></ul></div><div class="col footer__col"><div class="footer__title">Hudi On Cloud</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/docs/s3_hoodie">AWS</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/gcs_hoodie">Google Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/oss_hoodie">Alibaba Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/azure_hoodie">Microsoft Azure</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/cos_hoodie">Tencent Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/ibm_cos_hoodie">IBM Cloud</a></li></ul></div><div class="col footer__col"><div class="footer__title">Community</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/community/get-involved">Get Involved</a></li><li class="footer__item"><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Slack<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Twitter<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>YouTube<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Linkedin<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="mailto:dev-subscribe@hudi.apache.org?Subject=SubscribeToHudi" target="_blank" rel="noopener noreferrer" class="footer__link-item">Mailing List</a></li></ul></div><div class="col footer__col"><div class="footer__title">Apache</div><ul class="footer__items"><li class="footer__item"><a href="https://www.apache.org/events/current-event" target="_blank" rel="noopener noreferrer" class="footer__link-item">Events</a></li><li class="footer__item"><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Thanks</a></li><li class="footer__item"><a href="https://www.apache.org/licenses" target="_blank" rel="noopener noreferrer" class="footer__link-item">License</a></li><li class="footer__item"><a href="https://www.apache.org/security" target="_blank" rel="noopener noreferrer" class="footer__link-item">Security</a></li><li class="footer__item"><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Sponsorship</a></li><li class="footer__item"><a href="https://www.apache.org" target="_blank" rel="noopener noreferrer" class="footer__link-item">Foundation</a></li></ul></div></div><div class="footer__bottom text--center"><div class="margin-bottom--sm"><a href="https://hudi.apache.org/" target="_blank" rel="noopener noreferrer" class="footerLogoLink_SRtH"><img src="/assets/images/logo-big.png" alt="Apache Hudi™" class="themedImage_TMUO themedImage--light_4Vu1 footer__logo"><img src="/assets/images/logo-big.png" alt="Apache Hudi™" class="themedImage_TMUO themedImage--dark_uzRr footer__logo"></a></div><div class="footer__copyright">Copyright © 2021 <a href="https://apache.org">The Apache Software Foundation</a>, Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0"> Apache License, Version 2.0</a>. <br>Hudi, Apache and the Apache feather logo are trademarks of The Apache Software Foundation.</div></div></div></footer></div>
<script src="/assets/js/runtime~main.2cab5691.js"></script>
<script src="/assets/js/main.bd020950.js"></script>
</body>
</html>