blob: b81f30cdb858b2fc259165bd6b6e84c1f9495747 [file] [log] [blame]
<!doctype html>
<html class="docs-version-0.5.3" lang="en" dir="ltr">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="generator" content="Docusaurus v2.0.0-beta.14">
<link rel="alternate" type="application/rss+xml" href="/blog/rss.xml" title="Apache Hudi: User-Facing Analytics RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/blog/atom.xml" title="Apache Hudi: User-Facing Analytics Atom Feed">
<link rel="alternate" type="application/json" href="/blog/feed.json" title="Apache Hudi: User-Facing Analytics JSON Feed">
<link rel="search" type="application/opensearchdescription+xml" title="Apache Hudi" href="/opensearch.xml">
<link rel="alternate" type="application/rss+xml" href="/videos/rss.xml" title="Apache Hudi RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/videos/atom.xml" title="Apache Hudi Atom Feed">
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Comfortaa|Ubuntu|Roboto|Source+Code+Pro">
<link rel="stylesheet" href="https://at-ui.github.io/feather-font/css/iconfont.css"><title data-react-helmet="true">Deployment Guide | Apache Hudi</title><meta data-react-helmet="true" name="twitter:card" content="summary_large_image"><meta data-react-helmet="true" property="og:url" content="https://hudi.apache.org/docs/0.5.3/deployment"><meta data-react-helmet="true" name="docsearch:language" content="en"><meta data-react-helmet="true" name="docsearch:version" content="0.5.3"><meta data-react-helmet="true" name="docsearch:docusaurus_tag" content="docs-default-0.5.3"><meta data-react-helmet="true" property="og:title" content="Deployment Guide | Apache Hudi"><meta data-react-helmet="true" name="description" content="This section provides all the help you need to deploy and operate Hudi tables at scale."><meta data-react-helmet="true" property="og:description" content="This section provides all the help you need to deploy and operate Hudi tables at scale."><meta data-react-helmet="true" name="keywords" content="hudi,administration,operation,devops,deployment"><link data-react-helmet="true" rel="icon" href="/assets/images/favicon.ico"><link data-react-helmet="true" rel="canonical" href="https://hudi.apache.org/docs/0.5.3/deployment"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/docs/0.5.3/deployment" hreflang="en"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/cn/docs/0.5.3/deployment" hreflang="cn"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/docs/0.5.3/deployment" hreflang="x-default"><link data-react-helmet="true" rel="preconnect" href="https://BH4D9OD16A-dsn.algolia.net" crossorigin="anonymous"><link rel="stylesheet" href="/assets/css/styles.ea681a30.css">
<link rel="preload" href="/assets/js/runtime~main.2cab5691.js" as="script">
<link rel="preload" href="/assets/js/main.bd020950.js" as="script">
</head>
<body>
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus">
<div><a href="#" class="skipToContent_OuoZ">Skip to main content</a></div><div class="announcementBar_axC9" role="banner"><div class="announcementBarPlaceholder_xYHE"></div><div class="announcementBarContent_6uhP">⭐️ If you like Apache Hudi, give it a star on <a target="_blank" rel="noopener noreferrer" href="https://github.com/apache/hudi">GitHub</a>! ⭐</div><button type="button" class="clean-btn close announcementBarClose_A3A1" aria-label="Close"><svg viewBox="0 0 15 15" width="14" height="14"><g stroke="currentColor" stroke-width="3.1"><path d="M.75.75l13.5 13.5M14.25.75L.75 14.25"></path></g></svg></button></div><nav class="navbar navbar--fixed-top navbarWrapper_UIa0"><div class="navbar__inner"><img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=8f594acf-9b77-44fb-9475-3e82ead1910c" width="0" height="0" alt=""><img referrerpolicy="no-referrer-when-downgrade" src="https://analytics.apache.org/matomo.php?idsite=47&amp;rec=1" width="0" height="0" alt=""><div class="navbar__items"><button aria-label="Navigation bar toggle" class="navbar__toggle clean-btn" type="button" tabindex="0"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo navbarLogo_Bz6n"><img src="/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--light_4Vu1"><img src="/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--dark_uzRr"></div></a><a class="navbar__item navbar__link" href="/docs/overview"><div class="labelWrapperDropdown_Mqbj">Docs</div></a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Learn<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/talks"><div class="labelWrapperDropdown_Mqbj">Talks</div></a></li><li><a class="dropdown__link" href="/videos"><div class="labelWrapperDropdown_Mqbj">Video Guides</div></a></li><li><a class="dropdown__link" href="/docs/faq"><div class="labelWrapperDropdown_Mqbj">FAQ</div></a></li><li><a class="dropdown__link" href="/tech-specs"><div class="labelWrapperDropdown_Mqbj">Tech Specs</div></a></li><li><a class="dropdown__link" href="/tech-specs-1point0"><div class="labelWrapperDropdown_Mqbj">Tech Specs 1.0</div></a></li><li><a href="https://cwiki.apache.org/confluence/display/HUDI" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span class="externalLink_AE3f">Technical Wiki<svg width="20" height="20" viewBox="0 0 26 26" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M16.965 8.745 9.01 16.7M10.561 8.758l6.403-.013-.013 6.403" stroke="#0DB1F9" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path><rect x="4.5" y="4.5" width="17" height="17" rx="2.5" stroke="#0DB1F9"></rect></svg></span></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Contribute<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/contribute/how-to-contribute"><div class="labelWrapperDropdown_Mqbj">How to Contribute</div></a></li><li><a class="dropdown__link" href="/contribute/developer-setup"><div class="labelWrapperDropdown_Mqbj">Developer Setup</div></a></li><li><a class="dropdown__link" href="/contribute/rfc-process"><div class="labelWrapperDropdown_Mqbj">RFC Process</div></a></li><li><a class="dropdown__link" href="/contribute/report-security-issues"><div class="labelWrapperDropdown_Mqbj">Report Security Issues</div></a></li><li><a href="https://issues.apache.org/jira/projects/HUDI/summary" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span class="externalLink_AE3f">Report Issues<svg width="20" height="20" viewBox="0 0 26 26" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M16.965 8.745 9.01 16.7M10.561 8.758l6.403-.013-.013 6.403" stroke="#0DB1F9" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path><rect x="4.5" y="4.5" width="17" height="17" rx="2.5" stroke="#0DB1F9"></rect></svg></span></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Community<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/community/get-involved"><div class="labelWrapperDropdown_Mqbj">Get Involved</div></a></li><li><a class="dropdown__link" href="/community/syncs"><div class="labelWrapperDropdown_Mqbj">Community Syncs</div></a></li><li><a class="dropdown__link" href="/community/office_hours"><div class="labelWrapperDropdown_Mqbj">Office Hours</div></a></li><li><a class="dropdown__link" href="/community/team"><div class="labelWrapperDropdown_Mqbj">Team</div></a></li></ul></div><a class="navbar__item navbar__link" href="/blog"><div class="labelWrapperDropdown_Mqbj">Blog</div></a><a class="navbar__item navbar__link" href="/powered-by"><div class="labelWrapperDropdown_Mqbj">Who&#x27;s Using</div></a><a class="navbar__item navbar__link" href="/roadmap"><div class="labelWrapperDropdown_Mqbj">Roadmap</div></a><a class="navbar__item navbar__link" href="/releases/download"><div class="labelWrapperDropdown_Mqbj">Download</div></a></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__link downloadLinkDropdownHide_aDP3" href="/docs/0.5.3/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.3<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/docs/next/deployment"><div class="labelWrapperDropdown_Mqbj">Current</div></a></li><li><a class="dropdown__link" href="/docs/deployment"><div class="labelWrapperDropdown_Mqbj">0.14.1</div></a></li><li><a class="dropdown__link" href="/docs/0.14.0/deployment"><div class="labelWrapperDropdown_Mqbj">0.14.0</div></a></li><li><a class="dropdown__link" href="/docs/0.13.1/deployment"><div class="labelWrapperDropdown_Mqbj">0.13.1</div></a></li><li><a class="dropdown__link" href="/docs/0.13.0/deployment"><div class="labelWrapperDropdown_Mqbj">0.13.0</div></a></li><li><a class="dropdown__link" href="/docs/0.12.3/deployment"><div class="labelWrapperDropdown_Mqbj">0.12.3</div></a></li><li><a class="dropdown__link" href="/docs/0.12.2/deployment"><div class="labelWrapperDropdown_Mqbj">0.12.2</div></a></li><li><a class="dropdown__link" href="/docs/0.12.1/deployment"><div class="labelWrapperDropdown_Mqbj">0.12.1</div></a></li><li><a class="dropdown__link" href="/docs/0.12.0/deployment"><div class="labelWrapperDropdown_Mqbj">0.12.0</div></a></li><li><a class="dropdown__link" href="/docs/0.11.1/deployment"><div class="labelWrapperDropdown_Mqbj">0.11.1</div></a></li><li><a class="dropdown__link" href="/docs/0.11.0/deployment"><div class="labelWrapperDropdown_Mqbj">0.11.0</div></a></li><li><a class="dropdown__link" href="/docs/0.10.1/deployment"><div class="labelWrapperDropdown_Mqbj">0.10.1</div></a></li><li><a class="dropdown__link" href="/docs/0.10.0/deployment"><div class="labelWrapperDropdown_Mqbj">0.10.0</div></a></li><li><a class="dropdown__link" href="/docs/0.9.0/deployment"><div class="labelWrapperDropdown_Mqbj">0.9.0</div></a></li><li><a class="dropdown__link" href="/docs/0.8.0/deployment"><div class="labelWrapperDropdown_Mqbj">0.8.0</div></a></li><li><a class="dropdown__link" href="/docs/0.7.0/deployment"><div class="labelWrapperDropdown_Mqbj">0.7.0</div></a></li><li><a class="dropdown__link" href="/docs/0.6.0/deployment"><div class="labelWrapperDropdown_Mqbj">0.6.0</div></a></li><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/0.5.3/deployment"><div class="labelWrapperDropdown_Mqbj">0.5.3</div></a></li><li><a class="dropdown__link" href="/docs/0.5.2/deployment"><div class="labelWrapperDropdown_Mqbj">0.5.2</div></a></li><li><a class="dropdown__link" href="/docs/0.5.1/deployment"><div class="labelWrapperDropdown_Mqbj">0.5.1</div></a></li><li><a class="dropdown__link" href="/docs/0.5.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.0</div></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj"><span><svg viewBox="0 0 20 20" width="20" height="20" aria-hidden="true" class="iconLanguage_zID8"><path fill="currentColor" d="M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"></path></svg><span>English</span></span><svg width="14" height="14" viewBox="0 0 14 14" fill="none" xmlns="http://www.w3.org/2000/svg"><g clip-path="url(#a)"><path d="M14 6.457a6.842 6.842 0 0 0-7-6.02 6.843 6.843 0 0 0-7 6.02v1.085a6.843 6.843 0 0 0 7 6.02 6.843 6.843 0 0 0 7-6.02V6.457Zm-1.094 0h-2.625a9.92 9.92 0 0 0-.376-2.222 6.65 6.65 0 0 0 1.531-.875 5.25 5.25 0 0 1 1.444 3.097h.026Zm-8.032 0a8.479 8.479 0 0 1 .324-1.872 7.376 7.376 0 0 0 3.63 0c.175.61.284 1.239.325 1.872h-4.28Zm4.305 1.085a8.391 8.391 0 0 1-.324 1.873 7.464 7.464 0 0 0-3.658 0 8.479 8.479 0 0 1-.323-1.873h4.305Zm.35-4.375A10.342 10.342 0 0 0 8.75 1.75c.627.194 1.218.49 1.75.875a5.748 5.748 0 0 1-.998.577l.027-.035ZM7.254 1.54A8.75 8.75 0 0 1 8.46 3.552c-.48.11-.97.165-1.461.167-.492-.001-.982-.057-1.461-.167.308-.722.715-1.4 1.207-2.012h.508ZM4.498 3.202a5.748 5.748 0 0 1-.998-.577 6.029 6.029 0 0 1 1.75-.875c-.294.46-.546.947-.753 1.452Zm-1.873.15c.47.358.984.652 1.531.874A9.625 9.625 0 0 0 3.78 6.45H1.155a5.25 5.25 0 0 1 1.47-3.098ZM1.12 7.541h2.625c.038.753.164 1.5.376 2.223a6.649 6.649 0 0 0-1.531.875 5.25 5.25 0 0 1-1.47-3.098Zm3.377 3.255c.207.506.459.992.753 1.453a6.03 6.03 0 0 1-1.75-.875c.312-.226.646-.419.997-.578Zm2.25 1.663a8.594 8.594 0 0 1-1.208-2.013 6.501 6.501 0 0 1 2.922 0 8.54 8.54 0 0 1-1.207 2.013h-.508Zm2.755-1.663c.367.156.716.35 1.042.578a6.338 6.338 0 0 1-1.75.875c.275-.464.512-.95.708-1.453Zm1.873-.148a6.647 6.647 0 0 0-1.531-.875 9.45 9.45 0 0 0 .376-2.223h2.625a5.25 5.25 0 0 1-1.47 3.098Z" fill="#1C1E21"></path></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h14v14H0z"></path></clipPath></defs></svg></div></a><ul class="dropdown__menu"><li><a href="/docs/0.5.3/deployment" target="_self" rel="noopener noreferrer" class="dropdown__link dropdown__link--active"><div class="labelWrapperDropdown_Mqbj">English</div></a></li><li><a href="/cn/docs/0.5.3/deployment" target="_self" rel="noopener noreferrer" class="dropdown__link"><div class="labelWrapperDropdown_Mqbj">Chinese</div></a></li></ul></div><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-github-link" aria-label="GitHub repository"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-twitter-link" aria-label="Hudi Twitter Handle"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-slack-link" aria-label="Hudi Slack Channel"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-youtube-link" aria-label="Hudi YouTube Channel"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-linkedin-link" aria-label="Hudi Linkedin Page"><div class="labelWrapperDropdown_Mqbj"></div></a><div class="searchBox_fBfG"><div role="button" class="searchButton_g9-U" aria-label="Search"><span class="searchText_RI6l">Search</span><svg width="14" height="14" viewBox="0 0 14 14" fill="none" xmlns="http://www.w3.org/2000/svg"><circle cx="6.864" cy="6.864" r="5.243" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></circle><path d="m10.51 10.783 2.056 2.05" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div><div class="navbar-sidebar"><div class="navbar-sidebar__brand"><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--light_4Vu1"><img src="/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--dark_uzRr"></div></a><button type="button" class="clean-btn navbar-sidebar__close"><svg viewBox="0 0 15 15" width="21" height="21"><g stroke="var(--ifm-color-emphasis-600)" stroke-width="1.2"><path d="M.75.75l13.5 13.5M14.25.75L.75 14.25"></path></g></svg></button></div><div class="navbar-sidebar__items"><div class="navbar-sidebar__item menu"><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" href="/docs/overview"><div class="labelWrapperDropdown_Mqbj">Docs</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Learn</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Contribute</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Community</div></a></li><li class="menu__list-item"><a class="menu__link" href="/blog"><div class="labelWrapperDropdown_Mqbj">Blog</div></a></li><li class="menu__list-item"><a class="menu__link" href="/powered-by"><div class="labelWrapperDropdown_Mqbj">Who&#x27;s Using</div></a></li><li class="menu__list-item"><a class="menu__link" href="/roadmap"><div class="labelWrapperDropdown_Mqbj">Roadmap</div></a></li><li class="menu__list-item"><a class="menu__link" href="/releases/download"><div class="labelWrapperDropdown_Mqbj">Download</div></a></li><li class="menu__list-item"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Versions</div></a><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="menu__list-item"><a class="menu__link" href="/docs/next/deployment"><div class="labelWrapperDropdown_Mqbj">Current</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/deployment"><div class="labelWrapperDropdown_Mqbj">0.14.1</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.14.0/deployment"><div class="labelWrapperDropdown_Mqbj">0.14.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.13.1/deployment"><div class="labelWrapperDropdown_Mqbj">0.13.1</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.13.0/deployment"><div class="labelWrapperDropdown_Mqbj">0.13.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.12.3/deployment"><div class="labelWrapperDropdown_Mqbj">0.12.3</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.12.2/deployment"><div class="labelWrapperDropdown_Mqbj">0.12.2</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.12.1/deployment"><div class="labelWrapperDropdown_Mqbj">0.12.1</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.12.0/deployment"><div class="labelWrapperDropdown_Mqbj">0.12.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.11.1/deployment"><div class="labelWrapperDropdown_Mqbj">0.11.1</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.11.0/deployment"><div class="labelWrapperDropdown_Mqbj">0.11.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.10.1/deployment"><div class="labelWrapperDropdown_Mqbj">0.10.1</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.10.0/deployment"><div class="labelWrapperDropdown_Mqbj">0.10.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.9.0/deployment"><div class="labelWrapperDropdown_Mqbj">0.9.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.8.0/deployment"><div class="labelWrapperDropdown_Mqbj">0.8.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.7.0/deployment"><div class="labelWrapperDropdown_Mqbj">0.7.0</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.6.0/deployment"><div class="labelWrapperDropdown_Mqbj">0.6.0</div></a></li><li class="menu__list-item"><a aria-current="page" class="menu__link menu__link--active" href="/docs/0.5.3/deployment"><div class="labelWrapperDropdown_Mqbj">0.5.3</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.5.2/deployment"><div class="labelWrapperDropdown_Mqbj">0.5.2</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.5.1/deployment"><div class="labelWrapperDropdown_Mqbj">0.5.1</div></a></li><li class="menu__list-item"><a class="menu__link" href="/docs/0.5.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.0</div></a></li></ul></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj"><span><svg viewBox="0 0 20 20" width="20" height="20" aria-hidden="true" class="iconLanguage_zID8"><path fill="currentColor" d="M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"></path></svg><span>Languages</span></span></div></a></li><li class="menu__list-item"><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="menu__link header-github-link" aria-label="GitHub repository"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="menu__link header-twitter-link" aria-label="Hudi Twitter Handle"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="menu__link header-slack-link" aria-label="Hudi Slack Channel"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="menu__link header-youtube-link" aria-label="Hudi YouTube Channel"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="menu__link header-linkedin-link" aria-label="Hudi Linkedin Page"><div class="labelWrapperDropdown_Mqbj"></div></a></li></ul></div><div class="navbar-sidebar__item menu"><button type="button" class="clean-btn navbar-sidebar__back">← Back to main menu</button></div></div></div></nav><div class="main-wrapper docs-wrapper docs-doc-page"><div class="docPage_GMj9"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_i9tI" type="button"></button><aside class="docSidebarContainer_k0Pq"><div class="sidebar_a3j0"><nav class="menu thin-scrollbar menu_cyFh menuWithAnnouncementBar_+O1J"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/0.5.3/quick-start-guide">Quick-Start Guide</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/0.5.3/use_cases">Use Cases</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/0.5.3/writing_data">Writing Hudi Tables</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/0.5.3/querying_data">Querying Hudi Tables</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/0.5.3/configurations">Configurations</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/0.5.3/performance">Performance</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link menu__link--active" aria-current="page" href="/docs/0.5.3/deployment">Deployment Guide</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/docs/0.5.3/cloud">Storage Configurations</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/docs/0.5.3/docker_demo">Resources</a></div></li></ul></nav></div></aside><main class="docMainContainer_Q970"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_zHA2"><div class="theme-doc-version-banner alert alert--warning margin-bottom--md" role="alert"><div>This is documentation for <!-- -->Apache Hudi<!-- --> <b>0.5.3</b>, which is no longer actively maintained.</div><div class="margin-top--md">For up-to-date documentation, see the <b><a href="/docs/deployment">latest version</a></b> (<!-- -->0.14.1<!-- -->).</div></div><div class="docItemContainer_oiyr"><article><span class="theme-doc-version-badge badge badge--secondary">Version: <!-- -->0.5.3</span><div class="tocCollapsible_aw-L theme-doc-toc-mobile tocMobile_Tx6Y"><button type="button" class="clean-btn tocCollapsibleButton_zr6a">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>Deployment Guide</h1></header><p>This section provides all the help you need to deploy and operate Hudi tables at scale.
Specifically, we will cover the following aspects.</p><ul><li><a href="#deploying">Deployment Model</a> : How various Hudi components are deployed and managed.</li><li><a href="#upgrading">Upgrading Versions</a> : Picking up new releases of Hudi, guidelines and general best-practices.</li><li><a href="#migrating">Migrating to Hudi</a> : How to migrate your existing tables to Apache Hudi.</li><li><a href="#cli">Interacting via CLI</a> : Using the CLI to perform maintenance or deeper introspection.</li><li><a href="#monitoring">Monitoring</a> : Tracking metrics from your hudi tables using popular tools.</li><li><a href="#troubleshooting">Troubleshooting</a> : Uncovering, triaging and resolving issues in production.</li></ul><h2 class="anchor anchorWithStickyNavbar_y2LR" id="deploying">Deploying<a class="hash-link" href="#deploying" title="Direct link to heading"></a></h2><p>All in all, Hudi deploys with no long running servers or additional infrastructure cost to your data lake. In fact, Hudi pioneered this model of building a transactional distributed storage layer
using existing infrastructure and its heartening to see other systems adopting similar approaches as well. Hudi writing is done via Spark jobs (DeltaStreamer or custom Spark datasource jobs), deployed per standard Apache Spark <a href="https://spark.apache.org/docs/latest/cluster-overview" target="_blank" rel="noopener noreferrer">recommendations</a>.
Querying Hudi tables happens via libraries installed into Apache Hive, Apache Spark or Presto and hence no additional infrastructure is necessary. </p><p>A typical Hudi data ingestion can be achieved in 2 modes. In a singe run mode, Hudi ingestion reads next batch of data, ingest them to Hudi table and exits. In continuous mode, Hudi ingestion runs as a long-running service executing ingestion in a loop.</p><p>With Merge_On_Read Table, Hudi ingestion needs to also take care of compacting delta files. Again, compaction can be performed in an asynchronous-mode by letting compaction run concurrently with ingestion or in a serial fashion with one after another.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="deltastreamer">DeltaStreamer<a class="hash-link" href="#deltastreamer" title="Direct link to heading"></a></h3><p><a href="/docs/writing_data#deltastreamer">DeltaStreamer</a> is the standalone utility to incrementally pull upstream changes from varied sources such as DFS, Kafka and DB Changelogs and ingest them to hudi tables. It runs as a spark application in 2 modes.</p><ul><li><strong>Run Once Mode</strong> : In this mode, Deltastreamer performs one ingestion round which includes incrementally pulling events from upstream sources and ingesting them to hudi table. Background operations like cleaning old file versions and archiving hoodie timeline are automatically executed as part of the run. For Merge-On-Read tables, Compaction is also run inline as part of ingestion unless disabled by passing the flag &quot;--disable-compaction&quot;. By default, Compaction is run inline for every ingestion run and this can be changed by setting the property &quot;hoodie.compact.inline.max.delta.commits&quot;. You can either manually run this spark application or use any cron trigger or workflow orchestrator (most common deployment strategy) such as Apache Airflow to spawn this application. See command line options in <a href="/docs/writing_data#deltastreamer">this section</a> for running the spark application.</li></ul><p>Here is an example invocation for reading from kafka topic in a single-run mode and writing to Merge On Read table type in a yarn cluster.</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">[hoodie]$ spark-submit --packages org.apache.hudi:hudi-utilities-bundle_2.11:0.5.3,org.apache.spark:spark-avro_2.11:2.4.4 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --master yarn \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --deploy-mode cluster \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --num-executors 10 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --executor-memory 3g \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --driver-memory 6g \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.driver.extraJavaOptions=&quot;-XX:+PrintGCApplicationStoppedTime -XX:+PrintGCApplicationConcurrentTime -XX:+PrintGCTimeStamps -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/varadarb_ds_driver.hprof&quot; \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.executor.extraJavaOptions=&quot;-XX:+PrintGCApplicationStoppedTime -XX:+PrintGCApplicationConcurrentTime -XX:+PrintGCTimeStamps -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/varadarb_ds_executor.hprof&quot; \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --queue hadoop-platform-queue \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.scheduler.mode=FAIR \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.yarn.executor.memoryOverhead=1072 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.yarn.driver.memoryOverhead=2048 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.task.cpus=1 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.executor.cores=1 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.task.maxFailures=10 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.memory.fraction=0.4 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.rdd.compress=true \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.kryoserializer.buffer.max=200m \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.memory.storageFraction=0.1 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.shuffle.service.enabled=true \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.sql.hive.convertMetastoreParquet=false \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.ui.port=5555 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.driver.maxResultSize=3g \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.executor.heartbeatInterval=120s \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.network.timeout=600s \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.eventLog.overwrite=true \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.eventLog.enabled=true \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.eventLog.dir=hdfs:///user/spark/applicationHistory \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.yarn.max.executor.failures=10 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.sql.catalogImplementation=hive \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.sql.shuffle.partitions=100 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --driver-class-path $HADOOP_CONF_DIR \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --table-type MERGE_ON_READ \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --source-class org.apache.hudi.utilities.sources.JsonKafkaSource \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --source-ordering-field ts \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --target-base-path /user/hive/warehouse/stock_ticks_mor \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --target-table stock_ticks_mor \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --props /var/demo/config/kafka-source.properties \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><ul><li><strong>Continuous Mode</strong> : Here, deltastreamer runs an infinite loop with each round performing one ingestion round as described in <strong>Run Once Mode</strong>. The frequency of data ingestion can be controlled by the configuration &quot;--min-sync-interval-seconds&quot;. For Merge-On-Read tables, Compaction is run in asynchronous fashion concurrently with ingestion unless disabled by passing the flag &quot;--disable-compaction&quot;. Every ingestion run triggers a compaction request asynchronously and this frequency can be changed by setting the property &quot;hoodie.compact.inline.max.delta.commits&quot;. As both ingestion and compaction is running in the same spark context, you can use resource allocation configuration in DeltaStreamer CLI such as (&quot;--delta-sync-scheduling-weight&quot;, &quot;--compact-scheduling-weight&quot;, &quot;&quot;--delta-sync-scheduling-minshare&quot;, and &quot;--compact-scheduling-minshare&quot;) to control executor allocation between ingestion and compaction.</li></ul><p>Here is an example invocation for reading from kafka topic in a continuous mode and writing to Merge On Read table type in a yarn cluster.</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">[hoodie]$ spark-submit --packages org.apache.hudi:hudi-utilities-bundle_2.11:0.5.3,org.apache.spark:spark-avro_2.11:2.4.4 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --master yarn \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --deploy-mode cluster \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --num-executors 10 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --executor-memory 3g \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --driver-memory 6g \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.driver.extraJavaOptions=&quot;-XX:+PrintGCApplicationStoppedTime -XX:+PrintGCApplicationConcurrentTime -XX:+PrintGCTimeStamps -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/varadarb_ds_driver.hprof&quot; \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.executor.extraJavaOptions=&quot;-XX:+PrintGCApplicationStoppedTime -XX:+PrintGCApplicationConcurrentTime -XX:+PrintGCTimeStamps -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/varadarb_ds_executor.hprof&quot; \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --queue hadoop-platform-queue \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.scheduler.mode=FAIR \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.yarn.executor.memoryOverhead=1072 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.yarn.driver.memoryOverhead=2048 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.task.cpus=1 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.executor.cores=1 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.task.maxFailures=10 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.memory.fraction=0.4 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.rdd.compress=true \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.kryoserializer.buffer.max=200m \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.memory.storageFraction=0.1 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.shuffle.service.enabled=true \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.sql.hive.convertMetastoreParquet=false \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.ui.port=5555 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.driver.maxResultSize=3g \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.executor.heartbeatInterval=120s \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.network.timeout=600s \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.eventLog.overwrite=true \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.eventLog.enabled=true \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.eventLog.dir=hdfs:///user/spark/applicationHistory \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.yarn.max.executor.failures=10 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.sql.catalogImplementation=hive \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.sql.shuffle.partitions=100 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --driver-class-path $HADOOP_CONF_DIR \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --table-type MERGE_ON_READ \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --source-class org.apache.hudi.utilities.sources.JsonKafkaSource \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --source-ordering-field ts \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --target-base-path /user/hive/warehouse/stock_ticks_mor \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --target-table stock_ticks_mor \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --props /var/demo/config/kafka-source.properties \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --continuous</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="spark-datasource-writer-jobs">Spark Datasource Writer Jobs<a class="hash-link" href="#spark-datasource-writer-jobs" title="Direct link to heading"></a></h3><p>As described in <a href="/docs/writing_data#datasource-writer">Writing Data</a>, you can use spark datasource to ingest to hudi table. This mechanism allows you to ingest any spark dataframe in Hudi format. Hudi Spark DataSource also supports spark streaming to ingest a streaming source to Hudi table. For Merge On Read table types, inline compaction is turned on by default which runs after every ingestion run. The compaction frequency can be changed by setting the property &quot;hoodie.compact.inline.max.delta.commits&quot;. </p><p>Here is an example invocation using spark datasource</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">inputDF.write()</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .format(&quot;org.apache.hudi&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .options(clientOpts) // any of the Hudi client opts can be passed in as well</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), &quot;_row_key&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), &quot;partition&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY(), &quot;timestamp&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .option(HoodieWriteConfig.TABLE_NAME, tableName)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .mode(SaveMode.Append)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> .save(basePath);</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h2 class="anchor anchorWithStickyNavbar_y2LR" id="upgrading">Upgrading<a class="hash-link" href="#upgrading" title="Direct link to heading"></a></h2><p>New Hudi releases are listed on the <a href="/releases">releases page</a>, with detailed notes which list all the changes, with highlights in each release.
At the end of the day, Hudi is a storage system and with that comes a lot of responsibilities, which we take seriously. </p><p>As general guidelines, </p><ul><li>We strive to keep all changes backwards compatible (i.e new code can read old data/timeline files) and when we cannot, we will provide upgrade/downgrade tools via the CLI</li><li>We cannot always guarantee forward compatibility (i.e old code being able to read data/timeline files written by a greater version). This is generally the norm, since no new features can be built otherwise.
However any large such changes, will be turned off by default, for smooth transition to newer release. After a few releases and once enough users deem the feature stable in production, we will flip the defaults in a subsequent release.</li><li>Always upgrade the query bundles (mr-bundle, presto-bundle, spark-bundle) first and then upgrade the writers (deltastreamer, spark jobs using datasource). This often provides the best experience and it&#x27;s easy to fix
any issues by rolling forward/back the writer code (which typically you might have more control over)</li><li>With large, feature rich releases we recommend migrating slowly, by first testing in staging environments and running your own tests. Upgrading Hudi is no different than upgrading any database system.</li></ul><p>Note that release notes can override this information with specific instructions, applicable on case-by-case basis.</p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="migrating">Migrating<a class="hash-link" href="#migrating" title="Direct link to heading"></a></h2><p>Currently migrating to Hudi can be done using two approaches </p><ul><li><strong>Convert newer partitions to Hudi</strong> : This model is suitable for large event tables (e.g: click streams, ad impressions), which also typically receive writes for the last few days alone. You can convert the last
N partitions to Hudi and proceed writing as if it were a Hudi table to begin with. The Hudi query side code is able to correctly handle both hudi and non-hudi data partitions.</li><li><strong>Full conversion to Hudi</strong> : This model is suitable if you are currently bulk/full loading the table few times a day (e.g database ingestion). The full conversion of Hudi is simply a one-time step (akin to 1 run of your existing job),
which moves all of the data into the Hudi format and provides the ability to incrementally update for future writes.</li></ul><p>For more details, refer to the detailed <a href="/docs/migration_guide">migration guide</a>. In the future, we will be supporting seamless zero-copy bootstrap of existing tables with all the upsert/incremental query capabilities fully supported.</p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="cli">CLI<a class="hash-link" href="#cli" title="Direct link to heading"></a></h2><p>Once hudi has been built, the shell can be fired by via <code>cd hudi-cli &amp;&amp; ./hudi-cli.sh</code>. A hudi table resides on DFS, in a location referred to as the <code>basePath</code> and
we would need this location in order to connect to a Hudi table. Hudi library effectively manages this table internally, using <code>.hoodie</code> subfolder to track all metadata.</p><p>To initialize a hudi table, use the following command.</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">===================================================================</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* ___ ___ *</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* /\__\ ___ /\ \ ___ *</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* / / / /\__\ / \ \ /\ \ *</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* / /__/ / / / / /\ \ \ \ \ \ *</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* / \ \ ___ / / / / / \ \__\ / \__\ *</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* / /\ \ /\__\ / /__/ ___ / /__/ \ |__| / /\/__/ *</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* \/ \ \/ / / \ \ \ /\__\ \ \ \ / / / /\/ / / *</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* \ / / \ \ / / / \ \ / / / \ /__/ *</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* / / / \ \/ / / \ \/ / / \ \__\ *</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* / / / \ / / \ / / \/__/ *</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* \/__/ \/__/ \/__/ Apache Hudi CLI *</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* *</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">===================================================================</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi-&gt;create --path /user/hive/warehouse/table1 --tableName hoodie_table_1 --tableType COPY_ON_WRITE</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">.....</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>To see the description of hudi table, use the command:</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi:hoodie_table_1-&gt;desc</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">18/09/06 15:57:19 INFO timeline.HoodieActiveTimeline: Loaded instants []</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> _________________________________________________________</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | Property | Value |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> |========================================================|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | basePath | ... |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | metaPath | ... |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | fileSystem | hdfs |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | hoodie.table.name | hoodie_table_1 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | hoodie.table.type | COPY_ON_WRITE |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | hoodie.archivelog.folder| |</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>Following is a sample command to connect to a Hudi table contains uber trips.</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi:trips-&gt;connect --path /app/uber/trips</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">16/10/05 23:20:37 INFO model.HoodieTableMetadata: All commits :HoodieCommits{commitList=[20161002045850, 20161002052915, 20161002055918, 20161002065317, 20161002075932, 20161002082904, 20161002085949, 20161002092936, 20161002105903, 20161002112938, 20161002123005, 20161002133002, 20161002155940, 20161002165924, 20161002172907, 20161002175905, 20161002190016, 20161002192954, 20161002195925, 20161002205935, 20161002215928, 20161002222938, 20161002225915, 20161002232906, 20161003003028, 20161003005958, 20161003012936, 20161003022924, 20161003025859, 20161003032854, 20161003042930, 20161003052911, 20161003055907, 20161003062946, 20161003065927, 20161003075924, 20161003082926, 20161003085925, 20161003092909, 20161003100010, 20161003102913, 20161003105850, 20161003112910, 20161003115851, 20161003122929, 20161003132931, 20161003142952, 20161003145856, 20161003152953, 20161003155912, 20161003162922, 20161003165852, 20161003172923, 20161003175923, 20161003195931, 20161003210118, 20161003212919, 20161003215928, 20161003223000, 20161003225858, 20161004003042, 20161004011345, 20161004015235, 20161004022234, 20161004063001, 20161004072402, 20161004074436, 20161004080224, 20161004082928, 20161004085857, 20161004105922, 20161004122927, 20161004142929, 20161004163026, 20161004175925, 20161004194411, 20161004203202, 20161004211210, 20161004214115, 20161004220437, 20161004223020, 20161004225321, 20161004231431, 20161004233643, 20161005010227, 20161005015927, 20161005022911, 20161005032958, 20161005035939, 20161005052904, 20161005070028, 20161005074429, 20161005081318, 20161005083455, 20161005085921, 20161005092901, 20161005095936, 20161005120158, 20161005123418, 20161005125911, 20161005133107, 20161005155908, 20161005163517, 20161005165855, 20161005180127, 20161005184226, 20161005191051, 20161005193234, 20161005203112, 20161005205920, 20161005212949, 20161005223034, 20161005225920]}</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Metadata for table trips loaded</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>Once connected to the table, a lot of other commands become available. The shell has contextual autocomplete help (press TAB) and below is a list of all commands, few of which are reviewed in this section
are reviewed</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi:trips-&gt;help</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* ! - Allows execution of operating system (OS) commands</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* // - Inline comment markers (start of line only)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* ; - Inline comment markers (start of line only)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* addpartitionmeta - Add partition metadata to a table, if not present</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* clear - Clears the console</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* cls - Clears the console</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* commit rollback - Rollback a commit</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* commits compare - Compare commits with another Hoodie table</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* commit showfiles - Show file level details of a commit</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* commit showpartitions - Show partition level details of a commit</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* commits refresh - Refresh the commits</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* commits show - Show the commits</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* commits sync - Compare commits with another Hoodie table</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* connect - Connect to a hoodie table</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* date - Displays the local date and time</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* exit - Exits the shell</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* help - List all commands usage</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* quit - Exits the shell</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* records deduplicate - De-duplicate a partition path contains duplicates &amp; produce repaired files to replace with</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* script - Parses the specified resource file and executes its commands</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* stats filesizes - File Sizes. Display summary stats on sizes of files</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* stats wa - Write Amplification. Ratio of how many records were upserted to how many records were actually written</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* sync validate - Validate the sync by counting the number of records</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* system properties - Shows the shell&#x27;s properties</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* utils loadClass - Load a class</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* version - Displays shell version</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi:trips-&gt;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="inspecting-commits">Inspecting Commits<a class="hash-link" href="#inspecting-commits" title="Direct link to heading"></a></h3><p>The task of upserting or inserting a batch of incoming records is known as a <strong>commit</strong> in Hudi. A commit provides basic atomicity guarantees such that only committed data is available for querying.
Each commit has a monotonically increasing string/number called the <strong>commit number</strong>. Typically, this is the time at which we started the commit.</p><p>To view some basic information about the last 10 commits,</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi:trips-&gt;commits show --sortBy &quot;Total Bytes Written&quot; --desc true --limit 10</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ________________________________________________________________________________________________________________________________________________________________________</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | CommitTime | Total Bytes Written| Total Files Added| Total Files Updated| Total Partitions Written| Total Records Written| Total Update Records Written| Total Errors|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> |=======================================================================================================================================================================|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ....</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ....</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ....</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>At the start of each write, Hudi also writes a .inflight commit to the .hoodie folder. You can use the timestamp there to estimate how long the commit has been inflight</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">$ hdfs dfs -ls /app/uber/trips/.hoodie/*.inflight</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">-rw-r--r-- 3 vinoth supergroup 321984 2016-10-05 23:18 /app/uber/trips/.hoodie/20161005225920.inflight</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="drilling-down-to-a-specific-commit">Drilling Down to a specific Commit<a class="hash-link" href="#drilling-down-to-a-specific-commit" title="Direct link to heading"></a></h3><p>To understand how the writes spread across specific partiions,</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi:trips-&gt;commit showpartitions --commit 20161005165855 --sortBy &quot;Total Bytes Written&quot; --desc true --limit 10</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> __________________________________________________________________________________________________________________________________________</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | Partition Path| Total Files Added| Total Files Updated| Total Records Inserted| Total Records Updated| Total Bytes Written| Total Errors|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> |=========================================================================================================================================|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ....</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ....</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>If you need file level granularity , we can do the following</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi:trips-&gt;commit showfiles --commit 20161005165855 --sortBy &quot;Partition Path&quot;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ________________________________________________________________________________________________________________________________________________________</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | Partition Path| File ID | Previous Commit| Total Records Updated| Total Records Written| Total Bytes Written| Total Errors|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> |=======================================================================================================================================================|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ....</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ....</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="filesystem-view">FileSystem View<a class="hash-link" href="#filesystem-view" title="Direct link to heading"></a></h3><p>Hudi views each partition as a collection of file-groups with each file-group containing a list of file-slices in commit order (See concepts).
The below commands allow users to view the file-slices for a data-set.</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi:stock_ticks_mor-&gt;show fsview all</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ....</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> _______________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | Partition | FileId | Base-Instant | Data-File | Data-File Size| Num Delta Files| Total Delta File Size| Delta Files |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> |==============================================================================================================================================================================================================================================================================================================================================================================================================|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | 2018/08/31| 111415c3-f26d-4639-86c8-f9956f245ac3| 20181002180759| hdfs://namenode:8020/user/hive/warehouse/stock_ticks_mor/2018/08/31/111415c3-f26d-4639-86c8-f9956f245ac3_0_20181002180759.parquet| 432.5 KB | 1 | 20.8 KB | [HoodieLogFile {hdfs://namenode:8020/user/hive/warehouse/stock_ticks_mor/2018/08/31/.111415c3-f26d-4639-86c8-f9956f245ac3_20181002180759.log.1}]|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi:stock_ticks_mor-&gt;show fsview latest --partitionPath &quot;2018/08/31&quot;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ......</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> __________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | Partition | FileId | Base-Instant | Data-File | Data-File Size| Num Delta Files| Total Delta Size| Delta Size - compaction scheduled| Delta Size - compaction unscheduled| Delta To Base Ratio - compaction scheduled| Delta To Base Ratio - compaction unscheduled| Delta Files - compaction scheduled | Delta Files - compaction unscheduled|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> |=================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | 2018/08/31| 111415c3-f26d-4639-86c8-f9956f245ac3| 20181002180759| hdfs://namenode:8020/user/hive/warehouse/stock_ticks_mor/2018/08/31/111415c3-f26d-4639-86c8-f9956f245ac3_0_20181002180759.parquet| 432.5 KB | 1 | 20.8 KB | 20.8 KB | 0.0 B | 0.0 B | 0.0 B | [HoodieLogFile {hdfs://namenode:8020/user/hive/warehouse/stock_ticks_mor/2018/08/31/.111415c3-f26d-4639-86c8-f9956f245ac3_20181002180759.log.1}]| [] |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="statistics">Statistics<a class="hash-link" href="#statistics" title="Direct link to heading"></a></h3><p>Since Hudi directly manages file sizes for DFS table, it might be good to get an overall picture</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi:trips-&gt;stats filesizes --partitionPath 2016/09/01 --sortBy &quot;95th&quot; --desc true --limit 10</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ________________________________________________________________________________________________</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | CommitTime | Min | 10th | 50th | avg | 95th | Max | NumFiles| StdDev |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> |===============================================================================================|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | &lt;COMMIT_ID&gt; | 93.9 MB | 93.9 MB | 93.9 MB | 93.9 MB | 93.9 MB | 93.9 MB | 2 | 2.3 KB |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ....</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ....</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>In case of Hudi write taking much longer, it might be good to see the write amplification for any sudden increases</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi:trips-&gt;stats wa</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> __________________________________________________________________________</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | CommitTime | Total Upserted| Total Written| Write Amplifiation Factor|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> |=========================================================================|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ....</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ....</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="archived-commits">Archived Commits<a class="hash-link" href="#archived-commits" title="Direct link to heading"></a></h3><p>In order to limit the amount of growth of .commit files on DFS, Hudi archives older .commit files (with due respect to the cleaner policy) into a commits.archived file.
This is a sequence file that contains a mapping from commitNumber =&gt; json with raw information about the commit (same that is nicely rolled up above).</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="compactions">Compactions<a class="hash-link" href="#compactions" title="Direct link to heading"></a></h3><p>To get an idea of the lag between compaction and writer applications, use the below command to list down all
pending compactions.</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi:trips-&gt;compactions show all</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ___________________________________________________________________</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | Compaction Instant Time| State | Total FileIds to be Compacted|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> |==================================================================|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | &lt;INSTANT_1&gt; | REQUESTED| 35 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | &lt;INSTANT_2&gt; | INFLIGHT | 27 |</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>To inspect a specific compaction plan, use</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi:trips-&gt;compaction show --instant &lt;INSTANT_1&gt;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> _________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | Partition Path| File Id | Base Instant | Data File Path | Total Delta Files| getMetrics |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> |================================================================================================================================================================================================================================================</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | 2018/07/17 | &lt;UUID&gt; | &lt;INSTANT_1&gt; | viewfs://ns-default/.../../UUID_&lt;INSTANT&gt;.parquet | 1 | {TOTAL_LOG_FILES=1.0, TOTAL_IO_READ_MB=1230.0, TOTAL_LOG_FILES_SIZE=2.51255751E8, TOTAL_IO_WRITE_MB=991.0, TOTAL_IO_MB=2221.0}|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>To manually schedule or run a compaction, use the below command. This command uses spark launcher to perform compaction
operations. </p><p><strong>NOTE:</strong> Make sure no other application is scheduling compaction for this table concurrently
{: .notice--info}</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi:trips-&gt;help compaction schedule</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Keyword: compaction schedule</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Description: Schedule Compaction</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Keyword: sparkMemory</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Help: Spark executor memory</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Mandatory: false</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Default if specified: &#x27;__NULL__&#x27;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Default if unspecified: &#x27;1G&#x27;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* compaction schedule - Schedule Compaction</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi:trips-&gt;help compaction run</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Keyword: compaction run</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Description: Run Compaction for given instant time</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Keyword: tableName</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Help: Table name</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Mandatory: true</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Default if specified: &#x27;__NULL__&#x27;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Default if unspecified: &#x27;__NULL__&#x27;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Keyword: parallelism</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Help: Parallelism for hoodie compaction</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Mandatory: true</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Default if specified: &#x27;__NULL__&#x27;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Default if unspecified: &#x27;__NULL__&#x27;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Keyword: schemaFilePath</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Help: Path for Avro schema file</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Mandatory: true</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Default if specified: &#x27;__NULL__&#x27;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Default if unspecified: &#x27;__NULL__&#x27;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Keyword: sparkMemory</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Help: Spark executor memory</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Mandatory: true</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Default if specified: &#x27;__NULL__&#x27;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Default if unspecified: &#x27;__NULL__&#x27;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Keyword: retry</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Help: Number of retries</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Mandatory: true</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Default if specified: &#x27;__NULL__&#x27;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Default if unspecified: &#x27;__NULL__&#x27;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Keyword: compactionInstant</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Help: Base path for the target hoodie table</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Mandatory: true</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Default if specified: &#x27;__NULL__&#x27;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Default if unspecified: &#x27;__NULL__&#x27;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* compaction run - Run Compaction for given instant time</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="validate-compaction">Validate Compaction<a class="hash-link" href="#validate-compaction" title="Direct link to heading"></a></h3><p>Validating a compaction plan : Check if all the files necessary for compactions are present and are valid</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi:stock_ticks_mor-&gt;compaction validate --instant 20181005222611</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">...</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> COMPACTION PLAN VALID</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ___________________________________________________________________________________________________________________________________________________________________________________________________________________________</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | File Id | Base Instant Time| Base Data File | Num Delta Files| Valid| Error|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> |==========================================================================================================================================================================================================================|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | 05320e98-9a57-4c38-b809-a6beaaeb36bd| 20181005222445 | hdfs://namenode:8020/user/hive/warehouse/stock_ticks_mor/2018/08/31/05320e98-9a57-4c38-b809-a6beaaeb36bd_0_20181005222445.parquet| 1 | true | |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi:stock_ticks_mor-&gt;compaction validate --instant 20181005222601</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> COMPACTION PLAN INVALID</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> _______________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | File Id | Base Instant Time| Base Data File | Num Delta Files| Valid| Error |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> |=====================================================================================================================================================================================================================================================================================================|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> | 05320e98-9a57-4c38-b809-a6beaaeb36bd| 20181005222445 | hdfs://namenode:8020/user/hive/warehouse/stock_ticks_mor/2018/08/31/05320e98-9a57-4c38-b809-a6beaaeb36bd_0_20181005222445.parquet| 1 | false| All log files specified in compaction operation is not present. Missing .... |</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p><strong>NOTE:</strong> The following commands must be executed without any other writer/ingestion application running.
{: .notice--warning}</p><p>Sometimes, it becomes necessary to remove a fileId from a compaction-plan inorder to speed-up or unblock compaction
operation. Any new log-files that happened on this file after the compaction got scheduled will be safely renamed
so that are preserved. Hudi provides the following CLI to support it</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="unscheduling-compaction">Unscheduling Compaction<a class="hash-link" href="#unscheduling-compaction" title="Direct link to heading"></a></h3><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi:trips-&gt;compaction unscheduleFileId --fileId &lt;FileUUID&gt;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">....</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">No File renames needed to unschedule file from pending compaction. Operation successful.</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>In other cases, an entire compaction plan needs to be reverted. This is supported by the following CLI</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi:trips-&gt;compaction unschedule --instant &lt;compactionInstant&gt;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">.....</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">No File renames needed to unschedule pending compaction. Operation successful.</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="repair-compaction">Repair Compaction<a class="hash-link" href="#repair-compaction" title="Direct link to heading"></a></h3><p>The above compaction unscheduling operations could sometimes fail partially (e:g -&gt; DFS temporarily unavailable). With
partial failures, the compaction operation could become inconsistent with the state of file-slices. When you run
<code>compaction validate</code>, you can notice invalid compaction operations if there is one. In these cases, the repair
command comes to the rescue, it will rearrange the file-slices so that there is no loss and the file-slices are
consistent with the compaction plan</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi:stock_ticks_mor-&gt;compaction repair --instant 20181005222611</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">......</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Compaction successfully repaired</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">.....</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h2 class="anchor anchorWithStickyNavbar_y2LR" id="metrics"><a href="/docs/configurations#metrics-configs">Metrics</a><a class="hash-link" href="#metrics" title="Direct link to heading"></a></h2><p>Once the Hudi writer is configured with the right table and environment for metrics, it produces the following graphite metrics, that aid in debugging hudi tables</p><ul><li><strong>Commit Duration</strong> - This is amount of time it took to successfully commit a batch of records</li><li><strong>Rollback Duration</strong> - Similarly, amount of time taken to undo partial data left over by a failed commit (happens everytime automatically after a failing write)</li><li><strong>File Level metrics</strong> - Shows the amount of new files added, versions, deleted (cleaned) in each commit</li><li><strong>Record Level Metrics</strong> - Total records inserted/updated etc per commit</li><li><strong>Partition Level metrics</strong> - number of partitions upserted (super useful to understand sudden spikes in commit duration)</li></ul><p>These metrics can then be plotted on a standard tool like grafana. Below is a sample commit duration chart.</p><figure><img class="docimage" src="/assets/images/hudi_commit_duration.png" alt="hudi_commit_duration.png"></figure><h2 class="anchor anchorWithStickyNavbar_y2LR" id="troubleshooting">Troubleshooting<a class="hash-link" href="#troubleshooting" title="Direct link to heading"></a></h2><p>Section below generally aids in debugging Hudi failures. Off the bat, the following metadata is added to every record to help triage issues easily using standard Hadoop SQL engines (Hive/Presto/Spark)</p><ul><li><p><strong>_hoodie_record_key</strong> - Treated as a primary key within each DFS partition, basis of all updates/inserts</p></li><li><p><strong>_hoodie_commit_time</strong> - Last commit that touched this record</p></li><li><p><strong>_hoodie_file_name</strong> - Actual file name containing the record (super useful to triage duplicates)</p></li><li><p><strong>_hoodie_partition_path</strong> - Path from basePath that identifies the partition containing this record</p><p>For performance related issues, please refer to the <a href="https://cwiki.apache.org/confluence/display/HUDI/Tuning+Guide" target="_blank" rel="noopener noreferrer">tuning guide</a></p></li></ul><h3 class="anchor anchorWithStickyNavbar_y2LR" id="missing-records">Missing records<a class="hash-link" href="#missing-records" title="Direct link to heading"></a></h3><p>Please check if there were any write errors using the admin commands above, during the window at which the record could have been written.
If you do find errors, then the record was not actually written by Hudi, but handed back to the application to decide what to do with it.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="duplicates">Duplicates<a class="hash-link" href="#duplicates" title="Direct link to heading"></a></h3><p>First of all, please confirm if you do indeed have duplicates <strong>AFTER</strong> ensuring the query is accessing the Hudi table <a href="/docs/querying_data">properly</a> .</p><ul><li>If confirmed, please use the metadata fields above, to identify the physical files &amp; partition files containing the records .</li><li>If duplicates span files across partitionpath, then this means your application is generating different partitionPaths for same recordKey, Please fix your app</li><li>if duplicates span multiple files within the same partitionpath, please engage with mailing list. This should not happen. You can use the <code>records deduplicate</code> command to fix your data.</li></ul><h3 class="anchor anchorWithStickyNavbar_y2LR" id="spark-ui">Spark failures<a class="hash-link" href="#spark-ui" title="Direct link to heading"></a></h3><p>Typical upsert() DAG looks like below. Note that Hudi client also caches intermediate RDDs to intelligently profile workload and size files and spark parallelism.
Also Spark UI shows sortByKey twice due to the probe job also being shown, nonetheless its just a single sort.</p><figure><img class="docimage" src="/assets/images/hudi_upsert_dag.png" alt="hudi_upsert_dag.png"></figure><p>At a high level, there are two steps</p><p><strong>Index Lookup to identify files to be changed</strong></p><ul><li>Job 1 : Triggers the input data read, converts to HoodieRecord object and then stops at obtaining a spread of input records to target partition paths</li><li>Job 2 : Load the set of file names which we need check against</li><li>Job 3 &amp; 4 : Actual lookup after smart sizing of spark join parallelism, by joining RDDs in 1 &amp; 2 above</li><li>Job 5 : Have a tagged RDD of recordKeys with locations</li></ul><p><strong>Performing the actual writing of data</strong></p><ul><li>Job 6 : Lazy join of incoming records against recordKey, location to provide a final set of HoodieRecord which now contain the information about which file/partitionpath they are found at (or null if insert). Then also profile the workload again to determine sizing of files</li><li>Job 7 : Actual writing of data (update + insert + insert turned to updates to maintain file size)</li></ul><p>Depending on the exception source (Hudi/Spark), the above knowledge of the DAG can be used to pinpoint the actual issue. The most often encountered failures result from YARN/DFS temporary failures.
In the future, a more sophisticated debug/management UI would be added to the project, that can help automate some of this debugging.</p></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="theme-doc-footer-edit-meta-row row"><div class="col"><a href="https://github.com/apache/hudi/tree/asf-site/website/versioned_docs/version-0.5.3/deployment.md" target="_blank" rel="noreferrer noopener" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_mS5F" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_mt2f"></div></div></footer></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages navigation"><div class="pagination-nav__item"><a class="pagination-nav__link" href="/docs/0.5.3/performance"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">Performance</div></a></div><div class="pagination-nav__item pagination-nav__item--next"><a class="pagination-nav__link" href="/docs/0.5.3/cloud"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Cloud Storage</div></a></div></nav></div></div><div class="col col--3"><div class="tableOfContents_vrFS thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#deploying" class="table-of-contents__link toc-highlight">Deploying</a><ul><li><a href="#deltastreamer" class="table-of-contents__link toc-highlight">DeltaStreamer</a></li><li><a href="#spark-datasource-writer-jobs" class="table-of-contents__link toc-highlight">Spark Datasource Writer Jobs</a></li></ul></li><li><a href="#upgrading" class="table-of-contents__link toc-highlight">Upgrading</a></li><li><a href="#migrating" class="table-of-contents__link toc-highlight">Migrating</a></li><li><a href="#cli" class="table-of-contents__link toc-highlight">CLI</a><ul><li><a href="#inspecting-commits" class="table-of-contents__link toc-highlight">Inspecting Commits</a></li><li><a href="#drilling-down-to-a-specific-commit" class="table-of-contents__link toc-highlight">Drilling Down to a specific Commit</a></li><li><a href="#filesystem-view" class="table-of-contents__link toc-highlight">FileSystem View</a></li><li><a href="#statistics" class="table-of-contents__link toc-highlight">Statistics</a></li><li><a href="#archived-commits" class="table-of-contents__link toc-highlight">Archived Commits</a></li><li><a href="#compactions" class="table-of-contents__link toc-highlight">Compactions</a></li><li><a href="#validate-compaction" class="table-of-contents__link toc-highlight">Validate Compaction</a></li><li><a href="#unscheduling-compaction" class="table-of-contents__link toc-highlight">Unscheduling Compaction</a></li><li><a href="#repair-compaction" class="table-of-contents__link toc-highlight">Repair Compaction</a></li></ul></li><li><a href="#metrics" class="table-of-contents__link toc-highlight">Metrics</a></li><li><a href="#troubleshooting" class="table-of-contents__link toc-highlight">Troubleshooting</a><ul><li><a href="#missing-records" class="table-of-contents__link toc-highlight">Missing records</a></li><li><a href="#duplicates" class="table-of-contents__link toc-highlight">Duplicates</a></li><li><a href="#spark-ui" class="table-of-contents__link toc-highlight">Spark failures</a></li></ul></li></ul></div></div></div></div></main></div></div><footer class="footer"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">About</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/blog/2021/07/21/streaming-data-lake-platform">Our Vision</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/concepts">Concepts</a></li><li class="footer__item"><a class="footer__link-item" href="/community/team">Team</a></li><li class="footer__item"><a class="footer__link-item" href="/releases/release-0.14.1">Releases</a></li><li class="footer__item"><a class="footer__link-item" href="/releases/download">Download</a></li><li class="footer__item"><a class="footer__link-item" href="/powered-by">Who&#x27;s Using</a></li></ul></div><div class="col footer__col"><div class="footer__title">Learn</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/docs/quick-start-guide">Quick Start</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/docker_demo">Docker Demo</a></li><li class="footer__item"><a class="footer__link-item" href="/blog">Blog</a></li><li class="footer__item"><a class="footer__link-item" href="/talks">Talks</a></li><li class="footer__item"><a class="footer__link-item" href="/videos">Video Guides</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/faq">FAQ</a></li><li class="footer__item"><a href="https://cwiki.apache.org/confluence/display/HUDI" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Technical Wiki<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li></ul></div><div class="col footer__col"><div class="footer__title">Hudi On Cloud</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/docs/s3_hoodie">AWS</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/gcs_hoodie">Google Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/oss_hoodie">Alibaba Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/azure_hoodie">Microsoft Azure</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/cos_hoodie">Tencent Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/ibm_cos_hoodie">IBM Cloud</a></li></ul></div><div class="col footer__col"><div class="footer__title">Community</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/community/get-involved">Get Involved</a></li><li class="footer__item"><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Slack<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Twitter<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>YouTube<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Linkedin<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="mailto:dev-subscribe@hudi.apache.org?Subject=SubscribeToHudi" target="_blank" rel="noopener noreferrer" class="footer__link-item">Mailing List</a></li></ul></div><div class="col footer__col"><div class="footer__title">Apache</div><ul class="footer__items"><li class="footer__item"><a href="https://www.apache.org/events/current-event" target="_blank" rel="noopener noreferrer" class="footer__link-item">Events</a></li><li class="footer__item"><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Thanks</a></li><li class="footer__item"><a href="https://www.apache.org/licenses" target="_blank" rel="noopener noreferrer" class="footer__link-item">License</a></li><li class="footer__item"><a href="https://www.apache.org/security" target="_blank" rel="noopener noreferrer" class="footer__link-item">Security</a></li><li class="footer__item"><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Sponsorship</a></li><li class="footer__item"><a href="https://www.apache.org" target="_blank" rel="noopener noreferrer" class="footer__link-item">Foundation</a></li></ul></div></div><div class="footer__bottom text--center"><div class="margin-bottom--sm"><a href="https://hudi.apache.org/" target="_blank" rel="noopener noreferrer" class="footerLogoLink_SRtH"><img src="/assets/images/logo-big.png" alt="Apache Hudi™" class="themedImage_TMUO themedImage--light_4Vu1 footer__logo"><img src="/assets/images/logo-big.png" alt="Apache Hudi™" class="themedImage_TMUO themedImage--dark_uzRr footer__logo"></a></div><div class="footer__copyright">Copyright © 2021 <a href="https://apache.org">The Apache Software Foundation</a>, Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0"> Apache License, Version 2.0</a>. <br>Hudi, Apache and the Apache feather logo are trademarks of The Apache Software Foundation.</div></div></div></footer></div>
<script src="/assets/js/runtime~main.2cab5691.js"></script>
<script src="/assets/js/main.bd020950.js"></script>
</body>
</html>