blob: 2f742e1b49f209b5fba1b125b48daba7f52ebd9c [file] [log] [blame]
<!doctype html>
<html class="docs-version-0.13.1" lang="cn" dir="ltr">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="generator" content="Docusaurus v2.0.0-beta.14">
<link rel="alternate" type="application/rss+xml" href="/cn/blog/rss.xml" title="Apache Hudi: User-Facing Analytics RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/cn/blog/atom.xml" title="Apache Hudi: User-Facing Analytics Atom Feed">
<link rel="alternate" type="application/json" href="/cn/blog/feed.json" title="Apache Hudi: User-Facing Analytics JSON Feed">
<link rel="search" type="application/opensearchdescription+xml" title="Apache Hudi" href="/cn/opensearch.xml">
<link rel="alternate" type="application/rss+xml" href="/cn/videos/rss.xml" title="Apache Hudi RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/cn/videos/atom.xml" title="Apache Hudi Atom Feed">
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Comfortaa|Ubuntu|Roboto|Source+Code+Pro">
<link rel="stylesheet" href="https://at-ui.github.io/feather-font/css/iconfont.css"><title data-react-helmet="true">Docker Demo | Apache Hudi</title><meta data-react-helmet="true" name="twitter:card" content="summary_large_image"><meta data-react-helmet="true" property="og:url" content="https://hudi.apache.org/cn/docs/0.13.1/docker_demo"><meta data-react-helmet="true" name="docsearch:language" content="cn"><meta data-react-helmet="true" name="docsearch:version" content="0.13.1"><meta data-react-helmet="true" name="docsearch:docusaurus_tag" content="docs-default-0.13.1"><meta data-react-helmet="true" property="og:title" content="Docker Demo | Apache Hudi"><meta data-react-helmet="true" name="description" content="A Demo using Docker containers"><meta data-react-helmet="true" property="og:description" content="A Demo using Docker containers"><meta data-react-helmet="true" name="keywords" content="hudi,docker,demo"><link data-react-helmet="true" rel="icon" href="/cn/assets/images/favicon.ico"><link data-react-helmet="true" rel="canonical" href="https://hudi.apache.org/cn/docs/0.13.1/docker_demo"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/docs/0.13.1/docker_demo" hreflang="en"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/cn/docs/0.13.1/docker_demo" hreflang="cn"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/docs/0.13.1/docker_demo" hreflang="x-default"><link data-react-helmet="true" rel="preconnect" href="https://BH4D9OD16A-dsn.algolia.net" crossorigin="anonymous"><link rel="stylesheet" href="/cn/assets/css/styles.ea681a30.css">
<link rel="preload" href="/cn/assets/js/runtime~main.0acdb754.js" as="script">
<link rel="preload" href="/cn/assets/js/main.6d6aa24f.js" as="script">
</head>
<body>
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus">
<div><a href="#" class="skipToContent_OuoZ">Skip to main content</a></div><div class="announcementBar_axC9" role="banner"><div class="announcementBarPlaceholder_xYHE"></div><div class="announcementBarContent_6uhP">⭐️ If you like Apache Hudi, give it a star on <a target="_blank" rel="noopener noreferrer" href="https://github.com/apache/hudi">GitHub</a>! ⭐</div><button type="button" class="clean-btn close announcementBarClose_A3A1" aria-label="Close"><svg viewBox="0 0 15 15" width="14" height="14"><g stroke="currentColor" stroke-width="3.1"><path d="M.75.75l13.5 13.5M14.25.75L.75 14.25"></path></g></svg></button></div><nav class="navbar navbar--fixed-top navbarWrapper_UIa0"><div class="navbar__inner"><img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=8f594acf-9b77-44fb-9475-3e82ead1910c" width="0" height="0" alt=""><img referrerpolicy="no-referrer-when-downgrade" src="https://analytics.apache.org/matomo.php?idsite=47&amp;rec=1" width="0" height="0" alt=""><div class="navbar__items"><button aria-label="Navigation bar toggle" class="navbar__toggle clean-btn" type="button" tabindex="0"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/cn/"><div class="navbar__logo navbarLogo_Bz6n"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--light_4Vu1"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--dark_uzRr"></div></a><a class="navbar__item navbar__link" href="/cn/docs/overview"><div class="labelWrapperDropdown_Mqbj">Docs</div></a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Learn<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/talks"><div class="labelWrapperDropdown_Mqbj">Talks</div></a></li><li><a class="dropdown__link" href="/cn/videos"><div class="labelWrapperDropdown_Mqbj">Video Guides</div></a></li><li><a class="dropdown__link" href="/cn/docs/faq"><div class="labelWrapperDropdown_Mqbj">FAQ</div></a></li><li><a class="dropdown__link" href="/cn/tech-specs"><div class="labelWrapperDropdown_Mqbj">Tech Specs</div></a></li><li><a class="dropdown__link" href="/cn/tech-specs-1point0"><div class="labelWrapperDropdown_Mqbj">Tech Specs 1.0</div></a></li><li><a href="https://cwiki.apache.org/confluence/display/HUDI" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span class="externalLink_AE3f">Technical Wiki<svg width="20" height="20" viewBox="0 0 26 26" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M16.965 8.745 9.01 16.7M10.561 8.758l6.403-.013-.013 6.403" stroke="#0DB1F9" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path><rect x="4.5" y="4.5" width="17" height="17" rx="2.5" stroke="#0DB1F9"></rect></svg></span></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Contribute<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/contribute/how-to-contribute"><div class="labelWrapperDropdown_Mqbj">How to Contribute</div></a></li><li><a class="dropdown__link" href="/cn/contribute/developer-setup"><div class="labelWrapperDropdown_Mqbj">Developer Setup</div></a></li><li><a class="dropdown__link" href="/cn/contribute/rfc-process"><div class="labelWrapperDropdown_Mqbj">RFC Process</div></a></li><li><a class="dropdown__link" href="/cn/contribute/report-security-issues"><div class="labelWrapperDropdown_Mqbj">Report Security Issues</div></a></li><li><a href="https://issues.apache.org/jira/projects/HUDI/summary" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span class="externalLink_AE3f">Report Issues<svg width="20" height="20" viewBox="0 0 26 26" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M16.965 8.745 9.01 16.7M10.561 8.758l6.403-.013-.013 6.403" stroke="#0DB1F9" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path><rect x="4.5" y="4.5" width="17" height="17" rx="2.5" stroke="#0DB1F9"></rect></svg></span></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Community<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/community/get-involved"><div class="labelWrapperDropdown_Mqbj">Get Involved</div></a></li><li><a class="dropdown__link" href="/cn/community/syncs"><div class="labelWrapperDropdown_Mqbj">Community Syncs</div></a></li><li><a class="dropdown__link" href="/cn/community/office_hours"><div class="labelWrapperDropdown_Mqbj">Office Hours</div></a></li><li><a class="dropdown__link" href="/cn/community/team"><div class="labelWrapperDropdown_Mqbj">Team</div></a></li></ul></div><a class="navbar__item navbar__link" href="/cn/blog"><div class="labelWrapperDropdown_Mqbj">Blog</div></a><a class="navbar__item navbar__link" href="/cn/powered-by"><div class="labelWrapperDropdown_Mqbj">Who&#x27;s Using</div></a><a class="navbar__item navbar__link" href="/cn/roadmap"><div class="labelWrapperDropdown_Mqbj">Roadmap</div></a><a class="navbar__item navbar__link" href="/cn/releases/download"><div class="labelWrapperDropdown_Mqbj">Download</div></a></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__link downloadLinkDropdownHide_aDP3" href="/cn/docs/0.13.1/overview"><div class="labelWrapperDropdown_Mqbj">0.13.1<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/docs/next/docker_demo"><div class="labelWrapperDropdown_Mqbj">Next</div></a></li><li><a class="dropdown__link" href="/cn/docs/docker_demo"><div class="labelWrapperDropdown_Mqbj">0.14.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.14.0/docker_demo"><div class="labelWrapperDropdown_Mqbj">0.14.0</div></a></li><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/cn/docs/0.13.1/docker_demo"><div class="labelWrapperDropdown_Mqbj">0.13.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.13.0/docker_demo"><div class="labelWrapperDropdown_Mqbj">0.13.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.3/docker_demo"><div class="labelWrapperDropdown_Mqbj">0.12.3</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.2/docker_demo"><div class="labelWrapperDropdown_Mqbj">0.12.2</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.1/docker_demo"><div class="labelWrapperDropdown_Mqbj">0.12.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.0/docker_demo"><div class="labelWrapperDropdown_Mqbj">0.12.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.11.1/docker_demo"><div class="labelWrapperDropdown_Mqbj">0.11.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.11.0/docker_demo"><div class="labelWrapperDropdown_Mqbj">0.11.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.10.1/docker_demo"><div class="labelWrapperDropdown_Mqbj">0.10.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.10.0/docker_demo"><div class="labelWrapperDropdown_Mqbj">0.10.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.9.0/docker_demo"><div class="labelWrapperDropdown_Mqbj">0.9.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.8.0/docker_demo"><div class="labelWrapperDropdown_Mqbj">0.8.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.7.0/docker_demo"><div class="labelWrapperDropdown_Mqbj">0.7.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.6.0/docker_demo"><div class="labelWrapperDropdown_Mqbj">0.6.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.3/docker_demo"><div class="labelWrapperDropdown_Mqbj">0.5.3</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.2/docker_demo"><div class="labelWrapperDropdown_Mqbj">0.5.2</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.1/docker_demo"><div class="labelWrapperDropdown_Mqbj">0.5.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.0/docker_demo"><div class="labelWrapperDropdown_Mqbj">0.5.0</div></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj"><span><svg viewBox="0 0 20 20" width="20" height="20" aria-hidden="true" class="iconLanguage_zID8"><path fill="currentColor" d="M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"></path></svg><span>Chinese</span></span><svg width="14" height="14" viewBox="0 0 14 14" fill="none" xmlns="http://www.w3.org/2000/svg"><g clip-path="url(#a)"><path d="M14 6.457a6.842 6.842 0 0 0-7-6.02 6.843 6.843 0 0 0-7 6.02v1.085a6.843 6.843 0 0 0 7 6.02 6.843 6.843 0 0 0 7-6.02V6.457Zm-1.094 0h-2.625a9.92 9.92 0 0 0-.376-2.222 6.65 6.65 0 0 0 1.531-.875 5.25 5.25 0 0 1 1.444 3.097h.026Zm-8.032 0a8.479 8.479 0 0 1 .324-1.872 7.376 7.376 0 0 0 3.63 0c.175.61.284 1.239.325 1.872h-4.28Zm4.305 1.085a8.391 8.391 0 0 1-.324 1.873 7.464 7.464 0 0 0-3.658 0 8.479 8.479 0 0 1-.323-1.873h4.305Zm.35-4.375A10.342 10.342 0 0 0 8.75 1.75c.627.194 1.218.49 1.75.875a5.748 5.748 0 0 1-.998.577l.027-.035ZM7.254 1.54A8.75 8.75 0 0 1 8.46 3.552c-.48.11-.97.165-1.461.167-.492-.001-.982-.057-1.461-.167.308-.722.715-1.4 1.207-2.012h.508ZM4.498 3.202a5.748 5.748 0 0 1-.998-.577 6.029 6.029 0 0 1 1.75-.875c-.294.46-.546.947-.753 1.452Zm-1.873.15c.47.358.984.652 1.531.874A9.625 9.625 0 0 0 3.78 6.45H1.155a5.25 5.25 0 0 1 1.47-3.098ZM1.12 7.541h2.625c.038.753.164 1.5.376 2.223a6.649 6.649 0 0 0-1.531.875 5.25 5.25 0 0 1-1.47-3.098Zm3.377 3.255c.207.506.459.992.753 1.453a6.03 6.03 0 0 1-1.75-.875c.312-.226.646-.419.997-.578Zm2.25 1.663a8.594 8.594 0 0 1-1.208-2.013 6.501 6.501 0 0 1 2.922 0 8.54 8.54 0 0 1-1.207 2.013h-.508Zm2.755-1.663c.367.156.716.35 1.042.578a6.338 6.338 0 0 1-1.75.875c.275-.464.512-.95.708-1.453Zm1.873-.148a6.647 6.647 0 0 0-1.531-.875 9.45 9.45 0 0 0 .376-2.223h2.625a5.25 5.25 0 0 1-1.47 3.098Z" fill="#1C1E21"></path></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h14v14H0z"></path></clipPath></defs></svg></div></a><ul class="dropdown__menu"><li><a href="/docs/0.13.1/docker_demo" target="_self" rel="noopener noreferrer" class="dropdown__link"><div class="labelWrapperDropdown_Mqbj">English</div></a></li><li><a href="/cn/docs/0.13.1/docker_demo" target="_self" rel="noopener noreferrer" class="dropdown__link dropdown__link--active"><div class="labelWrapperDropdown_Mqbj">Chinese</div></a></li></ul></div><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-github-link" aria-label="GitHub repository"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-twitter-link" aria-label="Hudi Twitter Handle"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-slack-link" aria-label="Hudi Slack Channel"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-youtube-link" aria-label="Hudi YouTube Channel"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-linkedin-link" aria-label="Hudi Linkedin Page"><div class="labelWrapperDropdown_Mqbj"></div></a><div class="searchBox_fBfG"><div role="button" class="searchButton_g9-U" aria-label="Search"><span class="searchText_RI6l">Search</span><svg width="14" height="14" viewBox="0 0 14 14" fill="none" xmlns="http://www.w3.org/2000/svg"><circle cx="6.864" cy="6.864" r="5.243" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></circle><path d="m10.51 10.783 2.056 2.05" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div><div class="navbar-sidebar"><div class="navbar-sidebar__brand"><a class="navbar__brand" href="/cn/"><div class="navbar__logo"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--light_4Vu1"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--dark_uzRr"></div></a><button type="button" class="clean-btn navbar-sidebar__close"><svg viewBox="0 0 15 15" width="21" height="21"><g stroke="var(--ifm-color-emphasis-600)" stroke-width="1.2"><path d="M.75.75l13.5 13.5M14.25.75L.75 14.25"></path></g></svg></button></div><div class="navbar-sidebar__items"><div class="navbar-sidebar__item menu"><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" href="/cn/docs/overview"><div class="labelWrapperDropdown_Mqbj">Docs</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Learn</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Contribute</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Community</div></a></li><li class="menu__list-item"><a class="menu__link" href="/cn/blog"><div class="labelWrapperDropdown_Mqbj">Blog</div></a></li><li class="menu__list-item"><a class="menu__link" href="/cn/powered-by"><div class="labelWrapperDropdown_Mqbj">Who&#x27;s Using</div></a></li><li class="menu__list-item"><a class="menu__link" href="/cn/roadmap"><div class="labelWrapperDropdown_Mqbj">Roadmap</div></a></li><li class="menu__list-item"><a class="menu__link" href="/cn/releases/download"><div class="labelWrapperDropdown_Mqbj">Download</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Versions</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj"><span><svg viewBox="0 0 20 20" width="20" height="20" aria-hidden="true" class="iconLanguage_zID8"><path fill="currentColor" d="M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"></path></svg><span>Languages</span></span></div></a></li><li class="menu__list-item"><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="menu__link header-github-link" aria-label="GitHub repository"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="menu__link header-twitter-link" aria-label="Hudi Twitter Handle"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="menu__link header-slack-link" aria-label="Hudi Slack Channel"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="menu__link header-youtube-link" aria-label="Hudi YouTube Channel"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="menu__link header-linkedin-link" aria-label="Hudi Linkedin Page"><div class="labelWrapperDropdown_Mqbj"></div></a></li></ul></div><div class="navbar-sidebar__item menu"><button type="button" class="clean-btn navbar-sidebar__back">← Back to main menu</button></div></div></div></nav><div class="main-wrapper docs-wrapper docs-doc-page"><div class="docPage_GMj9"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_i9tI" type="button"></button><aside class="docSidebarContainer_k0Pq"><div class="sidebar_a3j0"><nav class="menu thin-scrollbar menu_cyFh menuWithAnnouncementBar_+O1J"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/cn/docs/0.13.1/overview">Overview</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--active hasHref_TwRn" href="/cn/docs/0.13.1/quick-start-guide">Quick Start</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.13.1/quick-start-guide">Spark Guide</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/cn/docs/0.13.1/flink-quick-start-guide">Flink Guide</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/cn/docs/0.13.1/docker_demo">Docker Demo</a></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/cn/docs/0.13.1/timeline">Concepts</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/cn/docs/0.13.1/table_management">How To</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/cn/docs/0.13.1/migration_guide">Services</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/cn/docs/0.13.1/basic_configurations">Configurations</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist hasHref_TwRn" href="/cn/docs/0.13.1/performance">Guides</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/cn/docs/0.13.1/use_cases">Use Cases</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/cn/docs/0.13.1/faq">FAQs</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/cn/docs/0.13.1/privacy">Privacy Policy</a></li></ul></nav></div></aside><main class="docMainContainer_Q970"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_zHA2"><div class="theme-doc-version-banner alert alert--warning margin-bottom--md" role="alert"><div>This is documentation for <!-- -->Apache Hudi<!-- --> <b>0.13.1</b>, which is no longer actively maintained.</div><div class="margin-top--md">For up-to-date documentation, see the <b><a href="/cn/docs/docker_demo">latest version</a></b> (<!-- -->0.14.1<!-- -->).</div></div><div class="docItemContainer_oiyr"><article><span class="theme-doc-version-badge badge badge--secondary">Version: <!-- -->0.13.1</span><div class="tocCollapsible_aw-L theme-doc-toc-mobile tocMobile_Tx6Y"><button type="button" class="clean-btn tocCollapsibleButton_zr6a">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>Docker Demo</h1></header><h2 class="anchor anchorWithStickyNavbar_y2LR" id="a-demo-using-docker-containers">A Demo using Docker containers<a class="hash-link" href="#a-demo-using-docker-containers" title="Direct link to heading"></a></h2><p>Let&#x27;s use a real world example to see how Hudi works end to end. For this purpose, a self contained
data infrastructure is brought up in a local Docker cluster within your computer. It requires the
Hudi repo to have been cloned locally. </p><p>The steps have been tested on a Mac laptop</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="prerequisites">Prerequisites<a class="hash-link" href="#prerequisites" title="Direct link to heading"></a></h3><ul><li><p>Clone the <a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer">Hudi repository</a> to your local machine.</p></li><li><p>Docker Setup : For Mac, Please follow the steps as defined in <a href="https://docs.docker.com/desktop/install/mac-install/" target="_blank" rel="noopener noreferrer">Install Docker Desktop on Mac</a>. For running Spark-SQL queries, please ensure atleast 6 GB and 4 CPUs are allocated to Docker (See Docker -&gt; Preferences -&gt; Advanced). Otherwise, spark-SQL queries could be killed because of memory issues.</p></li><li><p>kcat : A command-line utility to publish/consume from kafka topics. Use <code>brew install kcat</code> to install kcat.</p></li><li><p>/etc/hosts : The demo references many services running in container by the hostname. Add the following settings to /etc/hosts</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">127.0.0.1 adhoc-1</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">127.0.0.1 adhoc-2</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">127.0.0.1 namenode</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">127.0.0.1 datanode1</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">127.0.0.1 hiveserver</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">127.0.0.1 hivemetastore</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">127.0.0.1 kafkabroker</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">127.0.0.1 sparkmaster</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">127.0.0.1 zookeeper</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></li><li><p>Java : Java SE Development Kit 8.</p></li><li><p>Maven : A build automation tool for Java projects.</p></li><li><p>jq : A lightweight and flexible command-line JSON processor. Use <code>brew install jq</code> to install jq.</p></li></ul><p>Also, this has not been tested on some environments like Docker on Windows.</p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="setting-up-docker-cluster">Setting up Docker Cluster<a class="hash-link" href="#setting-up-docker-cluster" title="Direct link to heading"></a></h2><h3 class="anchor anchorWithStickyNavbar_y2LR" id="build-hudi">Build Hudi<a class="hash-link" href="#build-hudi" title="Direct link to heading"></a></h3><p>The first step is to build Hudi. <strong>Note</strong> This step builds Hudi on default supported scala version - 2.11.</p><p>NOTE: Make sure you&#x27;ve cloned the <a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer">Hudi repository</a> first. </p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">cd &lt;HUDI_WORKSPACE&gt;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">mvn clean package -Pintegration-tests -DskipTests</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="bringing-up-demo-cluster">Bringing up Demo Cluster<a class="hash-link" href="#bringing-up-demo-cluster" title="Direct link to heading"></a></h3><p>The next step is to run the Docker compose script and setup configs for bringing up the cluster. These files are in the <a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer">Hudi repository</a> which you should already have locally on your machine from the previous steps. </p><p>This should pull the Docker images from Docker hub and setup the Docker cluster.</p><div class="tabs-container"><ul role="tablist" aria-orientation="horizontal" class="tabs"><li role="tab" tabindex="0" aria-selected="true" class="tabs__item tabItem_vU9c tabs__item--active">Default</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_vU9c">Mac AArch64</li></ul><div class="margin-vert--md"><div role="tabpanel"><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">cd docker</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">./setup_demo.sh</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">....</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">....</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">....</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[+] Running 10/13</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container zookeeper Removed 8.6s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container datanode1 Removed 18.3s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container trino-worker-1 Removed 50.7s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container spark-worker-1 Removed 16.7s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container adhoc-2 Removed 16.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container graphite Removed 16.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container kafkabroker Removed 14.1s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container adhoc-1 Removed 14.1s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container presto-worker-1 Removed 11.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container presto-coordinator-1 Removed 34.6s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">.......</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">......</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[+] Running 17/17</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ adhoc-1 Pulled 2.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ graphite Pulled 2.8s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ spark-worker-1 Pulled 3.0s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ kafka Pulled 2.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ datanode1 Pulled 2.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ hivemetastore Pulled 2.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ hiveserver Pulled 3.0s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ hive-metastore-postgresql Pulled 2.8s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ presto-coordinator-1 Pulled 2.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ namenode Pulled 2.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ trino-worker-1 Pulled 2.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ sparkmaster Pulled 2.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ presto-worker-1 Pulled 2.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ zookeeper Pulled 2.8s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ adhoc-2 Pulled 2.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ historyserver Pulled 2.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ trino-coordinator-1 Pulled 2.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[+] Running 17/17</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container zookeeper Started 41.0s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container kafkabroker Started 41.7s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container graphite Started 41.5s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container hive-metastore-postgresql Running 0.0s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container namenode Running 0.0s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container hivemetastore Running 0.0s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container trino-coordinator-1 Runni... 0.0s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container presto-coordinator-1 Star... 42.1s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container historyserver Started 41.0s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container datanode1 Started 49.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container hiveserver Running 0.0s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container trino-worker-1 Started 42.1s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container sparkmaster Started 41.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container spark-worker-1 Started 50.2s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container adhoc-2 Started 38.5s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container adhoc-1 Started 38.5s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container presto-worker-1 Started 38.4s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Copying spark default config and setting up configs</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Copying spark default config and setting up configs</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">$ docker ps</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div><div role="tabpanel" hidden=""><div class="admonition admonition-note alert alert--secondary"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M6.3 5.69a.942.942 0 0 1-.28-.7c0-.28.09-.52.28-.7.19-.18.42-.28.7-.28.28 0 .52.09.7.28.18.19.28.42.28.7 0 .28-.09.52-.28.7a1 1 0 0 1-.7.3c-.28 0-.52-.11-.7-.3zM8 7.99c-.02-.25-.11-.48-.31-.69-.2-.19-.42-.3-.69-.31H6c-.27.02-.48.13-.69.31-.2.2-.3.44-.31.69h1v3c.02.27.11.5.31.69.2.2.42.31.69.31h1c.27 0 .48-.11.69-.31.2-.19.3-.42.31-.69H8V7.98v.01zM7 2.3c-3.14 0-5.7 2.54-5.7 5.68 0 3.14 2.56 5.7 5.7 5.7s5.7-2.55 5.7-5.7c0-3.15-2.56-5.69-5.7-5.69v.01zM7 .98c3.86 0 7 3.14 7 7s-3.14 7-7 7-7-3.12-7-7 3.14-7 7-7z"></path></svg></span>Please note the following for Mac AArch64 users</h5></div><div class="admonition-content"><ul><li> The demo must be built and run using the master branch. We currently plan to include support starting with the 0.13.0 release. </li><li> Presto and Trino are not currently supported in the demo. </li></ul></div></div><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">cd docker</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">./setup_demo.sh --mac-aarch64</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">.......</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">......</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[+] Running 12/12</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ adhoc-1 Pulled 2.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ spark-worker-1 Pulled 3.0s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ kafka Pulled 2.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ datanode1 Pulled 2.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ hivemetastore Pulled 2.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ hiveserver Pulled 3.0s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ hive-metastore-postgresql Pulled 2.8s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ namenode Pulled 2.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ sparkmaster Pulled 2.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ zookeeper Pulled 2.8s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ adhoc-2 Pulled 2.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ historyserver Pulled 2.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[+] Running 12/12</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container zookeeper Started 41.0s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container kafkabroker Started 41.7s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container hive-metastore-postgresql Running 0.0s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container namenode Running 0.0s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container hivemetastore Running 0.0s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container historyserver Started 41.0s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container datanode1 Started 49.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container hiveserver Running 0.0s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container sparkmaster Started 41.9s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container spark-worker-1 Started 50.2s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container adhoc-2 Started 38.5s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">⠿ Container adhoc-1 Started 38.5s</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Copying spark default config and setting up configs</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Copying spark default config and setting up configs</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">$ docker ps</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div></div></div><p>At this point, the Docker cluster will be up and running. The demo cluster brings up the following services</p><ul><li>HDFS Services (NameNode, DataNode)</li><li>Spark Master and Worker</li><li>Hive Services (Metastore, HiveServer2 along with PostgresDB)</li><li>Kafka Broker and a Zookeeper Node (Kafka will be used as upstream source for the demo)</li><li>Containers for Presto setup (Presto coordinator and worker)</li><li>Containers for Trino setup (Trino coordinator and worker)</li><li>Adhoc containers to run Hudi/Hive CLI commands</li></ul><h2 class="anchor anchorWithStickyNavbar_y2LR" id="demo">Demo<a class="hash-link" href="#demo" title="Direct link to heading"></a></h2><p>Stock Tracker data will be used to showcase different Hudi query types and the effects of Compaction.</p><p>Take a look at the directory <code>docker/demo/data</code>. There are 2 batches of stock data - each at 1 minute granularity.
The first batch contains stocker tracker data for some stock symbols during the first hour of trading window
(9:30 a.m to 10:30 a.m). The second batch contains tracker data for next 30 mins (10:30 - 11 a.m). Hudi will
be used to ingest these batches to a table which will contain the latest stock tracker data at hour level granularity.
The batches are windowed intentionally so that the second batch contains updates to some of the rows in the first batch.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="step-1--publish-the-first-batch-to-kafka">Step 1 : Publish the first batch to Kafka<a class="hash-link" href="#step-1--publish-the-first-batch-to-kafka" title="Direct link to heading"></a></h3><p>Upload the first batch to Kafka topic &#x27;stock ticks&#x27; </p><p><code>cat docker/demo/data/batch_1.json | kcat -b kafkabroker -t stock_ticks -P</code></p><p>To check if the new topic shows up, use</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">kcat -b kafkabroker -L -J | jq .</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">{</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> &quot;originating_broker&quot;: {</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> &quot;id&quot;: 1001,</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> &quot;name&quot;: &quot;kafkabroker:9092/1001&quot;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> },</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> &quot;query&quot;: {</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> &quot;topic&quot;: &quot;*&quot;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> },</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> &quot;brokers&quot;: [</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> {</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> &quot;id&quot;: 1001,</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> &quot;name&quot;: &quot;kafkabroker:9092&quot;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> }</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ],</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> &quot;topics&quot;: [</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> {</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> &quot;topic&quot;: &quot;stock_ticks&quot;,</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> &quot;partitions&quot;: [</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> {</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> &quot;partition&quot;: 0,</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> &quot;leader&quot;: 1001,</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> &quot;replicas&quot;: [</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> {</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> &quot;id&quot;: 1001</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> }</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ],</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> &quot;isrs&quot;: [</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> {</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> &quot;id&quot;: 1001</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> }</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> }</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> }</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="step-2-incrementally-ingest-data-from-kafka-topic">Step 2: Incrementally ingest data from Kafka topic<a class="hash-link" href="#step-2-incrementally-ingest-data-from-kafka-topic" title="Direct link to heading"></a></h3><p>Hudi comes with a tool named DeltaStreamer. This tool can connect to variety of data sources (including Kafka) to
pull changes and apply to Hudi table using upsert/insert primitives. Here, we will use the tool to download
json data from kafka topic and ingest to both COW and MOR tables we initialized in the previous step. This tool
automatically initializes the tables in the file-system if they do not exist yet.</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">docker exec -it adhoc-2 /bin/bash</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Run the following spark-submit command to execute the delta-streamer and ingest to stock_ticks_cow table in HDFS</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark-submit \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer $HUDI_UTILITIES_BUNDLE \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --table-type COPY_ON_WRITE \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --source-class org.apache.hudi.utilities.sources.JsonKafkaSource \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --source-ordering-field ts \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --target-base-path /user/hive/warehouse/stock_ticks_cow \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --target-table stock_ticks_cow --props /var/demo/config/kafka-source.properties \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Run the following spark-submit command to execute the delta-streamer and ingest to stock_ticks_mor table in HDFS</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark-submit \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer $HUDI_UTILITIES_BUNDLE \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --table-type MERGE_ON_READ \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --source-class org.apache.hudi.utilities.sources.JsonKafkaSource \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --source-ordering-field ts \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --target-base-path /user/hive/warehouse/stock_ticks_mor \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --target-table stock_ticks_mor \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --props /var/demo/config/kafka-source.properties \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --disable-compaction</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># As part of the setup (Look at setup_demo.sh), the configs needed for DeltaStreamer is uploaded to HDFS. The configs</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># contain mostly Kafa connectivity settings, the avro-schema to be used for ingesting along with key and partitioning fields.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">exit</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>You can use HDFS web-browser to look at the tables
<code>http://namenode:50070/explorer.html#/user/hive/warehouse/stock_ticks_cow</code>.</p><p>You can explore the new partition folder created in the table along with a &quot;commit&quot; / &quot;deltacommit&quot;
file under .hoodie which signals a successful commit.</p><p>There will be a similar setup when you browse the MOR table
<code>http://namenode:50070/explorer.html#/user/hive/warehouse/stock_ticks_mor</code></p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="step-3-sync-with-hive">Step 3: Sync with Hive<a class="hash-link" href="#step-3-sync-with-hive" title="Direct link to heading"></a></h3><p>At this step, the tables are available in HDFS. We need to sync with Hive to create new Hive tables and add partitions
inorder to run Hive queries against those tables.</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">docker exec -it adhoc-2 /bin/bash</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># This command takes in HiveServer URL and COW Hudi table location in HDFS and sync the HDFS state to Hive</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">/var/hoodie/ws/hudi-sync/hudi-hive-sync/run_sync_tool.sh \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --jdbc-url jdbc:hive2://hiveserver:10000 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --user hive \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --pass hive \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --partitioned-by dt \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --base-path /user/hive/warehouse/stock_ticks_cow \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --database default \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --table stock_ticks_cow \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --partition-value-extractor org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">.....</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">2020-01-25 19:51:28,953 INFO [main] hive.HiveSyncTool (HiveSyncTool.java:syncHoodieTable(129)) - Sync complete for stock_ticks_cow</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">.....</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Now run hive-sync for the second data-set in HDFS using Merge-On-Read (MOR table type)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">/var/hoodie/ws/hudi-sync/hudi-hive-sync/run_sync_tool.sh \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --jdbc-url jdbc:hive2://hiveserver:10000 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --user hive \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --pass hive \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --partitioned-by dt \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --base-path /user/hive/warehouse/stock_ticks_mor \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --database default \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --table stock_ticks_mor \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --partition-value-extractor org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">...</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">2020-01-25 19:51:51,066 INFO [main] hive.HiveSyncTool (HiveSyncTool.java:syncHoodieTable(129)) - Sync complete for stock_ticks_mor_ro</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">...</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">2020-01-25 19:51:51,569 INFO [main] hive.HiveSyncTool (HiveSyncTool.java:syncHoodieTable(129)) - Sync complete for stock_ticks_mor_rt</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">....</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">exit</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>After executing the above command, you will notice</p><ol><li>A hive table named <code>stock_ticks_cow</code> created which supports Snapshot and Incremental queries on Copy On Write table.</li><li>Two new tables <code>stock_ticks_mor_rt</code> and <code>stock_ticks_mor_ro</code> created for the Merge On Read table. The former
supports Snapshot and Incremental queries (providing near-real time data) while the later supports ReadOptimized queries.</li></ol><h3 class="anchor anchorWithStickyNavbar_y2LR" id="step-4-a-run-hive-queries">Step 4 (a): Run Hive Queries<a class="hash-link" href="#step-4-a-run-hive-queries" title="Direct link to heading"></a></h3><p>Run a hive query to find the latest timestamp ingested for stock symbol &#x27;GOOG&#x27;. You will notice that both snapshot
(for both COW and MOR _rt table) and read-optimized queries (for MOR _ro table) give the same value &quot;10:29 a.m&quot; as Hudi create a
parquet file for the first batch of data.</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">docker exec -it adhoc-2 /bin/bash</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">beeline -u jdbc:hive2://hiveserver:10000 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --hiveconf hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --hiveconf hive.stats.autogather=false</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># List Tables</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; show tables;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| tab_name |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| stock_ticks_cow |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| stock_ticks_mor_ro |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| stock_ticks_mor_rt |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">3 rows selected (1.199 seconds)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Look at partitions that were added</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; show partitions stock_ticks_mor_rt;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| partition |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| dt=2018-08-31 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">1 row selected (0.24 seconds)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># COPY-ON-WRITE Queries:</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">=========================</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; select symbol, max(ts) from stock_ticks_cow group by symbol HAVING symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| symbol | _c1 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| GOOG | 2018-08-31 10:29:00 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Now, run a projection query:</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow where symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| _hoodie_commit_time | symbol | ts | volume | open | close |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924221953 | GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924221953 | GOOG | 2018-08-31 10:29:00 | 3391 | 1230.1899 | 1230.085 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Merge-On-Read Queries:</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">==========================</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Lets run similar queries against M-O-R table. Lets look at both </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">ReadOptimized and Snapshot(realtime data) queries supported by M-O-R table</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Run ReadOptimized Query. Notice that the latest timestamp is 10:29</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; select symbol, max(ts) from stock_ticks_mor_ro group by symbol HAVING symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">WARNING: Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| symbol | _c1 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| GOOG | 2018-08-31 10:29:00 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">1 row selected (6.326 seconds)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Run Snapshot Query. Notice that the latest timestamp is again 10:29</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">WARNING: Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| symbol | _c1 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| GOOG | 2018-08-31 10:29:00 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">1 row selected (1.606 seconds)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Run Read Optimized and Snapshot project queries</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_ro where symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| _hoodie_commit_time | symbol | ts | volume | open | close |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924222155 | GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924222155 | GOOG | 2018-08-31 10:29:00 | 3391 | 1230.1899 | 1230.085 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| _hoodie_commit_time | symbol | ts | volume | open | close |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924222155 | GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924222155 | GOOG | 2018-08-31 10:29:00 | 3391 | 1230.1899 | 1230.085 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">exit</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="step-4-b-run-spark-sql-queries">Step 4 (b): Run Spark-SQL Queries<a class="hash-link" href="#step-4-b-run-spark-sql-queries" title="Direct link to heading"></a></h3><p>Hudi support Spark as query processor just like Hive. Here are the same hive queries
running in spark-sql</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">docker exec -it adhoc-1 /bin/bash</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">$SPARK_INSTALL/bin/spark-shell \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --jars $HUDI_SPARK_BUNDLE \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --master local[2] \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --driver-class-path $HADOOP_CONF_DIR \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.sql.hive.convertMetastoreParquet=false \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --deploy-mode client \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --driver-memory 1G \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --executor-memory 3G \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --num-executors 1</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">...</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Welcome to</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ____ __</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> / __/__ ___ _____/ /__</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> _\ \/ _ \/ _ `/ __/ &#x27;_/</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> /___/ .__/\_,_/_/ /_/\_\ version 2.4.4</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> /_/</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Using Scala version 2.11.12 (OpenJDK 64-Bit Server VM, Java 1.8.0_212)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Type in expressions to have them evaluated.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Type :help for more information.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">scala&gt; spark.sql(&quot;show tables&quot;).show(100, false)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+--------+------------------+-----------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">|database|tableName |isTemporary|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+--------+------------------+-----------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">|default |stock_ticks_cow |false |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">|default |stock_ticks_mor_ro|false |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">|default |stock_ticks_mor_rt|false |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+--------+------------------+-----------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Copy-On-Write Table</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">## Run max timestamp query against COW table</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">scala&gt; spark.sql(&quot;select symbol, max(ts) from stock_ticks_cow group by symbol HAVING symbol = &#x27;GOOG&#x27;&quot;).show(100, false)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[Stage 0:&gt; (0 + 1) / 1]SLF4J: Failed to load class &quot;org.slf4j.impl.StaticLoggerBinder&quot;.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">SLF4J: Defaulting to no-operation (NOP) logger implementation</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">SLF4J: See http://www.slf4j.org/codes#StaticLoggerBinder for further details.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+------+-------------------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">|symbol|max(ts) |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+------+-------------------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">|GOOG |2018-08-31 10:29:00|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+------+-------------------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">## Projection Query</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">scala&gt; spark.sql(&quot;select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow where symbol = &#x27;GOOG&#x27;&quot;).show(100, false)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+-------------------+------+-------------------+------+---------+--------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">|_hoodie_commit_time|symbol|ts |volume|open |close |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+-------------------+------+-------------------+------+---------+--------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">|20180924221953 |GOOG |2018-08-31 09:59:00|6330 |1230.5 |1230.02 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">|20180924221953 |GOOG |2018-08-31 10:29:00|3391 |1230.1899|1230.085|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+-------------------+------+-------------------+------+---------+--------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Merge-On-Read Queries:</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">==========================</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Lets run similar queries against M-O-R table. Lets look at both</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">ReadOptimized and Snapshot queries supported by M-O-R table</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Run ReadOptimized Query. Notice that the latest timestamp is 10:29</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">scala&gt; spark.sql(&quot;select symbol, max(ts) from stock_ticks_mor_ro group by symbol HAVING symbol = &#x27;GOOG&#x27;&quot;).show(100, false)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+------+-------------------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">|symbol|max(ts) |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+------+-------------------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">|GOOG |2018-08-31 10:29:00|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+------+-------------------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Run Snapshot Query. Notice that the latest timestamp is again 10:29</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">scala&gt; spark.sql(&quot;select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = &#x27;GOOG&#x27;&quot;).show(100, false)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+------+-------------------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">|symbol|max(ts) |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+------+-------------------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">|GOOG |2018-08-31 10:29:00|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+------+-------------------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Run Read Optimized and Snapshot project queries</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">scala&gt; spark.sql(&quot;select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_ro where symbol = &#x27;GOOG&#x27;&quot;).show(100, false)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+-------------------+------+-------------------+------+---------+--------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">|_hoodie_commit_time|symbol|ts |volume|open |close |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+-------------------+------+-------------------+------+---------+--------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">|20180924222155 |GOOG |2018-08-31 09:59:00|6330 |1230.5 |1230.02 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">|20180924222155 |GOOG |2018-08-31 10:29:00|3391 |1230.1899|1230.085|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+-------------------+------+-------------------+------+---------+--------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">scala&gt; spark.sql(&quot;select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = &#x27;GOOG&#x27;&quot;).show(100, false)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+-------------------+------+-------------------+------+---------+--------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">|_hoodie_commit_time|symbol|ts |volume|open |close |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+-------------------+------+-------------------+------+---------+--------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">|20180924222155 |GOOG |2018-08-31 09:59:00|6330 |1230.5 |1230.02 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">|20180924222155 |GOOG |2018-08-31 10:29:00|3391 |1230.1899|1230.085|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+-------------------+------+-------------------+------+---------+--------+</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="step-4-c-run-presto-queries">Step 4 (c): Run Presto Queries<a class="hash-link" href="#step-4-c-run-presto-queries" title="Direct link to heading"></a></h3><p>Here are the Presto queries for similar Hive and Spark queries. </p><div class="admonition admonition-note alert alert--secondary"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M6.3 5.69a.942.942 0 0 1-.28-.7c0-.28.09-.52.28-.7.19-.18.42-.28.7-.28.28 0 .52.09.7.28.18.19.28.42.28.7 0 .28-.09.52-.28.7a1 1 0 0 1-.7.3c-.28 0-.52-.11-.7-.3zM8 7.99c-.02-.25-.11-.48-.31-.69-.2-.19-.42-.3-.69-.31H6c-.27.02-.48.13-.69.31-.2.2-.3.44-.31.69h1v3c.02.27.11.5.31.69.2.2.42.31.69.31h1c.27 0 .48-.11.69-.31.2-.19.3-.42.31-.69H8V7.98v.01zM7 2.3c-3.14 0-5.7 2.54-5.7 5.68 0 3.14 2.56 5.7 5.7 5.7s5.7-2.55 5.7-5.7c0-3.15-2.56-5.69-5.7-5.69v.01zM7 .98c3.86 0 7 3.14 7 7s-3.14 7-7 7-7-3.12-7-7 3.14-7 7-7z"></path></svg></span>note</h5></div><div class="admonition-content"><ul><li> Currently, Presto does not support snapshot or incremental queries on Hudi tables. </li><li> This section of the demo is not supported for Mac AArch64 users at this time. </li></ul></div></div><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">docker exec -it presto-worker-1 presto --server presto-coordinator-1:8090</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">presto&gt; show catalogs;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Catalog</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">-----------</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> hive</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> jmx</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> localfile</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> system</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">(4 rows)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Query 20190817_134851_00000_j8rcz, FINISHED, 1 node</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Splits: 19 total, 19 done (100.00%)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0:04 [0 rows, 0B] [0 rows/s, 0B/s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">presto&gt; use hive.default;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">USE</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">presto:default&gt; show tables;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Table</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--------------------</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> stock_ticks_cow</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> stock_ticks_mor_ro</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> stock_ticks_mor_rt</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">(3 rows)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Query 20190822_181000_00001_segyw, FINISHED, 2 nodes</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Splits: 19 total, 19 done (100.00%)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0:05 [3 rows, 99B] [0 rows/s, 18B/s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># COPY-ON-WRITE Queries:</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">=========================</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">presto:default&gt; select symbol, max(ts) from stock_ticks_cow group by symbol HAVING symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> symbol | _col1</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--------+---------------------</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> GOOG | 2018-08-31 10:29:00</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">(1 row)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Query 20190822_181011_00002_segyw, FINISHED, 1 node</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Splits: 49 total, 49 done (100.00%)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0:12 [197 rows, 613B] [16 rows/s, 50B/s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">presto:default&gt; select &quot;_hoodie_commit_time&quot;, symbol, ts, volume, open, close from stock_ticks_cow where symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> _hoodie_commit_time | symbol | ts | volume | open | close</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">---------------------+--------+---------------------+--------+-----------+----------</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> 20190822180221 | GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> 20190822180221 | GOOG | 2018-08-31 10:29:00 | 3391 | 1230.1899 | 1230.085</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">(2 rows)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Query 20190822_181141_00003_segyw, FINISHED, 1 node</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Splits: 17 total, 17 done (100.00%)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0:02 [197 rows, 613B] [109 rows/s, 341B/s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Merge-On-Read Queries:</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">==========================</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Lets run similar queries against M-O-R table. </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Run ReadOptimized Query. Notice that the latest timestamp is 10:29</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> presto:default&gt; select symbol, max(ts) from stock_ticks_mor_ro group by symbol HAVING symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> symbol | _col1</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--------+---------------------</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> GOOG | 2018-08-31 10:29:00</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">(1 row)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Query 20190822_181158_00004_segyw, FINISHED, 1 node</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Splits: 49 total, 49 done (100.00%)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0:02 [197 rows, 613B] [110 rows/s, 343B/s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">presto:default&gt; select &quot;_hoodie_commit_time&quot;, symbol, ts, volume, open, close from stock_ticks_mor_ro where symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> _hoodie_commit_time | symbol | ts | volume | open | close</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">---------------------+--------+---------------------+--------+-----------+----------</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> 20190822180250 | GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> 20190822180250 | GOOG | 2018-08-31 10:29:00 | 3391 | 1230.1899 | 1230.085</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">(2 rows)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Query 20190822_181256_00006_segyw, FINISHED, 1 node</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Splits: 17 total, 17 done (100.00%)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0:02 [197 rows, 613B] [92 rows/s, 286B/s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">presto:default&gt; exit</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="step-4-d-run-trino-queries">Step 4 (d): Run Trino Queries<a class="hash-link" href="#step-4-d-run-trino-queries" title="Direct link to heading"></a></h3><p>Here are the similar queries with Trino.</p><div class="admonition admonition-note alert alert--secondary"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M6.3 5.69a.942.942 0 0 1-.28-.7c0-.28.09-.52.28-.7.19-.18.42-.28.7-.28.28 0 .52.09.7.28.18.19.28.42.28.7 0 .28-.09.52-.28.7a1 1 0 0 1-.7.3c-.28 0-.52-.11-.7-.3zM8 7.99c-.02-.25-.11-.48-.31-.69-.2-.19-.42-.3-.69-.31H6c-.27.02-.48.13-.69.31-.2.2-.3.44-.31.69h1v3c.02.27.11.5.31.69.2.2.42.31.69.31h1c.27 0 .48-.11.69-.31.2-.19.3-.42.31-.69H8V7.98v.01zM7 2.3c-3.14 0-5.7 2.54-5.7 5.68 0 3.14 2.56 5.7 5.7 5.7s5.7-2.55 5.7-5.7c0-3.15-2.56-5.69-5.7-5.69v.01zM7 .98c3.86 0 7 3.14 7 7s-3.14 7-7 7-7-3.12-7-7 3.14-7 7-7z"></path></svg></span>note</h5></div><div class="admonition-content"><ul><li> Currently, Trino does not support snapshot or incremental queries on Hudi tables. </li><li> This section of the demo is not supported for Mac AArch64 users at this time. </li></ul></div></div><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">docker exec -it adhoc-2 trino --server trino-coordinator-1:8091</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">trino&gt; show catalogs;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Catalog </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">---------</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> hive </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> system </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">(2 rows)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Query 20220112_055038_00000_sac73, FINISHED, 1 node</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Splits: 19 total, 19 done (100.00%)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">3.74 [0 rows, 0B] [0 rows/s, 0B/s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">trino&gt; use hive.default;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">USE</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">trino:default&gt; show tables;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> Table </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--------------------</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> stock_ticks_cow </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> stock_ticks_mor_ro </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> stock_ticks_mor_rt </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">(3 rows)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Query 20220112_055050_00003_sac73, FINISHED, 2 nodes</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Splits: 19 total, 19 done (100.00%)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">1.84 [3 rows, 102B] [1 rows/s, 55B/s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># COPY-ON-WRITE Queries:</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">=========================</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">trino:default&gt; select symbol, max(ts) from stock_ticks_cow group by symbol HAVING symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> symbol | _col1 </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--------+---------------------</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> GOOG | 2018-08-31 10:29:00 </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">(1 row)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Query 20220112_055101_00005_sac73, FINISHED, 1 node</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Splits: 49 total, 49 done (100.00%)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">4.08 [197 rows, 442KB] [48 rows/s, 108KB/s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">trino:default&gt; select &quot;_hoodie_commit_time&quot;, symbol, ts, volume, open, close from stock_ticks_cow where symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> _hoodie_commit_time | symbol | ts | volume | open | close </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">---------------------+--------+---------------------+--------+-----------+----------</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> 20220112054822108 | GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> 20220112054822108 | GOOG | 2018-08-31 10:29:00 | 3391 | 1230.1899 | 1230.085 </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">(2 rows)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Query 20220112_055113_00006_sac73, FINISHED, 1 node</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Splits: 17 total, 17 done (100.00%)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0.40 [197 rows, 450KB] [487 rows/s, 1.09MB/s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Merge-On-Read Queries:</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">==========================</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Lets run similar queries against MOR table.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Run ReadOptimized Query. Notice that the latest timestamp is 10:29</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">trino:default&gt; select symbol, max(ts) from stock_ticks_mor_ro group by symbol HAVING symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> symbol | _col1 </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--------+---------------------</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> GOOG | 2018-08-31 10:29:00 </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">(1 row)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Query 20220112_055125_00007_sac73, FINISHED, 1 node</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Splits: 49 total, 49 done (100.00%)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0.50 [197 rows, 442KB] [395 rows/s, 888KB/s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">trino:default&gt; select &quot;_hoodie_commit_time&quot;, symbol, ts, volume, open, close from stock_ticks_mor_ro where symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> _hoodie_commit_time | symbol | ts | volume | open | close </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">---------------------+--------+---------------------+--------+-----------+----------</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> 20220112054844841 | GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> 20220112054844841 | GOOG | 2018-08-31 10:29:00 | 3391 | 1230.1899 | 1230.085 </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">(2 rows)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Query 20220112_055136_00008_sac73, FINISHED, 1 node</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Splits: 17 total, 17 done (100.00%)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0.49 [197 rows, 450KB] [404 rows/s, 924KB/s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">trino:default&gt; exit</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="step-5-upload-second-batch-to-kafka-and-run-deltastreamer-to-ingest">Step 5: Upload second batch to Kafka and run DeltaStreamer to ingest<a class="hash-link" href="#step-5-upload-second-batch-to-kafka-and-run-deltastreamer-to-ingest" title="Direct link to heading"></a></h3><p>Upload the second batch of data and ingest this batch using delta-streamer. As this batch does not bring in any new
partitions, there is no need to run hive-sync</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">cat docker/demo/data/batch_2.json | kcat -b kafkabroker -t stock_ticks -P</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Within Docker container, run the ingestion command</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">docker exec -it adhoc-2 /bin/bash</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Run the following spark-submit command to execute the delta-streamer and ingest to stock_ticks_cow table in HDFS</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark-submit \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer $HUDI_UTILITIES_BUNDLE \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --table-type COPY_ON_WRITE \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --source-class org.apache.hudi.utilities.sources.JsonKafkaSource \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --source-ordering-field ts \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --target-base-path /user/hive/warehouse/stock_ticks_cow \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --target-table stock_ticks_cow \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --props /var/demo/config/kafka-source.properties \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Run the following spark-submit command to execute the delta-streamer and ingest to stock_ticks_mor table in HDFS</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">spark-submit \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer $HUDI_UTILITIES_BUNDLE \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --table-type MERGE_ON_READ \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --source-class org.apache.hudi.utilities.sources.JsonKafkaSource \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --source-ordering-field ts \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --target-base-path /user/hive/warehouse/stock_ticks_mor \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --target-table stock_ticks_mor \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --props /var/demo/config/kafka-source.properties \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --disable-compaction</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">exit</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>With Copy-On-Write table, the second ingestion by DeltaStreamer resulted in a new version of Parquet file getting created.
See <code>http://namenode:50070/explorer.html#/user/hive/warehouse/stock_ticks_cow/2018/08/31</code></p><p>With Merge-On-Read table, the second ingestion merely appended the batch to an unmerged delta (log) file.
Take a look at the HDFS filesystem to get an idea: <code>http://namenode:50070/explorer.html#/user/hive/warehouse/stock_ticks_mor/2018/08/31</code></p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="step-6-a-run-hive-queries">Step 6 (a): Run Hive Queries<a class="hash-link" href="#step-6-a-run-hive-queries" title="Direct link to heading"></a></h3><p>With Copy-On-Write table, the Snapshot query immediately sees the changes as part of second batch once the batch
got committed as each ingestion creates newer versions of parquet files.</p><p>With Merge-On-Read table, the second ingestion merely appended the batch to an unmerged delta (log) file.
This is the time, when ReadOptimized and Snapshot queries will provide different results. ReadOptimized query will still
return &quot;10:29 am&quot; as it will only read from the Parquet file. Snapshot query will do on-the-fly merge and return
latest committed data which is &quot;10:59 a.m&quot;.</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">docker exec -it adhoc-2 /bin/bash</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">beeline -u jdbc:hive2://hiveserver:10000 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --hiveconf hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --hiveconf hive.stats.autogather=false</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Copy On Write Table:</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; select symbol, max(ts) from stock_ticks_cow group by symbol HAVING symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">WARNING: Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| symbol | _c1 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| GOOG | 2018-08-31 10:59:00 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">1 row selected (1.932 seconds)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow where symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| _hoodie_commit_time | symbol | ts | volume | open | close |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924221953 | GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924224524 | GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">As you can notice, the above queries now reflect the changes that came as part of ingesting second batch.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Merge On Read Table:</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Read Optimized Query</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; select symbol, max(ts) from stock_ticks_mor_ro group by symbol HAVING symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">WARNING: Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| symbol | _c1 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| GOOG | 2018-08-31 10:29:00 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">1 row selected (1.6 seconds)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_ro where symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| _hoodie_commit_time | symbol | ts | volume | open | close |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924222155 | GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924222155 | GOOG | 2018-08-31 10:29:00 | 3391 | 1230.1899 | 1230.085 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Snapshot Query</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">WARNING: Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| symbol | _c1 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| GOOG | 2018-08-31 10:59:00 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| _hoodie_commit_time | symbol | ts | volume | open | close |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924222155 | GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924224537 | GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">exit</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="step-6-b-run-spark-sql-queries">Step 6 (b): Run Spark SQL Queries<a class="hash-link" href="#step-6-b-run-spark-sql-queries" title="Direct link to heading"></a></h3><p>Running the same queries in Spark-SQL:</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">docker exec -it adhoc-1 /bin/bash</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">$SPARK_INSTALL/bin/spark-shell \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --jars $HUDI_SPARK_BUNDLE \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --driver-class-path $HADOOP_CONF_DIR \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.sql.hive.convertMetastoreParquet=false \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --deploy-mode client \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --driver-memory 1G \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --master local[2] \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --executor-memory 3G \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --num-executors 1</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Copy On Write Table:</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">scala&gt; spark.sql(&quot;select symbol, max(ts) from stock_ticks_cow group by symbol HAVING symbol = &#x27;GOOG&#x27;&quot;).show(100, false)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+------+-------------------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">|symbol|max(ts) |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+------+-------------------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">|GOOG |2018-08-31 10:59:00|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+------+-------------------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">scala&gt; spark.sql(&quot;select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow where symbol = &#x27;GOOG&#x27;&quot;).show(100, false)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| _hoodie_commit_time | symbol | ts | volume | open | close |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924221953 | GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924224524 | GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">As you can notice, the above queries now reflect the changes that came as part of ingesting second batch.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Merge On Read Table:</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Read Optimized Query</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">scala&gt; spark.sql(&quot;select symbol, max(ts) from stock_ticks_mor_ro group by symbol HAVING symbol = &#x27;GOOG&#x27;&quot;).show(100, false)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| symbol | _c1 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| GOOG | 2018-08-31 10:29:00 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">1 row selected (1.6 seconds)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">scala&gt; spark.sql(&quot;select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_ro where symbol = &#x27;GOOG&#x27;&quot;).show(100, false)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| _hoodie_commit_time | symbol | ts | volume | open | close |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924222155 | GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924222155 | GOOG | 2018-08-31 10:29:00 | 3391 | 1230.1899 | 1230.085 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Snapshot Query</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">scala&gt; spark.sql(&quot;select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = &#x27;GOOG&#x27;&quot;).show(100, false)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| symbol | _c1 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| GOOG | 2018-08-31 10:59:00 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">scala&gt; spark.sql(&quot;select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = &#x27;GOOG&#x27;&quot;).show(100, false)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| _hoodie_commit_time | symbol | ts | volume | open | close |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924222155 | GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924224537 | GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">exit</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="step-6-c-run-presto-queries">Step 6 (c): Run Presto Queries<a class="hash-link" href="#step-6-c-run-presto-queries" title="Direct link to heading"></a></h3><p>Running the same queries on Presto for ReadOptimized queries. </p><div class="admonition admonition-note alert alert--secondary"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M6.3 5.69a.942.942 0 0 1-.28-.7c0-.28.09-.52.28-.7.19-.18.42-.28.7-.28.28 0 .52.09.7.28.18.19.28.42.28.7 0 .28-.09.52-.28.7a1 1 0 0 1-.7.3c-.28 0-.52-.11-.7-.3zM8 7.99c-.02-.25-.11-.48-.31-.69-.2-.19-.42-.3-.69-.31H6c-.27.02-.48.13-.69.31-.2.2-.3.44-.31.69h1v3c.02.27.11.5.31.69.2.2.42.31.69.31h1c.27 0 .48-.11.69-.31.2-.19.3-.42.31-.69H8V7.98v.01zM7 2.3c-3.14 0-5.7 2.54-5.7 5.68 0 3.14 2.56 5.7 5.7 5.7s5.7-2.55 5.7-5.7c0-3.15-2.56-5.69-5.7-5.69v.01zM7 .98c3.86 0 7 3.14 7 7s-3.14 7-7 7-7-3.12-7-7 3.14-7 7-7z"></path></svg></span>note</h5></div><div class="admonition-content"><p>This section of the demo is not supported for Mac AArch64 users at this time.</p></div></div><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">docker exec -it presto-worker-1 presto --server presto-coordinator-1:8090</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">presto&gt; use hive.default;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">USE</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Copy On Write Table:</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">presto:default&gt;select symbol, max(ts) from stock_ticks_cow group by symbol HAVING symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> symbol | _col1</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--------+---------------------</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> GOOG | 2018-08-31 10:59:00</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">(1 row)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Query 20190822_181530_00007_segyw, FINISHED, 1 node</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Splits: 49 total, 49 done (100.00%)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0:02 [197 rows, 613B] [125 rows/s, 389B/s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">presto:default&gt;select &quot;_hoodie_commit_time&quot;, symbol, ts, volume, open, close from stock_ticks_cow where symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> _hoodie_commit_time | symbol | ts | volume | open | close</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">---------------------+--------+---------------------+--------+-----------+----------</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> 20190822180221 | GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> 20190822181433 | GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">(2 rows)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Query 20190822_181545_00008_segyw, FINISHED, 1 node</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Splits: 17 total, 17 done (100.00%)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0:02 [197 rows, 613B] [106 rows/s, 332B/s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">As you can notice, the above queries now reflect the changes that came as part of ingesting second batch.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Merge On Read Table:</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Read Optimized Query</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">presto:default&gt; select symbol, max(ts) from stock_ticks_mor_ro group by symbol HAVING symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> symbol | _col1</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--------+---------------------</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> GOOG | 2018-08-31 10:29:00</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">(1 row)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Query 20190822_181602_00009_segyw, FINISHED, 1 node</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Splits: 49 total, 49 done (100.00%)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0:01 [197 rows, 613B] [139 rows/s, 435B/s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">presto:default&gt;select &quot;_hoodie_commit_time&quot;, symbol, ts, volume, open, close from stock_ticks_mor_ro where symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> _hoodie_commit_time | symbol | ts | volume | open | close</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">---------------------+--------+---------------------+--------+-----------+----------</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> 20190822180250 | GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> 20190822180250 | GOOG | 2018-08-31 10:29:00 | 3391 | 1230.1899 | 1230.085</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">(2 rows)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Query 20190822_181615_00010_segyw, FINISHED, 1 node</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Splits: 17 total, 17 done (100.00%)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0:01 [197 rows, 613B] [154 rows/s, 480B/s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">presto:default&gt; exit</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="step-6-d-run-trino-queries">Step 6 (d): Run Trino Queries<a class="hash-link" href="#step-6-d-run-trino-queries" title="Direct link to heading"></a></h3><p>Running the same queries on Trino for Read-Optimized queries.</p><div class="admonition admonition-note alert alert--secondary"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M6.3 5.69a.942.942 0 0 1-.28-.7c0-.28.09-.52.28-.7.19-.18.42-.28.7-.28.28 0 .52.09.7.28.18.19.28.42.28.7 0 .28-.09.52-.28.7a1 1 0 0 1-.7.3c-.28 0-.52-.11-.7-.3zM8 7.99c-.02-.25-.11-.48-.31-.69-.2-.19-.42-.3-.69-.31H6c-.27.02-.48.13-.69.31-.2.2-.3.44-.31.69h1v3c.02.27.11.5.31.69.2.2.42.31.69.31h1c.27 0 .48-.11.69-.31.2-.19.3-.42.31-.69H8V7.98v.01zM7 2.3c-3.14 0-5.7 2.54-5.7 5.68 0 3.14 2.56 5.7 5.7 5.7s5.7-2.55 5.7-5.7c0-3.15-2.56-5.69-5.7-5.69v.01zM7 .98c3.86 0 7 3.14 7 7s-3.14 7-7 7-7-3.12-7-7 3.14-7 7-7z"></path></svg></span>note</h5></div><div class="admonition-content"><p>This section of the demo is not supported for Mac AArch64 users at this time.</p></div></div><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">docker exec -it adhoc-2 trino --server trino-coordinator-1:8091</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">trino&gt; use hive.default;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">USE</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Copy On Write Table:</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">trino:default&gt; select symbol, max(ts) from stock_ticks_cow group by symbol HAVING symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> symbol | _col1 </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--------+---------------------</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> GOOG | 2018-08-31 10:59:00 </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">(1 row)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Query 20220112_055443_00012_sac73, FINISHED, 1 node</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Splits: 49 total, 49 done (100.00%)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0.63 [197 rows, 442KB] [310 rows/s, 697KB/s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">trino:default&gt; select &quot;_hoodie_commit_time&quot;, symbol, ts, volume, open, close from stock_ticks_cow where symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> _hoodie_commit_time | symbol | ts | volume | open | close </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">---------------------+--------+---------------------+--------+-----------+----------</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> 20220112054822108 | GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> 20220112055352654 | GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">(2 rows)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Query 20220112_055450_00013_sac73, FINISHED, 1 node</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Splits: 17 total, 17 done (100.00%)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0.65 [197 rows, 450KB] [303 rows/s, 692KB/s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">As you can notice, the above queries now reflect the changes that came as part of ingesting second batch.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Merge On Read Table:</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Read Optimized Query</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">trino:default&gt; select symbol, max(ts) from stock_ticks_mor_ro group by symbol HAVING symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> symbol | _col1 </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--------+---------------------</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> GOOG | 2018-08-31 10:29:00 </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">(1 row)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Query 20220112_055500_00014_sac73, FINISHED, 1 node</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Splits: 49 total, 49 done (100.00%)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0.59 [197 rows, 442KB] [336 rows/s, 756KB/s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">trino:default&gt; select &quot;_hoodie_commit_time&quot;, symbol, ts, volume, open, close from stock_ticks_mor_ro where symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> _hoodie_commit_time | symbol | ts | volume | open | close </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">---------------------+--------+---------------------+--------+-----------+----------</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> 20220112054844841 | GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> 20220112054844841 | GOOG | 2018-08-31 10:29:00 | 3391 | 1230.1899 | 1230.085 </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">(2 rows)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Query 20220112_055506_00015_sac73, FINISHED, 1 node</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Splits: 17 total, 17 done (100.00%)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0.35 [197 rows, 450KB] [556 rows/s, 1.24MB/s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">trino:default&gt; exit</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="step-7-a-incremental-query-for-copy-on-write-table">Step 7 (a): Incremental Query for COPY-ON-WRITE Table<a class="hash-link" href="#step-7-a-incremental-query-for-copy-on-write-table" title="Direct link to heading"></a></h3><p>With 2 batches of data ingested, lets showcase the support for incremental queries in Hudi Copy-On-Write tables</p><p>Lets take the same projection query example</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">docker exec -it adhoc-2 /bin/bash</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">beeline -u jdbc:hive2://hiveserver:10000 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --hiveconf hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --hiveconf hive.stats.autogather=false</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow where symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| _hoodie_commit_time | symbol | ts | volume | open | close |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924064621 | GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924065039 | GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>As you notice from the above queries, there are 2 commits - 20180924064621 and 20180924065039 in timeline order.
When you follow the steps, you will be getting different timestamps for commits. Substitute them
in place of the above timestamps.</p><p>To show the effects of incremental-query, let us assume that a reader has already seen the changes as part of
ingesting first batch. Now, for the reader to see effect of the second batch, he/she has to keep the start timestamp to
the commit time of the first batch (20180924064621) and run incremental query</p><p>Hudi incremental mode provides efficient scanning for incremental queries by filtering out files that do not have any
candidate rows using hudi-managed metadata.</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">docker exec -it adhoc-2 /bin/bash</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">beeline -u jdbc:hive2://hiveserver:10000 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --hiveconf hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --hiveconf hive.stats.autogather=false</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; set hoodie.stock_ticks_cow.consume.mode=INCREMENTAL;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">No rows affected (0.009 seconds)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; set hoodie.stock_ticks_cow.consume.max.commits=3;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">No rows affected (0.009 seconds)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; set hoodie.stock_ticks_cow.consume.start.timestamp=20180924064621;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>With the above setting, file-ids that do not have any updates from the commit 20180924065039 is filtered out without scanning.
Here is the incremental query :</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow where symbol = &#x27;GOOG&#x27; and `_hoodie_commit_time` &gt; &#x27;20180924064621&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| _hoodie_commit_time | symbol | ts | volume | open | close |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924065039 | GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">1 row selected (0.83 seconds)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="step-7-b-incremental-query-with-spark-sql">Step 7 (b): Incremental Query with Spark SQL:<a class="hash-link" href="#step-7-b-incremental-query-with-spark-sql" title="Direct link to heading"></a></h3><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">docker exec -it adhoc-1 /bin/bash</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">$SPARK_INSTALL/bin/spark-shell \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --jars $HUDI_SPARK_BUNDLE \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --driver-class-path $HADOOP_CONF_DIR \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.sql.hive.convertMetastoreParquet=false \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --deploy-mode client \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --driver-memory 1G \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --master local[2] \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --executor-memory 3G \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --num-executors 1</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Welcome to</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ____ __</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> / __/__ ___ _____/ /__</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> _\ \/ _ \/ _ `/ __/ &#x27;_/</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> /___/ .__/\_,_/_/ /_/\_\ version 2.4.4</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> /_/</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Using Scala version 2.11.12 (OpenJDK 64-Bit Server VM, Java 1.8.0_212)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Type in expressions to have them evaluated.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Type :help for more information.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">scala&gt; import org.apache.hudi.DataSourceReadOptions</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">import org.apache.hudi.DataSourceReadOptions</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># In the below query, 20180925045257 is the first commit&#x27;s timestamp</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">scala&gt; val hoodieIncViewDF = spark.read.format(&quot;org.apache.hudi&quot;).option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL).option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY, &quot;20180924064621&quot;).load(&quot;/user/hive/warehouse/stock_ticks_cow&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">SLF4J: Failed to load class &quot;org.slf4j.impl.StaticLoggerBinder&quot;.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">SLF4J: Defaulting to no-operation (NOP) logger implementation</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">SLF4J: See http://www.slf4j.org/codes#StaticLoggerBinder for further details.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodieIncViewDF: org.apache.spark.sql.DataFrame = [_hoodie_commit_time: string, _hoodie_commit_seqno: string ... 15 more fields]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">scala&gt; hoodieIncViewDF.registerTempTable(&quot;stock_ticks_cow_incr_tmp1&quot;)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">warning: there was one deprecation warning; re-run with -deprecation for details</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">scala&gt; spark.sql(&quot;select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow_incr_tmp1 where symbol = &#x27;GOOG&#x27;&quot;).show(100, false);</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| _hoodie_commit_time | symbol | ts | volume | open | close |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924065039 | GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="step-8-schedule-and-run-compaction-for-merge-on-read-table">Step 8: Schedule and Run Compaction for Merge-On-Read table<a class="hash-link" href="#step-8-schedule-and-run-compaction-for-merge-on-read-table" title="Direct link to heading"></a></h3><p>Lets schedule and run a compaction to create a new version of columnar file so that read-optimized readers will see fresher data.
Again, You can use Hudi CLI to manually schedule and run compaction</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">docker exec -it adhoc-1 /bin/bash</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">root@adhoc-1:/opt# /var/hoodie/ws/hudi-cli/hudi-cli.sh</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">...</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Table command getting loaded</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">HoodieSplashScreen loaded</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">===================================================================</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* ___ ___ *</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* /\__\ ___ /\ \ ___ *</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* / / / /\__\ / \ \ /\ \ *</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* / /__/ / / / / /\ \ \ \ \ \ *</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* / \ \ ___ / / / / / \ \__\ / \__\ *</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* / /\ \ /\__\ / /__/ ___ / /__/ \ |__| / /\/__/ *</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* \/ \ \/ / / \ \ \ /\__\ \ \ \ / / / /\/ / / *</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* \ / / \ \ / / / \ \ / / / \ /__/ *</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* / / / \ \/ / / \ \/ / / \ \__\ *</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* / / / \ / / \ / / \/__/ *</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* \/__/ \/__/ \/__/ Apache Hudi CLI *</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">* *</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">===================================================================</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Welcome to Apache Hudi CLI. Please type help if you are looking for help.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hudi-&gt;connect --path /user/hive/warehouse/stock_ticks_mor</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">18/09/24 06:59:34 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">18/09/24 06:59:35 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /user/hive/warehouse/stock_ticks_mor</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">18/09/24 06:59:35 INFO util.FSUtils: Hadoop Configuration: fs.defaultFS: [hdfs://namenode:8020], Config:[Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml], FileSystem: [DFS[DFSClient[clientName=DFSClient_NONMAPREDUCE_-1261652683_11, ugi=root (auth:SIMPLE)]]]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">18/09/24 06:59:35 INFO table.HoodieTableConfig: Loading table properties from /user/hive/warehouse/stock_ticks_mor/.hoodie/hoodie.properties</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">18/09/24 06:59:36 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1) from /user/hive/warehouse/stock_ticks_mor</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Metadata for table stock_ticks_mor loaded</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie:stock_ticks_mor-&gt;compactions show all</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">20/02/10 03:41:32 INFO timeline.HoodieActiveTimeline: Loaded instants [[20200210015059__clean__COMPLETED], [20200210015059__deltacommit__COMPLETED], [20200210022758__clean__COMPLETED], [20200210022758__deltacommit__COMPLETED], [==&gt;20200210023843__compaction__REQUESTED]]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">___________________________________________________________________</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| Compaction Instant Time| State | Total FileIds to be Compacted|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">|==================================================================|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Schedule a compaction. This will use Spark Launcher to schedule compaction</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie:stock_ticks_mor-&gt;compaction schedule --hoodieConfigs hoodie.compact.inline.max.delta.commits=1</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">....</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Compaction successfully completed for 20180924070031</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Now refresh and check again. You will see that there is a new compaction requested</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie:stock_ticks_mor-&gt;refresh</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">18/09/24 07:01:16 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /user/hive/warehouse/stock_ticks_mor</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">18/09/24 07:01:16 INFO table.HoodieTableConfig: Loading table properties from /user/hive/warehouse/stock_ticks_mor/.hoodie/hoodie.properties</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">18/09/24 07:01:16 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1) from /user/hive/warehouse/stock_ticks_mor</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Metadata for table stock_ticks_mor loaded</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie:stock_ticks_mor-&gt;compactions show all</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">18/09/24 06:34:12 INFO timeline.HoodieActiveTimeline: Loaded instants [[20180924041125__clean__COMPLETED], [20180924041125__deltacommit__COMPLETED], [20180924042735__clean__COMPLETED], [20180924042735__deltacommit__COMPLETED], [==&gt;20180924063245__compaction__REQUESTED]]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">___________________________________________________________________</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| Compaction Instant Time| State | Total FileIds to be Compacted|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">|==================================================================|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924070031 | REQUESTED| 1 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Execute the compaction. The compaction instant value passed below must be the one displayed in the above &quot;compactions show all&quot; query</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie:stock_ticks_mor-&gt;compaction run --compactionInstant 20180924070031 --parallelism 2 --sparkMemory 1G --schemaFilePath /var/demo/config/schema.avsc --retry 1 </span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">....</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Compaction successfully completed for 20180924070031</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">## Now check if compaction is completed</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie:stock_ticks_mor-&gt;refresh</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">18/09/24 07:03:00 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /user/hive/warehouse/stock_ticks_mor</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">18/09/24 07:03:00 INFO table.HoodieTableConfig: Loading table properties from /user/hive/warehouse/stock_ticks_mor/.hoodie/hoodie.properties</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">18/09/24 07:03:00 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1) from /user/hive/warehouse/stock_ticks_mor</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Metadata for table stock_ticks_mor loaded</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">hoodie:stock_ticks_mor-&gt;compactions show all</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">18/09/24 07:03:15 INFO timeline.HoodieActiveTimeline: Loaded instants [[20180924064636__clean__COMPLETED], [20180924064636__deltacommit__COMPLETED], [20180924065057__clean__COMPLETED], [20180924065057__deltacommit__COMPLETED], [20180924070031__commit__COMPLETED]]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">___________________________________________________________________</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| Compaction Instant Time| State | Total FileIds to be Compacted|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">|==================================================================|</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924070031 | COMPLETED| 1 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="step-9-run-hive-queries-including-incremental-queries">Step 9: Run Hive Queries including incremental queries<a class="hash-link" href="#step-9-run-hive-queries-including-incremental-queries" title="Direct link to heading"></a></h3><p>You will see that both ReadOptimized and Snapshot queries will show the latest committed data.
Lets also run the incremental query for MOR table.
From looking at the below query output, it will be clear that the fist commit time for the MOR table is 20180924064636
and the second commit time is 20180924070031</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">docker exec -it adhoc-2 /bin/bash</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">beeline -u jdbc:hive2://hiveserver:10000 \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --hiveconf hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --hiveconf hive.stats.autogather=false</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Read Optimized Query</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; select symbol, max(ts) from stock_ticks_mor_ro group by symbol HAVING symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">WARNING: Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| symbol | _c1 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| GOOG | 2018-08-31 10:59:00 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">1 row selected (1.6 seconds)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_ro where symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| _hoodie_commit_time | symbol | ts | volume | open | close |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924064636 | GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924070031 | GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Snapshot Query</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">WARNING: Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases.</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| symbol | _c1 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| GOOG | 2018-08-31 10:59:00 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| _hoodie_commit_time | symbol | ts | volume | open | close |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924064636 | GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924070031 | GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Incremental Query:</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; set hoodie.stock_ticks_mor.consume.mode=INCREMENTAL;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">No rows affected (0.008 seconds)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Max-Commits covers both second batch and compaction commit</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; set hoodie.stock_ticks_mor.consume.max.commits=3;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">No rows affected (0.007 seconds)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; set hoodie.stock_ticks_mor.consume.start.timestamp=20180924064636;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">No rows affected (0.013 seconds)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Query:</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0: jdbc:hive2://hiveserver:10000&gt; select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_ro where symbol = &#x27;GOOG&#x27; and `_hoodie_commit_time` &gt; &#x27;20180924064636&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| _hoodie_commit_time | symbol | ts | volume | open | close |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924070031 | GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+--+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">exit</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="step-10-read-optimized-and-snapshot-queries-for-mor-with-spark-sql-after-compaction">Step 10: Read Optimized and Snapshot queries for MOR with Spark-SQL after compaction<a class="hash-link" href="#step-10-read-optimized-and-snapshot-queries-for-mor-with-spark-sql-after-compaction" title="Direct link to heading"></a></h3><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">docker exec -it adhoc-1 /bin/bash</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">$SPARK_INSTALL/bin/spark-shell \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --jars $HUDI_SPARK_BUNDLE \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --driver-class-path $HADOOP_CONF_DIR \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --conf spark.sql.hive.convertMetastoreParquet=false \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --deploy-mode client \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --driver-memory 1G \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --master local[2] \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --executor-memory 3G \</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> --num-executors 1</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Read Optimized Query</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">scala&gt; spark.sql(&quot;select symbol, max(ts) from stock_ticks_mor_ro group by symbol HAVING symbol = &#x27;GOOG&#x27;&quot;).show(100, false)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| symbol | max(ts) |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| GOOG | 2018-08-31 10:59:00 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">1 row selected (1.6 seconds)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">scala&gt; spark.sql(&quot;select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_ro where symbol = &#x27;GOOG&#x27;&quot;).show(100, false)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| _hoodie_commit_time | symbol | ts | volume | open | close |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924064636 | GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924070031 | GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Snapshot Query</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">scala&gt; spark.sql(&quot;select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = &#x27;GOOG&#x27;&quot;).show(100, false)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| symbol | max(ts) |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| GOOG | 2018-08-31 10:59:00 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+---------+----------------------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">scala&gt; spark.sql(&quot;select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = &#x27;GOOG&#x27;&quot;).show(100, false)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| _hoodie_commit_time | symbol | ts | volume | open | close |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924064636 | GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">| 20180924070031 | GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">+----------------------+---------+----------------------+---------+------------+-----------+</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="step-11--presto-read-optimized-queries-on-mor-table-after-compaction">Step 11: Presto Read Optimized queries on MOR table after compaction<a class="hash-link" href="#step-11--presto-read-optimized-queries-on-mor-table-after-compaction" title="Direct link to heading"></a></h3><div class="admonition admonition-note alert alert--secondary"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M6.3 5.69a.942.942 0 0 1-.28-.7c0-.28.09-.52.28-.7.19-.18.42-.28.7-.28.28 0 .52.09.7.28.18.19.28.42.28.7 0 .28-.09.52-.28.7a1 1 0 0 1-.7.3c-.28 0-.52-.11-.7-.3zM8 7.99c-.02-.25-.11-.48-.31-.69-.2-.19-.42-.3-.69-.31H6c-.27.02-.48.13-.69.31-.2.2-.3.44-.31.69h1v3c.02.27.11.5.31.69.2.2.42.31.69.31h1c.27 0 .48-.11.69-.31.2-.19.3-.42.31-.69H8V7.98v.01zM7 2.3c-3.14 0-5.7 2.54-5.7 5.68 0 3.14 2.56 5.7 5.7 5.7s5.7-2.55 5.7-5.7c0-3.15-2.56-5.69-5.7-5.69v.01zM7 .98c3.86 0 7 3.14 7 7s-3.14 7-7 7-7-3.12-7-7 3.14-7 7-7z"></path></svg></span>note</h5></div><div class="admonition-content"><p>This section of the demo is not supported for Mac AArch64 users at this time.</p></div></div><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">docker exec -it presto-worker-1 presto --server presto-coordinator-1:8090</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">presto&gt; use hive.default;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">USE</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"># Read Optimized Query</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">resto:default&gt; select symbol, max(ts) from stock_ticks_mor_ro group by symbol HAVING symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> symbol | _col1</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">--------+---------------------</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> GOOG | 2018-08-31 10:59:00</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">(1 row)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Query 20190822_182319_00011_segyw, FINISHED, 1 node</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Splits: 49 total, 49 done (100.00%)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0:01 [197 rows, 613B] [133 rows/s, 414B/s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">presto:default&gt; select &quot;_hoodie_commit_time&quot;, symbol, ts, volume, open, close from stock_ticks_mor_ro where symbol = &#x27;GOOG&#x27;;</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> _hoodie_commit_time | symbol | ts | volume | open | close</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">---------------------+--------+---------------------+--------+-----------+----------</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> 20190822180250 | GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> 20190822181944 | GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">(2 rows)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Query 20190822_182333_00012_segyw, FINISHED, 1 node</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">Splits: 17 total, 17 done (100.00%)</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">0:02 [197 rows, 613B] [98 rows/s, 307B/s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">presto:default&gt;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>This brings the demo to an end.</p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="testing-hudi-in-local-docker-environment">Testing Hudi in Local Docker environment<a class="hash-link" href="#testing-hudi-in-local-docker-environment" title="Direct link to heading"></a></h2><p>You can bring up a Hadoop Docker environment containing Hadoop, Hive and Spark services with support for Hudi.</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">$ mvn pre-integration-test -DskipTests</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>The above command builds Docker images for all the services with
current Hudi source installed at /var/hoodie/ws and also brings up the services using a compose file. We
currently use Hadoop (v2.8.4), Hive (v2.3.3) and Spark (v2.4.4) in Docker images.</p><p>To bring down the containers</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">$ cd hudi-integ-test</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">$ mvn docker-compose:down</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>If you want to bring up the Docker containers, use</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">$ cd hudi-integ-test</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">$ mvn docker-compose:up -DdetachedMode=true</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>Hudi is a library that is operated in a broader data analytics/ingestion environment
involving Hadoop, Hive and Spark. Interoperability with all these systems is a key objective for us. We are
actively adding integration-tests under <strong>hudi-integ-test/src/test/java</strong> that makes use of this
docker environment (See <strong>hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java</strong> )</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="building-local-docker-containers">Building Local Docker Containers:<a class="hash-link" href="#building-local-docker-containers" title="Direct link to heading"></a></h3><p>The Docker images required for demo and running integration test are already in docker-hub. The Docker images
and compose scripts are carefully implemented so that they serve dual-purpose</p><ol><li>The Docker images have inbuilt Hudi jar files with environment variable pointing to those jars (HUDI_HADOOP_BUNDLE, ...)</li><li>For running integration-tests, we need the jars generated locally to be used for running services within docker. The
docker-compose scripts (see <code>docker/compose/docker-compose_hadoop284_hive233_spark244.yml</code>) ensures local jars override
inbuilt jars by mounting local Hudi workspace over the Docker location</li><li>As these Docker containers have mounted local Hudi workspace, any changes that happen in the workspace would automatically
reflect in the containers. This is a convenient way for developing and verifying Hudi for
developers who do not own a distributed environment. Note that this is how integration tests are run.</li></ol><p>This helps avoid maintaining separate Docker images and avoids the costly step of building Hudi Docker images locally.
But if users want to test Hudi from locations with lower network bandwidth, they can still build local images
run the script
<code>docker/build_local_docker_images.sh</code> to build local Docker images before running <code>docker/setup_demo.sh</code></p><p>Here are the commands:</p><div class="codeBlockContainer_J+bg language-java theme-code-block"><div class="codeBlockContent_csEI java"><pre tabindex="0" class="prism-code language-java codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">cd docker</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">./build_local_docker_images.sh</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">.....</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] Reactor Summary:</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] Hudi ............................................... SUCCESS [ 2.507 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-common ........................................ SUCCESS [ 15.181 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-aws ........................................... SUCCESS [ 2.621 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-timeline-service .............................. SUCCESS [ 1.811 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-client ........................................ SUCCESS [ 0.065 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-client-common ................................. SUCCESS [ 8.308 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-hadoop-mr ..................................... SUCCESS [ 3.733 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-spark-client .................................. SUCCESS [ 18.567 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-sync-common ................................... SUCCESS [ 0.794 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-hive-sync ..................................... SUCCESS [ 3.691 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-spark-datasource .............................. SUCCESS [ 0.121 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-spark-common_2.11 ............................. SUCCESS [ 12.979 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-spark2_2.11 ................................... SUCCESS [ 12.516 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-spark_2.11 .................................... SUCCESS [ 35.649 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-utilities_2.11 ................................ SUCCESS [ 5.881 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-utilities-bundle_2.11 ......................... SUCCESS [ 12.661 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-cli ........................................... SUCCESS [ 19.858 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-java-client ................................... SUCCESS [ 3.221 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-flink-client .................................. SUCCESS [ 5.731 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-spark3_2.12 ................................... SUCCESS [ 8.627 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-dla-sync ...................................... SUCCESS [ 1.459 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-sync .......................................... SUCCESS [ 0.053 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-hadoop-mr-bundle .............................. SUCCESS [ 5.652 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-hive-sync-bundle .............................. SUCCESS [ 1.623 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-spark-bundle_2.11 ............................. SUCCESS [ 10.930 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-presto-bundle ................................. SUCCESS [ 3.652 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-timeline-server-bundle ........................ SUCCESS [ 4.804 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-trino-bundle .................................. SUCCESS [ 5.991 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-hadoop-docker ................................. SUCCESS [ 2.061 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-hadoop-base-docker ............................ SUCCESS [ 53.372 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-hadoop-base-java11-docker ..................... SUCCESS [ 48.545 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-hadoop-namenode-docker ........................ SUCCESS [ 6.098 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-hadoop-datanode-docker ........................ SUCCESS [ 4.825 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-hadoop-history-docker ......................... SUCCESS [ 3.829 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-hadoop-hive-docker ............................ SUCCESS [ 52.660 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-hadoop-sparkbase-docker ....................... SUCCESS [01:02 min]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-hadoop-sparkmaster-docker ..................... SUCCESS [ 12.661 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-hadoop-sparkworker-docker ..................... SUCCESS [ 4.350 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-hadoop-sparkadhoc-docker ...................... SUCCESS [ 59.083 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-hadoop-presto-docker .......................... SUCCESS [01:31 min]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-hadoop-trinobase-docker ....................... SUCCESS [02:40 min]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-hadoop-trinocoordinator-docker ................ SUCCESS [ 14.003 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-hadoop-trinoworker-docker ..................... SUCCESS [ 12.100 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-integ-test .................................... SUCCESS [ 13.581 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-integ-test-bundle ............................. SUCCESS [ 27.212 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-examples ...................................... SUCCESS [ 8.090 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-flink_2.11 .................................... SUCCESS [ 4.217 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-kafka-connect ................................. SUCCESS [ 2.966 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-flink-bundle_2.11 ............................. SUCCESS [ 11.155 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] hudi-kafka-connect-bundle .......................... SUCCESS [ 12.369 s]</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] ------------------------------------------------------------------------</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] BUILD SUCCESS</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] ------------------------------------------------------------------------</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] Total time: 14:35 min</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] Finished at: 2022-01-12T18:41:27-08:00</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">[INFO] ------------------------------------------------------------------------</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="theme-doc-footer-edit-meta-row row"><div class="col"><a href="https://github.com/apache/hudi/tree/asf-site/website/versioned_docs/version-0.13.1/docker_demo.md" target="_blank" rel="noreferrer noopener" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_mS5F" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_mt2f"></div></div></footer></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages navigation"><div class="pagination-nav__item"><a class="pagination-nav__link" href="/cn/docs/0.13.1/flink-quick-start-guide"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">Flink Guide</div></a></div><div class="pagination-nav__item pagination-nav__item--next"><a class="pagination-nav__link" href="/cn/docs/0.13.1/timeline"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Timeline</div></a></div></nav></div></div><div class="col col--3"><div class="tableOfContents_vrFS thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#a-demo-using-docker-containers" class="table-of-contents__link toc-highlight">A Demo using Docker containers</a><ul><li><a href="#prerequisites" class="table-of-contents__link toc-highlight">Prerequisites</a></li></ul></li><li><a href="#setting-up-docker-cluster" class="table-of-contents__link toc-highlight">Setting up Docker Cluster</a><ul><li><a href="#build-hudi" class="table-of-contents__link toc-highlight">Build Hudi</a></li><li><a href="#bringing-up-demo-cluster" class="table-of-contents__link toc-highlight">Bringing up Demo Cluster</a></li></ul></li><li><a href="#demo" class="table-of-contents__link toc-highlight">Demo</a><ul><li><a href="#step-1--publish-the-first-batch-to-kafka" class="table-of-contents__link toc-highlight">Step 1 : Publish the first batch to Kafka</a></li><li><a href="#step-2-incrementally-ingest-data-from-kafka-topic" class="table-of-contents__link toc-highlight">Step 2: Incrementally ingest data from Kafka topic</a></li><li><a href="#step-3-sync-with-hive" class="table-of-contents__link toc-highlight">Step 3: Sync with Hive</a></li><li><a href="#step-4-a-run-hive-queries" class="table-of-contents__link toc-highlight">Step 4 (a): Run Hive Queries</a></li><li><a href="#step-4-b-run-spark-sql-queries" class="table-of-contents__link toc-highlight">Step 4 (b): Run Spark-SQL Queries</a></li><li><a href="#step-4-c-run-presto-queries" class="table-of-contents__link toc-highlight">Step 4 (c): Run Presto Queries</a></li><li><a href="#step-4-d-run-trino-queries" class="table-of-contents__link toc-highlight">Step 4 (d): Run Trino Queries</a></li><li><a href="#step-5-upload-second-batch-to-kafka-and-run-deltastreamer-to-ingest" class="table-of-contents__link toc-highlight">Step 5: Upload second batch to Kafka and run DeltaStreamer to ingest</a></li><li><a href="#step-6-a-run-hive-queries" class="table-of-contents__link toc-highlight">Step 6 (a): Run Hive Queries</a></li><li><a href="#step-6-b-run-spark-sql-queries" class="table-of-contents__link toc-highlight">Step 6 (b): Run Spark SQL Queries</a></li><li><a href="#step-6-c-run-presto-queries" class="table-of-contents__link toc-highlight">Step 6 (c): Run Presto Queries</a></li><li><a href="#step-6-d-run-trino-queries" class="table-of-contents__link toc-highlight">Step 6 (d): Run Trino Queries</a></li><li><a href="#step-7-a-incremental-query-for-copy-on-write-table" class="table-of-contents__link toc-highlight">Step 7 (a): Incremental Query for COPY-ON-WRITE Table</a></li><li><a href="#step-7-b-incremental-query-with-spark-sql" class="table-of-contents__link toc-highlight">Step 7 (b): Incremental Query with Spark SQL:</a></li><li><a href="#step-8-schedule-and-run-compaction-for-merge-on-read-table" class="table-of-contents__link toc-highlight">Step 8: Schedule and Run Compaction for Merge-On-Read table</a></li><li><a href="#step-9-run-hive-queries-including-incremental-queries" class="table-of-contents__link toc-highlight">Step 9: Run Hive Queries including incremental queries</a></li><li><a href="#step-10-read-optimized-and-snapshot-queries-for-mor-with-spark-sql-after-compaction" class="table-of-contents__link toc-highlight">Step 10: Read Optimized and Snapshot queries for MOR with Spark-SQL after compaction</a></li><li><a href="#step-11--presto-read-optimized-queries-on-mor-table-after-compaction" class="table-of-contents__link toc-highlight">Step 11: Presto Read Optimized queries on MOR table after compaction</a></li></ul></li><li><a href="#testing-hudi-in-local-docker-environment" class="table-of-contents__link toc-highlight">Testing Hudi in Local Docker environment</a><ul><li><a href="#building-local-docker-containers" class="table-of-contents__link toc-highlight">Building Local Docker Containers:</a></li></ul></li></ul></div></div></div></div></main></div></div><footer class="footer"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">About</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/blog/2021/07/21/streaming-data-lake-platform">Our Vision</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/concepts">Concepts</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/community/team">Team</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/releases/release-0.14.1">Releases</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/releases/download">Download</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/powered-by">Who&#x27;s Using</a></li></ul></div><div class="col footer__col"><div class="footer__title">Learn</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/docs/quick-start-guide">Quick Start</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/docker_demo">Docker Demo</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/blog">Blog</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/talks">Talks</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/videos">Video Guides</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/faq">FAQ</a></li><li class="footer__item"><a href="https://cwiki.apache.org/confluence/display/HUDI" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Technical Wiki<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li></ul></div><div class="col footer__col"><div class="footer__title">Hudi On Cloud</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/docs/s3_hoodie">AWS</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/gcs_hoodie">Google Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/oss_hoodie">Alibaba Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/azure_hoodie">Microsoft Azure</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/cos_hoodie">Tencent Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/ibm_cos_hoodie">IBM Cloud</a></li></ul></div><div class="col footer__col"><div class="footer__title">Community</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/community/get-involved">Get Involved</a></li><li class="footer__item"><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Slack<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Twitter<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>YouTube<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Linkedin<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="mailto:dev-subscribe@hudi.apache.org?Subject=SubscribeToHudi" target="_blank" rel="noopener noreferrer" class="footer__link-item">Mailing List</a></li></ul></div><div class="col footer__col"><div class="footer__title">Apache</div><ul class="footer__items"><li class="footer__item"><a href="https://www.apache.org/events/current-event" target="_blank" rel="noopener noreferrer" class="footer__link-item">Events</a></li><li class="footer__item"><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Thanks</a></li><li class="footer__item"><a href="https://www.apache.org/licenses" target="_blank" rel="noopener noreferrer" class="footer__link-item">License</a></li><li class="footer__item"><a href="https://www.apache.org/security" target="_blank" rel="noopener noreferrer" class="footer__link-item">Security</a></li><li class="footer__item"><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Sponsorship</a></li><li class="footer__item"><a href="https://www.apache.org" target="_blank" rel="noopener noreferrer" class="footer__link-item">Foundation</a></li></ul></div></div><div class="footer__bottom text--center"><div class="margin-bottom--sm"><a href="https://hudi.apache.org/" target="_blank" rel="noopener noreferrer" class="footerLogoLink_SRtH"><img src="/cn/assets/images/logo-big.png" alt="Apache Hudi™" class="themedImage_TMUO themedImage--light_4Vu1 footer__logo"><img src="/cn/assets/images/logo-big.png" alt="Apache Hudi™" class="themedImage_TMUO themedImage--dark_uzRr footer__logo"></a></div><div class="footer__copyright">Copyright © 2021 <a href="https://apache.org">The Apache Software Foundation</a>, Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0"> Apache License, Version 2.0</a>.
Hudi, Apache and the Apache feather logo are trademarks of The Apache Software Foundation. <a href="/docs/privacy">Privacy Policy</a></div></div></div></footer></div>
<script src="/cn/assets/js/runtime~main.0acdb754.js"></script>
<script src="/cn/assets/js/main.6d6aa24f.js"></script>
</body>
</html>