blob: 9f2997fa026e18b190d621fc570576e43be08616 [file] [log] [blame]
<!doctype html>
<html lang="cn" dir="ltr">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="generator" content="Docusaurus v2.0.0-beta.14">
<link rel="alternate" type="application/rss+xml" href="/cn/blog/rss.xml" title="Apache Hudi: User-Facing Analytics RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/cn/blog/atom.xml" title="Apache Hudi: User-Facing Analytics Atom Feed">
<link rel="alternate" type="application/json" href="/cn/blog/feed.json" title="Apache Hudi: User-Facing Analytics JSON Feed">
<link rel="search" type="application/opensearchdescription+xml" title="Apache Hudi" href="/cn/opensearch.xml">
<link rel="alternate" type="application/rss+xml" href="/cn/videos/rss.xml" title="Apache Hudi RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/cn/videos/atom.xml" title="Apache Hudi Atom Feed">
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Comfortaa|Ubuntu|Roboto|Source+Code+Pro">
<link rel="stylesheet" href="https://at-ui.github.io/feather-font/css/iconfont.css"><title data-react-helmet="true">Build Open Lakehouse using Apache Hudi &amp; dbt | Apache Hudi</title><meta data-react-helmet="true" name="twitter:card" content="summary_large_image"><meta data-react-helmet="true" property="og:url" content="https://hudi.apache.org/cn/blog/2022/07/11/build-open-lakehouse-using-apache-hudi-and-dbt"><meta data-react-helmet="true" name="docsearch:language" content="cn"><meta data-react-helmet="true" name="docsearch:docusaurus_tag" content="default"><meta data-react-helmet="true" name="keywords" content="apache hudi, data lake, lakehouse, big data, apache spark, apache flink, presto, trino, analytics, data engineering"><meta data-react-helmet="true" property="og:title" content="Build Open Lakehouse using Apache Hudi &amp; dbt | Apache Hudi"><meta data-react-helmet="true" name="description" content="The focus of this blog is to show you how to build an open lakehouse leveraging incremental data processing and performing field-level updates. We are excited to announce that you can now use Apache Hudi + dbt for building open data lakehouses."><meta data-react-helmet="true" property="og:description" content="The focus of this blog is to show you how to build an open lakehouse leveraging incremental data processing and performing field-level updates. We are excited to announce that you can now use Apache Hudi + dbt for building open data lakehouses."><meta data-react-helmet="true" property="og:image" content="https://hudi.apache.org/cn/assets/images/blog/hudi_dbt_lakehouse.png"><meta data-react-helmet="true" name="twitter:image" content="https://hudi.apache.org/cn/assets/images/blog/hudi_dbt_lakehouse.png"><meta data-react-helmet="true" property="og:type" content="article"><meta data-react-helmet="true" property="article:published_time" content="2022-07-11T00:00:00.000Z"><meta data-react-helmet="true" property="article:tag" content="how-to,deltastreamer,incremental processing,apache hudi"><link data-react-helmet="true" rel="icon" href="/cn/assets/images/favicon.ico"><link data-react-helmet="true" rel="canonical" href="https://hudi.apache.org/cn/blog/2022/07/11/build-open-lakehouse-using-apache-hudi-and-dbt"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/blog/2022/07/11/build-open-lakehouse-using-apache-hudi-and-dbt" hreflang="en"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/cn/blog/2022/07/11/build-open-lakehouse-using-apache-hudi-and-dbt" hreflang="cn"><link data-react-helmet="true" rel="alternate" href="https://hudi.apache.org/blog/2022/07/11/build-open-lakehouse-using-apache-hudi-and-dbt" hreflang="x-default"><link data-react-helmet="true" rel="preconnect" href="https://BH4D9OD16A-dsn.algolia.net" crossorigin="anonymous"><link rel="stylesheet" href="/cn/assets/css/styles.ea681a30.css">
<link rel="preload" href="/cn/assets/js/runtime~main.0acdb754.js" as="script">
<link rel="preload" href="/cn/assets/js/main.6d6aa24f.js" as="script">
</head>
<body>
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus">
<div><a href="#" class="skipToContent_OuoZ">Skip to main content</a></div><div class="announcementBar_axC9" role="banner"><div class="announcementBarPlaceholder_xYHE"></div><div class="announcementBarContent_6uhP">⭐️ If you like Apache Hudi, give it a star on <a target="_blank" rel="noopener noreferrer" href="https://github.com/apache/hudi">GitHub</a>! ⭐</div><button type="button" class="clean-btn close announcementBarClose_A3A1" aria-label="Close"><svg viewBox="0 0 15 15" width="14" height="14"><g stroke="currentColor" stroke-width="3.1"><path d="M.75.75l13.5 13.5M14.25.75L.75 14.25"></path></g></svg></button></div><nav class="navbar navbar--fixed-top navbarWrapper_UIa0"><div class="navbar__inner"><img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=8f594acf-9b77-44fb-9475-3e82ead1910c" width="0" height="0" alt=""><img referrerpolicy="no-referrer-when-downgrade" src="https://analytics.apache.org/matomo.php?idsite=47&amp;rec=1" width="0" height="0" alt=""><div class="navbar__items"><button aria-label="Navigation bar toggle" class="navbar__toggle clean-btn" type="button" tabindex="0"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/cn/"><div class="navbar__logo navbarLogo_Bz6n"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--light_4Vu1"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--dark_uzRr"></div></a><a class="navbar__item navbar__link" href="/cn/docs/overview"><div class="labelWrapperDropdown_Mqbj">Docs</div></a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Learn<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/talks"><div class="labelWrapperDropdown_Mqbj">Talks</div></a></li><li><a class="dropdown__link" href="/cn/videos"><div class="labelWrapperDropdown_Mqbj">Video Guides</div></a></li><li><a class="dropdown__link" href="/cn/docs/faq"><div class="labelWrapperDropdown_Mqbj">FAQ</div></a></li><li><a class="dropdown__link" href="/cn/tech-specs"><div class="labelWrapperDropdown_Mqbj">Tech Specs</div></a></li><li><a class="dropdown__link" href="/cn/tech-specs-1point0"><div class="labelWrapperDropdown_Mqbj">Tech Specs 1.0</div></a></li><li><a href="https://cwiki.apache.org/confluence/display/HUDI" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span class="externalLink_AE3f">Technical Wiki<svg width="20" height="20" viewBox="0 0 26 26" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M16.965 8.745 9.01 16.7M10.561 8.758l6.403-.013-.013 6.403" stroke="#0DB1F9" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path><rect x="4.5" y="4.5" width="17" height="17" rx="2.5" stroke="#0DB1F9"></rect></svg></span></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Contribute<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/contribute/how-to-contribute"><div class="labelWrapperDropdown_Mqbj">How to Contribute</div></a></li><li><a class="dropdown__link" href="/cn/contribute/developer-setup"><div class="labelWrapperDropdown_Mqbj">Developer Setup</div></a></li><li><a class="dropdown__link" href="/cn/contribute/rfc-process"><div class="labelWrapperDropdown_Mqbj">RFC Process</div></a></li><li><a class="dropdown__link" href="/cn/contribute/report-security-issues"><div class="labelWrapperDropdown_Mqbj">Report Security Issues</div></a></li><li><a href="https://issues.apache.org/jira/projects/HUDI/summary" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span class="externalLink_AE3f">Report Issues<svg width="20" height="20" viewBox="0 0 26 26" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M16.965 8.745 9.01 16.7M10.561 8.758l6.403-.013-.013 6.403" stroke="#0DB1F9" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path><rect x="4.5" y="4.5" width="17" height="17" rx="2.5" stroke="#0DB1F9"></rect></svg></span></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj">Community<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/community/get-involved"><div class="labelWrapperDropdown_Mqbj">Get Involved</div></a></li><li><a class="dropdown__link" href="/cn/community/syncs"><div class="labelWrapperDropdown_Mqbj">Community Syncs</div></a></li><li><a class="dropdown__link" href="/cn/community/office_hours"><div class="labelWrapperDropdown_Mqbj">Office Hours</div></a></li><li><a class="dropdown__link" href="/cn/community/team"><div class="labelWrapperDropdown_Mqbj">Team</div></a></li></ul></div><a aria-current="page" class="navbar__item navbar__link navbar__link--active" href="/cn/blog"><div class="labelWrapperDropdown_Mqbj">Blog</div></a><a class="navbar__item navbar__link" href="/cn/powered-by"><div class="labelWrapperDropdown_Mqbj">Who&#x27;s Using</div></a><a class="navbar__item navbar__link" href="/cn/roadmap"><div class="labelWrapperDropdown_Mqbj">Roadmap</div></a><a class="navbar__item navbar__link" href="/cn/releases/download"><div class="labelWrapperDropdown_Mqbj">Download</div></a></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__link downloadLinkDropdownHide_aDP3" href="/cn/docs/overview"><div class="labelWrapperDropdown_Mqbj">0.14.1<svg width="10" height="6" viewBox="0 0 10 6" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M8.5 1.25 5 4.75l-3.5-3.5" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/cn/docs/next/overview"><div class="labelWrapperDropdown_Mqbj">Next</div></a></li><li><a class="dropdown__link" href="/cn/docs/overview"><div class="labelWrapperDropdown_Mqbj">0.14.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.14.0/overview"><div class="labelWrapperDropdown_Mqbj">0.14.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.13.1/overview"><div class="labelWrapperDropdown_Mqbj">0.13.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.13.0/overview"><div class="labelWrapperDropdown_Mqbj">0.13.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.3/overview"><div class="labelWrapperDropdown_Mqbj">0.12.3</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.2/overview"><div class="labelWrapperDropdown_Mqbj">0.12.2</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.1/overview"><div class="labelWrapperDropdown_Mqbj">0.12.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.12.0/overview"><div class="labelWrapperDropdown_Mqbj">0.12.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.11.1/overview"><div class="labelWrapperDropdown_Mqbj">0.11.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.11.0/overview"><div class="labelWrapperDropdown_Mqbj">0.11.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.10.1/overview"><div class="labelWrapperDropdown_Mqbj">0.10.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.10.0/overview"><div class="labelWrapperDropdown_Mqbj">0.10.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.9.0/overview"><div class="labelWrapperDropdown_Mqbj">0.9.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.8.0/overview"><div class="labelWrapperDropdown_Mqbj">0.8.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.7.0/overview"><div class="labelWrapperDropdown_Mqbj">0.7.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.6.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.6.0</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.3/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.3</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.2/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.2</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.1/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.1</div></a></li><li><a class="dropdown__link" href="/cn/docs/0.5.0/quick-start-guide"><div class="labelWrapperDropdown_Mqbj">0.5.0</div></a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" class="navbar__link downloadLinkDropdownHide_aDP3"><div class="labelWrapperDropdown_Mqbj"><span><svg viewBox="0 0 20 20" width="20" height="20" aria-hidden="true" class="iconLanguage_zID8"><path fill="currentColor" d="M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"></path></svg><span>Chinese</span></span><svg width="14" height="14" viewBox="0 0 14 14" fill="none" xmlns="http://www.w3.org/2000/svg"><g clip-path="url(#a)"><path d="M14 6.457a6.842 6.842 0 0 0-7-6.02 6.843 6.843 0 0 0-7 6.02v1.085a6.843 6.843 0 0 0 7 6.02 6.843 6.843 0 0 0 7-6.02V6.457Zm-1.094 0h-2.625a9.92 9.92 0 0 0-.376-2.222 6.65 6.65 0 0 0 1.531-.875 5.25 5.25 0 0 1 1.444 3.097h.026Zm-8.032 0a8.479 8.479 0 0 1 .324-1.872 7.376 7.376 0 0 0 3.63 0c.175.61.284 1.239.325 1.872h-4.28Zm4.305 1.085a8.391 8.391 0 0 1-.324 1.873 7.464 7.464 0 0 0-3.658 0 8.479 8.479 0 0 1-.323-1.873h4.305Zm.35-4.375A10.342 10.342 0 0 0 8.75 1.75c.627.194 1.218.49 1.75.875a5.748 5.748 0 0 1-.998.577l.027-.035ZM7.254 1.54A8.75 8.75 0 0 1 8.46 3.552c-.48.11-.97.165-1.461.167-.492-.001-.982-.057-1.461-.167.308-.722.715-1.4 1.207-2.012h.508ZM4.498 3.202a5.748 5.748 0 0 1-.998-.577 6.029 6.029 0 0 1 1.75-.875c-.294.46-.546.947-.753 1.452Zm-1.873.15c.47.358.984.652 1.531.874A9.625 9.625 0 0 0 3.78 6.45H1.155a5.25 5.25 0 0 1 1.47-3.098ZM1.12 7.541h2.625c.038.753.164 1.5.376 2.223a6.649 6.649 0 0 0-1.531.875 5.25 5.25 0 0 1-1.47-3.098Zm3.377 3.255c.207.506.459.992.753 1.453a6.03 6.03 0 0 1-1.75-.875c.312-.226.646-.419.997-.578Zm2.25 1.663a8.594 8.594 0 0 1-1.208-2.013 6.501 6.501 0 0 1 2.922 0 8.54 8.54 0 0 1-1.207 2.013h-.508Zm2.755-1.663c.367.156.716.35 1.042.578a6.338 6.338 0 0 1-1.75.875c.275-.464.512-.95.708-1.453Zm1.873-.148a6.647 6.647 0 0 0-1.531-.875 9.45 9.45 0 0 0 .376-2.223h2.625a5.25 5.25 0 0 1-1.47 3.098Z" fill="#1C1E21"></path></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h14v14H0z"></path></clipPath></defs></svg></div></a><ul class="dropdown__menu"><li><a href="/blog/2022/07/11/build-open-lakehouse-using-apache-hudi-and-dbt" target="_self" rel="noopener noreferrer" class="dropdown__link"><div class="labelWrapperDropdown_Mqbj">English</div></a></li><li><a href="/cn/blog/2022/07/11/build-open-lakehouse-using-apache-hudi-and-dbt" target="_self" rel="noopener noreferrer" class="dropdown__link dropdown__link--active"><div class="labelWrapperDropdown_Mqbj">Chinese</div></a></li></ul></div><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-github-link" aria-label="GitHub repository"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-twitter-link" aria-label="Hudi Twitter Handle"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-slack-link" aria-label="Hudi Slack Channel"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-youtube-link" aria-label="Hudi YouTube Channel"><div class="labelWrapperDropdown_Mqbj"></div></a><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-linkedin-link" aria-label="Hudi Linkedin Page"><div class="labelWrapperDropdown_Mqbj"></div></a><div class="searchBox_fBfG"><div role="button" class="searchButton_g9-U" aria-label="Search"><span class="searchText_RI6l">Search</span><svg width="14" height="14" viewBox="0 0 14 14" fill="none" xmlns="http://www.w3.org/2000/svg"><circle cx="6.864" cy="6.864" r="5.243" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></circle><path d="m10.51 10.783 2.056 2.05" stroke="#1C1E21" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div><div class="navbar-sidebar"><div class="navbar-sidebar__brand"><a class="navbar__brand" href="/cn/"><div class="navbar__logo"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--light_4Vu1"><img src="/cn/assets/images/hudi.png" alt="Apache Hudi" class="themedImage_TMUO themedImage--dark_uzRr"></div></a><button type="button" class="clean-btn navbar-sidebar__close"><svg viewBox="0 0 15 15" width="21" height="21"><g stroke="var(--ifm-color-emphasis-600)" stroke-width="1.2"><path d="M.75.75l13.5 13.5M14.25.75L.75 14.25"></path></g></svg></button></div><div class="navbar-sidebar__items"><div class="navbar-sidebar__item menu"><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" href="/cn/docs/overview"><div class="labelWrapperDropdown_Mqbj">Docs</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Learn</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Contribute</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Community</div></a></li><li class="menu__list-item"><a aria-current="page" class="menu__link menu__link--active" href="/cn/blog"><div class="labelWrapperDropdown_Mqbj">Blog</div></a></li><li class="menu__list-item"><a class="menu__link" href="/cn/powered-by"><div class="labelWrapperDropdown_Mqbj">Who&#x27;s Using</div></a></li><li class="menu__list-item"><a class="menu__link" href="/cn/roadmap"><div class="labelWrapperDropdown_Mqbj">Roadmap</div></a></li><li class="menu__list-item"><a class="menu__link" href="/cn/releases/download"><div class="labelWrapperDropdown_Mqbj">Download</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj">Versions</div></a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist"><div class="labelWrapperDropdown_Mqbj"><span><svg viewBox="0 0 20 20" width="20" height="20" aria-hidden="true" class="iconLanguage_zID8"><path fill="currentColor" d="M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"></path></svg><span>Languages</span></span></div></a></li><li class="menu__list-item"><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="menu__link header-github-link" aria-label="GitHub repository"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="menu__link header-twitter-link" aria-label="Hudi Twitter Handle"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="menu__link header-slack-link" aria-label="Hudi Slack Channel"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="menu__link header-youtube-link" aria-label="Hudi YouTube Channel"><div class="labelWrapperDropdown_Mqbj"></div></a></li><li class="menu__list-item"><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="menu__link header-linkedin-link" aria-label="Hudi Linkedin Page"><div class="labelWrapperDropdown_Mqbj"></div></a></li></ul></div><div class="navbar-sidebar__item menu"><button type="button" class="clean-btn navbar-sidebar__back">← Back to main menu</button></div></div></div></nav><div class="main-wrapper blog-wrapper blog-post-page"><div class="container margin-vert--lg"><div class="row"><main class="col col--9 col--offset-2" itemscope="" itemtype="http://schema.org/Blog"><article itemprop="blogPost" itemscope="" itemtype="http://schema.org/BlogPosting"><header class="postHeader_Ipb1"><div><h1 class="blogPostTitle_RC3s" itemprop="headline"><h1 class="blogPostPageTitle_bKZt" itemprop="headline">Build Open Lakehouse using Apache Hudi &amp; dbt</h1></h1><div class="blogInfo_1FPd margin-top--sm margin-bottom--sm"><div class="blogPostText_jBA8 row"><time datetime="2022-07-11T00:00:00.000Z" itemprop="datePublished">July 11, 2022</time><div><div><div><a itemprop="url"><span class="blogPostAuthorsList_dlEG" itemprop="name">Vinoth Govindarajan</span></a></div></div></div></div><div class="blogPostData_A2Le">7 min read</div></div></div><ul class="authorTimeTags_oN88 padding--none margin-left--sm tagsWrapperPostPage_VdId"><li class="tag_MgfY tagPostPage_gnvv"><a class="tag_WK-t tagRegular_LXbV" href="/cn/blog/tags/how-to">how-to</a></li><li class="tag_MgfY tagPostPage_gnvv"><a class="tag_WK-t tagRegular_LXbV" href="/cn/blog/tags/deltastreamer">deltastreamer</a></li><li class="tag_MgfY tagPostPage_gnvv"><a class="tag_WK-t tagRegular_LXbV" href="/cn/blog/tags/incremental-processing">incremental processing</a></li><li class="tag_MgfY tagPostPage_gnvv"><a class="tag_WK-t tagRegular_LXbV" href="/cn/blog/tags/apache-hudi">apache hudi</a></li></ul></header><div class="markdown" itemprop="articleBody"><p>The focus of this blog is to show you how to build an open lakehouse leveraging incremental data processing and performing field-level updates. We are excited to announce that you can now use Apache Hudi + dbt for building open data lakehouses.</p><p><img alt="/assets/images/blog/hudi_dbt_lakehouse.png" src="/cn/assets/images/hudi_dbt_lakehouse-14b1cb2c180ecb95dc78e3b4c44a6301.png"></p><p>Let&#x27;s first clarify a few terminologies used in this blog before we dive into the details.</p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="what-is-apache-hudi">What is Apache Hudi?<a class="hash-link" href="#what-is-apache-hudi" title="Direct link to heading"></a></h2><p>Apache Hudi brings ACID transactions, record-level updates/deletes, and change streams to data lakehouses.</p><p>Apache Hudi is an open-source data management framework used to simplify incremental data processing and data pipeline development. This framework more efficiently manages business requirements like data lifecycle and improves data quality.</p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="what-is-dbt">What is dbt?<a class="hash-link" href="#what-is-dbt" title="Direct link to heading"></a></h2><p>dbt (data build tool) is a data transformation tool that enables data analysts and engineers to transform, test, and document data in the cloud data warehouses.</p><p>dbt enables analytics engineers to transform data in their warehouses by simply writing select statements. dbt handles turning these select statements into tables and views.</p><p>dbt does the T in ELT (Extract, Load, Transform) processes – it doesn’t extract or load data, but it’s extremely good at transforming data that’s already loaded into your warehouse.</p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="what-is-a-lakehouse">What is a Lakehouse?<a class="hash-link" href="#what-is-a-lakehouse" title="Direct link to heading"></a></h2><p>A lakehouse is a new, open architecture that combines the best elements of data lakes and data warehouses. Lakehouses are enabled by a new system design: implementing transaction management and data management features similar to those in a data warehouse directly on top of low-cost cloud storage in open formats. They are what you would get if you had to redesign data warehouses in the modern world, now that cheap and highly reliable storage (in the form of object stores) are available.</p><p>In other words, while data lakes historically have been viewed as a bunch of files added to cloud storage folders, lakehouse tables support transactions, updates, deletes, and in the case of Apache Hudi, even database-like functionality like indexing or change capture.</p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="how-to-build-an-open-lakehouse">How to build an open lakehouse?<a class="hash-link" href="#how-to-build-an-open-lakehouse" title="Direct link to heading"></a></h2><p>Now, we know what is a lakehouse, so let&#x27;s build one, In order to build an open lakehouse, you need a few components:</p><ul><li>Open table format which supports ACID transactions<ul><li>Apache Hudi (integrated with dbt)</li><li>Delta Lake (proprietary features locked to Databricks runtime)</li><li>Apache Iceberg (currently not integrated with dbt)</li></ul></li><li>Data transformation tool<ul><li>Open source dbt is the de-facto popular choice for transformation layer</li></ul></li><li>Distributed data processing engine<ul><li>Apache Spark is the de-facto popular choice for compute engine</li></ul></li><li>Cloud Storage<ul><li>You can choose any of the cost-effective cloud stores or HDFS</li></ul></li><li>Bring your favorite query engine</li></ul><p>To build the lakehouse you need a way to extract and load the data into Hudi table format and then transform in-place using dbt.</p><p>DBT supports Hudi out of the box with the <a href="https://github.com/dbt-labs/dbt-spark" target="_blank" rel="noopener noreferrer">dbt-spark</a> adapter package. When creating modeled datasets using dbt you can choose Hudi as the format for your tables.</p><p>You can follow the instructions on this <a href="https://github.com/apache/hudi/blob/master/hudi-examples/hudi-examples-dbt/README.md" target="_blank" rel="noopener noreferrer">page</a> to learn how to install and configure dbt+hudi.</p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="step-1-how-to-extract--load-the-raw-data-datasets">Step 1: How to extract &amp; load the raw data datasets?<a class="hash-link" href="#step-1-how-to-extract--load-the-raw-data-datasets" title="Direct link to heading"></a></h2><p>This is the first step in building your data lake and there are many choices here to load the data into our open lakehouse. I’m going to go with one of the Hudi’s native tools called Delta Streamer since all the ingestion features are pre-built and battle-tested in production at scale.</p><p>Hudi’s <a href="https://hudi.apache.org/docs/hoodie_streaming_ingestion" target="_blank" rel="noopener noreferrer">DeltaStreamer</a> does the EL in ELT (Extract, Load, Transform) processes – it’s extremely good at extracting, loading, and optionally <a href="https://hudi.apache.org/docs/hoodie_streaming_ingestion#transformers" target="_blank" rel="noopener noreferrer">transforming data</a> that’s already loaded into your lakehouse.</p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="step-2-how-to-configure-hudi-with-the-dbt-project">Step 2: How to configure hudi with the dbt project?<a class="hash-link" href="#step-2-how-to-configure-hudi-with-the-dbt-project" title="Direct link to heading"></a></h2><p>To use the Hudi with your dbt project, all you need to do is choose the file format as Hudi. The file format config can either be specified in specific models, or for all the models in your dbt_project.yml file:</p><div class="codeBlockContainer_J+bg language-yml theme-code-block"><div style="color:#F8F8F2;background-color:#282A36" class="codeBlockTitle_oQzk">dbt_project.yml</div><div class="codeBlockContent_csEI yml"><pre tabindex="0" class="prism-code language-yml codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token key atrule">models</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token key atrule">+file_format</span><span class="token punctuation" style="color:rgb(248, 248, 242)">:</span><span class="token plain"> hudi</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>or:</p><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div style="color:#F8F8F2;background-color:#282A36" class="codeBlockTitle_oQzk">model/my_model.sql</div><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">{{ config</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> materialized </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;incremental&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> incremental_strategy </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;merge&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> file_format </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hudi&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> unique_key </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;id&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> }}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>After choosing hudi as the file_format you can create materialized datasets using dbt, which offers additional benefits that are unique to the Hudi table format such as field-level upserts/deletes.</p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="step-3-how-to-read-the-raw-data-incrementally">Step 3: How to read the raw data incrementally?<a class="hash-link" href="#step-3-how-to-read-the-raw-data-incrementally" title="Direct link to heading"></a></h2><p>Before we learn how to build incremental materialization, let’s quickly learn, What are materializations in dbt? Materializations are strategies for persisting dbt models in a lakehouse. There are four types of materializations built into dbt. They are:</p><ul><li>table</li><li>view</li><li>incremental</li><li>ephemeral</li></ul><p>Among all the materialization types, only incremental models allow dbt to insert or update records into a table since the last time that dbt was run, which unlocks the powers of Hudi, we will dive into the details.</p><p>To use incremental models, you need to perform these two activities:</p><ol><li>Tell dbt how to filter the rows on the incremental executions</li><li>Define the uniqueness constraint of the model (required when using &gt;= Hudi 0.10.1 version)</li></ol><h3 class="anchor anchorWithStickyNavbar_y2LR" id="how-to-apply-filters-on-an-incremental-run">How to apply filters on an incremental run?<a class="hash-link" href="#how-to-apply-filters-on-an-incremental-run" title="Direct link to heading"></a></h3><p>dbt provides you a macro <code>is_incremental()</code> which is very useful to define the filters exclusively for incremental materializations.</p><p>Often, you&#x27;ll want to filter for &quot;new&quot; rows, as in, rows that have been created since the last time dbt ran this model. The best way to find the timestamp of the most recent run of this model is by checking the most recent timestamp in your target table. dbt makes it easy to query your target table by using the &quot;<a href="https://docs.getdbt.com/reference/dbt-jinja-functions/this" target="_blank" rel="noopener noreferrer">{{ this }}</a>&quot; variable.</p><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div style="color:#F8F8F2;background-color:#282A36" class="codeBlockTitle_oQzk">models/my_model.sql</div><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">{{</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> config</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> materialized</span><span class="token operator">=</span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;incremental&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> file_format</span><span class="token operator">=</span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hudi&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">}}</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token operator">*</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">from</span><span class="token plain"> raw_app_data</span><span class="token punctuation" style="color:rgb(248, 248, 242)">.</span><span class="token plain">events</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">{</span><span class="token operator">%</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">if</span><span class="token plain"> is_incremental</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> </span><span class="token operator">%</span><span class="token plain">}</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token comment" style="color:rgb(98, 114, 164)">-- this filter will only be applied on an incremental run</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">where</span><span class="token plain"> event_time </span><span class="token operator">&gt;</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">select</span><span class="token plain"> </span><span class="token function" style="color:rgb(80, 250, 123)">max</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain">event_time</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(189, 147, 249);font-style:italic">from</span><span class="token plain"> {{ this }}</span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">{</span><span class="token operator">%</span><span class="token plain"> endif </span><span class="token operator">%</span><span class="token plain">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="how-to-define-the-uniqueness-constraint">How to define the uniqueness constraint?<a class="hash-link" href="#how-to-define-the-uniqueness-constraint" title="Direct link to heading"></a></h3><p>A unique_key is the primary key of the dataset, which determines whether a record has new values and should be updated/deleted, or inserted.</p><p>You can define the unique_key in the configuration block at the top of your model. This unique_key will act as the primaryKey (hoodie.datasource.write.recordkey.field) on the hudi table.</p><h2 class="anchor anchorWithStickyNavbar_y2LR" id="step-4-how-to-use-the-upsert-feature-while-writing-datasets">Step 4: How to use the upsert feature while writing datasets?<a class="hash-link" href="#step-4-how-to-use-the-upsert-feature-while-writing-datasets" title="Direct link to heading"></a></h2><p>dbt offers multiple load strategies when loading the transformed datasets, such as:</p><ul><li>append (default)</li><li>insert_overwrite (optional)</li><li>merge (optional, Only available for Hudi and Delta formats)</li></ul><p>By default dbt uses the append strategy, which may cause duplicate rows when you execute dbt run command multiple times on the same payload.</p><p>When you choose the insert_overwrite strategy, dbt will overwrite the entire partition or full table load for every dbt run, which causes unnecessary overheads and is very expensive.</p><p>In addition to all the existing strategies to load the data, with hudi you can use the exclusive merge strategy when using incremental materialization. Using the merge strategy you can perform field-level updates/deletes on your data lakehouse which is performant and cost-efficient. As a result, you will get access to fresher data and accelerated insights.</p><h3 class="anchor anchorWithStickyNavbar_y2LR" id="how-to-perform-field-level-updates">How to perform field-level updates?<a class="hash-link" href="#how-to-perform-field-level-updates" title="Direct link to heading"></a></h3><p>If you are using the merge strategy and have specified a unique_key, by default, dbt will entirely overwrite matched rows with new values.</p><p>Since Apache Spark adapter supports the merge strategy, you may optionally pass a list of column names to a <code>merge_update_columns</code> config. In that case, dbt will update only the columns specified by the config, and keep the previous values of other columns.</p><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div style="color:#F8F8F2;background-color:#282A36" class="codeBlockTitle_oQzk">models/my_model.sql</div><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">{{ config</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> materialized </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;incremental&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> incremental_strategy </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;merge&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> file_format </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hudi&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> unique_key </span><span class="token operator">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;id&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> merge_update_columns </span><span class="token operator">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;msg&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;updated_ts&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"> }}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><h3 class="anchor anchorWithStickyNavbar_y2LR" id="how-to-configure-additional-hoodie-custom-configs">How to configure additional hoodie custom configs?<a class="hash-link" href="#how-to-configure-additional-hoodie-custom-configs" title="Direct link to heading"></a></h3><p>When you want to specify additional hudi configs, you can do that with the options config:</p><div class="codeBlockContainer_J+bg language-sql theme-code-block"><div style="color:#F8F8F2;background-color:#282A36" class="codeBlockTitle_oQzk">models/my_model.sql</div><div class="codeBlockContent_csEI sql"><pre tabindex="0" class="prism-code language-sql codeBlock_rtdJ thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_1zSZ"><span class="token-line" style="color:#F8F8F2"><span class="token plain">{{ config</span><span class="token punctuation" style="color:rgb(248, 248, 242)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> materialized</span><span class="token operator">=</span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;incremental&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> file_format</span><span class="token operator">=</span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;hudi&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> incremental_strategy</span><span class="token operator">=</span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;merge&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> options</span><span class="token operator">=</span><span class="token plain">{</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;type&#x27;</span><span class="token plain">: </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;mor&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;primaryKey&#x27;</span><span class="token plain">: </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;id&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;precombineKey&#x27;</span><span class="token plain">: </span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;ts&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> }</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> unique_key</span><span class="token operator">=</span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;id&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> partition_by</span><span class="token operator">=</span><span class="token string" style="color:rgb(255, 121, 198)">&#x27;datestr&#x27;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> pre_hook</span><span class="token operator">=</span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token string" style="color:rgb(255, 121, 198)">&quot;set spark.sql.datetime.java8API.enabled=false;&quot;</span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">}}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_M3SB clean-btn">Copy</button></div></div><p>Hope you understood the benefits of using Apache Hudi with dbt to build your next open lakehouse, good luck!</p></div><footer0 class="row docusaurus-mt-lg blogPostDetailsFull_2lop"><div class="col margin-top--sm"><a href="https://github.com/apache/hudi/edit/asf-site/website/blog/blog/2022-07-11-build-open-lakehouse-using-apache-hudi-and-dbt.md" target="_blank" rel="noreferrer noopener" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_mS5F" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div></footer0></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Blog post page navigation"><div class="pagination-nav__item"><a class="pagination-nav__link" href="/cn/blog/2022/08/09/How-NerdWallet-uses-AWS-and-Apache-Hudi-to-build-a-serverless-real-time-analytics-platform"><div class="pagination-nav__sublabel">Newer Post</div><div class="pagination-nav__label">« <!-- -->How NerdWallet uses AWS and Apache Hudi to build a serverless, real-time analytics platform</div></a></div><div class="pagination-nav__item pagination-nav__item--next"><a class="pagination-nav__link" href="/cn/blog/2022/06/29/Apache-Hudi-vs-Delta-Lake-transparent-tpc-ds-lakehouse-performance-benchmarks"><div class="pagination-nav__sublabel">Older Post</div><div class="pagination-nav__label">Apache Hudi vs Delta Lake - Transparent TPC-DS Lakehouse Performance Benchmarks<!-- --> »</div></a></div></nav></main><div class="col col--2"><div class="tableOfContents_vrFS thin-scrollbar"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#what-is-apache-hudi" class="table-of-contents__link toc-highlight">What is Apache Hudi?</a></li><li><a href="#what-is-dbt" class="table-of-contents__link toc-highlight">What is dbt?</a></li><li><a href="#what-is-a-lakehouse" class="table-of-contents__link toc-highlight">What is a Lakehouse?</a></li><li><a href="#how-to-build-an-open-lakehouse" class="table-of-contents__link toc-highlight">How to build an open lakehouse?</a></li><li><a href="#step-1-how-to-extract--load-the-raw-data-datasets" class="table-of-contents__link toc-highlight">Step 1: How to extract &amp; load the raw data datasets?</a></li><li><a href="#step-2-how-to-configure-hudi-with-the-dbt-project" class="table-of-contents__link toc-highlight">Step 2: How to configure hudi with the dbt project?</a></li><li><a href="#step-3-how-to-read-the-raw-data-incrementally" class="table-of-contents__link toc-highlight">Step 3: How to read the raw data incrementally?</a><ul><li><a href="#how-to-apply-filters-on-an-incremental-run" class="table-of-contents__link toc-highlight">How to apply filters on an incremental run?</a></li><li><a href="#how-to-define-the-uniqueness-constraint" class="table-of-contents__link toc-highlight">How to define the uniqueness constraint?</a></li></ul></li><li><a href="#step-4-how-to-use-the-upsert-feature-while-writing-datasets" class="table-of-contents__link toc-highlight">Step 4: How to use the upsert feature while writing datasets?</a><ul><li><a href="#how-to-perform-field-level-updates" class="table-of-contents__link toc-highlight">How to perform field-level updates?</a></li><li><a href="#how-to-configure-additional-hoodie-custom-configs" class="table-of-contents__link toc-highlight">How to configure additional hoodie custom configs?</a></li></ul></li></ul></div></div></div></div></div><footer class="footer"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">About</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/blog/2021/07/21/streaming-data-lake-platform">Our Vision</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/concepts">Concepts</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/community/team">Team</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/releases/release-0.14.1">Releases</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/releases/download">Download</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/powered-by">Who&#x27;s Using</a></li></ul></div><div class="col footer__col"><div class="footer__title">Learn</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/docs/quick-start-guide">Quick Start</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/docker_demo">Docker Demo</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/blog">Blog</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/talks">Talks</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/videos">Video Guides</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/faq">FAQ</a></li><li class="footer__item"><a href="https://cwiki.apache.org/confluence/display/HUDI" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Technical Wiki<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li></ul></div><div class="col footer__col"><div class="footer__title">Hudi On Cloud</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/docs/s3_hoodie">AWS</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/gcs_hoodie">Google Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/oss_hoodie">Alibaba Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/azure_hoodie">Microsoft Azure</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/cos_hoodie">Tencent Cloud</a></li><li class="footer__item"><a class="footer__link-item" href="/cn/docs/ibm_cos_hoodie">IBM Cloud</a></li></ul></div><div class="col footer__col"><div class="footer__title">Community</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/cn/community/get-involved">Get Involved</a></li><li class="footer__item"><a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Slack<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://github.com/apache/hudi" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://twitter.com/ApacheHudi" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Twitter<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://www.youtube.com/channel/UCs7AhE0BWaEPZSChrBR-Muw" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>YouTube<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="https://www.linkedin.com/company/apache-hudi/?viewAsMember=true" target="_blank" rel="noopener noreferrer" class="footer__link-item"><span>Linkedin<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="footer__item"><a href="mailto:dev-subscribe@hudi.apache.org?Subject=SubscribeToHudi" target="_blank" rel="noopener noreferrer" class="footer__link-item">Mailing List</a></li></ul></div><div class="col footer__col"><div class="footer__title">Apache</div><ul class="footer__items"><li class="footer__item"><a href="https://www.apache.org/events/current-event" target="_blank" rel="noopener noreferrer" class="footer__link-item">Events</a></li><li class="footer__item"><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Thanks</a></li><li class="footer__item"><a href="https://www.apache.org/licenses" target="_blank" rel="noopener noreferrer" class="footer__link-item">License</a></li><li class="footer__item"><a href="https://www.apache.org/security" target="_blank" rel="noopener noreferrer" class="footer__link-item">Security</a></li><li class="footer__item"><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Sponsorship</a></li><li class="footer__item"><a href="https://www.apache.org" target="_blank" rel="noopener noreferrer" class="footer__link-item">Foundation</a></li></ul></div></div><div class="footer__bottom text--center"><div class="margin-bottom--sm"><a href="https://hudi.apache.org/" target="_blank" rel="noopener noreferrer" class="footerLogoLink_SRtH"><img src="/cn/assets/images/logo-big.png" alt="Apache Hudi™" class="themedImage_TMUO themedImage--light_4Vu1 footer__logo"><img src="/cn/assets/images/logo-big.png" alt="Apache Hudi™" class="themedImage_TMUO themedImage--dark_uzRr footer__logo"></a></div><div class="footer__copyright">Copyright © 2021 <a href="https://apache.org">The Apache Software Foundation</a>, Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0"> Apache License, Version 2.0</a>.
Hudi, Apache and the Apache feather logo are trademarks of The Apache Software Foundation. <a href="/docs/privacy">Privacy Policy</a></div></div></div></footer></div>
<script src="/cn/assets/js/runtime~main.0acdb754.js"></script>
<script src="/cn/assets/js/main.6d6aa24f.js"></script>
</body>
</html>