| <!doctype html> |
| <html lang="en" dir="ltr" class="docs-wrapper plugin-docs plugin-id-default docs-version-current docs-doc-page docs-doc-id-tutorials/tutorial-batch-hadoop" data-has-hydrated="false"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="generator" content="Docusaurus v3.7.0"> |
| <title data-rh="true">Load batch data using Apache Hadoop | Apache® Druid</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:image" content="https://druid.apache.org/img/druid_nav.png"><meta data-rh="true" name="twitter:image" content="https://druid.apache.org/img/druid_nav.png"><meta data-rh="true" property="og:url" content="https://druid.apache.org/docs/latest/tutorials/tutorial-batch-hadoop"><meta data-rh="true" property="og:locale" content="en"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="current"><meta data-rh="true" name="docusaurus_tag" content="docs-default-current"><meta data-rh="true" name="docsearch:version" content="current"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-current"><meta data-rh="true" property="og:title" content="Load batch data using Apache Hadoop | Apache® Druid"><meta data-rh="true" name="description" content="<!--"><meta data-rh="true" property="og:description" content="<!--"><link data-rh="true" rel="icon" href="/img/favicon.png"><link data-rh="true" rel="canonical" href="https://druid.apache.org/docs/latest/tutorials/tutorial-batch-hadoop"><link data-rh="true" rel="alternate" href="https://druid.apache.org/docs/latest/tutorials/tutorial-batch-hadoop" hreflang="en"><link data-rh="true" rel="alternate" href="https://druid.apache.org/docs/latest/tutorials/tutorial-batch-hadoop" hreflang="x-default"><link rel="stylesheet" href="/css/all.css"> |
| <script src="/js/clipboard.min.js"></script><link rel="stylesheet" href="/assets/css/styles.f3140859.css"> |
| <script src="/assets/js/runtime~main.12e85305.js" defer="defer"></script> |
| <script src="/assets/js/main.2e3800ac.js" defer="defer"></script> |
| </head> |
| <body class="navigation-with-keyboard"> |
| <script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){try{return new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}}()||function(){try{return window.localStorage.getItem("theme")}catch(t){}}();t(null!==e?e:"light")}(),function(){try{const n=new URLSearchParams(window.location.search).entries();for(var[t,e]of n)if(t.startsWith("docusaurus-data-")){var a=t.replace("docusaurus-data-","data-");document.documentElement.setAttribute(a,e)}}catch(t){}}()</script><div id="__docusaurus"><div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><nav aria-label="Main" class="navbar navbar--fixed-top navbar--dark"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/img/druid_nav.png" alt="Apache® Druid" class="themedComponent_mlkZ themedComponent--light_NVdE"><img src="/img/druid_nav.png" alt="Apache® Druid" class="themedComponent_mlkZ themedComponent--dark_xIcU"></div></a></div><div class="navbar__items navbar__items--right"><a class="navbar__item navbar__link" href="/technology">Technology</a><a class="navbar__item navbar__link" href="/use-cases">Use Cases</a><a class="navbar__item navbar__link" href="/druid-powered">Powered By</a><a class="navbar__item navbar__link" href="/docs/latest/design/">Docs</a><a class="navbar__item navbar__link" href="/community/">Community</a><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Apache®</a><ul class="dropdown__menu"><li><a href="https://www.apache.org/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Foundation<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://apachecon.com/?ref=druid.apache.org" target="_blank" rel="noopener noreferrer" class="dropdown__link">Events<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer" class="dropdown__link">License<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Thanks<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/security/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Security<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Sponsorship<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><a class="navbar__item navbar__link" href="/downloads/">Download</a><div class="navbarSearchContainer_Bca1"><div class="navbar__search"><span aria-label="expand searchbar" role="button" class="search-icon" tabindex="0"></span><input id="search_input_react" type="search" placeholder="Loading..." aria-label="Search" class="navbar__search-input search-bar" disabled=""></div></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="__docusaurus_skipToContent_fallback" class="main-wrapper mainWrapper_z2l0"><div class="docsWrapper_hBAB"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type="button"></button><div class="docRoot_UBD9"><aside class="theme-doc-sidebar-container docSidebarContainer_YfHR"><div class="sidebarViewport_aRkj"><div class="sidebar_njMd"><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_SIkG"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/latest/design/">Introduction to Apache Druid</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--active" href="/docs/latest/tutorials/">Getting started</a><button aria-label="Collapse sidebar category 'Getting started'" aria-expanded="true" type="button" class="clean-btn menu__caret"></button></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/latest/operations/single-server">Single server deployment</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/latest/tutorials/docker">Run with Docker</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/latest/tutorials/cluster">Clustered deployment</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" role="button" aria-expanded="false" tabindex="0" href="/docs/latest/tutorials/tutorial-msq-extern">Ingestion tutorials</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" role="button" aria-expanded="false" tabindex="0" href="/docs/latest/tutorials/tutorial-retention">Data management tutorials</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" role="button" aria-expanded="false" tabindex="0" href="/docs/latest/tutorials/tutorial-query">Querying tutorials</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/latest/tutorials/tutorial-sketches-theta">Theta sketches tutorial</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/latest/tutorials/tutorial-jdbc">JDBC connector tutorial</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret menu__link--active" role="button" aria-expanded="true" tabindex="0" href="/docs/latest/tutorials/tutorial-batch-hadoop">Hadoop tutorials</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/docs/latest/tutorials/tutorial-batch-hadoop">Load from Apache Hadoop</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/latest/tutorials/tutorial-kerberos-hadoop">Kerberized HDFS deep storage</a></li></ul></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" role="button" aria-expanded="false" href="/docs/latest/design/architecture">Design</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" href="/docs/latest/ingestion/">Ingestion</a><button aria-label="Expand sidebar category 'Ingestion'" aria-expanded="false" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" href="/docs/latest/data-management/">Data management</a><button aria-label="Expand sidebar category 'Data management'" aria-expanded="false" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" role="button" aria-expanded="false" href="/docs/latest/querying/sql">Querying</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" href="/docs/latest/api-reference/">API reference</a><button aria-label="Expand sidebar category 'API reference'" aria-expanded="false" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" href="/docs/latest/configuration/">Configuration</a><button aria-label="Expand sidebar category 'Configuration'" aria-expanded="false" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" role="button" aria-expanded="false" href="/docs/latest/api-reference/automatic-compaction-api">Operations</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" href="/docs/latest/development/overview">Development</a><button aria-label="Expand sidebar category 'Development'" aria-expanded="false" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" role="button" aria-expanded="false" href="/docs/latest/release-info/release-notes">Release info</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/latest/misc/papers-and-talks">Papers</a></li></ul></nav></div></div></aside><main class="docMainContainer_TBSr"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_VOVn"><div class="docItemContainer_Djhp"><article><nav class="theme-doc-breadcrumbs breadcrumbsContainer_Z_bl" aria-label="Breadcrumbs"><ul class="breadcrumbs" itemscope="" itemtype="https://schema.org/BreadcrumbList"><li class="breadcrumbs__item"><a aria-label="Home page" class="breadcrumbs__link" href="/"><svg viewBox="0 0 24 24" class="breadcrumbHomeIcon_YNFT"><path d="M10 19v-5h4v5c0 .55.45 1 1 1h3c.55 0 1-.45 1-1v-7h1.7c.46 0 .68-.57.33-.87L12.67 3.6c-.38-.34-.96-.34-1.34 0l-8.36 7.53c-.34.3-.13.87.33.87H5v7c0 .55.45 1 1 1h3c.55 0 1-.45 1-1z" fill="currentColor"></path></svg></a></li><li itemscope="" itemprop="itemListElement" itemtype="https://schema.org/ListItem" class="breadcrumbs__item"><a class="breadcrumbs__link" itemprop="item" href="/docs/latest/tutorials/"><span itemprop="name">Getting started</span></a><meta itemprop="position" content="1"></li><li class="breadcrumbs__item"><span class="breadcrumbs__link">Hadoop tutorials</span><meta itemprop="position" content="2"></li><li itemscope="" itemprop="itemListElement" itemtype="https://schema.org/ListItem" class="breadcrumbs__item breadcrumbs__item--active"><span class="breadcrumbs__link" itemprop="name">Load from Apache Hadoop</span><meta itemprop="position" content="3"></li></ul></nav><div class="tocCollapsible_ETCw theme-doc-toc-mobile tocMobile_ITEo"><button type="button" class="clean-btn tocCollapsibleButton_TO0P">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>Load batch data using Apache Hadoop</h1></header><div class="theme-admonition theme-admonition-caution admonition_xJq3 alert alert--warning"><div class="admonitionHeading_Gvgb"><span class="admonitionIcon_Rf37"><svg viewBox="0 0 16 16"><path fill-rule="evenodd" d="M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"></path></svg></span>Deprecated</div><div class="admonitionContent_BuS1"><p>Hadoop-based ingestion is deprecated. We recommend one of Druid's other supported ingestion methods, such as <a href="/docs/latest/multi-stage-query/">SQL-based ingestion</a> or <a href="/docs/latest/development/extensions-core/k8s-jobs">MiddleManager-less ingestion using Kubernetes</a></p><p>You must now explicitly opt-in to using the deprecated <code>index_hadoop</code> task type. To opt-in, set <code>druid.indexer.task.allowHadoopTaskExecution</code> to <code>true</code> in your <code>common.runtime.properties</code> file. For more information, see <a href="https://github.com/apache/druid/pull/18239" target="_blank" rel="noopener noreferrer">#18239</a></p></div></div> |
| <p>This tutorial shows you how to load data files into Apache Druid using a remote Hadoop cluster.</p> |
| <p>For this tutorial, we'll assume that you've already completed the previous |
| <a href="/docs/latest/tutorials/tutorial-batch">batch ingestion tutorial</a> using Druid's native batch ingestion system and are using the |
| automatic single-machine configuration as described in the <a href="/docs/latest/operations/single-server">quickstart</a>.</p> |
| <h2 class="anchor anchorWithStickyNavbar_LWe7" id="install-docker">Install Docker<a href="#install-docker" class="hash-link" aria-label="Direct link to Install Docker" title="Direct link to Install Docker"></a></h2> |
| <p>This tutorial requires <a href="https://docs.docker.com/install/" target="_blank" rel="noopener noreferrer">Docker</a> to be installed on the tutorial machine.</p> |
| <p>Once the Docker install is complete, please proceed to the next steps in the tutorial.</p> |
| <h2 class="anchor anchorWithStickyNavbar_LWe7" id="build-the-hadoop-docker-image">Build the Hadoop docker image<a href="#build-the-hadoop-docker-image" class="hash-link" aria-label="Direct link to Build the Hadoop docker image" title="Direct link to Build the Hadoop docker image"></a></h2> |
| <p>For this tutorial, we've provided a Dockerfile for a Hadoop 3.3.6 cluster, which we'll use to run the batch indexing task.</p> |
| <p>This Dockerfile and related files are located at <code>quickstart/tutorial/hadoop/docker</code>.</p> |
| <p>From the <code>apache-druid-34.0.0</code> package root, run the following commands to build a Docker image named "druid-hadoop-demo" with version tag "3.3.6":</p> |
| <div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style="color:#bfc7d5;background-color:#292d3e"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">cd quickstart/tutorial/hadoop/docker</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">docker build -t druid-hadoop-demo:3.3.6 .</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> |
| <p>This will start building the Hadoop image. Once the image build is done, you should see the message <code>Successfully tagged druid-hadoop-demo:3.3.6</code> printed to the console.</p> |
| <h2 class="anchor anchorWithStickyNavbar_LWe7" id="setup-the-hadoop-docker-cluster">Setup the Hadoop docker cluster<a href="#setup-the-hadoop-docker-cluster" class="hash-link" aria-label="Direct link to Setup the Hadoop docker cluster" title="Direct link to Setup the Hadoop docker cluster"></a></h2> |
| <h3 class="anchor anchorWithStickyNavbar_LWe7" id="create-temporary-shared-directory">Create temporary shared directory<a href="#create-temporary-shared-directory" class="hash-link" aria-label="Direct link to Create temporary shared directory" title="Direct link to Create temporary shared directory"></a></h3> |
| <p>We'll need a shared folder between the host and the Hadoop container for transferring some files.</p> |
| <p>Let's create some folders under <code>/tmp</code>, we will use these later when starting the Hadoop container:</p> |
| <div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style="color:#bfc7d5;background-color:#292d3e"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">mkdir -p /tmp/shared</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> |
| <h3 class="anchor anchorWithStickyNavbar_LWe7" id="configure-etchosts">Configure /etc/hosts<a href="#configure-etchosts" class="hash-link" aria-label="Direct link to Configure /etc/hosts" title="Direct link to Configure /etc/hosts"></a></h3> |
| <p>On the host machine, add the following entry to <code>/etc/hosts</code>:</p> |
| <div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar" style="color:#bfc7d5;background-color:#292d3e"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">127.0.0.1 druid-hadoop-demo</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> |
| <h3 class="anchor anchorWithStickyNavbar_LWe7" id="start-the-hadoop-container">Start the Hadoop container<a href="#start-the-hadoop-container" class="hash-link" aria-label="Direct link to Start the Hadoop container" title="Direct link to Start the Hadoop container"></a></h3> |
| <p>Once the <code>/tmp/shared</code> folder has been created and the <code>etc/hosts</code> entry has been added, run the following command to start the Hadoop container.</p> |
| <div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style="color:#bfc7d5;background-color:#292d3e"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">docker run -it -h druid-hadoop-demo --name druid-hadoop-demo -p 2049:2049 -p 2122:2122 -p 8020-8042:8020-8042 -p 8088:8088 -p 8443:8443 -p 9000:9000 -p 9820:9820 -p 9860-9880:9860-9880 -p 10020:10020 -p 19888:19888 -p 34455:34455 -p 49707:49707 -p 50010:50010 -p 50020:50020 -p 50030:50030 -p 50060:50060 -p 50070:50070 -p 50075:50075 -p 50090:50090 -p 51111:51111 -v /tmp/shared:/shared druid-hadoop-demo:3.3.6 /etc/bootstrap.sh -bash</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> |
| <p>Once the container is started, your terminal will attach to a bash shell running inside the container:</p> |
| <div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style="color:#bfc7d5;background-color:#292d3e"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">Starting namenodes on [druid-hadoop-demo]</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">Starting datanodes</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">Starting secondary namenodes [druid-hadoop-demo]</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">WARNING: YARN_CONF_DIR has been replaced by HADOOP_CONF_DIR. Using value of YARN_CONF_DIR.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">WARNING: YARN_OPTS has been replaced by HADOOP_OPTS. Using value of YARN_OPTS.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">Starting resourcemanager</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">WARNING: YARN_OPTS has been replaced by HADOOP_OPTS. Using value of YARN_OPTS.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">Starting nodemanagers</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">WARNING: YARN_OPTS has been replaced by HADOOP_OPTS. Using value of YARN_OPTS.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">localhost: WARNING: YARN_OPTS has been replaced by HADOOP_OPTS. Using value of YARN_OPTS.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">WARNING: YARN_CONF_DIR has been replaced by HADOOP_CONF_DIR. Using value of YARN_CONF_DIR.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">WARNING: YARN_OPTS has been replaced by HADOOP_OPTS. Using value of YARN_OPTS.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">WARNING: Use of this script to start the MR JobHistory daemon is deprecated.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">WARNING: Attempting to execute replacement "mapred --daemon start" instead.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> * initialize hdfs for first run</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">bash-4.1#</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> |
| <p>The <code>Unable to load native-hadoop library for your platform... using builtin-java classes where applicable</code> warning messages can be safely ignored.</p> |
| <h4 class="anchor anchorWithStickyNavbar_LWe7" id="accessing-the-hadoop-container-shell">Accessing the Hadoop container shell<a href="#accessing-the-hadoop-container-shell" class="hash-link" aria-label="Direct link to Accessing the Hadoop container shell" title="Direct link to Accessing the Hadoop container shell"></a></h4> |
| <p>To open another shell to the Hadoop container, run the following command:</p> |
| <div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar" style="color:#bfc7d5;background-color:#292d3e"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">docker exec -it druid-hadoop-demo bash</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> |
| <h3 class="anchor anchorWithStickyNavbar_LWe7" id="test-data">Test data<a href="#test-data" class="hash-link" aria-label="Direct link to Test data" title="Direct link to Test data"></a></h3> |
| <p>The startup script <code>bootstrap.sh</code>:</p> |
| <ul> |
| <li>creates the necessary directories</li> |
| <li>loads an input file to HDFS</li> |
| <li>places the hadoop configuration into the shared volume as <code>hadoop-conf.tgz</code></li> |
| </ul> |
| <h2 class="anchor anchorWithStickyNavbar_LWe7" id="configure-druid-to-use-hadoop">Configure Druid to use Hadoop<a href="#configure-druid-to-use-hadoop" class="hash-link" aria-label="Direct link to Configure Druid to use Hadoop" title="Direct link to Configure Druid to use Hadoop"></a></h2> |
| <p>Some additional steps are needed to configure the Druid cluster for Hadoop batch indexing.</p> |
| <h3 class="anchor anchorWithStickyNavbar_LWe7" id="provide-hadoop-configuration-for-druid">Provide Hadoop configuration for Druid<a href="#provide-hadoop-configuration-for-druid" class="hash-link" aria-label="Direct link to Provide Hadoop configuration for Druid" title="Direct link to Provide Hadoop configuration for Druid"></a></h3> |
| <p>From the Hadoop container's shell, run the following command to copy the Hadoop .xml configuration files to the shared folder:</p> |
| <div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style="color:#bfc7d5;background-color:#292d3e"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">cp /usr/local/hadoop/etc/hadoop/*.xml /shared/hadoop_xml</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> |
| <p>From the host machine, run the following, where <code>PATH_TO_DRUID</code> is replaced by the path to the Druid package.</p> |
| <div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style="color:#bfc7d5;background-color:#292d3e"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">cd $PATH_TO_DRUID</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">mkdir -p conf/druid/single-server/micro-quickstart/_common/hadoop-xml</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">tar xzf /tmp/shared/hadoop-conf.tgz -C conf/druid/single-server/micro-quickstart/_common/hadoop-xml</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> |
| <h3 class="anchor anchorWithStickyNavbar_LWe7" id="update-druid-segment-and-log-storage">Update Druid segment and log storage<a href="#update-druid-segment-and-log-storage" class="hash-link" aria-label="Direct link to Update Druid segment and log storage" title="Direct link to Update Druid segment and log storage"></a></h3> |
| <p>In your favorite text editor, open <code>conf/druid/single-server/micro-quickstart/_common/common.runtime.properties</code>, and make the following edits:</p> |
| <h4 class="anchor anchorWithStickyNavbar_LWe7" id="disable-local-deep-storage-and-enable-hdfs-deep-storage">Disable local deep storage and enable HDFS deep storage<a href="#disable-local-deep-storage-and-enable-hdfs-deep-storage" class="hash-link" aria-label="Direct link to Disable local deep storage and enable HDFS deep storage" title="Direct link to Disable local deep storage and enable HDFS deep storage"></a></h4> |
| <div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar" style="color:#bfc7d5;background-color:#292d3e"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">#</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># Deep storage</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># For local disk (only viable in a cluster if this is a network mount):</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#druid.storage.type=local</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#druid.storage.storageDirectory=var/druid/segments</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># For HDFS:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">druid.storage.type=hdfs</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">druid.storage.storageDirectory=/druid/segments</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> |
| <h4 class="anchor anchorWithStickyNavbar_LWe7" id="disable-local-log-storage-and-enable-hdfs-log-storage">Disable local log storage and enable HDFS log storage<a href="#disable-local-log-storage-and-enable-hdfs-log-storage" class="hash-link" aria-label="Direct link to Disable local log storage and enable HDFS log storage" title="Direct link to Disable local log storage and enable HDFS log storage"></a></h4> |
| <div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar" style="color:#bfc7d5;background-color:#292d3e"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">#</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># Indexing service logs</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># For local disk (only viable in a cluster if this is a network mount):</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#druid.indexer.logs.type=file</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#druid.indexer.logs.directory=var/druid/indexing-logs</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># For HDFS:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">druid.indexer.logs.type=hdfs</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">druid.indexer.logs.directory=/druid/indexing-logs</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> |
| <h3 class="anchor anchorWithStickyNavbar_LWe7" id="restart-druid-cluster">Restart Druid cluster<a href="#restart-druid-cluster" class="hash-link" aria-label="Direct link to Restart Druid cluster" title="Direct link to Restart Druid cluster"></a></h3> |
| <p>Once the Hadoop .xml files have been copied to the Druid cluster and the segment/log storage configuration has been updated to use HDFS, the Druid cluster needs to be restarted for the new configurations to take effect.</p> |
| <p>If the cluster is still running, CTRL-C to terminate it if running - and start it with:</p> |
| <div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar" style="color:#bfc7d5;background-color:#292d3e"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">bin/start-druid -c conf/druid/single-server/micro-quickstart</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> |
| <h2 class="anchor anchorWithStickyNavbar_LWe7" id="load-batch-data">Load batch data<a href="#load-batch-data" class="hash-link" aria-label="Direct link to Load batch data" title="Direct link to Load batch data"></a></h2> |
| <p>We've included a sample of Wikipedia edits from September 12, 2015 to get you started.</p> |
| <p>To load this data into Druid, you can submit an <em>ingestion task</em> pointing to the file. We've included |
| a task that loads the <code>wikiticker-2015-09-12-sampled.json.gz</code> file included in the archive.</p> |
| <p>Let's submit the <code>wikipedia-index-hadoop3.json</code> task:</p> |
| <div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style="color:#bfc7d5;background-color:#292d3e"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">bin/post-index-task --file quickstart/tutorial/wikipedia-index-hadoop3.json --url http://localhost:8081</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> |
| <h2 class="anchor anchorWithStickyNavbar_LWe7" id="querying-your-data">Querying your data<a href="#querying-your-data" class="hash-link" aria-label="Direct link to Querying your data" title="Direct link to Querying your data"></a></h2> |
| <p>After the data load is complete, please follow the <a href="/docs/latest/tutorials/tutorial-query">query tutorial</a> to run some example queries on the newly loaded data.</p> |
| <h2 class="anchor anchorWithStickyNavbar_LWe7" id="cleanup">Cleanup<a href="#cleanup" class="hash-link" aria-label="Direct link to Cleanup" title="Direct link to Cleanup"></a></h2> |
| <p>This tutorial is only meant to be used together with the <a href="/docs/latest/tutorials/tutorial-query">query tutorial</a>.</p> |
| <p>If you wish to go through any of the other tutorials, you will need to:</p> |
| <ul> |
| <li>Shut down the cluster and reset the cluster state by removing the contents of the <code>var</code> directory under the druid package.</li> |
| <li>Revert the deep storage and task storage config back to local types in <code>conf/druid/single-server/micro-quickstart/_common/common.runtime.properties</code></li> |
| <li>Restart the cluster</li> |
| </ul> |
| <p>This is necessary because the other ingestion tutorials will write to the same "wikipedia" datasource, and later tutorials expect the cluster to use local deep storage.</p> |
| <p>Example reverted config:</p> |
| <div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar" style="color:#bfc7d5;background-color:#292d3e"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">#</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># Deep storage</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># For local disk (only viable in a cluster if this is a network mount):</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">druid.storage.type=local</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">druid.storage.storageDirectory=var/druid/segments</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># For HDFS:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#druid.storage.type=hdfs</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#druid.storage.storageDirectory=/druid/segments</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># Indexing service logs</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># For local disk (only viable in a cluster if this is a network mount):</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">druid.indexer.logs.type=file</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">druid.indexer.logs.directory=var/druid/indexing-logs</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># For HDFS:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#druid.indexer.logs.type=hdfs</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#druid.indexer.logs.directory=/druid/indexing-logs</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> |
| <h2 class="anchor anchorWithStickyNavbar_LWe7" id="further-reading">Further reading<a href="#further-reading" class="hash-link" aria-label="Direct link to Further reading" title="Direct link to Further reading"></a></h2> |
| <p>For more information on loading batch data with Hadoop, please see <a href="/docs/latest/ingestion/hadoop">the Hadoop batch ingestion documentation</a>.</p></div></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages"><a class="pagination-nav__link pagination-nav__link--prev" href="/docs/latest/tutorials/tutorial-jdbc"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">JDBC connector tutorial</div></a><a class="pagination-nav__link pagination-nav__link--next" href="/docs/latest/tutorials/tutorial-kerberos-hadoop"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Kerberized HDFS deep storage</div></a></nav></div></div><div class="col col--3"><div class="tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#install-docker" class="table-of-contents__link toc-highlight">Install Docker</a></li><li><a href="#build-the-hadoop-docker-image" class="table-of-contents__link toc-highlight">Build the Hadoop docker image</a></li><li><a href="#setup-the-hadoop-docker-cluster" class="table-of-contents__link toc-highlight">Setup the Hadoop docker cluster</a><ul><li><a href="#create-temporary-shared-directory" class="table-of-contents__link toc-highlight">Create temporary shared directory</a></li><li><a href="#configure-etchosts" class="table-of-contents__link toc-highlight">Configure /etc/hosts</a></li><li><a href="#start-the-hadoop-container" class="table-of-contents__link toc-highlight">Start the Hadoop container</a></li><li><a href="#test-data" class="table-of-contents__link toc-highlight">Test data</a></li></ul></li><li><a href="#configure-druid-to-use-hadoop" class="table-of-contents__link toc-highlight">Configure Druid to use Hadoop</a><ul><li><a href="#provide-hadoop-configuration-for-druid" class="table-of-contents__link toc-highlight">Provide Hadoop configuration for Druid</a></li><li><a href="#update-druid-segment-and-log-storage" class="table-of-contents__link toc-highlight">Update Druid segment and log storage</a></li><li><a href="#restart-druid-cluster" class="table-of-contents__link toc-highlight">Restart Druid cluster</a></li></ul></li><li><a href="#load-batch-data" class="table-of-contents__link toc-highlight">Load batch data</a></li><li><a href="#querying-your-data" class="table-of-contents__link toc-highlight">Querying your data</a></li><li><a href="#cleanup" class="table-of-contents__link toc-highlight">Cleanup</a></li><li><a href="#further-reading" class="table-of-contents__link toc-highlight">Further reading</a></li></ul></div></div></div></div></main></div></div></div><footer class="footer"><div class="container container-fluid"><div class="footer__bottom text--center"><div class="margin-bottom--sm"><img src="/img/favicon.png" class="footer__logo themedComponent_mlkZ themedComponent--light_NVdE"><img src="/img/favicon.png" class="footer__logo themedComponent_mlkZ themedComponent--dark_xIcU"></div><div class="footer__copyright">Copyright © 2025 Apache Software Foundation. Except where otherwise noted, licensed under CC BY-SA 4.0. Apache Druid, Druid, and the Druid logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</div></div></div></footer></div> |
| </body> |
| </html> |