blob: 1accc1887841416f99ea17dfa935d758eb949b46 [file] [log] [blame]
<!doctype html>
<html lang="en" dir="ltr" class="docs-wrapper docs-doc-page docs-version-current plugin-docs plugin-id-default docs-doc-id-tutorials/tutorial-batch-hadoop">
<head>
<meta charset="UTF-8">
<meta name="generator" content="Docusaurus v2.4.1">
<title data-rh="true">Load batch data using Apache Hadoop | Apache® Druid</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:image" content="https://druid.apache.org/img/druid_nav.png"><meta data-rh="true" name="twitter:image" content="https://druid.apache.org/img/druid_nav.png"><meta data-rh="true" property="og:url" content="https://druid.apache.org/docs/27.0.0/tutorials/tutorial-batch-hadoop"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="current"><meta data-rh="true" name="docusaurus_tag" content="docs-default-current"><meta data-rh="true" name="docsearch:version" content="current"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-current"><meta data-rh="true" property="og:title" content="Load batch data using Apache Hadoop | Apache® Druid"><meta data-rh="true" name="description" content="&lt;!--"><meta data-rh="true" property="og:description" content="&lt;!--"><link data-rh="true" rel="icon" href="/img/favicon.png"><link data-rh="true" rel="canonical" href="https://druid.apache.org/docs/27.0.0/tutorials/tutorial-batch-hadoop"><link data-rh="true" rel="alternate" href="https://druid.apache.org/docs/27.0.0/tutorials/tutorial-batch-hadoop" hreflang="en"><link data-rh="true" rel="alternate" href="https://druid.apache.org/docs/27.0.0/tutorials/tutorial-batch-hadoop" hreflang="x-default"><link rel="preconnect" href="https://www.google-analytics.com">
<link rel="preconnect" href="https://www.googletagmanager.com">
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-131010415-1"></script>
<script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","UA-131010415-1",{})</script>
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.7.2/css/all.css">
<script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.4/clipboard.min.js"></script><link rel="stylesheet" href="/assets/css/styles.f80751b3.css">
<link rel="preload" href="/assets/js/runtime~main.5371e784.js" as="script">
<link rel="preload" href="/assets/js/main.832012d1.js" as="script">
</head>
<body class="navigation-with-keyboard">
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}return t}()||function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}()</script><div id="__docusaurus">
<div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><nav aria-label="Main" class="navbar navbar--fixed-top navbar--dark"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/img/druid_nav.png" alt="Apache® Druid" class="themedImage_ToTc themedImage--light_HNdA"><img src="/img/druid_nav.png" alt="Apache® Druid" class="themedImage_ToTc themedImage--dark_i4oU"></div></a></div><div class="navbar__items navbar__items--right"><a class="navbar__item navbar__link" href="/technology">Technology</a><a class="navbar__item navbar__link" href="/use-cases">Use Cases</a><a class="navbar__item navbar__link" href="/druid-powered">Powered By</a><a class="navbar__item navbar__link" href="/docs/27.0.0/design/">Docs</a><a class="navbar__item navbar__link" href="/community/">Community</a><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Apache®</a><ul class="dropdown__menu"><li><a href="https://www.apache.org/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Foundation<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://apachecon.com/?ref=druid.apache.org" target="_blank" rel="noopener noreferrer" class="dropdown__link">Events<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer" class="dropdown__link">License<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Thanks<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/security/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Security<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Sponsorship<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><a class="navbar__item navbar__link" href="/downloads/">Download</a><div class="searchBox_ZlJk"><div class="navbar__search"><span aria-label="expand searchbar" role="button" class="search-icon" tabindex="0"></span><input type="search" id="search_input_react" placeholder="Loading..." aria-label="Search" class="navbar__search-input search-bar" disabled=""></div></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="__docusaurus_skipToContent_fallback" class="main-wrapper mainWrapper_z2l0 docsWrapper_BCFX"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type="button"></button><div class="docPage__5DB"><aside class="theme-doc-sidebar-container docSidebarContainer_b6E3"><div class="sidebarViewport_Xe31"><div class="sidebar_njMd"><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_SIkG"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/27.0.0/design/">Getting started</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret menu__link--active" aria-expanded="true" href="/docs/27.0.0/tutorials/tutorial-msq-extern">Tutorials</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/tutorials/tutorial-msq-extern">Load files using SQL</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/tutorials/tutorial-kafka">Load from Apache Kafka</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/docs/27.0.0/tutorials/tutorial-batch-hadoop">Load from Apache Hadoop</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/tutorials/tutorial-query">Query data</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/tutorials/tutorial-rollup">Aggregate data with rollup</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/tutorials/tutorial-sketches-theta">Theta sketches</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/tutorials/tutorial-retention">Configure data retention</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/tutorials/tutorial-update-data">Update existing data</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/tutorials/tutorial-compaction">Compact segments</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/tutorials/tutorial-delete-data">Deleting data</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/tutorials/tutorial-ingestion-spec">Write an ingestion spec</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/tutorials/tutorial-transform-spec">Transform input data</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/tutorials/tutorial-msq-convert-spec">Convert ingestion spec to SQL</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/tutorials/docker">Run with Docker</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/tutorials/tutorial-kerberos-hadoop">Kerberized HDFS deep storage</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/tutorials/tutorial-sql-query-view">Get to know Query view</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/tutorials/tutorial-unnest-arrays">Unnesting arrays</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/tutorials/tutorial-query-deep-storage">Query from deep storage</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/tutorials/tutorial-jupyter-index">Jupyter Notebook tutorials</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/tutorials/tutorial-jupyter-docker">Docker for tutorials</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/tutorials/tutorial-jdbc">JDBC connector</a></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/27.0.0/design/architecture">Design</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/27.0.0/ingestion/">Ingestion</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/27.0.0/data-management/">Data management</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/27.0.0/querying/sql">Querying</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/27.0.0/api-reference/">API reference</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/27.0.0/configuration/">Configuration</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/27.0.0/operations/web-console">Operations</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/27.0.0/development/overview">Development</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/27.0.0/misc/papers-and-talks">Misc</a></div></li></ul></nav></div></div></aside><main class="docMainContainer_gTbr"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_VOVn"><div class="docItemContainer_Djhp"><article><nav class="theme-doc-breadcrumbs breadcrumbsContainer_Z_bl" aria-label="Breadcrumbs"><ul class="breadcrumbs" itemscope="" itemtype="https://schema.org/BreadcrumbList"><li class="breadcrumbs__item"><a aria-label="Home page" class="breadcrumbs__link" href="/"><svg viewBox="0 0 24 24" class="breadcrumbHomeIcon_YNFT"><path d="M10 19v-5h4v5c0 .55.45 1 1 1h3c.55 0 1-.45 1-1v-7h1.7c.46 0 .68-.57.33-.87L12.67 3.6c-.38-.34-.96-.34-1.34 0l-8.36 7.53c-.34.3-.13.87.33.87H5v7c0 .55.45 1 1 1h3c.55 0 1-.45 1-1z" fill="currentColor"></path></svg></a></li><li class="breadcrumbs__item"><span class="breadcrumbs__link">Tutorials</span><meta itemprop="position" content="1"></li><li itemscope="" itemprop="itemListElement" itemtype="https://schema.org/ListItem" class="breadcrumbs__item breadcrumbs__item--active"><span class="breadcrumbs__link" itemprop="name">Load from Apache Hadoop</span><meta itemprop="position" content="2"></li></ul></nav><div class="tocCollapsible_ETCw theme-doc-toc-mobile tocMobile_ITEo"><button type="button" class="clean-btn tocCollapsibleButton_TO0P">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>Load batch data using Apache Hadoop</h1></header><p>This tutorial shows you how to load data files into Apache Druid using a remote Hadoop cluster.</p><p>For this tutorial, we&#x27;ll assume that you&#x27;ve already completed the previous
<a href="/docs/27.0.0/tutorials/tutorial-batch">batch ingestion tutorial</a> using Druid&#x27;s native batch ingestion system and are using the
automatic single-machine configuration as described in the <a href="/docs/27.0.0/operations/single-server">quickstart</a>.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="install-docker">Install Docker<a href="#install-docker" class="hash-link" aria-label="Direct link to Install Docker" title="Direct link to Install Docker"></a></h2><p>This tutorial requires <a href="https://docs.docker.com/install/" target="_blank" rel="noopener noreferrer">Docker</a> to be installed on the tutorial machine.</p><p>Once the Docker install is complete, please proceed to the next steps in the tutorial.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="build-the-hadoop-docker-image">Build the Hadoop docker image<a href="#build-the-hadoop-docker-image" class="hash-link" aria-label="Direct link to Build the Hadoop docker image" title="Direct link to Build the Hadoop docker image"></a></h2><p>For this tutorial, we&#x27;ve provided a Dockerfile for a Hadoop 2.8.5 cluster, which we&#x27;ll use to run the batch indexing task.</p><p>This Dockerfile and related files are located at <code>quickstart/tutorial/hadoop/docker</code>.</p><p>From the apache-druid-27.0.0 package root, run the following commands to build a Docker image named &quot;druid-hadoop-demo&quot; with version tag &quot;2.8.5&quot;:</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token builtin class-name" style="color:rgb(255, 203, 107)">cd</span><span class="token plain"> quickstart/tutorial/hadoop/docker</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token function" style="color:rgb(130, 170, 255)">docker</span><span class="token plain"> build -t druid-hadoop-demo:2.8.5 </span><span class="token builtin class-name" style="color:rgb(255, 203, 107)">.</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>This will start building the Hadoop image. Once the image build is done, you should see the message <code>Successfully tagged druid-hadoop-demo:2.8.5</code> printed to the console.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="setup-the-hadoop-docker-cluster">Setup the Hadoop docker cluster<a href="#setup-the-hadoop-docker-cluster" class="hash-link" aria-label="Direct link to Setup the Hadoop docker cluster" title="Direct link to Setup the Hadoop docker cluster"></a></h2><h3 class="anchor anchorWithStickyNavbar_LWe7" id="create-temporary-shared-directory">Create temporary shared directory<a href="#create-temporary-shared-directory" class="hash-link" aria-label="Direct link to Create temporary shared directory" title="Direct link to Create temporary shared directory"></a></h3><p>We&#x27;ll need a shared folder between the host and the Hadoop container for transferring some files.</p><p>Let&#x27;s create some folders under <code>/tmp</code>, we will use these later when starting the Hadoop container:</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token function" style="color:rgb(130, 170, 255)">mkdir</span><span class="token plain"> -p /tmp/shared</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token function" style="color:rgb(130, 170, 255)">mkdir</span><span class="token plain"> -p /tmp/shared/hadoop_xml</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h3 class="anchor anchorWithStickyNavbar_LWe7" id="configure-etchosts">Configure /etc/hosts<a href="#configure-etchosts" class="hash-link" aria-label="Direct link to Configure /etc/hosts" title="Direct link to Configure /etc/hosts"></a></h3><p>On the host machine, add the following entry to <code>/etc/hosts</code>:</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">127.0.0.1 druid-hadoop-demo</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h3 class="anchor anchorWithStickyNavbar_LWe7" id="start-the-hadoop-container">Start the Hadoop container<a href="#start-the-hadoop-container" class="hash-link" aria-label="Direct link to Start the Hadoop container" title="Direct link to Start the Hadoop container"></a></h3><p>Once the <code>/tmp/shared</code> folder has been created and the <code>etc/hosts</code> entry has been added, run the following command to start the Hadoop container.</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token function" style="color:rgb(130, 170, 255)">docker</span><span class="token plain"> run -it -h druid-hadoop-demo --name druid-hadoop-demo -p </span><span class="token number" style="color:rgb(247, 140, 108)">2049</span><span class="token plain">:2049 -p </span><span class="token number" style="color:rgb(247, 140, 108)">2122</span><span class="token plain">:2122 -p </span><span class="token number" style="color:rgb(247, 140, 108)">8020</span><span class="token plain">:8020 -p </span><span class="token number" style="color:rgb(247, 140, 108)">8021</span><span class="token plain">:8021 -p </span><span class="token number" style="color:rgb(247, 140, 108)">8030</span><span class="token plain">:8030 -p </span><span class="token number" style="color:rgb(247, 140, 108)">8031</span><span class="token plain">:8031 -p </span><span class="token number" style="color:rgb(247, 140, 108)">8032</span><span class="token plain">:8032 -p </span><span class="token number" style="color:rgb(247, 140, 108)">8033</span><span class="token plain">:8033 -p </span><span class="token number" style="color:rgb(247, 140, 108)">8040</span><span class="token plain">:8040 -p </span><span class="token number" style="color:rgb(247, 140, 108)">8042</span><span class="token plain">:8042 -p </span><span class="token number" style="color:rgb(247, 140, 108)">8088</span><span class="token plain">:8088 -p </span><span class="token number" style="color:rgb(247, 140, 108)">8443</span><span class="token plain">:8443 -p </span><span class="token number" style="color:rgb(247, 140, 108)">9000</span><span class="token plain">:9000 -p </span><span class="token number" style="color:rgb(247, 140, 108)">10020</span><span class="token plain">:10020 -p </span><span class="token number" style="color:rgb(247, 140, 108)">19888</span><span class="token plain">:19888 -p </span><span class="token number" style="color:rgb(247, 140, 108)">34455</span><span class="token plain">:34455 -p </span><span class="token number" style="color:rgb(247, 140, 108)">49707</span><span class="token plain">:49707 -p </span><span class="token number" style="color:rgb(247, 140, 108)">50010</span><span class="token plain">:50010 -p </span><span class="token number" style="color:rgb(247, 140, 108)">50020</span><span class="token plain">:50020 -p </span><span class="token number" style="color:rgb(247, 140, 108)">50030</span><span class="token plain">:50030 -p </span><span class="token number" style="color:rgb(247, 140, 108)">50060</span><span class="token plain">:50060 -p </span><span class="token number" style="color:rgb(247, 140, 108)">50070</span><span class="token plain">:50070 -p </span><span class="token number" style="color:rgb(247, 140, 108)">50075</span><span class="token plain">:50075 -p </span><span class="token number" style="color:rgb(247, 140, 108)">50090</span><span class="token plain">:50090 -p </span><span class="token number" style="color:rgb(247, 140, 108)">51111</span><span class="token plain">:51111 -v /tmp/shared:/shared druid-hadoop-demo:2.8.5 /etc/bootstrap.sh -bash</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>Once the container is started, your terminal will attach to a bash shell running inside the container:</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">Starting sshd: </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token plain"> OK </span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token number" style="color:rgb(247, 140, 108)">18</span><span class="token plain">/07/26 </span><span class="token number" style="color:rgb(247, 140, 108)">17</span><span class="token plain">:27:15 WARN util.NativeCodeLoader: Unable to load native-hadoop library </span><span class="token keyword" style="font-style:italic">for</span><span class="token plain"> your platform</span><span class="token punctuation" style="color:rgb(199, 146, 234)">..</span><span class="token plain">. using builtin-java classes where applicable</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">Starting namenodes on </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token plain">druid-hadoop-demo</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">druid-hadoop-demo: starting namenode, logging to /usr/local/hadoop/logs/hadoop-root-namenode-druid-hadoop-demo.out</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">localhost: starting datanode, logging to /usr/local/hadoop/logs/hadoop-root-datanode-druid-hadoop-demo.out</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">Starting secondary namenodes </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token number" style="color:rgb(247, 140, 108)">0.0</span><span class="token plain">.0.0</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token number" style="color:rgb(247, 140, 108)">0.0</span><span class="token plain">.0.0: starting secondarynamenode, logging to /usr/local/hadoop/logs/hadoop-root-secondarynamenode-druid-hadoop-demo.out</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token number" style="color:rgb(247, 140, 108)">18</span><span class="token plain">/07/26 </span><span class="token number" style="color:rgb(247, 140, 108)">17</span><span class="token plain">:27:31 WARN util.NativeCodeLoader: Unable to load native-hadoop library </span><span class="token keyword" style="font-style:italic">for</span><span class="token plain"> your platform</span><span class="token punctuation" style="color:rgb(199, 146, 234)">..</span><span class="token plain">. using builtin-java classes where applicable</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">starting </span><span class="token function" style="color:rgb(130, 170, 255)">yarn</span><span class="token plain"> daemons</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">starting resourcemanager, logging to /usr/local/hadoop/logs/yarn--resourcemanager-druid-hadoop-demo.out</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">localhost: starting nodemanager, logging to /usr/local/hadoop/logs/yarn-root-nodemanager-druid-hadoop-demo.out</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">starting historyserver, logging to /usr/local/hadoop/logs/mapred--historyserver-druid-hadoop-demo.out</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">bash-4.1</span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic">#</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>The <code>Unable to load native-hadoop library for your platform... using builtin-java classes where applicable</code> warning messages can be safely ignored.</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="accessing-the-hadoop-container-shell">Accessing the Hadoop container shell<a href="#accessing-the-hadoop-container-shell" class="hash-link" aria-label="Direct link to Accessing the Hadoop container shell" title="Direct link to Accessing the Hadoop container shell"></a></h4><p>To open another shell to the Hadoop container, run the following command:</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">docker exec -it druid-hadoop-demo bash</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h3 class="anchor anchorWithStickyNavbar_LWe7" id="copy-input-data-to-the-hadoop-container">Copy input data to the Hadoop container<a href="#copy-input-data-to-the-hadoop-container" class="hash-link" aria-label="Direct link to Copy input data to the Hadoop container" title="Direct link to Copy input data to the Hadoop container"></a></h3><p>From the apache-druid-27.0.0 package root on the host, copy the <code>quickstart/tutorial/wikiticker-2015-09-12-sampled.json.gz</code> sample data to the shared folder:</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token function" style="color:rgb(130, 170, 255)">cp</span><span class="token plain"> quickstart/tutorial/wikiticker-2015-09-12-sampled.json.gz /tmp/shared/wikiticker-2015-09-12-sampled.json.gz</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h3 class="anchor anchorWithStickyNavbar_LWe7" id="setup-hdfs-directories">Setup HDFS directories<a href="#setup-hdfs-directories" class="hash-link" aria-label="Direct link to Setup HDFS directories" title="Direct link to Setup HDFS directories"></a></h3><p>In the Hadoop container&#x27;s shell, run the following commands to setup the HDFS directories needed by this tutorial and copy the input data to HDFS.</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token builtin class-name" style="color:rgb(255, 203, 107)">cd</span><span class="token plain"> /usr/local/hadoop/bin</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">./hdfs dfs -mkdir /druid</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">./hdfs dfs -mkdir /druid/segments</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">./hdfs dfs -mkdir /quickstart</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">./hdfs dfs -chmod </span><span class="token number" style="color:rgb(247, 140, 108)">777</span><span class="token plain"> /druid</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">./hdfs dfs -chmod </span><span class="token number" style="color:rgb(247, 140, 108)">777</span><span class="token plain"> /druid/segments</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">./hdfs dfs -chmod </span><span class="token number" style="color:rgb(247, 140, 108)">777</span><span class="token plain"> /quickstart</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">./hdfs dfs -chmod -R </span><span class="token number" style="color:rgb(247, 140, 108)">777</span><span class="token plain"> /tmp</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">./hdfs dfs -chmod -R </span><span class="token number" style="color:rgb(247, 140, 108)">777</span><span class="token plain"> /user</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">./hdfs dfs -put /shared/wikiticker-2015-09-12-sampled.json.gz /quickstart/wikiticker-2015-09-12-sampled.json.gz</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>If you encounter namenode errors when running this command, the Hadoop container may not be finished initializing. When this occurs, wait a couple of minutes and retry the commands.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="configure-druid-to-use-hadoop">Configure Druid to use Hadoop<a href="#configure-druid-to-use-hadoop" class="hash-link" aria-label="Direct link to Configure Druid to use Hadoop" title="Direct link to Configure Druid to use Hadoop"></a></h2><p>Some additional steps are needed to configure the Druid cluster for Hadoop batch indexing.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="copy-hadoop-configuration-to-druid-classpath">Copy Hadoop configuration to Druid classpath<a href="#copy-hadoop-configuration-to-druid-classpath" class="hash-link" aria-label="Direct link to Copy Hadoop configuration to Druid classpath" title="Direct link to Copy Hadoop configuration to Druid classpath"></a></h3><p>From the Hadoop container&#x27;s shell, run the following command to copy the Hadoop .xml configuration files to the shared folder:</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token function" style="color:rgb(130, 170, 255)">cp</span><span class="token plain"> /usr/local/hadoop/etc/hadoop/*.xml /shared/hadoop_xml</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>From the host machine, run the following, where {PATH_TO_DRUID} is replaced by the path to the Druid package.</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token function" style="color:rgb(130, 170, 255)">mkdir</span><span class="token plain"> -p </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain">PATH_TO_DRUID</span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain">/conf/druid/single-server/micro-quickstart/_common/hadoop-xml</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token function" style="color:rgb(130, 170, 255)">cp</span><span class="token plain"> /tmp/shared/hadoop_xml/*.xml </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain">PATH_TO_DRUID</span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain">/conf/druid/single-server/micro-quickstart/_common/hadoop-xml/</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h3 class="anchor anchorWithStickyNavbar_LWe7" id="update-druid-segment-and-log-storage">Update Druid segment and log storage<a href="#update-druid-segment-and-log-storage" class="hash-link" aria-label="Direct link to Update Druid segment and log storage" title="Direct link to Update Druid segment and log storage"></a></h3><p>In your favorite text editor, open <code>conf/druid/auto/_common/common.runtime.properties</code>, and make the following edits:</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="disable-local-deep-storage-and-enable-hdfs-deep-storage">Disable local deep storage and enable HDFS deep storage<a href="#disable-local-deep-storage-and-enable-hdfs-deep-storage" class="hash-link" aria-label="Direct link to Disable local deep storage and enable HDFS deep storage" title="Direct link to Disable local deep storage and enable HDFS deep storage"></a></h4><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">#</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># Deep storage</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># For local disk (only viable in a cluster if this is a network mount):</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#druid.storage.type=local</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#druid.storage.storageDirectory=var/druid/segments</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># For HDFS:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">druid.storage.type=hdfs</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">druid.storage.storageDirectory=/druid/segments</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h4 class="anchor anchorWithStickyNavbar_LWe7" id="disable-local-log-storage-and-enable-hdfs-log-storage">Disable local log storage and enable HDFS log storage<a href="#disable-local-log-storage-and-enable-hdfs-log-storage" class="hash-link" aria-label="Direct link to Disable local log storage and enable HDFS log storage" title="Direct link to Disable local log storage and enable HDFS log storage"></a></h4><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">#</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># Indexing service logs</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># For local disk (only viable in a cluster if this is a network mount):</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#druid.indexer.logs.type=file</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#druid.indexer.logs.directory=var/druid/indexing-logs</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># For HDFS:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">druid.indexer.logs.type=hdfs</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">druid.indexer.logs.directory=/druid/indexing-logs</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h3 class="anchor anchorWithStickyNavbar_LWe7" id="restart-druid-cluster">Restart Druid cluster<a href="#restart-druid-cluster" class="hash-link" aria-label="Direct link to Restart Druid cluster" title="Direct link to Restart Druid cluster"></a></h3><p>Once the Hadoop .xml files have been copied to the Druid cluster and the segment/log storage configuration has been updated to use HDFS, the Druid cluster needs to be restarted for the new configurations to take effect.</p><p>If the cluster is still running, CTRL-C to terminate the <code>bin/start-druid</code> script, and re-run it to bring the Druid services back up.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="load-batch-data">Load batch data<a href="#load-batch-data" class="hash-link" aria-label="Direct link to Load batch data" title="Direct link to Load batch data"></a></h2><p>We&#x27;ve included a sample of Wikipedia edits from September 12, 2015 to get you started.</p><p>To load this data into Druid, you can submit an <em>ingestion task</em> pointing to the file. We&#x27;ve included
a task that loads the <code>wikiticker-2015-09-12-sampled.json.gz</code> file included in the archive.</p><p>Let&#x27;s submit the <code>wikipedia-index-hadoop.json</code> task:</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">bin/post-index-task --file quickstart/tutorial/wikipedia-index-hadoop.json --url http://localhost:8081</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h2 class="anchor anchorWithStickyNavbar_LWe7" id="querying-your-data">Querying your data<a href="#querying-your-data" class="hash-link" aria-label="Direct link to Querying your data" title="Direct link to Querying your data"></a></h2><p>After the data load is complete, please follow the <a href="/docs/27.0.0/tutorials/tutorial-query">query tutorial</a> to run some example queries on the newly loaded data.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="cleanup">Cleanup<a href="#cleanup" class="hash-link" aria-label="Direct link to Cleanup" title="Direct link to Cleanup"></a></h2><p>This tutorial is only meant to be used together with the <a href="/docs/27.0.0/tutorials/tutorial-query">query tutorial</a>.</p><p>If you wish to go through any of the other tutorials, you will need to:</p><ul><li>Shut down the cluster and reset the cluster state by removing the contents of the <code>var</code> directory under the druid package.</li><li>Revert the deep storage and task storage config back to local types in <code>conf/druid/auto/_common/common.runtime.properties</code></li><li>Restart the cluster</li></ul><p>This is necessary because the other ingestion tutorials will write to the same &quot;wikipedia&quot; datasource, and later tutorials expect the cluster to use local deep storage.</p><p>Example reverted config:</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">#</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># Deep storage</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># For local disk (only viable in a cluster if this is a network mount):</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">druid.storage.type=local</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">druid.storage.storageDirectory=var/druid/segments</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># For HDFS:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#druid.storage.type=hdfs</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#druid.storage.storageDirectory=/druid/segments</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># Indexing service logs</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># For local disk (only viable in a cluster if this is a network mount):</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">druid.indexer.logs.type=file</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">druid.indexer.logs.directory=var/druid/indexing-logs</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># For HDFS:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#druid.indexer.logs.type=hdfs</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">#druid.indexer.logs.directory=/druid/indexing-logs</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h2 class="anchor anchorWithStickyNavbar_LWe7" id="further-reading">Further reading<a href="#further-reading" class="hash-link" aria-label="Direct link to Further reading" title="Direct link to Further reading"></a></h2><p>For more information on loading batch data with Hadoop, please see <a href="/docs/27.0.0/ingestion/hadoop">the Hadoop batch ingestion documentation</a>.</p></div></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages"><a class="pagination-nav__link pagination-nav__link--prev" href="/docs/27.0.0/tutorials/tutorial-kafka"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">Load from Apache Kafka</div></a><a class="pagination-nav__link pagination-nav__link--next" href="/docs/27.0.0/tutorials/tutorial-query"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Query data</div></a></nav></div></div><div class="col col--3"><div class="tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#install-docker" class="table-of-contents__link toc-highlight">Install Docker</a></li><li><a href="#build-the-hadoop-docker-image" class="table-of-contents__link toc-highlight">Build the Hadoop docker image</a></li><li><a href="#setup-the-hadoop-docker-cluster" class="table-of-contents__link toc-highlight">Setup the Hadoop docker cluster</a><ul><li><a href="#create-temporary-shared-directory" class="table-of-contents__link toc-highlight">Create temporary shared directory</a></li><li><a href="#configure-etchosts" class="table-of-contents__link toc-highlight">Configure /etc/hosts</a></li><li><a href="#start-the-hadoop-container" class="table-of-contents__link toc-highlight">Start the Hadoop container</a></li><li><a href="#copy-input-data-to-the-hadoop-container" class="table-of-contents__link toc-highlight">Copy input data to the Hadoop container</a></li><li><a href="#setup-hdfs-directories" class="table-of-contents__link toc-highlight">Setup HDFS directories</a></li></ul></li><li><a href="#configure-druid-to-use-hadoop" class="table-of-contents__link toc-highlight">Configure Druid to use Hadoop</a><ul><li><a href="#copy-hadoop-configuration-to-druid-classpath" class="table-of-contents__link toc-highlight">Copy Hadoop configuration to Druid classpath</a></li><li><a href="#update-druid-segment-and-log-storage" class="table-of-contents__link toc-highlight">Update Druid segment and log storage</a></li><li><a href="#restart-druid-cluster" class="table-of-contents__link toc-highlight">Restart Druid cluster</a></li></ul></li><li><a href="#load-batch-data" class="table-of-contents__link toc-highlight">Load batch data</a></li><li><a href="#querying-your-data" class="table-of-contents__link toc-highlight">Querying your data</a></li><li><a href="#cleanup" class="table-of-contents__link toc-highlight">Cleanup</a></li><li><a href="#further-reading" class="table-of-contents__link toc-highlight">Further reading</a></li></ul></div></div></div></div></main></div></div><footer class="footer"><div class="container container-fluid"><div class="footer__bottom text--center"><div class="margin-bottom--sm"><img src="/img/favicon.png" class="themedImage_ToTc themedImage--light_HNdA footer__logo"><img src="/img/favicon.png" class="themedImage_ToTc themedImage--dark_i4oU footer__logo"></div><div class="footer__copyright">Copyright © 2023 Apache Software Foundation. Except where otherwise noted, licensed under CC BY-SA 4.0. Apache Druid, Druid, and the Druid logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</div></div></div></footer></div>
<script src="/assets/js/runtime~main.5371e784.js"></script>
<script src="/assets/js/main.832012d1.js"></script>
</body>
</html>