blob: ba7b1eba44e40056f19c3bfa3707275063210804 [file] [log] [blame]
<!doctype html>
<html lang="en" dir="ltr" class="docs-wrapper docs-doc-page docs-version-current plugin-docs plugin-id-default docs-doc-id-ingestion/hadoop">
<head>
<meta charset="UTF-8">
<meta name="generator" content="Docusaurus v2.4.1">
<title data-rh="true">Hadoop-based ingestion | Apache® Druid</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:image" content="https://druid.apache.org/img/druid_nav.png"><meta data-rh="true" name="twitter:image" content="https://druid.apache.org/img/druid_nav.png"><meta data-rh="true" property="og:url" content="https://druid.apache.org/docs/latest/ingestion/hadoop"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="current"><meta data-rh="true" name="docusaurus_tag" content="docs-default-current"><meta data-rh="true" name="docsearch:version" content="current"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-current"><meta data-rh="true" property="og:title" content="Hadoop-based ingestion | Apache® Druid"><meta data-rh="true" name="description" content="&lt;!--"><meta data-rh="true" property="og:description" content="&lt;!--"><link data-rh="true" rel="icon" href="/img/favicon.png"><link data-rh="true" rel="canonical" href="https://druid.apache.org/docs/latest/ingestion/hadoop"><link data-rh="true" rel="alternate" href="https://druid.apache.org/docs/latest/ingestion/hadoop" hreflang="en"><link data-rh="true" rel="alternate" href="https://druid.apache.org/docs/latest/ingestion/hadoop" hreflang="x-default"><link rel="preconnect" href="https://www.google-analytics.com">
<link rel="preconnect" href="https://www.googletagmanager.com">
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-131010415-1"></script>
<script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","UA-131010415-1",{})</script>
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.7.2/css/all.css">
<script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.4/clipboard.min.js"></script><link rel="stylesheet" href="/assets/css/styles.f80751b3.css">
<link rel="preload" href="/assets/js/runtime~main.dc5f839a.js" as="script">
<link rel="preload" href="/assets/js/main.a03dfc13.js" as="script">
</head>
<body class="navigation-with-keyboard">
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}return t}()||function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}()</script><div id="__docusaurus">
<div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><nav aria-label="Main" class="navbar navbar--fixed-top navbar--dark"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/img/druid_nav.png" alt="Apache® Druid" class="themedImage_ToTc themedImage--light_HNdA"><img src="/img/druid_nav.png" alt="Apache® Druid" class="themedImage_ToTc themedImage--dark_i4oU"></div></a></div><div class="navbar__items navbar__items--right"><a class="navbar__item navbar__link" href="/technology">Technology</a><a class="navbar__item navbar__link" href="/use-cases">Use Cases</a><a class="navbar__item navbar__link" href="/druid-powered">Powered By</a><a class="navbar__item navbar__link" href="/docs/latest/design/">Docs</a><a class="navbar__item navbar__link" href="/community/">Community</a><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Apache®</a><ul class="dropdown__menu"><li><a href="https://www.apache.org/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Foundation<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://apachecon.com/?ref=druid.apache.org" target="_blank" rel="noopener noreferrer" class="dropdown__link">Events<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer" class="dropdown__link">License<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Thanks<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/security/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Security<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Sponsorship<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><a class="navbar__item navbar__link" href="/downloads/">Download</a><div class="searchBox_ZlJk"><div class="navbar__search"><span aria-label="expand searchbar" role="button" class="search-icon" tabindex="0"></span><input type="search" id="search_input_react" placeholder="Loading..." aria-label="Search" class="navbar__search-input search-bar" disabled=""></div></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="__docusaurus_skipToContent_fallback" class="main-wrapper mainWrapper_z2l0 docsWrapper_BCFX"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type="button"></button><div class="docPage__5DB"><aside class="theme-doc-sidebar-container docSidebarContainer_b6E3"><div class="sidebarViewport_Xe31"><div class="sidebar_njMd"><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_SIkG"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/latest/design/">Getting started</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/latest/tutorials/tutorial-msq-extern">Tutorials</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/latest/design/architecture">Design</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret menu__link--active" aria-expanded="true" href="/docs/latest/ingestion/">Ingestion</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/latest/ingestion/">Overview</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" tabindex="0" href="/docs/latest/ingestion/data-formats">Ingestion concepts</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" tabindex="0" href="/docs/latest/multi-stage-query/">SQL-based batch</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" tabindex="0" href="/docs/latest/development/extensions-core/kafka-ingestion">Streaming</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret menu__link--active" aria-expanded="true" tabindex="0" href="/docs/latest/ingestion/native-batch">Classic batch</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/latest/ingestion/native-batch">JSON-based batch</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/docs/latest/ingestion/hadoop">Hadoop-based</a></li></ul></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/latest/ingestion/ingestion-spec">Ingestion spec reference</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/latest/ingestion/schema-design">Schema design tips</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/latest/ingestion/faq">Troubleshooting FAQ</a></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/latest/data-management/">Data management</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/latest/querying/sql">Querying</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/latest/api-reference/">API reference</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/latest/configuration/">Configuration</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/latest/operations/web-console">Operations</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/latest/development/overview">Development</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/latest/misc/papers-and-talks">Misc</a></div></li></ul></nav></div></div></aside><main class="docMainContainer_gTbr"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_VOVn"><div class="docItemContainer_Djhp"><article><nav class="theme-doc-breadcrumbs breadcrumbsContainer_Z_bl" aria-label="Breadcrumbs"><ul class="breadcrumbs" itemscope="" itemtype="https://schema.org/BreadcrumbList"><li class="breadcrumbs__item"><a aria-label="Home page" class="breadcrumbs__link" href="/"><svg viewBox="0 0 24 24" class="breadcrumbHomeIcon_YNFT"><path d="M10 19v-5h4v5c0 .55.45 1 1 1h3c.55 0 1-.45 1-1v-7h1.7c.46 0 .68-.57.33-.87L12.67 3.6c-.38-.34-.96-.34-1.34 0l-8.36 7.53c-.34.3-.13.87.33.87H5v7c0 .55.45 1 1 1h3c.55 0 1-.45 1-1z" fill="currentColor"></path></svg></a></li><li class="breadcrumbs__item"><span class="breadcrumbs__link">Ingestion</span><meta itemprop="position" content="1"></li><li class="breadcrumbs__item"><span class="breadcrumbs__link">Classic batch</span><meta itemprop="position" content="2"></li><li itemscope="" itemprop="itemListElement" itemtype="https://schema.org/ListItem" class="breadcrumbs__item breadcrumbs__item--active"><span class="breadcrumbs__link" itemprop="name">Hadoop-based</span><meta itemprop="position" content="3"></li></ul></nav><div class="tocCollapsible_ETCw theme-doc-toc-mobile tocMobile_ITEo"><button type="button" class="clean-btn tocCollapsibleButton_TO0P">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>Hadoop-based ingestion</h1></header><p>Apache Hadoop-based batch ingestion in Apache Druid is supported via a Hadoop-ingestion task. These tasks can be posted to a running
instance of a Druid <a href="/docs/latest/design/overlord">Overlord</a>. Please refer to our <a href="/docs/latest/ingestion/#batch">Hadoop-based vs. native batch comparison table</a> for
comparisons between Hadoop-based, native batch (simple), and native batch (parallel) ingestion.</p><p>To run a Hadoop-based ingestion task, write an ingestion spec as specified below. Then POST it to the
<a href="/docs/latest/api-reference/tasks-api"><code>/druid/indexer/v1/task</code></a> endpoint on the Overlord, or use the
<code>bin/post-index-task</code> script included with Druid.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="tutorial">Tutorial<a href="#tutorial" class="hash-link" aria-label="Direct link to Tutorial" title="Direct link to Tutorial"></a></h2><p>This page contains reference documentation for Hadoop-based ingestion.
For a walk-through instead, check out the <a href="/docs/latest/tutorials/tutorial-batch-hadoop">Loading from Apache Hadoop</a> tutorial.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="task-syntax">Task syntax<a href="#task-syntax" class="hash-link" aria-label="Direct link to Task syntax" title="Direct link to Task syntax"></a></h2><p>A sample task is shown below:</p><div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;index_hadoop&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;spec&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;dataSchema&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;dataSource&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;wikipedia&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;parser&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;hadoopyString&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;parseSpec&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;format&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;json&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;timestampSpec&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;column&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;timestamp&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;format&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;auto&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;dimensionsSpec&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;dimensions&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;page&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;language&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;user&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;unpatrolled&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;newPage&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;robot&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;anonymous&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;namespace&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;continent&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;country&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;region&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;city&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;dimensionExclusions&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;spatialDimensions&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;metricsSpec&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;count&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;name&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;count&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;doubleSum&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;name&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;added&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;fieldName&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;added&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;doubleSum&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;name&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;deleted&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;fieldName&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;deleted&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;doubleSum&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;name&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;delta&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;fieldName&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;delta&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;granularitySpec&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;uniform&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;segmentGranularity&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;DAY&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;queryGranularity&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;NONE&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;intervals&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;2013-08-31/2013-09-01&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;ioConfig&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;hadoop&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;inputSpec&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;static&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;paths&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;/MyDirectory/example/wikipedia_data.json&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;tuningConfig&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;hadoop&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;hadoopDependencyCoordinates&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> &lt;my_hadoop_version&gt;</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><table><thead><tr><th>property</th><th>description</th><th>required?</th></tr></thead><tbody><tr><td>type</td><td>The task type, this should always be &quot;index_hadoop&quot;.</td><td>yes</td></tr><tr><td>spec</td><td>A Hadoop Index Spec. See <a href="/docs/latest/ingestion/">Ingestion</a></td><td>yes</td></tr><tr><td>hadoopDependencyCoordinates</td><td>A JSON array of Hadoop dependency coordinates that Druid will use, this property will override the default Hadoop coordinates. Once specified, Druid will look for those Hadoop dependencies from the location specified by <code>druid.extensions.hadoopDependenciesDir</code></td><td>no</td></tr><tr><td>classpathPrefix</td><td>Classpath that will be prepended for the Peon process.</td><td>no</td></tr></tbody></table><p>Also note that Druid automatically computes the classpath for Hadoop job containers that run in the Hadoop cluster. But in case of conflicts between Hadoop and Druid&#x27;s dependencies, you can manually specify the classpath by setting <code>druid.extensions.hadoopContainerDruidClasspath</code> property. See the extensions config in <a href="/docs/latest/configuration/#extensions">base druid configuration</a>.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="dataschema"><code>dataSchema</code><a href="#dataschema" class="hash-link" aria-label="Direct link to dataschema" title="Direct link to dataschema"></a></h2><p>This field is required. See the <a href="/docs/latest/ingestion/ingestion-spec#legacy-dataschema-spec"><code>dataSchema</code></a> section of the main ingestion page for details on
what it should contain.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="ioconfig"><code>ioConfig</code><a href="#ioconfig" class="hash-link" aria-label="Direct link to ioconfig" title="Direct link to ioconfig"></a></h2><p>This field is required.</p><table><thead><tr><th>Field</th><th>Type</th><th>Description</th><th>Required</th></tr></thead><tbody><tr><td>type</td><td>String</td><td>This should always be &#x27;hadoop&#x27;.</td><td>yes</td></tr><tr><td>inputSpec</td><td>Object</td><td>A specification of where to pull the data in from. See below.</td><td>yes</td></tr><tr><td>segmentOutputPath</td><td>String</td><td>The path to dump segments into.</td><td>Only used by the <a href="#cli">Command-line Hadoop indexer</a>. This field must be null otherwise.</td></tr><tr><td>metadataUpdateSpec</td><td>Object</td><td>A specification of how to update the metadata for the druid cluster these segments belong to.</td><td>Only used by the <a href="#cli">Command-line Hadoop indexer</a>. This field must be null otherwise.</td></tr></tbody></table><h3 class="anchor anchorWithStickyNavbar_LWe7" id="inputspec"><code>inputSpec</code><a href="#inputspec" class="hash-link" aria-label="Direct link to inputspec" title="Direct link to inputspec"></a></h3><p>There are multiple types of inputSpecs:</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="static"><code>static</code><a href="#static" class="hash-link" aria-label="Direct link to static" title="Direct link to static"></a></h4><p>A type of inputSpec where a static path to the data files is provided.</p><table><thead><tr><th>Field</th><th>Type</th><th>Description</th><th>Required</th></tr></thead><tbody><tr><td>inputFormat</td><td>String</td><td>Specifies the Hadoop InputFormat class to use. e.g. <code>org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat</code></td><td>no</td></tr><tr><td>paths</td><td>String</td><td>Comma-separated input paths to the raw data. Druid ingests data only from the configured paths. It does not search recursively for data in subdirectories.</td><td>yes</td></tr></tbody></table><p>For example, using the static input paths:</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">&quot;paths&quot; : &quot;hdfs://path/to/data/is/here/data.gz,hdfs://path/to/data/is/here/moredata.gz,hdfs://path/to/data/is/here/evenmoredata.gz&quot;</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>You can also read from cloud storage such as AWS S3 or Google Cloud Storage.
To do so, you need to install the necessary library under Druid&#x27;s classpath in <em>all MiddleManager or Indexer processes</em>.
For S3, you can run the below command to install the <a href="https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/" target="_blank" rel="noopener noreferrer">Hadoop AWS module</a>.</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">java -classpath </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token string variable" style="color:rgb(191, 199, 213)">${DRUID_HOME}</span><span class="token string" style="color:rgb(195, 232, 141)">lib/*&quot;</span><span class="token plain"> org.apache.druid.cli.Main tools pull-deps -h </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;org.apache.hadoop:hadoop-aws:</span><span class="token string variable" style="color:rgb(191, 199, 213)">${HADOOP_VERSION}</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token function" style="color:rgb(130, 170, 255)">cp</span><span class="token plain"> </span><span class="token variable" style="color:rgb(191, 199, 213)">${DRUID_HOME}</span><span class="token plain">/hadoop-dependencies/hadoop-aws/</span><span class="token variable" style="color:rgb(191, 199, 213)">${HADOOP_VERSION}</span><span class="token plain">/hadoop-aws-</span><span class="token variable" style="color:rgb(191, 199, 213)">${HADOOP_VERSION}</span><span class="token plain">.jar </span><span class="token variable" style="color:rgb(191, 199, 213)">${DRUID_HOME}</span><span class="token plain">/extensions/druid-hdfs-storage/</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>Once you install the Hadoop AWS module in all MiddleManager and Indexer processes, you can put
your S3 paths in the inputSpec with the below job properties.
For more configurations, see the <a href="https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/" target="_blank" rel="noopener noreferrer">Hadoop AWS module</a>.</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">&quot;paths&quot; : &quot;s3a://billy-bucket/the/data/is/here/data.gz,s3a://billy-bucket/the/data/is/here/moredata.gz,s3a://billy-bucket/the/data/is/here/evenmoredata.gz&quot;</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token property">&quot;jobProperties&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;fs.s3a.impl&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;org.apache.hadoop.fs.s3a.S3AFileSystem&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;fs.AbstractFileSystem.s3a.impl&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;org.apache.hadoop.fs.s3a.S3A&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;fs.s3a.access.key&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;YOUR_ACCESS_KEY&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;fs.s3a.secret.key&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;YOUR_SECRET_KEY&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>For Google Cloud Storage, you need to install <a href="https://github.com/GoogleCloudPlatform/bigdata-interop/blob/master/gcs/INSTALL.md" target="_blank" rel="noopener noreferrer">GCS connector jar</a>
under <code>${DRUID_HOME}/hadoop-dependencies</code> in <em>all MiddleManager or Indexer processes</em>.
Once you install the GCS Connector jar in all MiddleManager and Indexer processes, you can put
your Google Cloud Storage paths in the inputSpec with the below job properties.
For more configurations, see the <a href="https://github.com/GoogleCloudPlatform/bigdata-interop/blob/master/gcs/INSTALL.md#configure-hadoop" target="_blank" rel="noopener noreferrer">instructions to configure Hadoop</a>,
<a href="https://github.com/GoogleCloudDataproc/hadoop-connectors/blob/v2.0.0/gcs/conf/gcs-core-default.xml" target="_blank" rel="noopener noreferrer">GCS core default</a>
and <a href="https://github.com/GoogleCloudPlatform/bdutil/blob/master/conf/hadoop2/gcs-core-template.xml" target="_blank" rel="noopener noreferrer">GCS core template</a>.</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">&quot;paths&quot; : &quot;gs://billy-bucket/the/data/is/here/data.gz,gs://billy-bucket/the/data/is/here/moredata.gz,gs://billy-bucket/the/data/is/here/evenmoredata.gz&quot;</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token property">&quot;jobProperties&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;fs.gs.impl&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;fs.AbstractFileSystem.gs.impl&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h4 class="anchor anchorWithStickyNavbar_LWe7" id="granularity"><code>granularity</code><a href="#granularity" class="hash-link" aria-label="Direct link to granularity" title="Direct link to granularity"></a></h4><p>A type of inputSpec that expects data to be organized in directories according to datetime using the path format: <code>y=XXXX/m=XX/d=XX/H=XX/M=XX/S=XX</code> (where date is represented by lowercase and time is represented by uppercase).</p><table><thead><tr><th>Field</th><th>Type</th><th>Description</th><th>Required</th></tr></thead><tbody><tr><td>dataGranularity</td><td>String</td><td>Specifies the granularity to expect the data at, e.g. hour means to expect directories <code>y=XXXX/m=XX/d=XX/H=XX</code>.</td><td>yes</td></tr><tr><td>inputFormat</td><td>String</td><td>Specifies the Hadoop InputFormat class to use. e.g. <code>org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat</code></td><td>no</td></tr><tr><td>inputPath</td><td>String</td><td>Base path to append the datetime path to.</td><td>yes</td></tr><tr><td>filePattern</td><td>String</td><td>Pattern that files should match to be included.</td><td>yes</td></tr><tr><td>pathFormat</td><td>String</td><td>Joda datetime format for each directory. Default value is <code>&quot;&#x27;y&#x27;=yyyy/&#x27;m&#x27;=MM/&#x27;d&#x27;=dd/&#x27;H&#x27;=HH&quot;</code>, or see <a href="http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html" target="_blank" rel="noopener noreferrer">Joda documentation</a></td><td>no</td></tr></tbody></table><p>For example, if the sample config were run with the interval 2012-06-01/2012-06-02, it would expect data at the paths:</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">s3n://billy-bucket/the/data/is/here/y=2012/m=06/d=01/H=00</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">s3n://billy-bucket/the/data/is/here/y=2012/m=06/d=01/H=01</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">...</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">s3n://billy-bucket/the/data/is/here/y=2012/m=06/d=01/H=23</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h4 class="anchor anchorWithStickyNavbar_LWe7" id="datasource"><code>dataSource</code><a href="#datasource" class="hash-link" aria-label="Direct link to datasource" title="Direct link to datasource"></a></h4><p>This is a type of <code>inputSpec</code> that reads data already stored inside Druid. This is used to allow &quot;re-indexing&quot; data and for &quot;delta-ingestion&quot; described later in <code>multi</code> type inputSpec.</p><table><thead><tr><th>Field</th><th>Type</th><th>Description</th><th>Required</th></tr></thead><tbody><tr><td>type</td><td>String.</td><td>This should always be &#x27;dataSource&#x27;.</td><td>yes</td></tr><tr><td>ingestionSpec</td><td>JSON object.</td><td>Specification of Druid segments to be loaded. See below.</td><td>yes</td></tr><tr><td>maxSplitSize</td><td>Number</td><td>Enables combining multiple segments into single Hadoop InputSplit according to size of segments. With -1, druid calculates max split size based on user specified number of map task(mapred.map.tasks or mapreduce.job.maps). By default, one split is made for one segment. maxSplitSize is specified in bytes.</td><td>no</td></tr><tr><td>useNewAggs</td><td>Boolean</td><td>If &quot;false&quot;, then list of aggregators in &quot;metricsSpec&quot; of hadoop indexing task must be same as that used in original indexing task while ingesting raw data. Default value is &quot;false&quot;. This field can be set to &quot;true&quot; when &quot;inputSpec&quot; type is &quot;dataSource&quot; and not &quot;multi&quot; to enable arbitrary aggregators while reindexing. See below for &quot;multi&quot; type support for delta-ingestion.</td><td>no</td></tr></tbody></table><p>Here is what goes inside <code>ingestionSpec</code>:</p><table><thead><tr><th>Field</th><th>Type</th><th>Description</th><th>Required</th></tr></thead><tbody><tr><td>dataSource</td><td>String</td><td>Druid dataSource name from which you are loading the data.</td><td>yes</td></tr><tr><td>intervals</td><td>List</td><td>A list of strings representing ISO-8601 Intervals.</td><td>yes</td></tr><tr><td>segments</td><td>List</td><td>List of segments from which to read data from, by default it is obtained automatically. You can obtain list of segments to put here by making a POST query to Coordinator at url /druid/coordinator/v1/metadata/datasources/segments?full with list of intervals specified in the request payload, e.g. <!-- -->[&quot;2012-01-01T00:00:00.000/2012-01-03T00:00:00.000&quot;, &quot;2012-01-05T00:00:00.000/2012-01-07T00:00:00.000&quot;]<!-- -->. You may want to provide this list manually in order to ensure that segments read are exactly same as they were at the time of task submission, task would fail if the list provided by the user does not match with state of database when the task actually runs.</td><td>no</td></tr><tr><td>filter</td><td>JSON</td><td>See <a href="/docs/latest/querying/filters">Filters</a></td><td>no</td></tr><tr><td>dimensions</td><td>Array of String</td><td>Name of dimension columns to load. By default, the list will be constructed from parseSpec. If parseSpec does not have an explicit list of dimensions then all the dimension columns present in stored data will be read.</td><td>no</td></tr><tr><td>metrics</td><td>Array of String</td><td>Name of metric columns to load. By default, the list will be constructed from the &quot;name&quot; of all the configured aggregators.</td><td>no</td></tr><tr><td>ignoreWhenNoSegments</td><td>boolean</td><td>Whether to ignore this ingestionSpec if no segments were found. Default behavior is to throw error when no segments were found.</td><td>no</td></tr></tbody></table><p>For example</p><div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token property">&quot;ioConfig&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;hadoop&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;inputSpec&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;dataSource&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;ingestionSpec&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;dataSource&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;wikipedia&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;intervals&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;2014-10-20T00:00:00Z/P2W&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h4 class="anchor anchorWithStickyNavbar_LWe7" id="multi"><code>multi</code><a href="#multi" class="hash-link" aria-label="Direct link to multi" title="Direct link to multi"></a></h4><p>This is a composing inputSpec to combine other inputSpecs. This inputSpec is used for delta ingestion. You can also use a <code>multi</code> inputSpec to combine data from multiple dataSources. However, each particular dataSource can only be specified one time.
Note that, &quot;useNewAggs&quot; must be set to default value false to support delta-ingestion.</p><table><thead><tr><th>Field</th><th>Type</th><th>Description</th><th>Required</th></tr></thead><tbody><tr><td>children</td><td>Array of JSON objects</td><td>List of JSON objects containing other inputSpecs.</td><td>yes</td></tr></tbody></table><p>For example:</p><div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token property">&quot;ioConfig&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;hadoop&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;inputSpec&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;multi&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;children&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;dataSource&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;ingestionSpec&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;dataSource&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;wikipedia&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;intervals&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;2012-01-01T00:00:00.000/2012-01-03T00:00:00.000&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;2012-01-05T00:00:00.000/2012-01-07T00:00:00.000&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;segments&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;dataSource&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;test1&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;interval&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;2012-01-01T00:00:00.000/2012-01-03T00:00:00.000&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;version&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;v2&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;loadSpec&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;local&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;path&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;/tmp/index1.zip&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;dimensions&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;host&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;metrics&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;visited_sum,unique_hosts&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;shardSpec&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;none&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;binaryVersion&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">9</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;size&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">2</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;identifier&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;test1_2000-01-01T00:00:00.000Z_3000-01-01T00:00:00.000Z_v2&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;static&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;paths&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;/path/to/more/wikipedia/data/&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>It is STRONGLY RECOMMENDED to provide list of segments in <code>dataSource</code> inputSpec explicitly so that your delta ingestion task is idempotent. You can obtain that list of segments by making following call to the Coordinator.
POST <code>/druid/coordinator/v1/metadata/datasources/{dataSourceName}/segments?full</code>
Request Body: <!-- -->[interval1, interval2,...]<!-- --> for example <!-- -->[&quot;2012-01-01T00:00:00.000/2012-01-03T00:00:00.000&quot;, &quot;2012-01-05T00:00:00.000/2012-01-07T00:00:00.000&quot;]</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="tuningconfig"><code>tuningConfig</code><a href="#tuningconfig" class="hash-link" aria-label="Direct link to tuningconfig" title="Direct link to tuningconfig"></a></h2><p>The tuningConfig is optional and default parameters will be used if no tuningConfig is specified.</p><table><thead><tr><th>Field</th><th>Type</th><th>Description</th><th>Required</th></tr></thead><tbody><tr><td>workingPath</td><td>String</td><td>The working path to use for intermediate results (results between Hadoop jobs).</td><td>Only used by the <a href="#cli">Command-line Hadoop indexer</a>. The default is &#x27;/tmp/druid-indexing&#x27;. This field must be null otherwise.</td></tr><tr><td>version</td><td>String</td><td>The version of created segments. Ignored for HadoopIndexTask unless useExplicitVersion is set to true</td><td>no (default == datetime that indexing starts at)</td></tr><tr><td>partitionsSpec</td><td>Object</td><td>A specification of how to partition each time bucket into segments. Absence of this property means no partitioning will occur. See <a href="#partitionsspec"><code>partitionsSpec</code></a> below.</td><td>no (default == &#x27;hashed&#x27;)</td></tr><tr><td>maxRowsInMemory</td><td>Integer</td><td>The number of rows to aggregate before persisting. Note that this is the number of post-aggregation rows which may not be equal to the number of input events due to roll-up. This is used to manage the required JVM heap size. Normally user does not need to set this, but depending on the nature of data, if rows are short in terms of bytes, user may not want to store a million rows in memory and this value should be set.</td><td>no (default == 1000000)</td></tr><tr><td>maxBytesInMemory</td><td>Long</td><td>The number of bytes to aggregate in heap memory before persisting. Normally this is computed internally and user does not need to set it. This is based on a rough estimate of memory usage and not actual usage. The maximum heap memory usage for indexing is maxBytesInMemory * (2 + maxPendingPersists). Note that <code>maxBytesInMemory</code> also includes heap usage of artifacts created from intermediary persists. This means that after every persist, the amount of <code>maxBytesInMemory</code> until next persist will decreases, and task will fail when the sum of bytes of all intermediary persisted artifacts exceeds <code>maxBytesInMemory</code>.</td><td>no (default == One-sixth of max JVM memory)</td></tr><tr><td>leaveIntermediate</td><td>Boolean</td><td>Leave behind intermediate files (for debugging) in the workingPath when a job completes, whether it passes or fails.</td><td>no (default == false)</td></tr><tr><td>cleanupOnFailure</td><td>Boolean</td><td>Clean up intermediate files when a job fails (unless leaveIntermediate is on).</td><td>no (default == true)</td></tr><tr><td>overwriteFiles</td><td>Boolean</td><td>Override existing files found during indexing.</td><td>no (default == false)</td></tr><tr><td>ignoreInvalidRows</td><td>Boolean</td><td>DEPRECATED. Ignore rows found to have problems. If false, any exception encountered during parsing will be thrown and will halt ingestion; if true, unparseable rows and fields will be skipped. If <code>maxParseExceptions</code> is defined, this property is ignored.</td><td>no (default == false)</td></tr><tr><td>combineText</td><td>Boolean</td><td>Use CombineTextInputFormat to combine multiple files into a file split. This can speed up Hadoop jobs when processing a large number of small files.</td><td>no (default == false)</td></tr><tr><td>useCombiner</td><td>Boolean</td><td>Use Hadoop combiner to merge rows at mapper if possible.</td><td>no (default == false)</td></tr><tr><td>jobProperties</td><td>Object</td><td>A map of properties to add to the Hadoop job configuration, see below for details.</td><td>no (default == null)</td></tr><tr><td>indexSpec</td><td>Object</td><td>Tune how data is indexed. See <a href="/docs/latest/ingestion/ingestion-spec#indexspec"><code>indexSpec</code></a> on the main ingestion page for more information.</td><td>no</td></tr><tr><td>indexSpecForIntermediatePersists</td><td>Object</td><td>defines segment storage format options to be used at indexing time for intermediate persisted temporary segments. this can be used to disable dimension/metric compression on intermediate segments to reduce memory required for final merging. however, disabling compression on intermediate segments might increase page cache use while they are used before getting merged into final segment published, see <a href="/docs/latest/ingestion/ingestion-spec#indexspec"><code>indexSpec</code></a> for possible values.</td><td>no (default = same as indexSpec)</td></tr><tr><td>numBackgroundPersistThreads</td><td>Integer</td><td>The number of new background threads to use for incremental persists. Using this feature causes a notable increase in memory pressure and CPU usage but will make the job finish more quickly. If changing from the default of 0 (use current thread for persists), we recommend setting it to 1.</td><td>no (default == 0)</td></tr><tr><td>forceExtendableShardSpecs</td><td>Boolean</td><td>Forces use of extendable shardSpecs. Hash-based partitioning always uses an extendable shardSpec. For single-dimension partitioning, this option should be set to true to use an extendable shardSpec. For partitioning, please check <a href="#partitionsspec">Partitioning specification</a>. This option can be useful when you need to append more data to existing dataSource.</td><td>no (default = false)</td></tr><tr><td>useExplicitVersion</td><td>Boolean</td><td>Forces HadoopIndexTask to use version.</td><td>no (default = false)</td></tr><tr><td>logParseExceptions</td><td>Boolean</td><td>If true, log an error message when a parsing exception occurs, containing information about the row where the error occurred.</td><td>no(default = false)</td></tr><tr><td>maxParseExceptions</td><td>Integer</td><td>The maximum number of parse exceptions that can occur before the task halts ingestion and fails. Overrides <code>ignoreInvalidRows</code> if <code>maxParseExceptions</code> is defined.</td><td>no(default = unlimited)</td></tr><tr><td>useYarnRMJobStatusFallback</td><td>Boolean</td><td>If the Hadoop jobs created by the indexing task are unable to retrieve their completion status from the JobHistory server, and this parameter is true, the indexing task will try to fetch the application status from <code>http://&lt;yarn-rm-address&gt;/ws/v1/cluster/apps/&lt;application-id&gt;</code>, where <code>&lt;yarn-rm-address&gt;</code> is the value of <code>yarn.resourcemanager.webapp.address</code> in your Hadoop configuration. This flag is intended as a fallback for cases where an indexing task&#x27;s jobs succeed, but the JobHistory server is unavailable, causing the indexing task to fail because it cannot determine the job statuses.</td><td>no (default = true)</td></tr><tr><td>awaitSegmentAvailabilityTimeoutMillis</td><td>Long</td><td>Milliseconds to wait for the newly indexed segments to become available for query after ingestion completes. If <code>&lt;= 0</code>, no wait will occur. If <code>&gt; 0</code>, the task will wait for the Coordinator to indicate that the new segments are available for querying. If the timeout expires, the task will exit as successful, but the segments were not confirmed to have become available for query.</td><td>no (default = 0)</td></tr></tbody></table><h3 class="anchor anchorWithStickyNavbar_LWe7" id="jobproperties"><code>jobProperties</code><a href="#jobproperties" class="hash-link" aria-label="Direct link to jobproperties" title="Direct link to jobproperties"></a></h3><div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;tuningConfig&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;hadoop&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;jobProperties&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;&lt;hadoop-property-a&gt;&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;&lt;value-a&gt;&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;&lt;hadoop-property-b&gt;&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;&lt;value-b&gt;&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>Hadoop&#x27;s <a href="https://hadoop.apache.org/docs/stable/hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml" target="_blank" rel="noopener noreferrer">MapReduce documentation</a> lists the possible configuration parameters.</p><p>With some Hadoop distributions, it may be necessary to set <code>mapreduce.job.classpath</code> or <code>mapreduce.job.user.classpath.first</code>
to avoid class loading issues. See the <a href="/docs/latest/operations/other-hadoop">working with different Hadoop versions documentation</a>
for more details.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="partitionsspec"><code>partitionsSpec</code><a href="#partitionsspec" class="hash-link" aria-label="Direct link to partitionsspec" title="Direct link to partitionsspec"></a></h2><p>Segments are always partitioned based on timestamp (according to the granularitySpec) and may be further partitioned in
some other way depending on partition type. Druid supports two types of partitioning strategies: <code>hashed</code> (based on the
hash of all dimensions in each row), and <code>single_dim</code> (based on ranges of a single dimension).</p><p>Hashed partitioning is recommended in most cases, as it will improve indexing performance and create more uniformly
sized data segments relative to single-dimension partitioning.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="hash-based-partitioning">Hash-based partitioning<a href="#hash-based-partitioning" class="hash-link" aria-label="Direct link to Hash-based partitioning" title="Direct link to Hash-based partitioning"></a></h3><div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;partitionsSpec&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;hashed&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;targetRowsPerSegment&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">5000000</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>Hashed partitioning works by first selecting a number of segments, and then partitioning rows across those segments
according to the hash of all dimensions in each row. The number of segments is determined automatically based on the
cardinality of the input set and a target partition size.</p><p>The configuration options are:</p><table><thead><tr><th>Field</th><th>Description</th><th>Required</th></tr></thead><tbody><tr><td>type</td><td>Type of partitionSpec to be used.</td><td>&quot;hashed&quot;</td></tr><tr><td>targetRowsPerSegment</td><td>Target number of rows to include in a partition, should be a number that targets segments of 500MB<!-- -->~<!-- -->1GB. Defaults to 5000000 if <code>numShards</code> is not set.</td><td>either this or <code>numShards</code></td></tr><tr><td>targetPartitionSize</td><td>Deprecated. Renamed to <code>targetRowsPerSegment</code>. Target number of rows to include in a partition, should be a number that targets segments of 500MB<!-- -->~<!-- -->1GB.</td><td>either this or <code>numShards</code></td></tr><tr><td>maxRowsPerSegment</td><td>Deprecated. Renamed to <code>targetRowsPerSegment</code>. Target number of rows to include in a partition, should be a number that targets segments of 500MB<!-- -->~<!-- -->1GB.</td><td>either this or <code>numShards</code></td></tr><tr><td>numShards</td><td>Specify the number of partitions directly, instead of a target partition size. Ingestion will run faster, since it can skip the step necessary to select a number of partitions automatically.</td><td>either this or <code>targetRowsPerSegment</code></td></tr><tr><td>partitionDimensions</td><td>The dimensions to partition on. Leave blank to select all dimensions. Only used with <code>numShards</code>, will be ignored when <code>targetRowsPerSegment</code> is set.</td><td>no</td></tr><tr><td>partitionFunction</td><td>A function to compute hash of partition dimensions. See <a href="#hash-partition-function">Hash partition function</a></td><td><code>murmur3_32_abs</code></td></tr></tbody></table><h5 class="anchor anchorWithStickyNavbar_LWe7" id="hash-partition-function">Hash partition function<a href="#hash-partition-function" class="hash-link" aria-label="Direct link to Hash partition function" title="Direct link to Hash partition function"></a></h5><p>In hash partitioning, the partition function is used to compute hash of partition dimensions. The partition dimension
values are first serialized into a byte array as a whole, and then the partition function is applied to compute hash of
the byte array.
Druid currently supports only one partition function.</p><table><thead><tr><th>name</th><th>description</th></tr></thead><tbody><tr><td><code>murmur3_32_abs</code></td><td>Applies an absolute value function to the result of <a href="https://guava.dev/releases/16.0/api/docs/com/google/common/hash/Hashing.html#murmur3_32()" target="_blank" rel="noopener noreferrer"><code>murmur3_32</code></a>.</td></tr></tbody></table><h3 class="anchor anchorWithStickyNavbar_LWe7" id="single-dimension-range-partitioning">Single-dimension range partitioning<a href="#single-dimension-range-partitioning" class="hash-link" aria-label="Direct link to Single-dimension range partitioning" title="Direct link to Single-dimension range partitioning"></a></h3><div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;partitionsSpec&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;single_dim&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;targetRowsPerSegment&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">5000000</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>Single-dimension range partitioning works by first selecting a dimension to partition on, and then separating that dimension
into contiguous ranges. Each segment will contain all rows with values of that dimension in that range. For example,
your segments may be partitioned on the dimension &quot;host&quot; using the ranges &quot;a.example.com&quot; to &quot;f.example.com&quot; and
&quot;f.example.com&quot; to &quot;z.example.com&quot;. By default, the dimension to use is determined automatically, although you can
override it with a specific dimension.</p><p>The configuration options are:</p><table><thead><tr><th>Field</th><th>Description</th><th>Required</th></tr></thead><tbody><tr><td>type</td><td>Type of partitionSpec to be used.</td><td>&quot;single_dim&quot;</td></tr><tr><td>targetRowsPerSegment</td><td>Target number of rows to include in a partition, should be a number that targets segments of 500MB<!-- -->~<!-- -->1GB.</td><td>yes</td></tr><tr><td>targetPartitionSize</td><td>Deprecated. Renamed to <code>targetRowsPerSegment</code>. Target number of rows to include in a partition, should be a number that targets segments of 500MB<!-- -->~<!-- -->1GB.</td><td>no</td></tr><tr><td>maxRowsPerSegment</td><td>Maximum number of rows to include in a partition. Defaults to 50% larger than the <code>targetRowsPerSegment</code>.</td><td>no</td></tr><tr><td>maxPartitionSize</td><td>Deprecated. Use <code>maxRowsPerSegment</code> instead. Maximum number of rows to include in a partition. Defaults to 50% larger than the <code>targetPartitionSize</code>.</td><td>no</td></tr><tr><td>partitionDimension</td><td>The dimension to partition on. Leave blank to select a dimension automatically.</td><td>no</td></tr><tr><td>assumeGrouped</td><td>Assume that input data has already been grouped on time and dimensions. Ingestion will run faster, but may choose sub-optimal partitions if this assumption is violated.</td><td>no</td></tr></tbody></table><h2 class="anchor anchorWithStickyNavbar_LWe7" id="remote-hadoop-clusters">Remote Hadoop clusters<a href="#remote-hadoop-clusters" class="hash-link" aria-label="Direct link to Remote Hadoop clusters" title="Direct link to Remote Hadoop clusters"></a></h2><p>If you have a remote Hadoop cluster, make sure to include the folder holding your configuration <code>*.xml</code> files in your Druid <code>_common</code> configuration folder.</p><p>If you are having dependency problems with your version of Hadoop and the version compiled with Druid, please see <a href="/docs/latest/operations/other-hadoop">these docs</a>.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="elastic-mapreduce">Elastic MapReduce<a href="#elastic-mapreduce" class="hash-link" aria-label="Direct link to Elastic MapReduce" title="Direct link to Elastic MapReduce"></a></h2><p>If your cluster is running on Amazon Web Services, you can use Elastic MapReduce (EMR) to index data
from S3. To do this:</p><ul><li>Create a persistent, <a href="http://docs.aws.amazon.com/ElasticMapReduce/latest/ManagementGuide/emr-plan-longrunning-transient" target="_blank" rel="noopener noreferrer">long-running cluster</a>.</li><li>When creating your cluster, enter the following configuration. If you&#x27;re using the wizard, this
should be in advanced mode under &quot;Edit software settings&quot;:</li></ul><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">classification=yarn-site,properties=[mapreduce.reduce.memory.mb=6144,mapreduce.reduce.java.opts=-server -Xms2g -Xmx2g -Duser.timezone=UTC -Dfile.encoding=UTF-8 -XX:+PrintGCDetails -XX:+PrintGCTimeStamps,mapreduce.map.java.opts=758,mapreduce.map.java.opts=-server -Xms512m -Xmx512m -Duser.timezone=UTC -Dfile.encoding=UTF-8 -XX:+PrintGCDetails -XX:+PrintGCTimeStamps,mapreduce.task.timeout=1800000]</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><ul><li>Follow the instructions under
<a href="/docs/latest/tutorials/cluster#hadoop">Configure for connecting to Hadoop</a> using the XML files from <code>/etc/hadoop/conf</code>
on your EMR master.</li></ul><h2 class="anchor anchorWithStickyNavbar_LWe7" id="kerberized-hadoop-clusters">Kerberized Hadoop clusters<a href="#kerberized-hadoop-clusters" class="hash-link" aria-label="Direct link to Kerberized Hadoop clusters" title="Direct link to Kerberized Hadoop clusters"></a></h2><p>By default druid can use the existing TGT kerberos ticket available in local kerberos key cache.
Although TGT ticket has a limited life cycle,
therefore you need to call <code>kinit</code> command periodically to ensure validity of TGT ticket.
To avoid this extra external cron job script calling <code>kinit</code> periodically,
you can provide the principal name and keytab location and druid will do the authentication transparently at startup and job launching time.</p><table><thead><tr><th>Property</th><th>Possible Values</th><th>Description</th><th>Default</th></tr></thead><tbody><tr><td><code>druid.hadoop.security.kerberos.principal</code></td><td><code>druid@EXAMPLE.COM</code></td><td>Principal user name</td><td>empty</td></tr><tr><td><code>druid.hadoop.security.kerberos.keytab</code></td><td><code>/etc/security/keytabs/druid.headlessUser.keytab</code></td><td>Path to keytab file</td><td>empty</td></tr></tbody></table><h3 class="anchor anchorWithStickyNavbar_LWe7" id="loading-from-s3-with-emr">Loading from S3 with EMR<a href="#loading-from-s3-with-emr" class="hash-link" aria-label="Direct link to Loading from S3 with EMR" title="Direct link to Loading from S3 with EMR"></a></h3><ul><li>In the <code>jobProperties</code> field in the <code>tuningConfig</code> section of your Hadoop indexing task, add:</li></ul><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">&quot;jobProperties&quot; : {</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;fs.s3.awsAccessKeyId&quot; : &quot;YOUR_ACCESS_KEY&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;fs.s3.awsSecretAccessKey&quot; : &quot;YOUR_SECRET_KEY&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;fs.s3.impl&quot; : &quot;org.apache.hadoop.fs.s3native.NativeS3FileSystem&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;fs.s3n.awsAccessKeyId&quot; : &quot;YOUR_ACCESS_KEY&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;fs.s3n.awsSecretAccessKey&quot; : &quot;YOUR_SECRET_KEY&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;fs.s3n.impl&quot; : &quot;org.apache.hadoop.fs.s3native.NativeS3FileSystem&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;io.compression.codecs&quot; : &quot;org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.SnappyCodec&quot;</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">}</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>Note that this method uses Hadoop&#x27;s built-in S3 filesystem rather than Amazon&#x27;s EMRFS, and is not compatible
with Amazon-specific features such as S3 encryption and consistent views. If you need to use these
features, you will need to make the Amazon EMR Hadoop JARs available to Druid through one of the
mechanisms described in the <a href="#using-other-hadoop-distributions">Using other Hadoop distributions</a> section.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="using-other-hadoop-distributions">Using other Hadoop distributions<a href="#using-other-hadoop-distributions" class="hash-link" aria-label="Direct link to Using other Hadoop distributions" title="Direct link to Using other Hadoop distributions"></a></h2><p>Druid works out of the box with many Hadoop distributions.</p><p>If you are having dependency conflicts between Druid and your version of Hadoop, you can try
searching for a solution in the <a href="https://groups.google.com/forum/#!forum/druid-user" target="_blank" rel="noopener noreferrer">Druid user groups</a>, or reading the
Druid <a href="/docs/latest/operations/other-hadoop">Different Hadoop Versions</a> documentation.</p><a name="cli"></a><h2 class="anchor anchorWithStickyNavbar_LWe7" id="command-line-non-task-version">Command line (non-task) version<a href="#command-line-non-task-version" class="hash-link" aria-label="Direct link to Command line (non-task) version" title="Direct link to Command line (non-task) version"></a></h2><p>To run:</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 -classpath lib/*:&lt;hadoop_config_dir&gt; org.apache.druid.cli.Main index hadoop &lt;spec_file&gt;</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h3 class="anchor anchorWithStickyNavbar_LWe7" id="options">Options<a href="#options" class="hash-link" aria-label="Direct link to Options" title="Direct link to Options"></a></h3><ul><li>&quot;--coordinate&quot; - provide a version of Apache Hadoop to use. This property will override the default Hadoop coordinates. Once specified, Apache Druid will look for those Hadoop dependencies from the location specified by <code>druid.extensions.hadoopDependenciesDir</code>.</li><li>&quot;--no-default-hadoop&quot; - don&#x27;t pull down the default hadoop version</li></ul><h3 class="anchor anchorWithStickyNavbar_LWe7" id="spec-file">Spec file<a href="#spec-file" class="hash-link" aria-label="Direct link to Spec file" title="Direct link to Spec file"></a></h3><p>The spec file needs to contain a JSON object where the contents are the same as the &quot;spec&quot; field in the Hadoop index task. See <a href="/docs/latest/ingestion/hadoop">Hadoop Batch Ingestion</a> for details on the spec format.</p><p>In addition, a <code>metadataUpdateSpec</code> and <code>segmentOutputPath</code> field needs to be added to the ioConfig:</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;ioConfig&quot; : {</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;metadataUpdateSpec&quot; : {</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;type&quot;:&quot;mysql&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;connectURI&quot; : &quot;jdbc:mysql://localhost:3306/druid&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;password&quot; : &quot;diurd&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;segmentTable&quot; : &quot;druid_segments&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;user&quot; : &quot;druid&quot;</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> },</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;segmentOutputPath&quot; : &quot;/MyDirectory/data/index/output&quot;</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> },</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>and a <code>workingPath</code> field needs to be added to the tuningConfig:</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;tuningConfig&quot; : {</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;workingPath&quot;: &quot;/tmp&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> }</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h4 class="anchor anchorWithStickyNavbar_LWe7" id="metadata-update-job-spec">Metadata Update Job Spec<a href="#metadata-update-job-spec" class="hash-link" aria-label="Direct link to Metadata Update Job Spec" title="Direct link to Metadata Update Job Spec"></a></h4><p>This is a specification of the properties that tell the job how to update metadata such that the Druid cluster will see the output segments and load them.</p><table><thead><tr><th>Field</th><th>Type</th><th>Description</th><th>Required</th></tr></thead><tbody><tr><td>type</td><td>String</td><td>&quot;metadata&quot; is the only value available.</td><td>yes</td></tr><tr><td>connectURI</td><td>String</td><td>A valid JDBC url to metadata storage.</td><td>yes</td></tr><tr><td>user</td><td>String</td><td>Username for db.</td><td>yes</td></tr><tr><td>password</td><td>String</td><td>password for db.</td><td>yes</td></tr><tr><td>segmentTable</td><td>String</td><td>Table to use in DB.</td><td>yes</td></tr></tbody></table><p>These properties should parrot what you have configured for your <a href="/docs/latest/design/coordinator">Coordinator</a>.</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="segmentoutputpath-config">segmentOutputPath Config<a href="#segmentoutputpath-config" class="hash-link" aria-label="Direct link to segmentOutputPath Config" title="Direct link to segmentOutputPath Config"></a></h4><table><thead><tr><th>Field</th><th>Type</th><th>Description</th><th>Required</th></tr></thead><tbody><tr><td>segmentOutputPath</td><td>String</td><td>the path to dump segments into.</td><td>yes</td></tr></tbody></table><h4 class="anchor anchorWithStickyNavbar_LWe7" id="workingpath-config">workingPath Config<a href="#workingpath-config" class="hash-link" aria-label="Direct link to workingPath Config" title="Direct link to workingPath Config"></a></h4><table><thead><tr><th>Field</th><th>Type</th><th>Description</th><th>Required</th></tr></thead><tbody><tr><td>workingPath</td><td>String</td><td>the working path to use for intermediate results (results between Hadoop jobs).</td><td>no (default == &#x27;/tmp/druid-indexing&#x27;)</td></tr></tbody></table><p>Please note that the command line Hadoop indexer doesn&#x27;t have the locking capabilities of the indexing service, so if you choose to use it,
you have to take caution to not override segments created by real-time processing (if you that a real-time pipeline set up).</p></div></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages"><a class="pagination-nav__link pagination-nav__link--prev" href="/docs/latest/ingestion/native-batch"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">JSON-based batch</div></a><a class="pagination-nav__link pagination-nav__link--next" href="/docs/latest/ingestion/ingestion-spec"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Ingestion spec reference</div></a></nav></div></div><div class="col col--3"><div class="tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#tutorial" class="table-of-contents__link toc-highlight">Tutorial</a></li><li><a href="#task-syntax" class="table-of-contents__link toc-highlight">Task syntax</a></li><li><a href="#dataschema" class="table-of-contents__link toc-highlight"><code>dataSchema</code></a></li><li><a href="#ioconfig" class="table-of-contents__link toc-highlight"><code>ioConfig</code></a><ul><li><a href="#inputspec" class="table-of-contents__link toc-highlight"><code>inputSpec</code></a></li></ul></li><li><a href="#tuningconfig" class="table-of-contents__link toc-highlight"><code>tuningConfig</code></a><ul><li><a href="#jobproperties" class="table-of-contents__link toc-highlight"><code>jobProperties</code></a></li></ul></li><li><a href="#partitionsspec" class="table-of-contents__link toc-highlight"><code>partitionsSpec</code></a><ul><li><a href="#hash-based-partitioning" class="table-of-contents__link toc-highlight">Hash-based partitioning</a></li><li><a href="#single-dimension-range-partitioning" class="table-of-contents__link toc-highlight">Single-dimension range partitioning</a></li></ul></li><li><a href="#remote-hadoop-clusters" class="table-of-contents__link toc-highlight">Remote Hadoop clusters</a></li><li><a href="#elastic-mapreduce" class="table-of-contents__link toc-highlight">Elastic MapReduce</a></li><li><a href="#kerberized-hadoop-clusters" class="table-of-contents__link toc-highlight">Kerberized Hadoop clusters</a><ul><li><a href="#loading-from-s3-with-emr" class="table-of-contents__link toc-highlight">Loading from S3 with EMR</a></li></ul></li><li><a href="#using-other-hadoop-distributions" class="table-of-contents__link toc-highlight">Using other Hadoop distributions</a></li><li><a href="#command-line-non-task-version" class="table-of-contents__link toc-highlight">Command line (non-task) version</a><ul><li><a href="#options" class="table-of-contents__link toc-highlight">Options</a></li><li><a href="#spec-file" class="table-of-contents__link toc-highlight">Spec file</a></li></ul></li></ul></div></div></div></div></main></div></div><footer class="footer"><div class="container container-fluid"><div class="footer__bottom text--center"><div class="margin-bottom--sm"><img src="/img/favicon.png" class="themedImage_ToTc themedImage--light_HNdA footer__logo"><img src="/img/favicon.png" class="themedImage_ToTc themedImage--dark_i4oU footer__logo"></div><div class="footer__copyright">Copyright © 2023 Apache Software Foundation. Except where otherwise noted, licensed under CC BY-SA 4.0. Apache Druid, Druid, and the Druid logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</div></div></div></footer></div>
<script src="/assets/js/runtime~main.dc5f839a.js"></script>
<script src="/assets/js/main.a03dfc13.js"></script>
</body>
</html>