blob: f11bf661ec2a0a50908c213b6782627e155e8416 [file] [log] [blame]
<!doctype html>
<html lang="en" dir="ltr" class="docs-wrapper docs-doc-page docs-version-current plugin-docs plugin-id-default docs-doc-id-development/extensions-core/hdfs">
<head>
<meta charset="UTF-8">
<meta name="generator" content="Docusaurus v2.4.1">
<title data-rh="true">HDFS | Apache® Druid</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:image" content="https://druid.apache.org/img/druid_nav.png"><meta data-rh="true" name="twitter:image" content="https://druid.apache.org/img/druid_nav.png"><meta data-rh="true" property="og:url" content="https://druid.apache.org/docs/latest/development/extensions-core/hdfs"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="current"><meta data-rh="true" name="docusaurus_tag" content="docs-default-current"><meta data-rh="true" name="docsearch:version" content="current"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-current"><meta data-rh="true" property="og:title" content="HDFS | Apache® Druid"><meta data-rh="true" name="description" content="&lt;!--"><meta data-rh="true" property="og:description" content="&lt;!--"><link data-rh="true" rel="icon" href="/img/favicon.png"><link data-rh="true" rel="canonical" href="https://druid.apache.org/docs/latest/development/extensions-core/hdfs"><link data-rh="true" rel="alternate" href="https://druid.apache.org/docs/latest/development/extensions-core/hdfs" hreflang="en"><link data-rh="true" rel="alternate" href="https://druid.apache.org/docs/latest/development/extensions-core/hdfs" hreflang="x-default"><link rel="preconnect" href="https://www.google-analytics.com">
<link rel="preconnect" href="https://www.googletagmanager.com">
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-131010415-1"></script>
<script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","UA-131010415-1",{})</script>
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.7.2/css/all.css">
<script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.4/clipboard.min.js"></script><link rel="stylesheet" href="/assets/css/styles.546f39eb.css">
<link rel="preload" href="/assets/js/runtime~main.26d714fb.js" as="script">
<link rel="preload" href="/assets/js/main.bd54ee66.js" as="script">
</head>
<body class="navigation-with-keyboard">
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}return t}()||function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}()</script><div id="__docusaurus">
<div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><nav aria-label="Main" class="navbar navbar--fixed-top navbar--dark"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/img/druid_nav.png" alt="Apache® Druid" class="themedImage_ToTc themedImage--light_HNdA"><img src="/img/druid_nav.png" alt="Apache® Druid" class="themedImage_ToTc themedImage--dark_i4oU"></div></a></div><div class="navbar__items navbar__items--right"><a class="navbar__item navbar__link" href="/technology">Technology</a><a class="navbar__item navbar__link" href="/use-cases">Use Cases</a><a class="navbar__item navbar__link" href="/druid-powered">Powered By</a><a class="navbar__item navbar__link" href="/docs/latest/design/">Docs</a><a class="navbar__item navbar__link" href="/community/">Community</a><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Apache®</a><ul class="dropdown__menu"><li><a href="https://www.apache.org/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Foundation<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://apachecon.com/?ref=druid.apache.org" target="_blank" rel="noopener noreferrer" class="dropdown__link">Events<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer" class="dropdown__link">License<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Thanks<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/security/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Security<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Sponsorship<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><a class="navbar__item navbar__link" href="/downloads/">Download</a><div class="searchBox_ZlJk"><div class="navbar__search"><span aria-label="expand searchbar" role="button" class="search-icon" tabindex="0"></span><input type="search" id="search_input_react" placeholder="Loading..." aria-label="Search" class="navbar__search-input search-bar" disabled=""></div></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="__docusaurus_skipToContent_fallback" class="main-wrapper mainWrapper_z2l0 docsWrapper_BCFX"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type="button"></button><div class="docPage__5DB"><main class="docMainContainer_gTbr docMainContainerEnhanced_Uz_u"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_VOVn"><div class="docItemContainer_Djhp"><article><div class="tocCollapsible_ETCw theme-doc-toc-mobile tocMobile_ITEo"><button type="button" class="clean-btn tocCollapsibleButton_TO0P">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>HDFS</h1></header><p>To use this Apache Druid extension, <a href="/docs/latest/configuration/extensions#loading-extensions">include</a> <code>druid-hdfs-storage</code> in the extensions load list and run druid processes with <code>GOOGLE_APPLICATION_CREDENTIALS=/path/to/service_account_keyfile</code> in the environment.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="deep-storage">Deep Storage<a href="#deep-storage" class="hash-link" aria-label="Direct link to Deep Storage" title="Direct link to Deep Storage"></a></h2><h3 class="anchor anchorWithStickyNavbar_LWe7" id="configuration-for-hdfs">Configuration for HDFS<a href="#configuration-for-hdfs" class="hash-link" aria-label="Direct link to Configuration for HDFS" title="Direct link to Configuration for HDFS"></a></h3><table><thead><tr><th>Property</th><th>Possible Values</th><th>Description</th><th>Default</th></tr></thead><tbody><tr><td><code>druid.storage.type</code></td><td>hdfs</td><td></td><td>Must be set.</td></tr><tr><td><code>druid.storage.storageDirectory</code></td><td></td><td>Directory for storing segments.</td><td>Must be set.</td></tr><tr><td><code>druid.hadoop.security.kerberos.principal</code></td><td><code>druid@EXAMPLE.COM</code></td><td>Principal user name</td><td>empty</td></tr><tr><td><code>druid.hadoop.security.kerberos.keytab</code></td><td><code>/etc/security/keytabs/druid.headlessUser.keytab</code></td><td>Path to keytab file</td><td>empty</td></tr></tbody></table><p>Besides the above settings, you also need to include all Hadoop configuration files (such as <code>core-site.xml</code>, <code>hdfs-site.xml</code>)
in the Druid classpath. One way to do this is copying all those files under <code>${DRUID_HOME}/conf/_common</code>.</p><p>If you are using the Hadoop ingestion, set your output directory to be a location on Hadoop and it will work.
If you want to eagerly authenticate against a secured hadoop/hdfs cluster you must set <code>druid.hadoop.security.kerberos.principal</code> and <code>druid.hadoop.security.kerberos.keytab</code>, this is an alternative to the cron job method that runs <code>kinit</code> command periodically.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="configuration-for-cloud-storage">Configuration for Cloud Storage<a href="#configuration-for-cloud-storage" class="hash-link" aria-label="Direct link to Configuration for Cloud Storage" title="Direct link to Configuration for Cloud Storage"></a></h3><p>You can also use the AWS S3 or the Google Cloud Storage as the deep storage via HDFS.</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="configuration-for-aws-s3">Configuration for AWS S3<a href="#configuration-for-aws-s3" class="hash-link" aria-label="Direct link to Configuration for AWS S3" title="Direct link to Configuration for AWS S3"></a></h4><p>To use the AWS S3 as the deep storage, you need to configure <code>druid.storage.storageDirectory</code> properly.</p><table><thead><tr><th>Property</th><th>Possible Values</th><th>Description</th><th>Default</th></tr></thead><tbody><tr><td><code>druid.storage.type</code></td><td>hdfs</td><td></td><td>Must be set.</td></tr><tr><td><code>druid.storage.storageDirectory</code></td><td>s3a://bucket/example/directory or s3n://bucket/example/directory</td><td>Path to the deep storage</td><td>Must be set.</td></tr></tbody></table><p>You also need to include the <a href="https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/" target="_blank" rel="noopener noreferrer">Hadoop AWS module</a>, especially the <code>hadoop-aws.jar</code> in the Druid classpath.
Run the below command to install the <code>hadoop-aws.jar</code> file under <code>${DRUID_HOME}/extensions/druid-hdfs-storage</code> in all nodes.</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token variable" style="color:rgb(191, 199, 213)">${DRUID_HOME}</span><span class="token plain">/bin/run-java -classpath </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token string variable" style="color:rgb(191, 199, 213)">${DRUID_HOME}</span><span class="token string" style="color:rgb(195, 232, 141)">/lib/*&quot;</span><span class="token plain"> org.apache.druid.cli.Main tools pull-deps -h </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;org.apache.hadoop:hadoop-aws:</span><span class="token string variable" style="color:rgb(191, 199, 213)">${HADOOP_VERSION}</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token function" style="color:rgb(130, 170, 255)">cp</span><span class="token plain"> </span><span class="token variable" style="color:rgb(191, 199, 213)">${DRUID_HOME}</span><span class="token plain">/hadoop-dependencies/hadoop-aws/</span><span class="token variable" style="color:rgb(191, 199, 213)">${HADOOP_VERSION}</span><span class="token plain">/hadoop-aws-</span><span class="token variable" style="color:rgb(191, 199, 213)">${HADOOP_VERSION}</span><span class="token plain">.jar </span><span class="token variable" style="color:rgb(191, 199, 213)">${DRUID_HOME}</span><span class="token plain">/extensions/druid-hdfs-storage/</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>Finally, you need to add the below properties in the <code>core-site.xml</code>.
For more configurations, see the <a href="https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/" target="_blank" rel="noopener noreferrer">Hadoop AWS module</a>.</p><div class="language-xml codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-xml codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">property</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">name</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain">fs.s3a.impl</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">name</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">value</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain">org.apache.hadoop.fs.s3a.S3AFileSystem</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">value</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">description</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain">The implementation class of the S3A Filesystem</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">description</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">property</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">property</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">name</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain">fs.AbstractFileSystem.s3a.impl</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">name</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">value</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain">org.apache.hadoop.fs.s3a.S3A</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">value</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">description</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain">The implementation class of the S3A AbstractFileSystem.</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">description</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">property</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">property</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">name</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain">fs.s3a.access.key</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">name</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">description</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain">AWS access key ID. Omit for IAM role-based or provider-based authentication.</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">description</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">value</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain">your access key</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">value</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">property</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">property</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">name</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain">fs.s3a.secret.key</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">name</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">description</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain">AWS secret key. Omit for IAM role-based or provider-based authentication.</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">description</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">value</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain">your secret key</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">value</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">property</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h4 class="anchor anchorWithStickyNavbar_LWe7" id="configuration-for-google-cloud-storage">Configuration for Google Cloud Storage<a href="#configuration-for-google-cloud-storage" class="hash-link" aria-label="Direct link to Configuration for Google Cloud Storage" title="Direct link to Configuration for Google Cloud Storage"></a></h4><p>To use the Google Cloud Storage as the deep storage, you need to configure <code>druid.storage.storageDirectory</code> properly.</p><table><thead><tr><th>Property</th><th>Possible Values</th><th>Description</th><th>Default</th></tr></thead><tbody><tr><td><code>druid.storage.type</code></td><td>hdfs</td><td></td><td>Must be set.</td></tr><tr><td><code>druid.storage.storageDirectory</code></td><td>gs://bucket/example/directory</td><td>Path to the deep storage</td><td>Must be set.</td></tr></tbody></table><p>All services that need to access GCS need to have the <a href="https://cloud.google.com/dataproc/docs/concepts/connectors/cloud-storage#other_sparkhadoop_clusters" target="_blank" rel="noopener noreferrer">GCS connector jar</a> in their class path.
Please read the <a href="https://github.com/GoogleCloudPlatform/bigdata-interop/blob/master/gcs/INSTALL.md" target="_blank" rel="noopener noreferrer">install instructions</a>
to properly set up the necessary libraries and configurations.
One option is to place this jar in <code>${DRUID_HOME}/lib/</code> and <code>${DRUID_HOME}/extensions/druid-hdfs-storage/</code>.</p><p>Finally, you need to configure the <code>core-site.xml</code> file with the filesystem
and authentication properties needed for GCS. You may want to copy the below
example properties. Please follow the instructions at
<a href="https://github.com/GoogleCloudPlatform/bigdata-interop/blob/master/gcs/INSTALL.md" target="_blank" rel="noopener noreferrer">https://github.com/GoogleCloudPlatform/bigdata-interop/blob/master/gcs/INSTALL.md</a>
for more details.
For more configurations, <a href="https://github.com/GoogleCloudDataproc/hadoop-connectors/blob/v2.0.0/gcs/conf/gcs-core-default.xml" target="_blank" rel="noopener noreferrer">GCS core default</a>
and <a href="https://github.com/GoogleCloudPlatform/bdutil/blob/master/conf/hadoop2/gcs-core-template.xml" target="_blank" rel="noopener noreferrer">GCS core template</a>.</p><div class="language-xml codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-xml codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">property</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">name</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain">fs.gs.impl</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">name</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">value</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain">com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">value</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">description</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain">The FileSystem for gs: (GCS) uris.</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">description</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">property</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">property</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">name</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain">fs.AbstractFileSystem.gs.impl</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">name</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">value</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain">com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">value</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">description</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain">The AbstractFileSystem for gs: uris.</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">description</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">property</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">property</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">name</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain">google.cloud.auth.service.account.enable</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">name</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">value</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain">true</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">value</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">description</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> Whether to use a service account for GCS authorization.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> Setting this property to `false` will disable use of service accounts for</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> authentication.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">description</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">property</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">property</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">name</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain">google.cloud.auth.service.account.json.keyfile</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">name</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">value</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain">/path/to/keyfile</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">value</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token tag" style="color:rgb(255, 85, 114)">description</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> The JSON key file of the service account used for GCS</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> access when google.cloud.auth.service.account.enable is true.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">description</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&lt;/</span><span class="token tag" style="color:rgb(255, 85, 114)">property</span><span class="token tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>Tested with Druid 0.17.0, Hadoop 2.8.5 and gcs-connector jar 2.0.0-hadoop2.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="reading-data-from-hdfs-or-cloud-storage">Reading data from HDFS or Cloud Storage<a href="#reading-data-from-hdfs-or-cloud-storage" class="hash-link" aria-label="Direct link to Reading data from HDFS or Cloud Storage" title="Direct link to Reading data from HDFS or Cloud Storage"></a></h2><h3 class="anchor anchorWithStickyNavbar_LWe7" id="native-batch-ingestion">Native batch ingestion<a href="#native-batch-ingestion" class="hash-link" aria-label="Direct link to Native batch ingestion" title="Direct link to Native batch ingestion"></a></h3><p>The <a href="/docs/latest/ingestion/input-sources#hdfs-input-source">HDFS input source</a> is supported by the <a href="/docs/latest/ingestion/native-batch">Parallel task</a>
to read files directly from the HDFS Storage. You may be able to read objects from cloud storage
with the HDFS input source, but we highly recommend to use a proper
<a href="/docs/latest/ingestion/input-sources">Input Source</a> instead if possible because
it is simple to set up. For now, only the <a href="/docs/latest/ingestion/input-sources#s3-input-source">S3 input source</a>
and the <a href="/docs/latest/ingestion/input-sources#google-cloud-storage-input-source">Google Cloud Storage input source</a>
are supported for cloud storage types, and so you may still want to use the HDFS input source
to read from cloud storage other than those two.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="hadoop-based-ingestion">Hadoop-based ingestion<a href="#hadoop-based-ingestion" class="hash-link" aria-label="Direct link to Hadoop-based ingestion" title="Direct link to Hadoop-based ingestion"></a></h3><p>If you use the <a href="/docs/latest/ingestion/hadoop">Hadoop ingestion</a>, you can read data from HDFS
by specifying the paths in your <a href="/docs/latest/ingestion/hadoop#inputspec"><code>inputSpec</code></a>.
See the <a href="/docs/latest/ingestion/hadoop#static">Static</a> inputSpec for details.</p></div></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages"></nav></div></div><div class="col col--3"><div class="tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#deep-storage" class="table-of-contents__link toc-highlight">Deep Storage</a><ul><li><a href="#configuration-for-hdfs" class="table-of-contents__link toc-highlight">Configuration for HDFS</a></li><li><a href="#configuration-for-cloud-storage" class="table-of-contents__link toc-highlight">Configuration for Cloud Storage</a></li></ul></li><li><a href="#reading-data-from-hdfs-or-cloud-storage" class="table-of-contents__link toc-highlight">Reading data from HDFS or Cloud Storage</a><ul><li><a href="#native-batch-ingestion" class="table-of-contents__link toc-highlight">Native batch ingestion</a></li><li><a href="#hadoop-based-ingestion" class="table-of-contents__link toc-highlight">Hadoop-based ingestion</a></li></ul></li></ul></div></div></div></div></main></div></div><footer class="footer"><div class="container container-fluid"><div class="footer__bottom text--center"><div class="margin-bottom--sm"><img src="/img/favicon.png" class="themedImage_ToTc themedImage--light_HNdA footer__logo"><img src="/img/favicon.png" class="themedImage_ToTc themedImage--dark_i4oU footer__logo"></div><div class="footer__copyright">Copyright © 2023 Apache Software Foundation. Except where otherwise noted, licensed under CC BY-SA 4.0. Apache Druid, Druid, and the Druid logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</div></div></div></footer></div>
<script src="/assets/js/runtime~main.26d714fb.js"></script>
<script src="/assets/js/main.bd54ee66.js"></script>
</body>
</html>