|  | <!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta http-equiv="X-UA-Compatible" content="IE=edge"/><title>HDFS · Apache Druid</title><meta name="viewport" content="width=device-width"/><link rel="canonical" href="https://druid.apache.org/docs/0.17.1/development/extensions-core/hdfs.html"/><meta name="generator" content="Docusaurus"/><meta name="description" content="<!--"/><meta name="docsearch:language" content="en"/><meta name="docsearch:version" content="0.17.1" /><meta property="og:title" content="HDFS · Apache Druid"/><meta property="og:type" content="website"/><meta property="og:url" content="https://druid.apache.org/index.html"/><meta property="og:description" content="<!--"/><meta property="og:image" content="https://druid.apache.org/img/druid_nav.png"/><meta name="twitter:card" content="summary"/><meta name="twitter:image" content="https://druid.apache.org/img/druid_nav.png"/><link rel="shortcut icon" href="/img/favicon.png"/><link rel="stylesheet" href="https://cdn.jsdelivr.net/docsearch.js/1/docsearch.min.css"/><link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css"/><script async="" src="https://www.googletagmanager.com/gtag/js?id=UA-131010415-1"></script><script> | 
|  | window.dataLayer = window.dataLayer || []; | 
|  | function gtag(){dataLayer.push(arguments); } | 
|  | gtag('js', new Date()); | 
|  | gtag('config', 'UA-131010415-1'); | 
|  | </script><link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.7.2/css/all.css"/><link rel="stylesheet" href="/css/code-block-buttons.css"/><script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.4/clipboard.min.js"></script><script type="text/javascript" src="/js/code-block-buttons.js"></script><script src="/js/scrollSpy.js"></script><link rel="stylesheet" href="/css/main.css"/><script src="/js/codetabs.js"></script></head><body class="sideNavVisible separateOnPageNav"><div class="fixedHeaderContainer"><div class="headerWrapper wrapper"><header><a href="/"><img class="logo" src="/img/druid_nav.png" alt="Apache Druid"/></a><div class="navigationWrapper navigationSlider"><nav class="slidingNav"><ul class="nav-site nav-site-internal"><li class=""><a href="/technology" target="_self">Technology</a></li><li class=""><a href="/use-cases" target="_self">Use Cases</a></li><li class=""><a href="/druid-powered" target="_self">Powered By</a></li><li class="siteNavGroupActive"><a href="/docs/0.17.1/design/index.html" target="_self">Docs</a></li><li class=""><a href="/community/" target="_self">Community</a></li><li class=""><a href="https://www.apache.org" target="_self">Apache</a></li><li class=""><a href="/downloads.html" target="_self">Download</a></li><li class="navSearchWrapper reactNavSearchWrapper"><input type="text" id="search_input_react" placeholder="Search" title="Search"/></li></ul></nav></div></header></div></div><div class="navPusher"><div class="docMainWrapper wrapper"><div class="docsNavContainer" id="docsNav"><nav class="toc"><div class="toggleNav"><section class="navWrapper wrapper"><div class="navBreadcrumb wrapper"><div class="navToggle" id="navToggler"><div class="hamburger-menu"><div class="line1"></div><div class="line2"></div><div class="line3"></div></div></div><h2><i>›</i><span>Hidden</span></h2><div class="tocToggler" id="tocToggler"><i class="icon-toc"></i></div></div><div class="navGroups"><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Getting started<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/0.17.1/design/index.html">Introduction to Apache Druid</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/tutorials/index.html">Quickstart</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/operations/single-server.html">Single server deployment</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/tutorials/cluster.html">Clustered deployment</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Tutorials<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/0.17.1/tutorials/tutorial-batch.html">Loading files natively</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/tutorials/tutorial-kafka.html">Load from Apache Kafka</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/tutorials/tutorial-batch-hadoop.html">Load from Apache Hadoop</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/tutorials/tutorial-query.html">Querying data</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/tutorials/tutorial-rollup.html">Roll-up</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/tutorials/tutorial-retention.html">Configuring data retention</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/tutorials/tutorial-update-data.html">Updating existing data</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/tutorials/tutorial-compaction.html">Compacting segments</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/tutorials/tutorial-delete-data.html">Deleting data</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/tutorials/tutorial-ingestion-spec.html">Writing an ingestion spec</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/tutorials/tutorial-transform-spec.html">Transforming input data</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/tutorials/tutorial-kerberos-hadoop.html">Kerberized HDFS deep storage</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Design<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/0.17.1/design/architecture.html">Design</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/design/segments.html">Segments</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/design/processes.html">Processes and servers</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/dependencies/deep-storage.html">Deep storage</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/dependencies/metadata-storage.html">Metadata storage</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/dependencies/zookeeper.html">ZooKeeper</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Data ingestion<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/0.17.1/ingestion/index.html">Ingestion</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/ingestion/data-formats.html">Data formats</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/ingestion/schema-design.html">Schema design tips</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/ingestion/data-management.html">Data management</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Stream ingestion</h4><ul><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-core/kafka-ingestion.html">Apache Kafka</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-core/kinesis-ingestion.html">Amazon Kinesis</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/ingestion/tranquility.html">Tranquility</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Batch ingestion</h4><ul><li class="navListItem"><a class="navItem" href="/docs/0.17.1/ingestion/native-batch.html">Native batch</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/ingestion/hadoop.html">Hadoop-based</a></li></ul></div><li class="navListItem"><a class="navItem" href="/docs/0.17.1/ingestion/tasks.html">Task reference</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/ingestion/faq.html">Troubleshooting FAQ</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Querying<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/sql.html">Druid SQL</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Native query types</h4><ul><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/querying.html">Making native queries</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/timeseriesquery.html">Timeseries</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/topnquery.html">TopN</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/groupbyquery.html">GroupBy</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/scan-query.html">Scan</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/timeboundaryquery.html">TimeBoundary</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/segmentmetadataquery.html">SegmentMetadata</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/datasourcemetadataquery.html">DatasourceMetadata</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/searchquery.html">Search</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/select-query.html">Select</a></li></ul></div><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/multi-value-dimensions.html">Multi-value dimensions</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/lookups.html">Lookups</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/joins.html">Joins</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/multitenancy.html">Multitenancy considerations</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/caching.html">Query caching</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/geo.html">Spatial filters</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Configuration<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/0.17.1/configuration/index.html">Configuration reference</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions.html">Extensions</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/configuration/logging.html">Logging</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Operations<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/0.17.1/operations/management-uis.html">Management UIs</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/operations/basic-cluster-tuning.html">Basic cluster tuning</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/operations/api-reference.html">API reference</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/operations/high-availability.html">High availability</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/operations/rolling-updates.html">Rolling updates</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/operations/rule-configuration.html">Retaining or automatically dropping data</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/operations/metrics.html">Metrics</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/operations/alerts.html">Alerts</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/operations/other-hadoop.html">Working with different versions of Apache Hadoop</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/operations/http-compression.html">HTTP compression</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/operations/tls-support.html">TLS support</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/operations/password-provider.html">Password providers</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/operations/dump-segment.html">dump-segment tool</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/operations/reset-cluster.html">reset-cluster tool</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/operations/insert-segment-to-db.html">insert-segment-to-db tool</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/operations/pull-deps.html">pull-deps tool</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Misc</h4><ul><li class="navListItem"><a class="navItem" href="/docs/0.17.1/operations/deep-storage-migration.html">Deep storage migration</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/operations/druid-console.html">Web console</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/operations/export-metadata.html">Export Metadata Tool</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/operations/getting-started.html">Getting started with Apache Druid</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/operations/metadata-migration.html">Metadata Migration</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/operations/segment-optimization.html">Segment Size Optimization</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/operations/use_sbt_to_build_fat_jar.html">Content for build.sbt</a></li></ul></div></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Development<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/overview.html">Developing on Druid</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/modules.html">Creating extensions</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/javascript.html">JavaScript functionality</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/build.html">Build from source</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/versioning.html">Versioning</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/experimental.html">Experimental features</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Misc<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/0.17.1/misc/math-expr.html">Expressions</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/misc/papers-and-talks.html">Papers</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Hidden<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/0.17.1/comparisons/druid-vs-elasticsearch.html">Apache Druid vs Elasticsearch</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/comparisons/druid-vs-key-value.html">Apache Druid vs. Key/Value Stores (HBase/Cassandra/OpenTSDB)</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/comparisons/druid-vs-kudu.html">Apache Druid vs Kudu</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/comparisons/druid-vs-redshift.html">Apache Druid vs Redshift</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/comparisons/druid-vs-spark.html">Apache Druid vs Spark</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/comparisons/druid-vs-sql-on-hadoop.html">Apache Druid vs SQL-on-Hadoop</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/design/auth.html">Authentication and Authorization</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/design/broker.html">Broker</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/design/coordinator.html">Coordinator Process</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/design/historical.html">Historical Process</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/design/indexer.html">Indexer Process</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/design/indexing-service.html">Indexing Service</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/design/middlemanager.html">MiddleManager Process</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/design/overlord.html">Overlord Process</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/design/router.html">Router Process</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/design/peons.html">Peons</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-core/approximate-histograms.html">Approximate Histogram aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-core/avro.html">Apache Avro</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-core/bloom-filter.html">Bloom Filter</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-core/datasketches-extension.html">DataSketches extension</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-core/datasketches-hll.html">DataSketches HLL Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-core/datasketches-quantiles.html">DataSketches Quantiles Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-core/datasketches-theta.html">DataSketches Theta Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-core/datasketches-tuple.html">DataSketches Tuple Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-core/druid-basic-security.html">Basic Security</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-core/druid-kerberos.html">Kerberos</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-core/druid-lookups.html">Cached Lookup Module</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-core/google.html">Google Cloud Storage</a></li><li class="navListItem navListItemActive"><a class="navItem" href="/docs/0.17.1/development/extensions-core/hdfs.html">HDFS</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-core/kafka-extraction-namespace.html">Apache Kafka Lookups</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-core/lookups-cached-global.html">Globally Cached Lookups</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-core/mysql.html">MySQL Metadata Store</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-core/orc.html">ORC Extension</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-core/parquet.html">Apache Parquet Extension</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-core/postgresql.html">PostgreSQL Metadata Store</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-core/protobuf.html">Protobuf</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-core/s3.html">S3-compatible</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-core/simple-client-sslcontext.html">Simple SSLContext Provider Module</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-core/stats.html">Stats aggregator</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-core/test-stats.html">Test Stats Aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-contrib/ambari-metrics-emitter.html">Ambari Metrics Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-contrib/azure.html">Microsoft Azure</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-contrib/cassandra.html">Apache Cassandra</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-contrib/cloudfiles.html">Rackspace Cloud Files</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-contrib/distinctcount.html">DistinctCount Aggregator</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-contrib/graphite.html">Graphite Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/aggregations.html">Aggregations</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/datasource.html">Datasources</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/dimensionspecs.html">Transforming Dimension Values</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/filters.html">Query Filters</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/granularities.html">Aggregation Granularity</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/having.html">Filter groupBy query results</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/hll-old.html">Cardinality/HyperUnique aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/limitspec.html">Sort groupBy query results</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/post-aggregations.html">Post-Aggregations</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/query-context.html">Query context</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/searchqueryspec.html">Refining search queries</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/sorting-orders.html">Sorting Orders</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/topnmetricspec.html">TopNMetricSpec</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/querying/virtual-columns.html">Virtual Columns</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-contrib/influx.html">InfluxDB Line Protocol Parser</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-contrib/influxdb-emitter.html">InfluxDB Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-contrib/kafka-emitter.html">Kafka Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-contrib/materialized-view.html">Materialized View</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-contrib/momentsketch-quantiles.html">Moment Sketches for Approximate Quantiles module</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-contrib/moving-average-query.html">development/extensions-contrib/moving-average-query</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-contrib/opentsdb-emitter.html">OpenTSDB Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-contrib/redis-cache.html">Druid Redis Cache</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-contrib/sqlserver.html">Microsoft SQLServer</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-contrib/statsd.html">StatsD Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-contrib/tdigestsketch-quantiles.html">T-Digest Quantiles Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-contrib/thrift.html">Thrift</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/development/extensions-contrib/time-min-max.html">Timestamp Min/Max aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/0.17.1/ingestion/standalone-realtime.html">Realtime Process</a></li></ul></div></div></section></div><script> | 
|  | var coll = document.getElementsByClassName('collapsible'); | 
|  | var checkActiveCategory = true; | 
|  | for (var i = 0; i < coll.length; i++) { | 
|  | var links = coll[i].nextElementSibling.getElementsByTagName('*'); | 
|  | if (checkActiveCategory){ | 
|  | for (var j = 0; j < links.length; j++) { | 
|  | if (links[j].classList.contains('navListItemActive')){ | 
|  | coll[i].nextElementSibling.classList.toggle('hide'); | 
|  | coll[i].childNodes[1].classList.toggle('rotate'); | 
|  | checkActiveCategory = false; | 
|  | break; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | coll[i].addEventListener('click', function() { | 
|  | var arrow = this.childNodes[1]; | 
|  | arrow.classList.toggle('rotate'); | 
|  | var content = this.nextElementSibling; | 
|  | content.classList.toggle('hide'); | 
|  | }); | 
|  | } | 
|  |  | 
|  | document.addEventListener('DOMContentLoaded', function() { | 
|  | createToggler('#navToggler', '#docsNav', 'docsSliderActive'); | 
|  | createToggler('#tocToggler', 'body', 'tocActive'); | 
|  |  | 
|  | var headings = document.querySelector('.toc-headings'); | 
|  | headings && headings.addEventListener('click', function(event) { | 
|  | var el = event.target; | 
|  | while(el !== headings){ | 
|  | if (el.tagName === 'A') { | 
|  | document.body.classList.remove('tocActive'); | 
|  | break; | 
|  | } else{ | 
|  | el = el.parentNode; | 
|  | } | 
|  | } | 
|  | }, false); | 
|  |  | 
|  | function createToggler(togglerSelector, targetSelector, className) { | 
|  | var toggler = document.querySelector(togglerSelector); | 
|  | var target = document.querySelector(targetSelector); | 
|  |  | 
|  | if (!toggler) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | toggler.onclick = function(event) { | 
|  | event.preventDefault(); | 
|  |  | 
|  | target.classList.toggle(className); | 
|  | }; | 
|  | } | 
|  | }); | 
|  | </script></nav></div><div class="container mainContainer"><div class="wrapper"><div class="post"><header class="postHeader"><a class="edit-page-link button" href="https://github.com/apache/druid/edit/master/docs/development/extensions-core/hdfs.md" target="_blank" rel="noreferrer noopener">Edit</a><h1 class="postHeaderTitle">HDFS</h1></header><article><div><span><!-- | 
|  | ~ Licensed to the Apache Software Foundation (ASF) under one | 
|  | ~ or more contributor license agreements.  See the NOTICE file | 
|  | ~ distributed with this work for additional information | 
|  | ~ regarding copyright ownership.  The ASF licenses this file | 
|  | ~ to you under the Apache License, Version 2.0 (the | 
|  | ~ "License"); you may not use this file except in compliance | 
|  | ~ with the License.  You may obtain a copy of the License at | 
|  | ~ | 
|  | ~   http://www.apache.org/licenses/LICENSE-2.0 | 
|  | ~ | 
|  | ~ Unless required by applicable law or agreed to in writing, | 
|  | ~ software distributed under the License is distributed on an | 
|  | ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
|  | ~ KIND, either express or implied.  See the License for the | 
|  | ~ specific language governing permissions and limitations | 
|  | ~ under the License. | 
|  | --> | 
|  | <p>To use this Apache Druid extension, make sure to <a href="/docs/0.17.1/development/extensions.html#loading-extensions">include</a> <code>druid-hdfs-storage</code> as an extension.</p> | 
|  | <h2><a class="anchor" aria-hidden="true" id="deep-storage"></a><a href="#deep-storage" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Deep Storage</h2> | 
|  | <h3><a class="anchor" aria-hidden="true" id="configuration-for-hdfs"></a><a href="#configuration-for-hdfs" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Configuration for HDFS</h3> | 
|  | <table> | 
|  | <thead> | 
|  | <tr><th>Property</th><th>Possible Values</th><th>Description</th><th>Default</th></tr> | 
|  | </thead> | 
|  | <tbody> | 
|  | <tr><td><code>druid.storage.type</code></td><td>hdfs</td><td></td><td>Must be set.</td></tr> | 
|  | <tr><td><code>druid.storage.storageDirectory</code></td><td></td><td>Directory for storing segments.</td><td>Must be set.</td></tr> | 
|  | <tr><td><code>druid.hadoop.security.kerberos.principal</code></td><td><code>druid@EXAMPLE.COM</code></td><td>Principal user name</td><td>empty</td></tr> | 
|  | <tr><td><code>druid.hadoop.security.kerberos.keytab</code></td><td><code>/etc/security/keytabs/druid.headlessUser.keytab</code></td><td>Path to keytab file</td><td>empty</td></tr> | 
|  | </tbody> | 
|  | </table> | 
|  | <p>Besides the above settings, you also need to include all Hadoop configuration files (such as <code>core-site.xml</code>, <code>hdfs-site.xml</code>) | 
|  | in the Druid classpath. One way to do this is copying all those files under <code>${DRUID_HOME}/conf/_common</code>.</p> | 
|  | <p>If you are using the Hadoop ingestion, set your output directory to be a location on Hadoop and it will work. | 
|  | If you want to eagerly authenticate against a secured hadoop/hdfs cluster you must set <code>druid.hadoop.security.kerberos.principal</code> and <code>druid.hadoop.security.kerberos.keytab</code>, this is an alternative to the cron job method that runs <code>kinit</code> command periodically.</p> | 
|  | <h3><a class="anchor" aria-hidden="true" id="configuration-for-cloud-storage"></a><a href="#configuration-for-cloud-storage" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Configuration for Cloud Storage</h3> | 
|  | <p>You can also use the AWS S3 or the Google Cloud Storage as the deep storage via HDFS.</p> | 
|  | <h4><a class="anchor" aria-hidden="true" id="configuration-for-aws-s3"></a><a href="#configuration-for-aws-s3" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Configuration for AWS S3</h4> | 
|  | <p>To use the AWS S3 as the deep storage, you need to configure <code>druid.storage.storageDirectory</code> properly.</p> | 
|  | <table> | 
|  | <thead> | 
|  | <tr><th>Property</th><th>Possible Values</th><th>Description</th><th>Default</th></tr> | 
|  | </thead> | 
|  | <tbody> | 
|  | <tr><td><code>druid.storage.type</code></td><td>hdfs</td><td></td><td>Must be set.</td></tr> | 
|  | <tr><td><code>druid.storage.storageDirectory</code></td><td><a href="s3a://bucket/example/directory">s3a://bucket/example/directory</a> or <a href="s3n://bucket/example/directory">s3n://bucket/example/directory</a></td><td>Path to the deep storage</td><td>Must be set.</td></tr> | 
|  | </tbody> | 
|  | </table> | 
|  | <p>You also need to include the <a href="https://hadoop.apache.org/docs/current/hadoop-aws/tools/hadoop-aws/index.html">Hadoop AWS module</a>, especially the <code>hadoop-aws.jar</code> in the Druid classpath. | 
|  | Run the below command to install the <code>hadoop-aws.jar</code> file under <code>${DRUID_HOME}/extensions/druid-hdfs-storage</code> in all nodes.</p> | 
|  | <pre><code class="hljs css language-bash">java -classpath <span class="hljs-string">"<span class="hljs-variable">${DRUID_HOME}</span>lib/*"</span> org.apache.druid.cli.Main tools pull-deps -h <span class="hljs-string">"org.apache.hadoop:hadoop-aws:<span class="hljs-variable">${HADOOP_VERSION}</span>"</span>; | 
|  | cp <span class="hljs-variable">${DRUID_HOME}</span>/hadoop-dependencies/hadoop-aws/<span class="hljs-variable">${HADOOP_VERSION}</span>/hadoop-aws-<span class="hljs-variable">${HADOOP_VERSION}</span>.jar <span class="hljs-variable">${DRUID_HOME}</span>/extensions/druid-hdfs-storage/ | 
|  | </code></pre> | 
|  | <p>Finally, you need to add the below properties in the <code>core-site.xml</code>. | 
|  | For more configurations, see the <a href="https://hadoop.apache.org/docs/current/hadoop-aws/tools/hadoop-aws/index.html">Hadoop AWS module</a>.</p> | 
|  | <pre><code class="hljs css language-xml"><span class="hljs-tag"><<span class="hljs-name">property</span>></span> | 
|  | <span class="hljs-tag"><<span class="hljs-name">name</span>></span>fs.s3a.impl<span class="hljs-tag"></<span class="hljs-name">name</span>></span> | 
|  | <span class="hljs-tag"><<span class="hljs-name">value</span>></span>org.apache.hadoop.fs.s3a.S3AFileSystem<span class="hljs-tag"></<span class="hljs-name">value</span>></span> | 
|  | <span class="hljs-tag"><<span class="hljs-name">description</span>></span>The implementation class of the S3A Filesystem<span class="hljs-tag"></<span class="hljs-name">description</span>></span> | 
|  | <span class="hljs-tag"></<span class="hljs-name">property</span>></span> | 
|  |  | 
|  | <span class="hljs-tag"><<span class="hljs-name">property</span>></span> | 
|  | <span class="hljs-tag"><<span class="hljs-name">name</span>></span>fs.AbstractFileSystem.s3a.impl<span class="hljs-tag"></<span class="hljs-name">name</span>></span> | 
|  | <span class="hljs-tag"><<span class="hljs-name">value</span>></span>org.apache.hadoop.fs.s3a.S3A<span class="hljs-tag"></<span class="hljs-name">value</span>></span> | 
|  | <span class="hljs-tag"><<span class="hljs-name">description</span>></span>The implementation class of the S3A AbstractFileSystem.<span class="hljs-tag"></<span class="hljs-name">description</span>></span> | 
|  | <span class="hljs-tag"></<span class="hljs-name">property</span>></span> | 
|  |  | 
|  | <span class="hljs-tag"><<span class="hljs-name">property</span>></span> | 
|  | <span class="hljs-tag"><<span class="hljs-name">name</span>></span>fs.s3a.access.key<span class="hljs-tag"></<span class="hljs-name">name</span>></span> | 
|  | <span class="hljs-tag"><<span class="hljs-name">description</span>></span>AWS access key ID. Omit for IAM role-based or provider-based authentication.<span class="hljs-tag"></<span class="hljs-name">description</span>></span> | 
|  | <span class="hljs-tag"><<span class="hljs-name">value</span>></span>your access key<span class="hljs-tag"></<span class="hljs-name">value</span>></span> | 
|  | <span class="hljs-tag"></<span class="hljs-name">property</span>></span> | 
|  |  | 
|  | <span class="hljs-tag"><<span class="hljs-name">property</span>></span> | 
|  | <span class="hljs-tag"><<span class="hljs-name">name</span>></span>fs.s3a.secret.key<span class="hljs-tag"></<span class="hljs-name">name</span>></span> | 
|  | <span class="hljs-tag"><<span class="hljs-name">description</span>></span>AWS secret key. Omit for IAM role-based or provider-based authentication.<span class="hljs-tag"></<span class="hljs-name">description</span>></span> | 
|  | <span class="hljs-tag"><<span class="hljs-name">value</span>></span>your secret key<span class="hljs-tag"></<span class="hljs-name">value</span>></span> | 
|  | <span class="hljs-tag"></<span class="hljs-name">property</span>></span> | 
|  | </code></pre> | 
|  | <h4><a class="anchor" aria-hidden="true" id="configuration-for-google-cloud-storage"></a><a href="#configuration-for-google-cloud-storage" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Configuration for Google Cloud Storage</h4> | 
|  | <p>To use the Google Cloud Storage as the deep storage, you need to configure <code>druid.storage.storageDirectory</code> properly.</p> | 
|  | <table> | 
|  | <thead> | 
|  | <tr><th>Property</th><th>Possible Values</th><th>Description</th><th>Default</th></tr> | 
|  | </thead> | 
|  | <tbody> | 
|  | <tr><td><code>druid.storage.type</code></td><td>hdfs</td><td></td><td>Must be set.</td></tr> | 
|  | <tr><td><code>druid.storage.storageDirectory</code></td><td><a href="gs://bucket/example/directory">gs://bucket/example/directory</a></td><td>Path to the deep storage</td><td>Must be set.</td></tr> | 
|  | </tbody> | 
|  | </table> | 
|  | <p>All services that need to access GCS need to have the <a href="https://cloud.google.com/dataproc/docs/concepts/connectors/cloud-storage#other_sparkhadoop_clusters">GCS connector jar</a> in their class path. | 
|  | Please read the <a href="https://github.com/GoogleCloudPlatform/bigdata-interop/blob/master/gcs/INSTALL.md">install instructions</a> | 
|  | to properly set up the necessary libraries and configurations. | 
|  | One option is to place this jar in <code>${DRUID_HOME}/lib/</code> and <code>${DRUID_HOME}/extensions/druid-hdfs-storage/</code>.</p> | 
|  | <p>Finally, you need to configure the <code>core-site.xml</code> file with the filesystem | 
|  | and authentication properties needed for GCS. You may want to copy the below | 
|  | example properties. Please follow the instructions at | 
|  | <a href="https://github.com/GoogleCloudPlatform/bigdata-interop/blob/master/gcs/INSTALL.md">https://github.com/GoogleCloudPlatform/bigdata-interop/blob/master/gcs/INSTALL.md</a> | 
|  | for more details. | 
|  | For more configurations, <a href="https://github.com/GoogleCloudPlatform/bigdata-interop/blob/master/gcs/conf/gcs-core-default.xml">GCS core default</a> | 
|  | and <a href="https://github.com/GoogleCloudPlatform/bdutil/blob/master/conf/hadoop2/gcs-core-template.xml">GCS core template</a>.</p> | 
|  | <pre><code class="hljs css language-xml"><span class="hljs-tag"><<span class="hljs-name">property</span>></span> | 
|  | <span class="hljs-tag"><<span class="hljs-name">name</span>></span>fs.gs.impl<span class="hljs-tag"></<span class="hljs-name">name</span>></span> | 
|  | <span class="hljs-tag"><<span class="hljs-name">value</span>></span>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem<span class="hljs-tag"></<span class="hljs-name">value</span>></span> | 
|  | <span class="hljs-tag"><<span class="hljs-name">description</span>></span>The FileSystem for gs: (GCS) uris.<span class="hljs-tag"></<span class="hljs-name">description</span>></span> | 
|  | <span class="hljs-tag"></<span class="hljs-name">property</span>></span> | 
|  |  | 
|  | <span class="hljs-tag"><<span class="hljs-name">property</span>></span> | 
|  | <span class="hljs-tag"><<span class="hljs-name">name</span>></span>fs.AbstractFileSystem.gs.impl<span class="hljs-tag"></<span class="hljs-name">name</span>></span> | 
|  | <span class="hljs-tag"><<span class="hljs-name">value</span>></span>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS<span class="hljs-tag"></<span class="hljs-name">value</span>></span> | 
|  | <span class="hljs-tag"><<span class="hljs-name">description</span>></span>The AbstractFileSystem for gs: uris.<span class="hljs-tag"></<span class="hljs-name">description</span>></span> | 
|  | <span class="hljs-tag"></<span class="hljs-name">property</span>></span> | 
|  |  | 
|  | <span class="hljs-tag"><<span class="hljs-name">property</span>></span> | 
|  | <span class="hljs-tag"><<span class="hljs-name">name</span>></span>google.cloud.auth.service.account.enable<span class="hljs-tag"></<span class="hljs-name">name</span>></span> | 
|  | <span class="hljs-tag"><<span class="hljs-name">value</span>></span>true<span class="hljs-tag"></<span class="hljs-name">value</span>></span> | 
|  | <span class="hljs-tag"><<span class="hljs-name">description</span>></span> | 
|  | Whether to use a service account for GCS authorization. | 
|  | Setting this property to `false` will disable use of service accounts for | 
|  | authentication. | 
|  | <span class="hljs-tag"></<span class="hljs-name">description</span>></span> | 
|  | <span class="hljs-tag"></<span class="hljs-name">property</span>></span> | 
|  |  | 
|  | <span class="hljs-tag"><<span class="hljs-name">property</span>></span> | 
|  | <span class="hljs-tag"><<span class="hljs-name">name</span>></span>google.cloud.auth.service.account.json.keyfile<span class="hljs-tag"></<span class="hljs-name">name</span>></span> | 
|  | <span class="hljs-tag"><<span class="hljs-name">value</span>></span>/path/to/keyfile<span class="hljs-tag"></<span class="hljs-name">value</span>></span> | 
|  | <span class="hljs-tag"><<span class="hljs-name">description</span>></span> | 
|  | The JSON key file of the service account used for GCS | 
|  | access when google.cloud.auth.service.account.enable is true. | 
|  | <span class="hljs-tag"></<span class="hljs-name">description</span>></span> | 
|  | <span class="hljs-tag"></<span class="hljs-name">property</span>></span> | 
|  | </code></pre> | 
|  | <p>Tested with Druid 0.17.0, Hadoop 2.8.5 and gcs-connector jar 2.0.0-hadoop2.</p> | 
|  | <h2><a class="anchor" aria-hidden="true" id="reading-data-from-hdfs-or-cloud-storage"></a><a href="#reading-data-from-hdfs-or-cloud-storage" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Reading data from HDFS or Cloud Storage</h2> | 
|  | <h3><a class="anchor" aria-hidden="true" id="native-batch-ingestion"></a><a href="#native-batch-ingestion" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Native batch ingestion</h3> | 
|  | <p>The <a href="/docs/0.17.1/ingestion/native-batch.html#hdfs-input-source">HDFS input source</a> is supported by the <a href="/docs/0.17.1/ingestion/native-batch.html#parallel-task">Parallel task</a> | 
|  | to read files directly from the HDFS Storage. You may be able to read objects from cloud storage | 
|  | with the HDFS input source, but we highly recommend to use a proper | 
|  | <a href="/docs/0.17.1/ingestion/native-batch.html#input-sources">Input Source</a> instead if possible because | 
|  | it is simple to set up. For now, only the <a href="/docs/0.17.1/ingestion/native-batch.html#s3-input-source">S3 input source</a> | 
|  | and the <a href="/docs/0.17.1/ingestion/native-batch.html#google-cloud-storage-input-source">Google Cloud Storage input source</a> | 
|  | are supported for cloud storage types, and so you may still want to use the HDFS input source | 
|  | to read from cloud storage other than those two.</p> | 
|  | <h3><a class="anchor" aria-hidden="true" id="hadoop-based-ingestion"></a><a href="#hadoop-based-ingestion" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Hadoop-based ingestion</h3> | 
|  | <p>If you use the <a href="/docs/0.17.1/ingestion/hadoop.html">Hadoop ingestion</a>, you can read data from HDFS | 
|  | by specifying the paths in your <a href="/docs/0.17.1/ingestion/hadoop.html#inputspec"><code>inputSpec</code></a>. | 
|  | See the <a href="/docs/0.17.1/ingestion/hadoop.html#static">Static</a> inputSpec for details.</p> | 
|  | </span></div></article></div><div class="docs-prevnext"><a class="docs-prev button" href="/docs/0.17.1/development/extensions-core/google.html"><span class="arrow-prev">← </span><span>Google Cloud Storage</span></a><a class="docs-next button" href="/docs/0.17.1/development/extensions-core/kafka-extraction-namespace.html"><span>Apache Kafka Lookups</span><span class="arrow-next"> →</span></a></div></div></div><nav class="onPageNav"><ul class="toc-headings"><li><a href="#deep-storage">Deep Storage</a><ul class="toc-headings"><li><a href="#configuration-for-hdfs">Configuration for HDFS</a></li><li><a href="#configuration-for-cloud-storage">Configuration for Cloud Storage</a></li></ul></li><li><a href="#reading-data-from-hdfs-or-cloud-storage">Reading data from HDFS or Cloud Storage</a><ul class="toc-headings"><li><a href="#native-batch-ingestion">Native batch ingestion</a></li><li><a href="#hadoop-based-ingestion">Hadoop-based ingestion</a></li></ul></li></ul></nav></div><footer class="nav-footer druid-footer" id="footer"><div class="container"><div class="text-center"><p><a href="/technology">Technology</a> · <a href="/use-cases">Use Cases</a> · <a href="/druid-powered">Powered by Druid</a> · <a href="/docs/0.17.1/latest">Docs</a> · <a href="/community/">Community</a> · <a href="/downloads.html">Download</a> · <a href="/faq">FAQ</a></p></div><div class="text-center"><a title="Join the user group" href="https://groups.google.com/forum/#!forum/druid-user" target="_blank"><span class="fa fa-comments"></span></a> · <a title="Follow Druid" href="https://twitter.com/druidio" target="_blank"><span class="fab fa-twitter"></span></a> · <a title="Download via Apache" href="https://www.apache.org/dyn/closer.cgi?path=/incubator/druid/{{ site.druid_versions[0].versions[0].version }}/apache-druid-{{ site.druid_versions[0].versions[0].version }}-bin.tar.gz" target="_blank"><span class="fas fa-feather"></span></a> · <a title="GitHub" href="https://github.com/apache/druid" target="_blank"><span class="fab fa-github"></span></a></div><div class="text-center license">Copyright © 2019 <a href="https://www.apache.org/" target="_blank">Apache Software Foundation</a>.<br/>Except where otherwise noted, licensed under <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a>.<br/>Apache Druid, Druid, and the Druid logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</div></div></footer></div><script type="text/javascript" src="https://cdn.jsdelivr.net/docsearch.js/1/docsearch.min.js"></script><script> | 
|  | document.addEventListener('keyup', function(e) { | 
|  | if (e.target !== document.body) { | 
|  | return; | 
|  | } | 
|  | // keyCode for '/' (slash) | 
|  | if (e.keyCode === 191) { | 
|  | const search = document.getElementById('search_input_react'); | 
|  | search && search.focus(); | 
|  | } | 
|  | }); | 
|  | </script><script> | 
|  | var search = docsearch({ | 
|  |  | 
|  | apiKey: '2de99082a9f38e49dfaa059bbe4c901d', | 
|  | indexName: 'apache_druid', | 
|  | inputSelector: '#search_input_react', | 
|  | algoliaOptions: {"facetFilters":["language:en","version:0.17.1"]} | 
|  | }); | 
|  | </script></body></html> |