| <!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta http-equiv="X-UA-Compatible" content="IE=edge"/><title>Datasources · Apache Druid</title><meta name="viewport" content="width=device-width"/><link rel="canonical" href="https://druid.apache.org/docs/0.20.1/querying/datasource.html"/><meta name="generator" content="Docusaurus"/><meta name="description" content="<!--"/><meta name="docsearch:language" content="en"/><meta name="docsearch:version" content="0.20.1" /><meta property="og:title" content="Datasources · Apache Druid"/><meta property="og:type" content="website"/><meta property="og:url" content="https://druid.apache.org/index.html"/><meta property="og:description" content="<!--"/><meta property="og:image" content="https://druid.apache.org/img/druid_nav.png"/><meta name="twitter:card" content="summary"/><meta name="twitter:image" content="https://druid.apache.org/img/druid_nav.png"/><link rel="shortcut icon" href="/img/favicon.png"/><link rel="stylesheet" href="https://cdn.jsdelivr.net/docsearch.js/1/docsearch.min.css"/><link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css"/><script async="" src="https://www.googletagmanager.com/gtag/js?id=UA-131010415-1"></script><script> |
| window.dataLayer = window.dataLayer || []; |
| function gtag(){dataLayer.push(arguments); } |
| gtag('js', new Date()); |
| gtag('config', 'UA-131010415-1'); |
| </script><link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.7.2/css/all.css"/><link rel="stylesheet" href="/css/code-block-buttons.css"/><script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.4/clipboard.min.js"></script><script type="text/javascript" src="/js/code-block-buttons.js"></script><script src="/js/scrollSpy.js"></script><link rel="stylesheet" href="/css/main.css"/><script src="/js/codetabs.js"></script></head><body class="sideNavVisible separateOnPageNav"><div class="fixedHeaderContainer"><div class="headerWrapper wrapper"><header><a href="/"><img class="logo" src="/img/druid_nav.png" alt="Apache Druid"/></a><div class="navigationWrapper navigationSlider"><nav class="slidingNav"><ul class="nav-site nav-site-internal"><li class=""><a href="/technology" target="_self">Technology</a></li><li class=""><a href="/use-cases" target="_self">Use Cases</a></li><li class=""><a href="/druid-powered" target="_self">Powered By</a></li><li class="siteNavGroupActive"><a href="/docs/0.20.1/design/index.html" target="_self">Docs</a></li><li class=""><a href="/community/" target="_self">Community</a></li><li class=""><a href="https://www.apache.org" target="_self">Apache</a></li><li class=""><a href="/downloads.html" target="_self">Download</a></li><li class="navSearchWrapper reactNavSearchWrapper"><input type="text" id="search_input_react" placeholder="Search" title="Search"/></li></ul></nav></div></header></div></div><div class="navPusher"><div class="docMainWrapper wrapper"><div class="docsNavContainer" id="docsNav"><nav class="toc"><div class="toggleNav"><section class="navWrapper wrapper"><div class="navBreadcrumb wrapper"><div class="navToggle" id="navToggler"><div class="hamburger-menu"><div class="line1"></div><div class="line2"></div><div class="line3"></div></div></div><h2><i>›</i><span>Concepts</span></h2><div class="tocToggler" id="tocToggler"><i class="icon-toc"></i></div></div><div class="navGroups"><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Getting started<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/0.20.1/design/index.html">Introduction to Apache Druid</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/tutorials/index.html">Quickstart</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/tutorials/docker.html">Docker</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/operations/single-server.html">Single server deployment</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/tutorials/cluster.html">Clustered deployment</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Tutorials<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/0.20.1/tutorials/tutorial-batch.html">Loading files natively</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/tutorials/tutorial-kafka.html">Load from Apache Kafka</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/tutorials/tutorial-batch-hadoop.html">Load from Apache Hadoop</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/tutorials/tutorial-query.html">Querying data</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/tutorials/tutorial-rollup.html">Roll-up</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/tutorials/tutorial-retention.html">Configuring data retention</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/tutorials/tutorial-update-data.html">Updating existing data</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/tutorials/tutorial-compaction.html">Compacting segments</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/tutorials/tutorial-delete-data.html">Deleting data</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/tutorials/tutorial-ingestion-spec.html">Writing an ingestion spec</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/tutorials/tutorial-transform-spec.html">Transforming input data</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/tutorials/tutorial-kerberos-hadoop.html">Kerberized HDFS deep storage</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Design<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/0.20.1/design/architecture.html">Design</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/design/segments.html">Segments</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/design/processes.html">Processes and servers</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/dependencies/deep-storage.html">Deep storage</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/dependencies/metadata-storage.html">Metadata storage</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/dependencies/zookeeper.html">ZooKeeper</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Ingestion<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/0.20.1/ingestion/index.html">Ingestion</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/ingestion/data-formats.html">Data formats</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/ingestion/schema-design.html">Schema design tips</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/ingestion/data-management.html">Data management</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Stream ingestion</h4><ul><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/kafka-ingestion.html">Apache Kafka</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/kinesis-ingestion.html">Amazon Kinesis</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/ingestion/tranquility.html">Tranquility</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Batch ingestion</h4><ul><li class="navListItem"><a class="navItem" href="/docs/0.20.1/ingestion/native-batch.html">Native batch</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/ingestion/hadoop.html">Hadoop-based</a></li></ul></div><li class="navListItem"><a class="navItem" href="/docs/0.20.1/ingestion/tasks.html">Task reference</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/ingestion/faq.html">Troubleshooting FAQ</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Querying<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/sql.html">Druid SQL</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/querying.html">Native queries</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/query-execution.html">Query execution</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Concepts</h4><ul><li class="navListItem navListItemActive"><a class="navItem" href="/docs/0.20.1/querying/datasource.html">Datasources</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/joins.html">Joins</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/lookups.html">Lookups</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/multi-value-dimensions.html">Multi-value dimensions</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/multitenancy.html">Multitenancy</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/caching.html">Query caching</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/query-context.html">Context parameters</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Native query types</h4><ul><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/timeseriesquery.html">Timeseries</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/topnquery.html">TopN</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/groupbyquery.html">GroupBy</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/scan-query.html">Scan</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/searchquery.html">Search</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/timeboundaryquery.html">TimeBoundary</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/segmentmetadataquery.html">SegmentMetadata</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/datasourcemetadataquery.html">DatasourceMetadata</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Native query components</h4><ul><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/filters.html">Filters</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/granularities.html">Granularities</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/dimensionspecs.html">Dimensions</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/aggregations.html">Aggregations</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/post-aggregations.html">Post-aggregations</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/misc/math-expr.html">Expressions</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/having.html">Having filters (groupBy)</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/limitspec.html">Sorting and limiting (groupBy)</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/topnmetricspec.html">Sorting (topN)</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/sorting-orders.html">String comparators</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/virtual-columns.html">Virtual columns</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/geo.html">Spatial filters</a></li></ul></div></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Configuration<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/0.20.1/configuration/index.html">Configuration reference</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions.html">Extensions</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/configuration/logging.html">Logging</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Operations<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/0.20.1/operations/druid-console.html">Web console</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/operations/getting-started.html">Getting started with Apache Druid</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/operations/basic-cluster-tuning.html">Basic cluster tuning</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/operations/api-reference.html">API reference</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/operations/high-availability.html">High availability</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/operations/rolling-updates.html">Rolling updates</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/operations/rule-configuration.html">Retaining or automatically dropping data</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/operations/metrics.html">Metrics</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/operations/alerts.html">Alerts</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/operations/other-hadoop.html">Working with different versions of Apache Hadoop</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/operations/http-compression.html">HTTP compression</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/operations/tls-support.html">TLS support</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/operations/password-provider.html">Password providers</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/operations/dump-segment.html">dump-segment tool</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/operations/reset-cluster.html">reset-cluster tool</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/operations/insert-segment-to-db.html">insert-segment-to-db tool</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/operations/pull-deps.html">pull-deps tool</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Misc</h4><ul><li class="navListItem"><a class="navItem" href="/docs/0.20.1/operations/management-uis.html">Legacy Management UIs</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/operations/deep-storage-migration.html">Deep storage migration</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/operations/export-metadata.html">Export Metadata Tool</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/operations/metadata-migration.html">Metadata Migration</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/operations/segment-optimization.html">Segment Size Optimization</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/operations/use_sbt_to_build_fat_jar.html">Content for build.sbt</a></li></ul></div></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Development<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/overview.html">Developing on Druid</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/modules.html">Creating extensions</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/javascript.html">JavaScript functionality</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/build.html">Build from source</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/versioning.html">Versioning</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/experimental.html">Experimental features</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Misc<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/0.20.1/misc/papers-and-talks.html">Papers</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Hidden<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/0.20.1/comparisons/druid-vs-elasticsearch.html">Apache Druid vs Elasticsearch</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/comparisons/druid-vs-key-value.html">Apache Druid vs. Key/Value Stores (HBase/Cassandra/OpenTSDB)</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/comparisons/druid-vs-kudu.html">Apache Druid vs Kudu</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/comparisons/druid-vs-redshift.html">Apache Druid vs Redshift</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/comparisons/druid-vs-spark.html">Apache Druid vs Spark</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/comparisons/druid-vs-sql-on-hadoop.html">Apache Druid vs SQL-on-Hadoop</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/design/auth.html">Authentication and Authorization</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/design/broker.html">Broker</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/design/coordinator.html">Coordinator Process</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/design/historical.html">Historical Process</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/design/indexer.html">Indexer Process</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/design/indexing-service.html">Indexing Service</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/design/middlemanager.html">MiddleManager Process</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/design/overlord.html">Overlord Process</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/design/router.html">Router Process</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/design/peons.html">Peons</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/approximate-histograms.html">Approximate Histogram aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/avro.html">Apache Avro</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/azure.html">Microsoft Azure</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/bloom-filter.html">Bloom Filter</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/datasketches-extension.html">DataSketches extension</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/datasketches-hll.html">DataSketches HLL Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/datasketches-quantiles.html">DataSketches Quantiles Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/datasketches-theta.html">DataSketches Theta Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/datasketches-tuple.html">DataSketches Tuple Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/druid-basic-security.html">Basic Security</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/druid-kerberos.html">Kerberos</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/druid-lookups.html">Cached Lookup Module</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/druid-ranger-security.html">Apache Ranger Security</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/google.html">Google Cloud Storage</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/hdfs.html">HDFS</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/kafka-extraction-namespace.html">Apache Kafka Lookups</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/lookups-cached-global.html">Globally Cached Lookups</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/mysql.html">MySQL Metadata Store</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/orc.html">ORC Extension</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/druid-pac4j.html">Druid pac4j based Security extension</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/parquet.html">Apache Parquet Extension</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/postgresql.html">PostgreSQL Metadata Store</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/protobuf.html">Protobuf</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/s3.html">S3-compatible</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/simple-client-sslcontext.html">Simple SSLContext Provider Module</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/stats.html">Stats aggregator</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-core/test-stats.html">Test Stats Aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-contrib/ambari-metrics-emitter.html">Ambari Metrics Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-contrib/cassandra.html">Apache Cassandra</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-contrib/cloudfiles.html">Rackspace Cloud Files</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-contrib/distinctcount.html">DistinctCount Aggregator</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-contrib/graphite.html">Graphite Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-contrib/influx.html">InfluxDB Line Protocol Parser</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-contrib/influxdb-emitter.html">InfluxDB Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-contrib/kafka-emitter.html">Kafka Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-contrib/materialized-view.html">Materialized View</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-contrib/momentsketch-quantiles.html">Moment Sketches for Approximate Quantiles module</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-contrib/moving-average-query.html">Moving Average Query</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-contrib/opentsdb-emitter.html">OpenTSDB Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-contrib/redis-cache.html">Druid Redis Cache</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-contrib/sqlserver.html">Microsoft SQLServer</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-contrib/statsd.html">StatsD Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-contrib/tdigestsketch-quantiles.html">T-Digest Quantiles Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-contrib/thrift.html">Thrift</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-contrib/time-min-max.html">Timestamp Min/Max aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-contrib/gce-extensions.html">GCE Extensions</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/development/extensions-contrib/aliyun-oss.html">Aliyun OSS</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/hll-old.html">Cardinality/HyperUnique aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/querying/select-query.html">Select</a></li><li class="navListItem"><a class="navItem" href="/docs/0.20.1/ingestion/standalone-realtime.html">Realtime Process</a></li></ul></div></div></section></div><script> |
| var coll = document.getElementsByClassName('collapsible'); |
| var checkActiveCategory = true; |
| for (var i = 0; i < coll.length; i++) { |
| var links = coll[i].nextElementSibling.getElementsByTagName('*'); |
| if (checkActiveCategory){ |
| for (var j = 0; j < links.length; j++) { |
| if (links[j].classList.contains('navListItemActive')){ |
| coll[i].nextElementSibling.classList.toggle('hide'); |
| coll[i].childNodes[1].classList.toggle('rotate'); |
| checkActiveCategory = false; |
| break; |
| } |
| } |
| } |
| |
| coll[i].addEventListener('click', function() { |
| var arrow = this.childNodes[1]; |
| arrow.classList.toggle('rotate'); |
| var content = this.nextElementSibling; |
| content.classList.toggle('hide'); |
| }); |
| } |
| |
| document.addEventListener('DOMContentLoaded', function() { |
| createToggler('#navToggler', '#docsNav', 'docsSliderActive'); |
| createToggler('#tocToggler', 'body', 'tocActive'); |
| |
| var headings = document.querySelector('.toc-headings'); |
| headings && headings.addEventListener('click', function(event) { |
| var el = event.target; |
| while(el !== headings){ |
| if (el.tagName === 'A') { |
| document.body.classList.remove('tocActive'); |
| break; |
| } else{ |
| el = el.parentNode; |
| } |
| } |
| }, false); |
| |
| function createToggler(togglerSelector, targetSelector, className) { |
| var toggler = document.querySelector(togglerSelector); |
| var target = document.querySelector(targetSelector); |
| |
| if (!toggler) { |
| return; |
| } |
| |
| toggler.onclick = function(event) { |
| event.preventDefault(); |
| |
| target.classList.toggle(className); |
| }; |
| } |
| }); |
| </script></nav></div><div class="container mainContainer docsContainer"><div class="wrapper"><div class="post"><header class="postHeader"><a class="edit-page-link button" href="https://github.com/apache/druid/edit/master/docs/querying/datasource.md" target="_blank" rel="noreferrer noopener">Edit</a><h1 id="__docusaurus" class="postHeaderTitle">Datasources</h1></header><article><div><span><!-- |
| ~ Licensed to the Apache Software Foundation (ASF) under one |
| ~ or more contributor license agreements. See the NOTICE file |
| ~ distributed with this work for additional information |
| ~ regarding copyright ownership. The ASF licenses this file |
| ~ to you under the Apache License, Version 2.0 (the |
| ~ "License"); you may not use this file except in compliance |
| ~ with the License. You may obtain a copy of the License at |
| ~ |
| ~ http://www.apache.org/licenses/LICENSE-2.0 |
| ~ |
| ~ Unless required by applicable law or agreed to in writing, |
| ~ software distributed under the License is distributed on an |
| ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| ~ KIND, either express or implied. See the License for the |
| ~ specific language governing permissions and limitations |
| ~ under the License. |
| --> |
| <p>Datasources in Apache Druid are things that you can query. The most common kind of datasource is a table datasource, |
| and in many contexts the word "datasource" implicitly refers to table datasources. This is especially true |
| <a href="../ingestion/index.html">during data ingestion</a>, where ingestion is always creating or writing into a table |
| datasource. But at query time, there are many other types of datasources available.</p> |
| <p>The word "datasource" is generally spelled <code>dataSource</code> (with a capital S) when it appears in API requests and |
| responses.</p> |
| <h2><a class="anchor" aria-hidden="true" id="datasource-type"></a><a href="#datasource-type" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Datasource type</h2> |
| <h3><a class="anchor" aria-hidden="true" id="table"></a><a href="#table" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a><code>table</code></h3> |
| <div class="tabs"><div class="nav-tabs"><div id="tab-group-1-tab-2" class="nav-link active" data-group="group_1" data-tab="tab-group-1-content-2">SQL</div><div id="tab-group-1-tab-3" class="nav-link" data-group="group_1" data-tab="tab-group-1-content-3">Native</div></div><div class="tab-content"><div id="tab-group-1-content-2" class="tab-pane active" data-group="group_1" tabindex="-1"><div><span><pre><code class="hljs css language-sql"><span class="hljs-keyword">SELECT</span> column1, column2 <span class="hljs-keyword">FROM</span> <span class="hljs-string">"druid"</span>.<span class="hljs-string">"dataSourceName"</span><br /></code></pre> |
| </span></div></div><div id="tab-group-1-content-3" class="tab-pane" data-group="group_1" tabindex="-1"><div><span><pre><code class="hljs css language-json">{<br /> <span class="hljs-attr">"queryType"</span>: <span class="hljs-string">"scan"</span>,<br /> <span class="hljs-attr">"dataSource"</span>: <span class="hljs-string">"dataSourceName"</span>,<br /> <span class="hljs-attr">"columns"</span>: [<span class="hljs-string">"column1"</span>, <span class="hljs-string">"column2"</span>],<br /> <span class="hljs-attr">"intervals"</span>: [<span class="hljs-string">"0000/3000"</span>]<br />}<br /></code></pre> |
| </span></div></div></div></div> |
| <p>The table datasource is the most common type. This is the kind of datasource you get when you perform |
| <a href="../ingestion/index.html">data ingestion</a>. They are split up into segments, distributed around the cluster, |
| and queried in parallel.</p> |
| <p>In <a href="sql.html#from">Druid SQL</a>, table datasources reside in the the <code>druid</code> schema. This is the default schema, so table |
| datasources can be referenced as either <code>druid.dataSourceName</code> or simply <code>dataSourceName</code>.</p> |
| <p>In native queries, table datasources can be referenced using their names as strings (as in the example above), or by |
| using JSON objects of the form:</p> |
| <pre><code class="hljs css language-json">"dataSource": { |
| "type": "table", |
| "name": "dataSourceName" |
| } |
| </code></pre> |
| <p>To see a list of all table datasources, use the SQL query |
| <code>SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = 'druid'</code>.</p> |
| <h3><a class="anchor" aria-hidden="true" id="lookup"></a><a href="#lookup" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a><code>lookup</code></h3> |
| <div class="tabs"><div class="nav-tabs"><div id="tab-group-4-tab-5" class="nav-link active" data-group="group_4" data-tab="tab-group-4-content-5">SQL</div><div id="tab-group-4-tab-6" class="nav-link" data-group="group_4" data-tab="tab-group-4-content-6">Native</div></div><div class="tab-content"><div id="tab-group-4-content-5" class="tab-pane active" data-group="group_4" tabindex="-1"><div><span><pre><code class="hljs css language-sql"><span class="hljs-keyword">SELECT</span> k, v <span class="hljs-keyword">FROM</span> lookup.countries<br /></code></pre> |
| </span></div></div><div id="tab-group-4-content-6" class="tab-pane" data-group="group_4" tabindex="-1"><div><span><pre><code class="hljs css language-json">{<br /> <span class="hljs-attr">"queryType"</span>: <span class="hljs-string">"scan"</span>,<br /> <span class="hljs-attr">"dataSource"</span>: {<br /> <span class="hljs-attr">"type"</span>: <span class="hljs-string">"lookup"</span>,<br /> <span class="hljs-attr">"lookup"</span>: <span class="hljs-string">"countries"</span><br /> },<br /> <span class="hljs-attr">"columns"</span>: [<span class="hljs-string">"k"</span>, <span class="hljs-string">"v"</span>],<br /> <span class="hljs-attr">"intervals"</span>: [<span class="hljs-string">"0000/3000"</span>]<br />}<br /></code></pre> |
| </span></div></div></div></div> |
| <p>Lookup datasources correspond to Druid's key-value <a href="lookups.html">lookup</a> objects. In <a href="sql.html#from">Druid SQL</a>, |
| they reside in the the <code>lookup</code> schema. They are preloaded in memory on all servers, so they can be accessed rapidly. |
| They can be joined onto regular tables using the <a href="#join">join operator</a>.</p> |
| <p>Lookup datasources are key-value oriented and always have exactly two columns: <code>k</code> (the key) and <code>v</code> (the value), and |
| both are always strings.</p> |
| <p>To see a list of all lookup datasources, use the SQL query |
| <code>SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = 'lookup'</code>.</p> |
| <blockquote> |
| <p>Performance tip: Lookups can be joined with a base table either using an explicit <a href="#join">join</a>, or by using the |
| SQL <a href="sql.html#string-functions"><code>LOOKUP</code> function</a>. |
| However, the join operator must evaluate the condition on each row, whereas the |
| <code>LOOKUP</code> function can defer evaluation until after an aggregation phase. This means that the <code>LOOKUP</code> function is |
| usually faster than joining to a lookup datasource.</p> |
| </blockquote> |
| <p>Refer to the <a href="/docs/0.20.1/querying/query-execution.html#table">Query execution</a> page for more details on how queries are executed when you |
| use table datasources.</p> |
| <h3><a class="anchor" aria-hidden="true" id="union"></a><a href="#union" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a><code>union</code></h3> |
| <div class="tabs"><div class="nav-tabs"><div id="tab-group-7-tab-8" class="nav-link active" data-group="group_7" data-tab="tab-group-7-content-8">Native</div></div><div class="tab-content"><div id="tab-group-7-content-8" class="tab-pane active" data-group="group_7" tabindex="-1"><div><span><pre><code class="hljs css language-json">{<br /> <span class="hljs-attr">"queryType"</span>: <span class="hljs-string">"scan"</span>,<br /> <span class="hljs-attr">"dataSource"</span>: {<br /> <span class="hljs-attr">"type"</span>: <span class="hljs-string">"union"</span>,<br /> <span class="hljs-attr">"dataSources"</span>: [<span class="hljs-string">"<tableDataSourceName1>"</span>, <span class="hljs-string">"<tableDataSourceName2>"</span>, <span class="hljs-string">"<tableDataSourceName3>"</span>]<br /> },<br /> <span class="hljs-attr">"columns"</span>: [<span class="hljs-string">"column1"</span>, <span class="hljs-string">"column2"</span>],<br /> <span class="hljs-attr">"intervals"</span>: [<span class="hljs-string">"0000/3000"</span>]<br />}<br /></code></pre> |
| </span></div></div></div></div> |
| <p>Union datasources allow you to treat two or more table datasources as a single datasource. The datasources being unioned |
| do not need to have identical schemas. If they do not fully match up, then columns that exist in one table but not |
| another will be treated as if they contained all null values in the tables where they do not exist.</p> |
| <p>The list of "dataSources" must be nonempty. If you want to query an empty dataset, use an <a href="#inline"><code>inline</code> datasource</a> |
| instead.</p> |
| <p>Union datasources are not available in Druid SQL.</p> |
| <p>Refer to the <a href="/docs/0.20.1/querying/query-execution.html#union">Query execution</a> page for more details on how queries are executed when you |
| use union datasources.</p> |
| <h3><a class="anchor" aria-hidden="true" id="inline"></a><a href="#inline" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a><code>inline</code></h3> |
| <div class="tabs"><div class="nav-tabs"><div id="tab-group-9-tab-10" class="nav-link active" data-group="group_9" data-tab="tab-group-9-content-10">Native</div></div><div class="tab-content"><div id="tab-group-9-content-10" class="tab-pane active" data-group="group_9" tabindex="-1"><div><span><pre><code class="hljs css language-json">{<br /> <span class="hljs-attr">"queryType"</span>: <span class="hljs-string">"scan"</span>,<br /> <span class="hljs-attr">"dataSource"</span>: {<br /> <span class="hljs-attr">"type"</span>: <span class="hljs-string">"inline"</span>,<br /> <span class="hljs-attr">"columnNames"</span>: [<span class="hljs-string">"country"</span>, <span class="hljs-string">"city"</span>],<br /> <span class="hljs-attr">"rows"</span>: [<br /> [<span class="hljs-string">"United States"</span>, <span class="hljs-string">"San Francisco"</span>],<br /> [<span class="hljs-string">"Canada"</span>, <span class="hljs-string">"Calgary"</span>]<br /> ]<br /> },<br /> <span class="hljs-attr">"columns"</span>: [<span class="hljs-string">"country"</span>, <span class="hljs-string">"city"</span>],<br /> <span class="hljs-attr">"intervals"</span>: [<span class="hljs-string">"0000/3000"</span>]<br />}<br /></code></pre> |
| </span></div></div></div></div> |
| <p>Inline datasources allow you to query a small amount of data that is embedded in the query itself. They are useful when |
| you want to write a query on a small amount of data without loading it first. They are also useful as inputs into a |
| <a href="#join">join</a>. Druid also uses them internally to handle subqueries that need to be inlined on the Broker. See the |
| <a href="#query"><code>query</code> datasource</a> documentation for more details.</p> |
| <p>There are two fields in an inline datasource: an array of <code>columnNames</code> and an array of <code>rows</code>. Each row is an array |
| that must be exactly as long as the list of <code>columnNames</code>. The first element in each row corresponds to the first |
| column in <code>columnNames</code>, and so on.</p> |
| <p>Inline datasources are not available in Druid SQL.</p> |
| <p>Refer to the <a href="/docs/0.20.1/querying/query-execution.html#inline">Query execution</a> page for more details on how queries are executed when you |
| use inline datasources.</p> |
| <h3><a class="anchor" aria-hidden="true" id="query"></a><a href="#query" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a><code>query</code></h3> |
| <div class="tabs"><div class="nav-tabs"><div id="tab-group-11-tab-12" class="nav-link active" data-group="group_11" data-tab="tab-group-11-content-12">SQL</div><div id="tab-group-11-tab-13" class="nav-link" data-group="group_11" data-tab="tab-group-11-content-13">Native</div></div><div class="tab-content"><div id="tab-group-11-content-12" class="tab-pane active" data-group="group_11" tabindex="-1"><div><span><pre><code class="hljs css language-sql"><span class="hljs-comment">-- Uses a subquery to count hits per page, then takes the average.</span><br /><span class="hljs-keyword">SELECT</span><br /> <span class="hljs-keyword">AVG</span>(cnt) <span class="hljs-keyword">AS</span> average_hits_per_page<br /><span class="hljs-keyword">FROM</span><br /> (<span class="hljs-keyword">SELECT</span> page, <span class="hljs-keyword">COUNT</span>(*) <span class="hljs-keyword">AS</span> hits <span class="hljs-keyword">FROM</span> site_traffic <span class="hljs-keyword">GROUP</span> <span class="hljs-keyword">BY</span> page)<br /></code></pre> |
| </span></div></div><div id="tab-group-11-content-13" class="tab-pane" data-group="group_11" tabindex="-1"><div><span><pre><code class="hljs css language-json">{<br /> <span class="hljs-attr">"queryType"</span>: <span class="hljs-string">"timeseries"</span>,<br /> <span class="hljs-attr">"dataSource"</span>: {<br /> <span class="hljs-attr">"type"</span>: <span class="hljs-string">"query"</span>,<br /> <span class="hljs-attr">"query"</span>: {<br /> <span class="hljs-attr">"queryType"</span>: <span class="hljs-string">"groupBy"</span>,<br /> <span class="hljs-attr">"dataSource"</span>: <span class="hljs-string">"site_traffic"</span>,<br /> <span class="hljs-attr">"intervals"</span>: [<span class="hljs-string">"0000/3000"</span>],<br /> <span class="hljs-attr">"granularity"</span>: <span class="hljs-string">"all"</span>,<br /> <span class="hljs-attr">"dimensions"</span>: [<span class="hljs-string">"page"</span>],<br /> <span class="hljs-attr">"aggregations"</span>: [<br /> { <span class="hljs-attr">"type"</span>: <span class="hljs-string">"count"</span>, <span class="hljs-attr">"name"</span>: <span class="hljs-string">"hits"</span> }<br /> ]<br /> }<br /> },<br /> <span class="hljs-attr">"intervals"</span>: [<span class="hljs-string">"0000/3000"</span>],<br /> <span class="hljs-attr">"granularity"</span>: <span class="hljs-string">"all"</span>,<br /> <span class="hljs-attr">"aggregations"</span>: [<br /> { <span class="hljs-attr">"type"</span>: <span class="hljs-string">"longSum"</span>, <span class="hljs-attr">"name"</span>: <span class="hljs-string">"hits"</span>, <span class="hljs-attr">"fieldName"</span>: <span class="hljs-string">"hits"</span> },<br /> { <span class="hljs-attr">"type"</span>: <span class="hljs-string">"count"</span>, <span class="hljs-attr">"name"</span>: <span class="hljs-string">"pages"</span> }<br /> ],<br /> <span class="hljs-attr">"postAggregations"</span>: [<br /> { <span class="hljs-attr">"type"</span>: <span class="hljs-string">"expression"</span>, <span class="hljs-attr">"name"</span>: <span class="hljs-string">"average_hits_per_page"</span>, <span class="hljs-attr">"expression"</span>: <span class="hljs-string">"hits / pages"</span> }<br /> ]<br />}<br /></code></pre> |
| </span></div></div></div></div> |
| <p>Query datasources allow you to issue subqueries. In native queries, they can appear anywhere that accepts a |
| <code>dataSource</code>. In SQL, they can appear in the following places, always surrounded by parentheses:</p> |
| <ul> |
| <li>The FROM clause: <code>FROM (<subquery>)</code>.</li> |
| <li>As inputs to a JOIN: <code><table-or-subquery-1> t1 INNER JOIN <table-or-subquery-2> t2 ON t1.<col1> = t2.<col2></code>.</li> |
| <li>In the WHERE clause: <code>WHERE <column> { IN | NOT IN } (<subquery>)</code>. These are translated to joins by the SQL planner.</li> |
| </ul> |
| <blockquote> |
| <p>Performance tip: In most cases, subquery results are fully buffered in memory on the Broker and then further |
| processing occurs on the Broker itself. This means that subqueries with large result sets can cause performance |
| bottlenecks or run into memory usage limits on the Broker. See the <a href="/docs/0.20.1/querying/query-execution.html#query">Query execution</a> |
| page for more details on how subqueries are executed and what limits will apply.</p> |
| </blockquote> |
| <h3><a class="anchor" aria-hidden="true" id="join"></a><a href="#join" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a><code>join</code></h3> |
| <div class="tabs"><div class="nav-tabs"><div id="tab-group-14-tab-15" class="nav-link active" data-group="group_14" data-tab="tab-group-14-content-15">SQL</div><div id="tab-group-14-tab-16" class="nav-link" data-group="group_14" data-tab="tab-group-14-content-16">Native</div></div><div class="tab-content"><div id="tab-group-14-content-15" class="tab-pane active" data-group="group_14" tabindex="-1"><div><span><pre><code class="hljs css language-sql"><span class="hljs-comment">-- Joins "sales" with "countries" (using "store" as the join key) to get sales by country.</span><br /><span class="hljs-keyword">SELECT</span><br /> store_to_country.v <span class="hljs-keyword">AS</span> country,<br /> <span class="hljs-keyword">SUM</span>(sales.revenue) <span class="hljs-keyword">AS</span> country_revenue<br /><span class="hljs-keyword">FROM</span><br /> sales<br /> <span class="hljs-keyword">INNER</span> <span class="hljs-keyword">JOIN</span> lookup.store_to_country <span class="hljs-keyword">ON</span> sales.store = store_to_country.k<br /><span class="hljs-keyword">GROUP</span> <span class="hljs-keyword">BY</span><br /> countries.v<br /></code></pre> |
| </span></div></div><div id="tab-group-14-content-16" class="tab-pane" data-group="group_14" tabindex="-1"><div><span><pre><code class="hljs css language-json">{<br /> <span class="hljs-attr">"queryType"</span>: <span class="hljs-string">"groupBy"</span>,<br /> <span class="hljs-attr">"dataSource"</span>: {<br /> <span class="hljs-attr">"type"</span>: <span class="hljs-string">"join"</span>,<br /> <span class="hljs-attr">"left"</span>: <span class="hljs-string">"sales"</span>,<br /> <span class="hljs-attr">"right"</span>: {<br /> <span class="hljs-attr">"type"</span>: <span class="hljs-string">"lookup"</span>,<br /> <span class="hljs-attr">"lookup"</span>: <span class="hljs-string">"store_to_country"</span><br /> },<br /> <span class="hljs-attr">"rightPrefix"</span>: <span class="hljs-string">"r."</span>,<br /> <span class="hljs-attr">"condition"</span>: <span class="hljs-string">"store == \"r.k\""</span>,<br /> <span class="hljs-attr">"joinType"</span>: <span class="hljs-string">"INNER"</span><br /> },<br /> <span class="hljs-attr">"intervals"</span>: [<span class="hljs-string">"0000/3000"</span>],<br /> <span class="hljs-attr">"granularity"</span>: <span class="hljs-string">"all"</span>,<br /> <span class="hljs-attr">"dimensions"</span>: [<br /> { <span class="hljs-attr">"type"</span>: <span class="hljs-string">"default"</span>, <span class="hljs-attr">"outputName"</span>: <span class="hljs-string">"country"</span>, <span class="hljs-attr">"dimension"</span>: <span class="hljs-string">"r.v"</span> }<br /> ],<br /> <span class="hljs-attr">"aggregations"</span>: [<br /> { <span class="hljs-attr">"type"</span>: <span class="hljs-string">"longSum"</span>, <span class="hljs-attr">"name"</span>: <span class="hljs-string">"country_revenue"</span>, <span class="hljs-attr">"fieldName"</span>: <span class="hljs-string">"revenue"</span> }<br /> ]<br />}<br /></code></pre> |
| </span></div></div></div></div> |
| <p>Join datasources allow you to do a SQL-style join of two datasources. Stacking joins on top of each other allows |
| you to join arbitrarily many datasources.</p> |
| <p>In Druid 0.20.1, joins are implemented with a broadcast hash-join algorithm. This means that all tables |
| other than the leftmost "base" table must fit in memory. It also means that the join condition must be an equality. This |
| feature is intended mainly to allow joining regular Druid tables with <a href="#lookup">lookup</a>, <a href="#inline">inline</a>, and |
| <a href="#query">query</a> datasources.</p> |
| <p>Refer to the <a href="/docs/0.20.1/querying/query-execution.html#join">Query execution</a> page for more details on how queries are executed when you |
| use join datasources.</p> |
| <h4><a class="anchor" aria-hidden="true" id="joins-in-sql"></a><a href="#joins-in-sql" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Joins in SQL</h4> |
| <p>SQL joins take the form:</p> |
| <pre><code class="hljs"><o1> [ <span class="hljs-keyword">INNER</span> | LEFT [<span class="hljs-keyword">OUTER</span>] ] <span class="hljs-keyword">JOIN</span> <o2> <span class="hljs-keyword">ON</span> <condition> |
| </code></pre> |
| <p>The condition must involve only equalities, but functions are okay, and there can be multiple equalities ANDed together. |
| Conditions like <code>t1.x = t2.x</code>, or <code>LOWER(t1.x) = t2.x</code>, or <code>t1.x = t2.x AND t1.y = t2.y</code> can all be handled. Conditions |
| like <code>t1.x <> t2.x</code> cannot currently be handled.</p> |
| <p>Note that Druid SQL is less rigid than what native join datasources can handle. In cases where a SQL query does |
| something that is not allowed as-is with a native join datasource, Druid SQL will generate a subquery. This can have |
| a substantial effect on performance and scalability, so it is something to watch out for. Some examples of when the |
| SQL layer will generate subqueries include:</p> |
| <ul> |
| <li><p>Joining a regular Druid table to itself, or to another regular Druid table. The native join datasource can accept |
| a table on the left-hand side, but not the right, so a subquery is needed.</p></li> |
| <li><p>Join conditions where the expressions on either side are of different types.</p></li> |
| <li><p>Join conditions where the right-hand expression is not a direct column access.</p></li> |
| </ul> |
| <p>For more information about how Druid translates SQL to native queries, refer to the |
| <a href="/docs/0.20.1/querying/sql.html#query-translation">Druid SQL</a> documentation.</p> |
| <h4><a class="anchor" aria-hidden="true" id="joins-in-native-queries"></a><a href="#joins-in-native-queries" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Joins in native queries</h4> |
| <p>Native join datasources have the following properties. All are required.</p> |
| <table> |
| <thead> |
| <tr><th>Field</th><th>Description</th></tr> |
| </thead> |
| <tbody> |
| <tr><td><code>left</code></td><td>Left-hand datasource. Must be of type <code>table</code>, <code>join</code>, <code>lookup</code>, <code>query</code>, or <code>inline</code>. Placing another join as the left datasource allows you to join arbitrarily many datasources.</td></tr> |
| <tr><td><code>right</code></td><td>Right-hand datasource. Must be of type <code>lookup</code>, <code>query</code>, or <code>inline</code>. Note that this is more rigid than what Druid SQL requires.</td></tr> |
| <tr><td><code>rightPrefix</code></td><td>String prefix that will be applied to all columns from the right-hand datasource, to prevent them from colliding with columns from the left-hand datasource. Can be any string, so long as it is nonempty and is not be a prefix of the string <code>__time</code>. Any columns from the left-hand side that start with your <code>rightPrefix</code> will be shadowed. It is up to you to provide a prefix that will not shadow any important columns from the left side.</td></tr> |
| <tr><td><code>condition</code></td><td><a href="/docs/0.20.1/misc/math-expr.html">Expression</a> that must be an equality where one side is an expression of the left-hand side, and the other side is a simple column reference to the right-hand side. Note that this is more rigid than what Druid SQL requires: here, the right-hand reference must be a simple column reference; in SQL it can be an expression.</td></tr> |
| <tr><td><code>joinType</code></td><td><code>INNER</code> or <code>LEFT</code>.</td></tr> |
| </tbody> |
| </table> |
| <h4><a class="anchor" aria-hidden="true" id="join-performance"></a><a href="#join-performance" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Join performance</h4> |
| <p>Joins are a feature that can significantly affect performance of your queries. Some performance tips and notes:</p> |
| <ol> |
| <li>Joins are especially useful with <a href="#lookup">lookup datasources</a>, but in most cases, the |
| <a href="sql.html#string-functions"><code>LOOKUP</code> function</a> performs better than a join. Consider using the <code>LOOKUP</code> function if |
| it is appropriate for your use case.</li> |
| <li>When using joins in Druid SQL, keep in mind that it can generate subqueries that you did not explicitly include in |
| your queries. Refer to the <a href="/docs/0.20.1/querying/sql.html#query-translation">Druid SQL</a> documentation for more details about when this happens |
| and how to detect it.</li> |
| <li>One common reason for implicit subquery generation is if the types of the two halves of an equality do not match. |
| For example, since lookup keys are always strings, the condition <code>druid.d JOIN lookup.l ON d.field = l.field</code> will |
| perform best if <code>d.field</code> is a string.</li> |
| <li>As of Druid 0.20.1, the join operator must evaluate the condition for each row. In the future, we expect |
| to implement both early and deferred condition evaluation, which we expect to improve performance considerably for |
| common use cases.</li> |
| <li>Currently, Druid does not support pushing down predicates (condition and filter) past a Join (i.e. into |
| Join's children). Druid only supports pushing predicates into the join if they originated from |
| above the join. Hence, the location of predicates and filters in your Druid SQL is very important. |
| Also, as a result of this, comma joins should be avoided.</li> |
| </ol> |
| <h4><a class="anchor" aria-hidden="true" id="future-work-for-joins"></a><a href="#future-work-for-joins" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Future work for joins</h4> |
| <p>Joins are an area of active development in Druid. The following features are missing today but may appear in |
| future versions:</p> |
| <ul> |
| <li>Reordering of predicates and filters (pushing up and/or pushing down) to get the most performant plan.</li> |
| <li>Preloaded dimension tables that are wider than lookups (i.e. supporting more than a single key and single value).</li> |
| <li>RIGHT OUTER and FULL OUTER joins. Currently, they are partially implemented. Queries will run but results will not |
| always be correct.</li> |
| <li>Performance-related optimizations as mentioned in the <a href="#join-performance">previous section</a>.</li> |
| <li>Join algorithms other than broadcast hash-joins.</li> |
| <li>Join condition on a column compared to a constant value.</li> |
| <li>Join conditions on a column containing a multi-value dimension.</li> |
| </ul> |
| </span></div></article></div><div class="docs-prevnext"><a class="docs-prev button" href="/docs/0.20.1/querying/query-execution.html"><span class="arrow-prev">← </span><span>Query execution</span></a><a class="docs-next button" href="/docs/0.20.1/querying/joins.html"><span>Joins</span><span class="arrow-next"> →</span></a></div></div></div><nav class="onPageNav"><ul class="toc-headings"><li><a href="#datasource-type">Datasource type</a><ul class="toc-headings"><li><a href="#table"><code>table</code></a></li><li><a href="#lookup"><code>lookup</code></a></li><li><a href="#union"><code>union</code></a></li><li><a href="#inline"><code>inline</code></a></li><li><a href="#query"><code>query</code></a></li><li><a href="#join"><code>join</code></a></li></ul></li></ul></nav></div><footer class="nav-footer druid-footer" id="footer"><div class="container"><div class="text-center"><p><a href="/technology">Technology</a> · <a href="/use-cases">Use Cases</a> · <a href="/druid-powered">Powered by Druid</a> · <a href="/docs/0.20.1/latest">Docs</a> · <a href="/community/">Community</a> · <a href="/downloads.html">Download</a> · <a href="/faq">FAQ</a></p></div><div class="text-center"><a title="Join the user group" href="https://groups.google.com/forum/#!forum/druid-user" target="_blank"><span class="fa fa-comments"></span></a> · <a title="Follow Druid" href="https://twitter.com/druidio" target="_blank"><span class="fab fa-twitter"></span></a> · <a title="Download via Apache" href="https://www.apache.org/dyn/closer.cgi?path=/incubator/druid/{{ site.druid_versions[0].versions[0].version }}/apache-druid-{{ site.druid_versions[0].versions[0].version }}-bin.tar.gz" target="_blank"><span class="fas fa-feather"></span></a> · <a title="GitHub" href="https://github.com/apache/druid" target="_blank"><span class="fab fa-github"></span></a></div><div class="text-center license">Copyright © 2019 <a href="https://www.apache.org/" target="_blank">Apache Software Foundation</a>.<br/>Except where otherwise noted, licensed under <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a>.<br/>Apache Druid, Druid, and the Druid logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</div></div></footer></div><script type="text/javascript" src="https://cdn.jsdelivr.net/docsearch.js/1/docsearch.min.js"></script><script> |
| document.addEventListener('keyup', function(e) { |
| if (e.target !== document.body) { |
| return; |
| } |
| // keyCode for '/' (slash) |
| if (e.keyCode === 191) { |
| const search = document.getElementById('search_input_react'); |
| search && search.focus(); |
| } |
| }); |
| </script><script> |
| var search = docsearch({ |
| |
| apiKey: '2de99082a9f38e49dfaa059bbe4c901d', |
| indexName: 'apache_druid', |
| inputSelector: '#search_input_react', |
| algoliaOptions: {"facetFilters":["language:en","version:0.20.1"]} |
| }); |
| </script></body></html> |