blob: a6725ea21e9d59c60c2c234df6b63725be1739a7 [file] [log] [blame]
<!doctype html>
<html lang="en" dir="ltr" class="mdx-wrapper mdx-page plugin-pages plugin-id-default">
<head>
<meta charset="UTF-8">
<meta name="generator" content="Docusaurus v2.4.1">
<title data-rh="true">Frequently Asked Questions | Apache® Druid</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:image" content="https://druid.apache.org/img/druid_nav.png"><meta data-rh="true" name="twitter:image" content="https://druid.apache.org/img/druid_nav.png"><meta data-rh="true" property="og:url" content="https://druid.apache.org/faq"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docusaurus_tag" content="default"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docsearch:docusaurus_tag" content="default"><meta data-rh="true" property="og:title" content="Frequently Asked Questions | Apache® Druid"><meta data-rh="true" name="description" content="Don&#x27;t see your question here? Ask us"><meta data-rh="true" property="og:description" content="Don&#x27;t see your question here? Ask us"><link data-rh="true" rel="icon" href="/img/favicon.png"><link data-rh="true" rel="canonical" href="https://druid.apache.org/faq"><link data-rh="true" rel="alternate" href="https://druid.apache.org/faq" hreflang="en"><link data-rh="true" rel="alternate" href="https://druid.apache.org/faq" hreflang="x-default"><link rel="preconnect" href="https://www.google-analytics.com">
<link rel="preconnect" href="https://www.googletagmanager.com">
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-131010415-1"></script>
<script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","UA-131010415-1",{})</script>
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.7.2/css/all.css">
<script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.4/clipboard.min.js"></script><link rel="stylesheet" href="/assets/css/styles.546f39eb.css">
<link rel="preload" href="/assets/js/runtime~main.9a92b840.js" as="script">
<link rel="preload" href="/assets/js/main.6f6dba15.js" as="script">
</head>
<body class="navigation-with-keyboard">
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}return t}()||function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}()</script><div id="__docusaurus">
<div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><nav aria-label="Main" class="navbar navbar--fixed-top navbar--dark"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/img/druid_nav.png" alt="Apache® Druid" class="themedImage_ToTc themedImage--light_HNdA"><img src="/img/druid_nav.png" alt="Apache® Druid" class="themedImage_ToTc themedImage--dark_i4oU"></div></a></div><div class="navbar__items navbar__items--right"><a class="navbar__item navbar__link" href="/technology">Technology</a><a class="navbar__item navbar__link" href="/use-cases">Use Cases</a><a class="navbar__item navbar__link" href="/druid-powered">Powered By</a><a class="navbar__item navbar__link" href="/docs/latest/design/">Docs</a><a class="navbar__item navbar__link" href="/community/">Community</a><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Apache®</a><ul class="dropdown__menu"><li><a href="https://www.apache.org/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Foundation<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://apachecon.com/?ref=druid.apache.org" target="_blank" rel="noopener noreferrer" class="dropdown__link">Events<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer" class="dropdown__link">License<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Thanks<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/security/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Security<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Sponsorship<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><a class="navbar__item navbar__link" href="/downloads/">Download</a><div class="searchBox_ZlJk"><div class="navbar__search"><span aria-label="expand searchbar" role="button" class="search-icon" tabindex="0"></span><input type="search" id="search_input_react" placeholder="Loading..." aria-label="Search" class="navbar__search-input search-bar" disabled=""></div></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="__docusaurus_skipToContent_fallback" class="main-wrapper mainWrapper_z2l0"><main class="container container--fluid margin-vert--lg"><div class="row mdxPageWrapper_j9I6"><div class="col col--8"><article><div class="theme-admonition theme-admonition-tip alert alert--success admonition_LlT9"><div class="admonitionHeading_tbUL"><span class="admonitionIcon_kALy"><svg viewBox="0 0 12 16"><path fill-rule="evenodd" d="M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"></path></svg></span>tip</div><div class="admonitionContent_S0QG"><p>Don&#x27;t see your question here? <a href="/community/">Ask us</a></p></div></div><h3 class="anchor anchorWithStickyNavbar_LWe7" id="is-druid-a-data-warehouse-when-should-i-use-druid-over-redshiftbigquerysnowflake">Is Druid a data warehouse? When should I use Druid over Redshift/BigQuery/Snowflake?<a href="#is-druid-a-data-warehouse-when-should-i-use-druid-over-redshiftbigquerysnowflake" class="hash-link" aria-label="Direct link to Is Druid a data warehouse? When should I use Druid over Redshift/BigQuery/Snowflake?" title="Direct link to Is Druid a data warehouse? When should I use Druid over Redshift/BigQuery/Snowflake?"></a></h3><p>Apache Druid is a new type of database to power real-time analytic workloads for
event-driven data, and isn’t a traditional data warehouse. Although Druid
incorporates architecture ideas from data warehouses such as column-oriented
storage, Druid also incorporates designs from search systems and timeseries
databases. Druid&#x27;s architecture is designed to handle many use cases that
traditional data warehouses cannot.</p><p>Druid offers the following advantages over traditional data warehouses:</p><ul><li>Much lower latency for OLAP-style queries</li><li>Much lower latency for data ingest (both streaming and batch)</li><li>Out-of-the-box integration with Apache Kafka, AWS Kinesis, HDFS, AWS S3, and more</li><li>Time-based partitioning, which enables performant time-based queries</li><li>Fast search and filter, for fast slice and dice</li><li>Minimal schema design and native support for semi-structured and nested data</li></ul><p>Consider using Druid to augment your data warehouse if your use case requires:</p><ul><li>Powering an user-facing application</li><li>Low-latency query response with high concurrency</li><li>Instant data visibility</li><li>Fast ad-hoc slice and dice</li><li>Streaming data</li></ul><p>To summarize, Druid shines when the use cases involves real-time analytics and
where the end-user (technical or not) wants to apply numerous queries in rapid
succession to explore or better understand data trends. </p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="is-druid-a-log-aggregationlog-search-system-when-should-i-use-druid-over-elasticsplunk">Is Druid a log aggregation/log search system? When should I use Druid over Elastic/Splunk?<a href="#is-druid-a-log-aggregationlog-search-system-when-should-i-use-druid-over-elasticsplunk" class="hash-link" aria-label="Direct link to Is Druid a log aggregation/log search system? When should I use Druid over Elastic/Splunk?" title="Direct link to Is Druid a log aggregation/log search system? When should I use Druid over Elastic/Splunk?"></a></h3><p>Druid uses inverted indexes (in particular, compressed bitmaps) for fast
searching and filtering, but it is not generally considered a search system.
While Druid contains many features commonly found in search systems, such as
the ability to stream in structured and semi-structured data and the ability to
search and filter the data, Druid isn’t commonly used to ingest text logs and
run full text search queries over the text logs. However, Druid is often used
to ingest and analyze semi-structured data such as JSON.</p><p>Druid at its core is an analytics engine and as such, it can support numerical
aggregations, groupBys (including multi-dimensional groupBys), and other
analytic workloads faster and more efficiently than search systems.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="is-druid-a-timeseries-database-when-should-i-use-druid-over-influxdbopentsdbprometheus">Is Druid a timeseries database? When should I use Druid over InfluxDB/OpenTSDB/Prometheus?<a href="#is-druid-a-timeseries-database-when-should-i-use-druid-over-influxdbopentsdbprometheus" class="hash-link" aria-label="Direct link to Is Druid a timeseries database? When should I use Druid over InfluxDB/OpenTSDB/Prometheus?" title="Direct link to Is Druid a timeseries database? When should I use Druid over InfluxDB/OpenTSDB/Prometheus?"></a></h3><p>Druid does share some characteristics with timeseries databases, but also
combines ideas from analytic databases and search systems. Like in timeseries
databases, Druid is optimized for data where a timestamp is present. Druid
partitions data by time, and queries that include a time filter will be
significantly faster than those that do not. Aggregating metrics and filtering
on dimensions (which are roughly equivalent to TSDBs&#x27; tags) are also very fast when a
time filter is present. However, because Druid incorporates many architectural designs
from analytics databases and search systems, it can significantly
outperformance TSDBs when grouping, searching, and filtering on tags that are
not time, or when computing complex metrics such as histograms and quantiles.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="does-druid-separate-storage-and-compute">Does Druid separate storage and compute?<a href="#does-druid-separate-storage-and-compute" class="hash-link" aria-label="Direct link to Does Druid separate storage and compute?" title="Direct link to Does Druid separate storage and compute?"></a></h3><p>Druid creates an indexed copy of raw data that is highly optimized for
analytic queries. Druid runs queries over this indexed data, called a <a href="/docs/latest/design/segments">&#x27;segment&#x27;</a>
in Druid, and does not pull raw data from an external storage system as needed
by queries. </p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="how-is-druid-deployed">How is Druid deployed?<a href="#how-is-druid-deployed" class="hash-link" aria-label="Direct link to How is Druid deployed?" title="Direct link to How is Druid deployed?"></a></h3><p>Druid can be deployed on commodity hardware in any *NIX based environment.
A Druid cluster consists of several different services, each designed to do a small set of things very well (ingestion, querying, coordination, etc).
Many of these services can be co-located and deployed together on the same hardware as described <a href="/docs/latest/tutorials/">here</a>.</p><p>Druid was designed for the cloud, and runs well in AWS, GCP, Azure, and other cloud environments.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="where-does-druid-fit-in-my-big-data-stack">Where does Druid fit in my big data stack?<a href="#where-does-druid-fit-in-my-big-data-stack" class="hash-link" aria-label="Direct link to Where does Druid fit in my big data stack?" title="Direct link to Where does Druid fit in my big data stack?"></a></h3><p>Druid typically connects to a source of raw data such as a message bus such as Apache Kafka, or a filesystem such as HDFS.
Druid ingests an optimized, column-oriented, indexed copy of your data and serves analytics workloads on top of it.</p><p>A common streaming data oriented setup involving Druid looks like this:
Raw data → Kafka → Stream processor (optional, typically for ETL) → Kafka (optional) → Druid → Application/user</p><p>A common batch/static file oriented setup involving Druid looks like this:
Raw data → Kafka (optional) → HDFS → ETL process (optional) → Druid → Application/user</p><p>The same Druid cluster can serve both the streaming and batch path.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="is-druid-in-memory">Is Druid in-memory?<a href="#is-druid-in-memory" class="hash-link" aria-label="Direct link to Is Druid in-memory?" title="Direct link to Is Druid in-memory?"></a></h3><p>The earliest iterations of Druid didn’t allow for data to be paged in from
and out to disk, so it was often called an “in-memory” database. As Druid
evolved, this limitation was removed. To provide a balance between hardware
cost and query performance, Druid leverages memory-mapping to page data between
disk and memory and extend the amount of data a single node can load up to the
size of its disks.</p><p>Individual Historicals can be configured with the maximum amount of data
they should be given. Coupled with the Coordinator’s ability to assign data to
different “tiers” based on different query requirements, Druid is essentially a
system that can be configured across a wide spectrum of performance
requirements. All data can be in memory and processed, or data can be heavily
over-committed compared to the amount of memory available. Druid can also
support complex configurations, such as configuring the most recent month of
data in memory, while everything else is over-committed.</p></article></div><div class="col col--2"><div class="tableOfContents_bqdL thin-scrollbar"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#is-druid-a-data-warehouse-when-should-i-use-druid-over-redshiftbigquerysnowflake" class="table-of-contents__link toc-highlight">Is Druid a data warehouse? When should I use Druid over Redshift/BigQuery/Snowflake?</a></li><li><a href="#is-druid-a-log-aggregationlog-search-system-when-should-i-use-druid-over-elasticsplunk" class="table-of-contents__link toc-highlight">Is Druid a log aggregation/log search system? When should I use Druid over Elastic/Splunk?</a></li><li><a href="#is-druid-a-timeseries-database-when-should-i-use-druid-over-influxdbopentsdbprometheus" class="table-of-contents__link toc-highlight">Is Druid a timeseries database? When should I use Druid over InfluxDB/OpenTSDB/Prometheus?</a></li><li><a href="#does-druid-separate-storage-and-compute" class="table-of-contents__link toc-highlight">Does Druid separate storage and compute?</a></li><li><a href="#how-is-druid-deployed" class="table-of-contents__link toc-highlight">How is Druid deployed?</a></li><li><a href="#where-does-druid-fit-in-my-big-data-stack" class="table-of-contents__link toc-highlight">Where does Druid fit in my big data stack?</a></li><li><a href="#is-druid-in-memory" class="table-of-contents__link toc-highlight">Is Druid in-memory?</a></li></ul></div></div></div></main></div><footer class="footer"><div class="container container-fluid"><div class="footer__bottom text--center"><div class="margin-bottom--sm"><img src="/img/favicon.png" class="themedImage_ToTc themedImage--light_HNdA footer__logo"><img src="/img/favicon.png" class="themedImage_ToTc themedImage--dark_i4oU footer__logo"></div><div class="footer__copyright">Copyright © 2023 Apache Software Foundation. Except where otherwise noted, licensed under CC BY-SA 4.0. Apache Druid, Druid, and the Druid logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</div></div></div></footer></div>
<script src="/assets/js/runtime~main.9a92b840.js"></script>
<script src="/assets/js/main.6f6dba15.js"></script>
</body>
</html>