| <!doctype html> |
| <html lang="en" dir="ltr" class="docs-wrapper docs-doc-page docs-version-current plugin-docs plugin-id-default docs-doc-id-multi-stage-query/concepts"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="generator" content="Docusaurus v2.4.1"> |
| <title data-rh="true">SQL-based ingestion concepts | Apache® Druid</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:image" content="https://druid.apache.org/img/druid_nav.png"><meta data-rh="true" name="twitter:image" content="https://druid.apache.org/img/druid_nav.png"><meta data-rh="true" property="og:url" content="https://druid.apache.org/docs/27.0.0/multi-stage-query/concepts"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="current"><meta data-rh="true" name="docusaurus_tag" content="docs-default-current"><meta data-rh="true" name="docsearch:version" content="current"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-current"><meta data-rh="true" property="og:title" content="SQL-based ingestion concepts | Apache® Druid"><meta data-rh="true" name="description" content="<!--"><meta data-rh="true" property="og:description" content="<!--"><link data-rh="true" rel="icon" href="/img/favicon.png"><link data-rh="true" rel="canonical" href="https://druid.apache.org/docs/27.0.0/multi-stage-query/concepts"><link data-rh="true" rel="alternate" href="https://druid.apache.org/docs/27.0.0/multi-stage-query/concepts" hreflang="en"><link data-rh="true" rel="alternate" href="https://druid.apache.org/docs/27.0.0/multi-stage-query/concepts" hreflang="x-default"><link rel="preconnect" href="https://www.google-analytics.com"> |
| <link rel="preconnect" href="https://www.googletagmanager.com"> |
| <script async src="https://www.googletagmanager.com/gtag/js?id=UA-131010415-1"></script> |
| <script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","UA-131010415-1",{})</script> |
| |
| |
| |
| |
| <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.7.2/css/all.css"> |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.4/clipboard.min.js"></script><link rel="stylesheet" href="/assets/css/styles.f80751b3.css"> |
| <link rel="preload" href="/assets/js/runtime~main.5371e784.js" as="script"> |
| <link rel="preload" href="/assets/js/main.832012d1.js" as="script"> |
| </head> |
| <body class="navigation-with-keyboard"> |
| <script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}return t}()||function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}()</script><div id="__docusaurus"> |
| <div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><nav aria-label="Main" class="navbar navbar--fixed-top navbar--dark"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/img/druid_nav.png" alt="Apache® Druid" class="themedImage_ToTc themedImage--light_HNdA"><img src="/img/druid_nav.png" alt="Apache® Druid" class="themedImage_ToTc themedImage--dark_i4oU"></div></a></div><div class="navbar__items navbar__items--right"><a class="navbar__item navbar__link" href="/technology">Technology</a><a class="navbar__item navbar__link" href="/use-cases">Use Cases</a><a class="navbar__item navbar__link" href="/druid-powered">Powered By</a><a class="navbar__item navbar__link" href="/docs/27.0.0/design/">Docs</a><a class="navbar__item navbar__link" href="/community/">Community</a><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Apache®</a><ul class="dropdown__menu"><li><a href="https://www.apache.org/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Foundation<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://apachecon.com/?ref=druid.apache.org" target="_blank" rel="noopener noreferrer" class="dropdown__link">Events<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer" class="dropdown__link">License<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Thanks<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/security/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Security<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Sponsorship<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><a class="navbar__item navbar__link" href="/downloads/">Download</a><div class="searchBox_ZlJk"><div class="navbar__search"><span aria-label="expand searchbar" role="button" class="search-icon" tabindex="0"></span><input type="search" id="search_input_react" placeholder="Loading..." aria-label="Search" class="navbar__search-input search-bar" disabled=""></div></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="__docusaurus_skipToContent_fallback" class="main-wrapper mainWrapper_z2l0 docsWrapper_BCFX"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type="button"></button><div class="docPage__5DB"><aside class="theme-doc-sidebar-container docSidebarContainer_b6E3"><div class="sidebarViewport_Xe31"><div class="sidebar_njMd"><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_SIkG"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/27.0.0/design/">Getting started</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/27.0.0/tutorials/tutorial-msq-extern">Tutorials</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/27.0.0/design/architecture">Design</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret menu__link--active" aria-expanded="true" href="/docs/27.0.0/ingestion/">Ingestion</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/ingestion/">Overview</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" tabindex="0" href="/docs/27.0.0/ingestion/data-formats">Ingestion concepts</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret menu__link--active" aria-expanded="true" tabindex="0" href="/docs/27.0.0/multi-stage-query/">SQL-based batch</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/multi-stage-query/">SQL-based ingestion</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/docs/27.0.0/multi-stage-query/concepts">Key concepts</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/multi-stage-query/security">Security</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/multi-stage-query/examples">Examples</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/multi-stage-query/reference">Reference</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/multi-stage-query/known-issues">Known issues</a></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" tabindex="0" href="/docs/27.0.0/development/extensions-core/kafka-ingestion">Streaming</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" tabindex="0" href="/docs/27.0.0/ingestion/native-batch">Classic batch</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/ingestion/ingestion-spec">Ingestion spec reference</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/ingestion/schema-design">Schema design tips</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/27.0.0/ingestion/faq">Troubleshooting FAQ</a></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/27.0.0/data-management/">Data management</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/27.0.0/querying/sql">Querying</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/27.0.0/api-reference/">API reference</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/27.0.0/configuration/">Configuration</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/27.0.0/operations/web-console">Operations</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/27.0.0/development/overview">Development</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/27.0.0/misc/papers-and-talks">Misc</a></div></li></ul></nav></div></div></aside><main class="docMainContainer_gTbr"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_VOVn"><div class="docItemContainer_Djhp"><article><nav class="theme-doc-breadcrumbs breadcrumbsContainer_Z_bl" aria-label="Breadcrumbs"><ul class="breadcrumbs" itemscope="" itemtype="https://schema.org/BreadcrumbList"><li class="breadcrumbs__item"><a aria-label="Home page" class="breadcrumbs__link" href="/"><svg viewBox="0 0 24 24" class="breadcrumbHomeIcon_YNFT"><path d="M10 19v-5h4v5c0 .55.45 1 1 1h3c.55 0 1-.45 1-1v-7h1.7c.46 0 .68-.57.33-.87L12.67 3.6c-.38-.34-.96-.34-1.34 0l-8.36 7.53c-.34.3-.13.87.33.87H5v7c0 .55.45 1 1 1h3c.55 0 1-.45 1-1z" fill="currentColor"></path></svg></a></li><li class="breadcrumbs__item"><span class="breadcrumbs__link">Ingestion</span><meta itemprop="position" content="1"></li><li class="breadcrumbs__item"><span class="breadcrumbs__link">SQL-based batch</span><meta itemprop="position" content="2"></li><li itemscope="" itemprop="itemListElement" itemtype="https://schema.org/ListItem" class="breadcrumbs__item breadcrumbs__item--active"><span class="breadcrumbs__link" itemprop="name">Key concepts</span><meta itemprop="position" content="3"></li></ul></nav><div class="tocCollapsible_ETCw theme-doc-toc-mobile tocMobile_ITEo"><button type="button" class="clean-btn tocCollapsibleButton_TO0P">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>SQL-based ingestion concepts</h1></header><div class="theme-admonition theme-admonition-info alert alert--info admonition_LlT9"><div class="admonitionHeading_tbUL"><span class="admonitionIcon_kALy"><svg viewBox="0 0 14 16"><path fill-rule="evenodd" d="M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"></path></svg></span>info</div><div class="admonitionContent_S0QG"><p> This page describes SQL-based batch ingestion using the <a href="/docs/27.0.0/multi-stage-query/"><code>druid-multi-stage-query</code></a> |
| extension, new in Druid 24.0. Refer to the <a href="/docs/27.0.0/ingestion/#batch">ingestion methods</a> table to determine which |
| ingestion method is right for you.</p></div></div><h2 class="anchor anchorWithStickyNavbar_LWe7" id="multi-stage-query-task-engine">Multi-stage query task engine<a href="#multi-stage-query-task-engine" class="hash-link" aria-label="Direct link to Multi-stage query task engine" title="Direct link to Multi-stage query task engine"></a></h2><p>The <code>druid-multi-stage-query</code> extension adds a multi-stage query (MSQ) task engine that executes SQL statements as batch |
| tasks in the indexing service, which execute on <a href="/docs/27.0.0/design/architecture#druid-services">Middle Managers</a>. |
| <a href="/docs/27.0.0/multi-stage-query/reference#insert">INSERT</a> and <a href="/docs/27.0.0/multi-stage-query/reference#replace">REPLACE</a> tasks publish |
| <a href="/docs/27.0.0/design/architecture#datasources-and-segments">segments</a> just like <a href="/docs/27.0.0/ingestion/#batch">all other forms of batch |
| ingestion</a>. Each query occupies at least two task slots while running: one controller task, |
| and at least one worker task. As an experimental feature, the MSQ task engine also supports running SELECT queries as |
| batch tasks. The behavior and result format of plain SELECT (without INSERT or REPLACE) is subject to change.</p><p>You can execute SQL statements using the MSQ task engine through the <strong>Query</strong> view in the <a href="/docs/27.0.0/operations/web-console">web |
| console</a> or through the <a href="/docs/27.0.0/api-reference/sql-ingestion-api"><code>/druid/v2/sql/task</code> API</a>.</p><p>For more details on how SQL queries are executed using the MSQ task engine, see <a href="#multi-stage-query-tasks">multi-stage query |
| tasks</a>.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="sql-extensions">SQL extensions<a href="#sql-extensions" class="hash-link" aria-label="Direct link to SQL extensions" title="Direct link to SQL extensions"></a></h2><p>To support ingestion, additional SQL functionality is available through the MSQ task engine.</p><a name="extern"></a><h3 class="anchor anchorWithStickyNavbar_LWe7" id="read-external-data-with-extern">Read external data with <code>EXTERN</code><a href="#read-external-data-with-extern" class="hash-link" aria-label="Direct link to read-external-data-with-extern" title="Direct link to read-external-data-with-extern"></a></h3><p>Query tasks can access external data through the <code>EXTERN</code> function, using any native batch <a href="/docs/27.0.0/ingestion/input-sources">input |
| source</a> and <a href="/docs/27.0.0/ingestion/data-formats#input-format">input format</a>.</p><p><code>EXTERN</code> can read multiple files in parallel across different worker tasks. However, <code>EXTERN</code> does not split individual |
| files across multiple worker tasks. If you have a small number of very large input files, you can increase query |
| parallelism by splitting up your input files.</p><p>For more information about the syntax, see <a href="/docs/27.0.0/multi-stage-query/reference#extern-function"><code>EXTERN</code></a>.</p><p>See also the set of SQL-friendly input-source-specific table functions which may be more convenient |
| than <code>EXTERN</code>.</p><a name="insert"></a><h3 class="anchor anchorWithStickyNavbar_LWe7" id="load-data-with-insert">Load data with <code>INSERT</code><a href="#load-data-with-insert" class="hash-link" aria-label="Direct link to load-data-with-insert" title="Direct link to load-data-with-insert"></a></h3><p><code>INSERT</code> statements can create a new datasource or append to an existing datasource. In Druid SQL, unlike standard SQL, |
| there is no syntactical difference between creating a table and appending data to a table. Druid does not include a |
| <code>CREATE TABLE</code> statement.</p><p>Nearly all <code>SELECT</code> capabilities are available for <code>INSERT ... SELECT</code> queries. Certain exceptions are listed on the <a href="/docs/27.0.0/multi-stage-query/known-issues#select-statement">Known |
| issues</a> page.</p><p><code>INSERT</code> statements acquire a shared lock to the target datasource. Multiple <code>INSERT</code> statements can run at the same time, |
| for the same datasource, if your cluster has enough task slots.</p><p>Like all other forms of <a href="/docs/27.0.0/ingestion/#batch">batch ingestion</a>, each <code>INSERT</code> statement generates new segments and |
| publishes them at the end of its run. For this reason, it is best suited to loading data in larger batches. Do not use |
| <code>INSERT</code> statements to load data in a sequence of microbatches; for that, use <a href="/docs/27.0.0/ingestion/#streaming">streaming |
| ingestion</a> instead.</p><p>When deciding whether to use <code>REPLACE</code> or <code>INSERT</code>, keep in mind that segments generated with <code>REPLACE</code> can be pruned |
| with dimension-based pruning but those generated with <code>INSERT</code> cannot. For more information about the requirements |
| for dimension-based pruning, see <a href="#clustering">Clustering</a>.</p><p>For more information about the syntax, see <a href="/docs/27.0.0/multi-stage-query/reference#insert">INSERT</a>.</p><a name="replace"></a><h3 class="anchor anchorWithStickyNavbar_LWe7" id="overwrite-data-with-replace">Overwrite data with REPLACE<a href="#overwrite-data-with-replace" class="hash-link" aria-label="Direct link to Overwrite data with REPLACE" title="Direct link to Overwrite data with REPLACE"></a></h3><p><code>REPLACE</code> statements can create a new datasource or overwrite data in an existing datasource. In Druid SQL, unlike |
| standard SQL, there is no syntactical difference between creating a table and overwriting data in a table. Druid does |
| not include a <code>CREATE TABLE</code> statement.</p><p><code>REPLACE</code> uses an <a href="/docs/27.0.0/multi-stage-query/reference#replace-specific-time-ranges">OVERWRITE clause</a> to determine which data to overwrite. You |
| can overwrite an entire table, or a specific time range of a table. When you overwrite a specific time range, that time |
| range must align with the granularity specified in the <code>PARTITIONED BY</code> clause.</p><p><code>REPLACE</code> statements acquire an exclusive write lock to the target time range of the target datasource. No other ingestion |
| or compaction operations may proceed for that time range while the task is running. However, ingestion and compaction |
| operations may proceed for other time ranges.</p><p>Nearly all <code>SELECT</code> capabilities are available for <code>REPLACE ... SELECT</code> queries. Certain exceptions are listed on the <a href="/docs/27.0.0/multi-stage-query/known-issues#select-statement">Known |
| issues</a> page.</p><p>For more information about the syntax, see <a href="/docs/27.0.0/multi-stage-query/reference#replace">REPLACE</a>.</p><p>When deciding whether to use <code>REPLACE</code> or <code>INSERT</code>, keep in mind that segments generated with <code>REPLACE</code> can be pruned |
| with dimension-based pruning but those generated with <code>INSERT</code> cannot. For more information about the requirements |
| for dimension-based pruning, see <a href="#clustering">Clustering</a>.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="primary-timestamp">Primary timestamp<a href="#primary-timestamp" class="hash-link" aria-label="Direct link to Primary timestamp" title="Direct link to Primary timestamp"></a></h3><p>Druid tables always include a primary timestamp named <code>__time</code>.</p><p>It is common to set a primary timestamp by using <a href="/docs/27.0.0/querying/sql-scalar#date-and-time-functions">date and time |
| functions</a>; for example: <code>TIME_FORMAT("timestamp", 'yyyy-MM-dd |
| HH:mm:ss') AS __time</code>.</p><p>The <code>__time</code> column is used for <a href="#partitioning-by-time">partitioning by time</a>. If you use <code>PARTITIONED BY ALL</code> or |
| <code>PARTITIONED BY ALL TIME</code>, partitioning by time is disabled. In these cases, you do not need to include a <code>__time</code> |
| column in your <code>INSERT</code> statement. However, Druid still creates a <code>__time</code> column in your Druid table and sets all |
| timestamps to 1970-01-01 00:00:00.</p><p>For more information, see <a href="/docs/27.0.0/ingestion/schema-model#primary-timestamp">Primary timestamp</a>.</p><a name="partitioning"></a><h3 class="anchor anchorWithStickyNavbar_LWe7" id="partitioning-by-time">Partitioning by time<a href="#partitioning-by-time" class="hash-link" aria-label="Direct link to Partitioning by time" title="Direct link to Partitioning by time"></a></h3><p><code>INSERT</code> and <code>REPLACE</code> statements require the <code>PARTITIONED BY</code> clause, which determines how time-based partitioning is done. |
| In Druid, data is split into one or more segments per time chunk, defined by the PARTITIONED BY granularity.</p><p>Partitioning by time is important for three reasons:</p><ol><li>Queries that filter by <code>__time</code> (SQL) or <code>intervals</code> (native) are able to use time partitioning to prune the set of |
| segments to consider.</li><li>Certain data management operations, such as overwriting and compacting existing data, acquire exclusive write locks |
| on time partitions. Finer-grained partitioning allows finer-grained exclusive write locks.</li><li>Each segment file is wholly contained within a time partition. Too-fine-grained partitioning may cause a large number |
| of small segments, which leads to poor performance.</li></ol><p><code>PARTITIONED BY HOUR</code> and <code>PARTITIONED BY DAY</code> are the most common choices to balance these considerations. <code>PARTITIONED |
| BY ALL</code> is suitable if your dataset does not have a <a href="#primary-timestamp">primary timestamp</a>.</p><p>For more information about the syntax, see <a href="/docs/27.0.0/multi-stage-query/reference#partitioned-by">PARTITIONED BY</a>.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="clustering">Clustering<a href="#clustering" class="hash-link" aria-label="Direct link to Clustering" title="Direct link to Clustering"></a></h3><p>Within each time chunk defined by <a href="#partitioning-by-time">time partitioning</a>, data can be further split by the optional |
| <a href="/docs/27.0.0/multi-stage-query/reference#clustered-by">CLUSTERED BY</a> clause.</p><p>For example, suppose you ingest 100 million rows per hour using <code>PARTITIONED BY HOUR</code> and <code>CLUSTERED BY hostName</code>. The |
| ingestion task will generate segments of roughly 3 million rows — the default value of |
| <a href="/docs/27.0.0/multi-stage-query/reference#context-parameters"><code>rowsPerSegment</code></a> — with lexicographic ranges of <code>hostName</code>s grouped into segments.</p><p>Clustering is important for two reasons:</p><ol><li>Lower storage footprint due to improved locality, and therefore improved compressibility.</li><li>Better query performance due to dimension-based segment pruning, which removes segments from consideration when they |
| cannot possibly contain data matching a query's filter. This speeds up filters like <code>x = 'foo'</code> and <code>x IN ('foo', |
| 'bar')</code>.</li></ol><p>To activate dimension-based pruning, these requirements must be met:</p><ul><li>Segments were generated by a <code>REPLACE</code> statement, not an <code>INSERT</code> statement.</li><li>All <code>CLUSTERED BY</code> columns are single-valued string columns.</li></ul><p>If these requirements are <em>not</em> met, Druid still clusters data during ingestion but will not be able to perform |
| dimension-based segment pruning at query time. You can tell if dimension-based segment pruning is possible by using the |
| <code>sys.segments</code> table to inspect the <code>shard_spec</code> for the segments generated by an ingestion query. If they are of type |
| <code>range</code> or <code>single</code>, then dimension-based segment pruning is possible. Otherwise, it is not. The shard spec type is also |
| available in the <strong>Segments</strong> view under the <strong>Partitioning</strong> column.</p><p>For more information about syntax, see <a href="/docs/27.0.0/multi-stage-query/reference#clustered-by"><code>CLUSTERED BY</code></a>.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="rollup">Rollup<a href="#rollup" class="hash-link" aria-label="Direct link to Rollup" title="Direct link to Rollup"></a></h3><p><a href="/docs/27.0.0/ingestion/rollup">Rollup</a> is a technique that pre-aggregates data during ingestion to reduce the amount of data |
| stored. Intermediate aggregations are stored in the generated segments, and further aggregation is done at query time. |
| This reduces storage footprint and improves performance, often dramatically.</p><p>To perform ingestion with rollup:</p><ol><li>Use <code>GROUP BY</code>. The columns in the <code>GROUP BY</code> clause become dimensions, and aggregation functions become metrics.</li><li>Set <a href="/docs/27.0.0/multi-stage-query/reference#context-parameters"><code>finalizeAggregations: false</code></a> in your context. This causes aggregation |
| functions to write their internal state to the generated segments, instead of the finalized end result, and enables |
| further aggregation at query time.</li><li>Wrap all multi-value strings in <code>MV_TO_ARRAY(...)</code> and set <a href="/docs/27.0.0/multi-stage-query/reference#context-parameters"><code>groupByEnableMultiValueUnnesting: |
| false</code></a> in your context. This ensures that multi-value strings are left alone and |
| remain lists, instead of being <a href="/docs/27.0.0/querying/sql-data-types#multi-value-strings">automatically unnested</a> by the |
| <code>GROUP BY</code> operator.</li></ol><p>When you do all of these things, Druid understands that you intend to do an ingestion with rollup, and it writes |
| rollup-related metadata into the generated segments. Other applications can then use <a href="/docs/27.0.0/querying/segmentmetadataquery"><code>segmentMetadata</code> |
| queries</a> to retrieve rollup-related information.</p><p>If you see the error "Encountered multi-value dimension <code>x</code> that cannot be processed with |
| groupByEnableMultiValueUnnesting set to false", then wrap that column in <code>MV_TO_ARRAY(x) AS x</code>.</p><p>The following <a href="/docs/27.0.0/querying/sql-aggregations">aggregation functions</a> are supported for rollup at ingestion time: |
| <code>COUNT</code> (but switch to <code>SUM</code> at query time), <code>SUM</code>, <code>MIN</code>, <code>MAX</code>, <code>EARLIEST</code> (<a href="/docs/27.0.0/multi-stage-query/known-issues#select-statement">string only</a>), |
| <code>LATEST</code> (<a href="/docs/27.0.0/multi-stage-query/known-issues#select-statement">string only</a>), <code>APPROX_COUNT_DISTINCT</code>, <code>APPROX_COUNT_DISTINCT_BUILTIN</code>, |
| <code>APPROX_COUNT_DISTINCT_DS_HLL</code>, <code>APPROX_COUNT_DISTINCT_DS_THETA</code>, and <code>DS_QUANTILES_SKETCH</code> (but switch to |
| <code>APPROX_QUANTILE_DS</code> at query time). Do not use <code>AVG</code>; instead, use <code>SUM</code> and <code>COUNT</code> at ingest time and compute the |
| quotient at query time.</p><p>For an example, see <a href="/docs/27.0.0/multi-stage-query/examples#insert-with-rollup">INSERT with rollup example</a>.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="multi-stage-query-tasks">Multi-stage query tasks<a href="#multi-stage-query-tasks" class="hash-link" aria-label="Direct link to Multi-stage query tasks" title="Direct link to Multi-stage query tasks"></a></h2><h3 class="anchor anchorWithStickyNavbar_LWe7" id="execution-flow">Execution flow<a href="#execution-flow" class="hash-link" aria-label="Direct link to Execution flow" title="Direct link to Execution flow"></a></h3><p>When you execute a SQL statement using the task endpoint <a href="/docs/27.0.0/api-reference/sql-ingestion-api#submit-a-query"><code>/druid/v2/sql/task</code></a>, the following |
| happens:</p><ol><li><p>The Broker plans your SQL query into a native query, as usual.</p></li><li><p>The Broker wraps the native query into a task of type <code>query_controller</code> |
| and submits it to the indexing service.</p></li><li><p>The Broker returns the task ID to you and exits.</p></li><li><p>The controller task launches some number of worker tasks determined by |
| the <code>maxNumTasks</code> and <code>taskAssignment</code> <a href="/docs/27.0.0/multi-stage-query/reference#context-parameters">context parameters</a>. You can set these settings individually for each query.</p></li><li><p>Worker tasks of type <code>query_worker</code> execute the query.</p></li><li><p>If the query is a <code>SELECT</code> query, the worker tasks send the results |
| back to the controller task, which writes them into its task report. |
| If the query is an INSERT or REPLACE query, the worker tasks generate and |
| publish new Druid segments to the provided datasource.</p></li></ol><h3 class="anchor anchorWithStickyNavbar_LWe7" id="parallelism">Parallelism<a href="#parallelism" class="hash-link" aria-label="Direct link to Parallelism" title="Direct link to Parallelism"></a></h3><p>The <a href="/docs/27.0.0/multi-stage-query/reference#context-parameters"><code>maxNumTasks</code></a> query parameter determines the maximum number of tasks your |
| query will use, including the one <code>query_controller</code> task. Generally, queries perform better with more workers. The |
| lowest possible value of <code>maxNumTasks</code> is two (one worker and one controller). Do not set this higher than the number of |
| free slots available in your cluster; doing so will result in a <a href="/docs/27.0.0/multi-stage-query/reference#error_TaskStartTimeout">TaskStartTimeout</a> |
| error.</p><p>When <a href="#extern">reading external data</a>, EXTERN can read multiple files in parallel across |
| different worker tasks. However, EXTERN does not split individual files across multiple worker tasks. If you have a |
| small number of very large input files, you can increase query parallelism by splitting up your input files.</p><p>The <code>druid.worker.capacity</code> server property on each <a href="/docs/27.0.0/design/architecture#druid-services">Middle Manager</a> |
| determines the maximum number of worker tasks that can run on each server at once. Worker tasks run single-threaded, |
| which also determines the maximum number of processors on the server that can contribute towards multi-stage queries.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="memory-usage">Memory usage<a href="#memory-usage" class="hash-link" aria-label="Direct link to Memory usage" title="Direct link to Memory usage"></a></h3><p>Increasing the amount of available memory can improve performance in certain cases:</p><ul><li>Segment generation becomes more efficient when data doesn't spill to disk as often.</li><li>Sorting stage output data becomes more efficient since available memory affects the |
| number of required sorting passes.</li></ul><p>Worker tasks use both JVM heap memory and off-heap ("direct") memory.</p><p>On Peons launched by Middle Managers, the bulk of the JVM heap (75%, less any space used by |
| <a href="/docs/27.0.0/querying/lookups">lookups</a>) is split up into two bundles of equal size: one processor bundle and one worker |
| bundle. Each one comprises 37.5% of the available JVM heap, less any space used by <a href="/docs/27.0.0/querying/lookups">lookups</a>.</p><p>Depending on the type of query, controller and worker tasks may use sketches for determining partition boundaries. |
| The heap footprint of these sketches is capped at 10% of available memory, or 300 MB, whichever is lower.</p><p>The processor memory bundle is used for query processing and segment generation. Each processor bundle must also |
| provides space to buffer I/O between stages. Specifically, each downstream stage requires 1 MB of buffer space for each |
| upstream worker. For example, if you have 100 workers running in stage 0, and stage 1 reads from stage 0, then each |
| worker in stage 1 requires 1M * 100 = 100 MB of memory for frame buffers.</p><p>The worker memory bundle is used for sorting stage output data prior to shuffle. Workers can sort more data than fits in |
| memory; in this case, they will switch to using disk.</p><p>Worker tasks also use off-heap ("direct") memory. Set the amount of direct memory available (<code>-XX:MaxDirectMemorySize</code>) |
| to at least <code>(druid.processing.numThreads + 1) * druid.processing.buffer.sizeBytes</code>. Increasing the amount of direct |
| memory available beyond the minimum does not speed up processing.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="disk-usage">Disk usage<a href="#disk-usage" class="hash-link" aria-label="Direct link to Disk usage" title="Direct link to Disk usage"></a></h3><p>Worker tasks use local disk for four purposes:</p><ul><li>Temporary copies of input data. Each temporary file is deleted before the next one is read. You only need |
| enough temporary disk space to store one input file at a time per task.</li><li>Temporary data related to segment generation. You only need enough temporary disk space to store one segments' worth |
| of data at a time per task. This is generally less than 2 GB per task.</li><li>External sort of data prior to shuffle. Requires enough space to store a compressed copy of the entire output dataset |
| for a task.</li><li>Storing stage output data during a shuffle. Requires enough space to store a compressed copy of the entire output |
| dataset for a task.</li></ul><p>Workers use the task working directory, given by |
| <a href="/docs/27.0.0/configuration/#additional-peon-configuration"><code>druid.indexer.task.baseDir</code></a>, for these items. It is |
| important that this directory has enough space available for these purposes.</p></div></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages"><a class="pagination-nav__link pagination-nav__link--prev" href="/docs/27.0.0/multi-stage-query/"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">SQL-based ingestion</div></a><a class="pagination-nav__link pagination-nav__link--next" href="/docs/27.0.0/multi-stage-query/security"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Security</div></a></nav></div></div><div class="col col--3"><div class="tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#multi-stage-query-task-engine" class="table-of-contents__link toc-highlight">Multi-stage query task engine</a></li><li><a href="#sql-extensions" class="table-of-contents__link toc-highlight">SQL extensions</a><ul><li><a href="#read-external-data-with-extern" class="table-of-contents__link toc-highlight">Read external data with <code>EXTERN</code></a></li><li><a href="#load-data-with-insert" class="table-of-contents__link toc-highlight">Load data with <code>INSERT</code></a></li><li><a href="#overwrite-data-with-replace" class="table-of-contents__link toc-highlight">Overwrite data with REPLACE</a></li><li><a href="#primary-timestamp" class="table-of-contents__link toc-highlight">Primary timestamp</a></li><li><a href="#partitioning-by-time" class="table-of-contents__link toc-highlight">Partitioning by time</a></li><li><a href="#clustering" class="table-of-contents__link toc-highlight">Clustering</a></li><li><a href="#rollup" class="table-of-contents__link toc-highlight">Rollup</a></li></ul></li><li><a href="#multi-stage-query-tasks" class="table-of-contents__link toc-highlight">Multi-stage query tasks</a><ul><li><a href="#execution-flow" class="table-of-contents__link toc-highlight">Execution flow</a></li><li><a href="#parallelism" class="table-of-contents__link toc-highlight">Parallelism</a></li><li><a href="#memory-usage" class="table-of-contents__link toc-highlight">Memory usage</a></li><li><a href="#disk-usage" class="table-of-contents__link toc-highlight">Disk usage</a></li></ul></li></ul></div></div></div></div></main></div></div><footer class="footer"><div class="container container-fluid"><div class="footer__bottom text--center"><div class="margin-bottom--sm"><img src="/img/favicon.png" class="themedImage_ToTc themedImage--light_HNdA footer__logo"><img src="/img/favicon.png" class="themedImage_ToTc themedImage--dark_i4oU footer__logo"></div><div class="footer__copyright">Copyright © 2023 Apache Software Foundation. Except where otherwise noted, licensed under CC BY-SA 4.0. Apache Druid, Druid, and the Druid logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</div></div></div></footer></div> |
| <script src="/assets/js/runtime~main.5371e784.js"></script> |
| <script src="/assets/js/main.832012d1.js"></script> |
| </body> |
| </html> |