| <!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta http-equiv="X-UA-Compatible" content="IE=edge"/><title>SQL-based ingestion concepts · Apache Druid</title><meta name="viewport" content="width=device-width"/><link rel="canonical" href="https://druid.apache.org/docs/latest/multi-stage-query/concepts.html"/><meta name="generator" content="Docusaurus"/><meta name="description" content="<!--"/><meta name="docsearch:language" content="en"/><meta name="docsearch:version" content="24.0.0" /><meta property="og:title" content="SQL-based ingestion concepts · Apache Druid"/><meta property="og:type" content="website"/><meta property="og:url" content="https://druid.apache.org/index.html"/><meta property="og:description" content="<!--"/><meta property="og:image" content="https://druid.apache.org/img/druid_nav.png"/><meta name="twitter:card" content="summary"/><meta name="twitter:image" content="https://druid.apache.org/img/druid_nav.png"/><link rel="shortcut icon" href="/img/favicon.png"/><link rel="stylesheet" href="https://cdn.jsdelivr.net/docsearch.js/1/docsearch.min.css"/><link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css"/><script async="" src="https://www.googletagmanager.com/gtag/js?id=UA-131010415-1"></script><script> |
| window.dataLayer = window.dataLayer || []; |
| function gtag(){dataLayer.push(arguments); } |
| gtag('js', new Date()); |
| gtag('config', 'UA-131010415-1'); |
| </script><link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.7.2/css/all.css"/><link rel="stylesheet" href="/css/code-block-buttons.css"/><script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.4/clipboard.min.js"></script><script type="text/javascript" src="/js/code-block-buttons.js"></script><script src="/js/scrollSpy.js"></script><link rel="stylesheet" href="/css/main.css"/><script src="/js/codetabs.js"></script></head><body class="sideNavVisible separateOnPageNav"><div class="fixedHeaderContainer"><div class="headerWrapper wrapper"><header><a href="/"><img class="logo" src="/img/druid_nav.png" alt="Apache Druid"/></a><div class="navigationWrapper navigationSlider"><nav class="slidingNav"><ul class="nav-site nav-site-internal"><li class=""><a href="/technology" target="_self">Technology</a></li><li class=""><a href="/use-cases" target="_self">Use Cases</a></li><li class=""><a href="/druid-powered" target="_self">Powered By</a></li><li class="siteNavGroupActive"><a href="/docs/latest/design/index.html" target="_self">Docs</a></li><li class=""><a href="/community/" target="_self">Community</a></li><li class=""><a href="https://www.apache.org" target="_self">Apache</a></li><li class=""><a href="/downloads.html" target="_self">Download</a></li><li class="navSearchWrapper reactNavSearchWrapper"><input type="text" id="search_input_react" placeholder="Search" title="Search"/></li></ul></nav></div></header></div></div><div class="navPusher"><div class="docMainWrapper wrapper"><div class="docsNavContainer" id="docsNav"><nav class="toc"><div class="toggleNav"><section class="navWrapper wrapper"><div class="navBreadcrumb wrapper"><div class="navToggle" id="navToggler"><div class="hamburger-menu"><div class="line1"></div><div class="line2"></div><div class="line3"></div></div></div><h2><i>›</i><span>SQL-based ingestion</span></h2><div class="tocToggler" id="tocToggler"><i class="icon-toc"></i></div></div><div class="navGroups"><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Getting started<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/design/index.html">Introduction to Apache Druid</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/index.html">Quickstart</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/docker.html">Docker</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/single-server.html">Single server deployment</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/cluster.html">Clustered deployment</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Tutorials<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-batch.html">Loading files natively</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-msq-external-data.html">Loading files with SQL</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-kafka.html">Load from Apache Kafka</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-batch-hadoop.html">Load from Apache Hadoop</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-query.html">Querying data</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-rollup.html">Roll-up</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-sketches-theta.html">Theta sketches</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-retention.html">Configuring data retention</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-update-data.html">Updating existing data</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-compaction.html">Compacting segments</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-delete-data.html">Deleting data</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-ingestion-spec.html">Writing an ingestion spec</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-transform-spec.html">Transforming input data</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-kerberos-hadoop.html">Kerberized HDFS deep storage</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-msq-convert-json.html">Convert ingestion spec to SQL</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Design<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/design/architecture.html">Design</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/segments.html">Segments</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/processes.html">Processes and servers</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/dependencies/deep-storage.html">Deep storage</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/dependencies/metadata-storage.html">Metadata storage</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/dependencies/zookeeper.html">ZooKeeper</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Ingestion<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/index.html">Ingestion</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/data-formats.html">Data formats</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/data-model.html">Data model</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/rollup.html">Data rollup</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/partitioning.html">Partitioning</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/ingestion-spec.html">Ingestion spec</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/schema-design.html">Schema design tips</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/data-management.html">Data management</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/compaction.html">Compaction</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/automatic-compaction.html">Automatic compaction</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Stream ingestion</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/kafka-ingestion.html">Apache Kafka ingestion</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/kafka-supervisor-reference.html">Apache Kafka supervisor</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/kafka-supervisor-operations.html">Apache Kafka operations</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/kinesis-ingestion.html">Amazon Kinesis</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Batch ingestion</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/native-batch.html">Native batch</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/native-batch-simple-task.html">Simple task indexing</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/native-batch-input-sources.html">Input sources</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/native-batch-firehose.html">Firehose</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/hadoop.html">Hadoop-based</a></li></ul></div><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/tasks.html">Task reference</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/faq.html">Troubleshooting FAQ</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">SQL-based ingestion<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/multi-stage-query/index.html">Overview and syntax</a></li><li class="navListItem navListItemActive"><a class="navItem" href="/docs/latest/multi-stage-query/concepts.html">Key concepts</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/multi-stage-query/connect-external-data.html">Tutorial - Load files with SQL-based ingestion</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/multi-stage-query/convert-json-spec.html">Tutorial - Convert an ingestion spec for SQL-based ingestion</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/multi-stage-query/examples.html">Examples</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/multi-stage-query/api.html">API</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/multi-stage-query/security.html">Security</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/multi-stage-query/reference.html">Reference</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/multi-stage-query/known-issues.html">Known issues</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Querying<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Druid SQL</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql.html">Overview and syntax</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-data-types.html">SQL data types</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-operators.html">Operators</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-scalar.html">Scalar functions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-aggregations.html">Aggregation functions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-multivalue-string-functions.html">Multi-value string functions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-functions.html">All functions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-json-functions.html">JSON functions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-api.html">Druid SQL API</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-jdbc.html">JDBC driver API</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-query-context.html">SQL query context</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-metadata-tables.html">SQL metadata tables</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-translation.html">SQL query translation</a></li></ul></div><li class="navListItem"><a class="navItem" href="/docs/latest/querying/querying.html">Native queries</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/query-execution.html">Query execution</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/troubleshooting.html">Troubleshooting</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Concepts</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/querying/datasource.html">Datasources</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/joins.html">Joins</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/lookups.html">Lookups</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/multi-value-dimensions.html">Multi-value dimensions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/nested-columns.html">Nested columns</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/multitenancy.html">Multitenancy</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/caching.html">Query caching</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/using-caching.html">Using query caching</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/query-context.html">Query context</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Native query types</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/querying/timeseriesquery.html">Timeseries</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/topnquery.html">TopN</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/groupbyquery.html">GroupBy</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/scan-query.html">Scan</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/searchquery.html">Search</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/timeboundaryquery.html">TimeBoundary</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/segmentmetadataquery.html">SegmentMetadata</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/datasourcemetadataquery.html">DatasourceMetadata</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Native query components</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/querying/filters.html">Filters</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/granularities.html">Granularities</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/dimensionspecs.html">Dimensions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/aggregations.html">Aggregations</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/post-aggregations.html">Post-aggregations</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/misc/math-expr.html">Expressions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/having.html">Having filters (groupBy)</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/limitspec.html">Sorting and limiting (groupBy)</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/topnmetricspec.html">Sorting (topN)</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sorting-orders.html">String comparators</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/virtual-columns.html">Virtual columns</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/geo.html">Spatial filters</a></li></ul></div></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Configuration<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/configuration/index.html">Configuration reference</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions.html">Extensions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/configuration/logging.html">Logging</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Operations<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/operations/druid-console.html">Druid console</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/getting-started.html">Getting started with Apache Druid</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/java.html">Java runtime</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Security</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/operations/security-overview.html">Security overview</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/security-user-auth.html">User authentication and authorization</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/auth-ldap.html">LDAP auth</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/password-provider.html">Password providers</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/dynamic-config-provider.html">Dynamic Config Providers</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/tls-support.html">TLS support</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Performance tuning</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/operations/basic-cluster-tuning.html">Basic cluster tuning</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/segment-optimization.html">Segment size optimization</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/mixed-workloads.html">Mixed workloads</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/http-compression.html">HTTP compression</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/clean-metadata-store.html">Automated metadata cleanup</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Monitoring</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/operations/request-logging.html">Request logging</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/metrics.html">Metrics</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/alerts.html">Alerts</a></li></ul></div><li class="navListItem"><a class="navItem" href="/docs/latest/operations/api-reference.html">API reference</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/high-availability.html">High availability</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/rolling-updates.html">Rolling updates</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/rule-configuration.html">Retaining or automatically dropping data</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/other-hadoop.html">Working with different versions of Apache Hadoop</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Misc</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/operations/dump-segment.html">dump-segment tool</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/reset-cluster.html">reset-cluster tool</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/insert-segment-to-db.html">insert-segment-to-db tool</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/pull-deps.html">pull-deps tool</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/deep-storage-migration.html">Deep storage migration</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/export-metadata.html">Export Metadata Tool</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/metadata-migration.html">Metadata Migration</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/use_sbt_to_build_fat_jar.html">Content for build.sbt</a></li></ul></div></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Development<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/development/overview.html">Developing on Druid</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/modules.html">Creating extensions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/javascript.html">JavaScript functionality</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/build.html">Build from source</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/versioning.html">Versioning</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/experimental.html">Experimental features</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Misc<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/misc/papers-and-talks.html">Papers</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Hidden<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/comparisons/druid-vs-elasticsearch.html">Apache Druid vs Elasticsearch</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/comparisons/druid-vs-key-value.html">Apache Druid vs. Key/Value Stores (HBase/Cassandra/OpenTSDB)</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/comparisons/druid-vs-kudu.html">Apache Druid vs Kudu</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/comparisons/druid-vs-redshift.html">Apache Druid vs Redshift</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/comparisons/druid-vs-spark.html">Apache Druid vs Spark</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/comparisons/druid-vs-sql-on-hadoop.html">Apache Druid vs SQL-on-Hadoop</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/auth.html">Authentication and Authorization</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/broker.html">Broker</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/coordinator.html">Coordinator Process</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/historical.html">Historical Process</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/indexer.html">Indexer Process</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/indexing-service.html">Indexing Service</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/middlemanager.html">MiddleManager Process</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/overlord.html">Overlord Process</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/router.html">Router Process</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/peons.html">Peons</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/approximate-histograms.html">Approximate Histogram aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/avro.html">Apache Avro</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/azure.html">Microsoft Azure</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/bloom-filter.html">Bloom Filter</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/datasketches-extension.html">DataSketches extension</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/datasketches-hll.html">DataSketches HLL Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/datasketches-quantiles.html">DataSketches Quantiles Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/datasketches-theta.html">DataSketches Theta Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/datasketches-tuple.html">DataSketches Tuple Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/druid-basic-security.html">Basic Security</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/druid-kerberos.html">Kerberos</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/druid-lookups.html">Cached Lookup Module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/druid-ranger-security.html">Apache Ranger Security</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/google.html">Google Cloud Storage</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/hdfs.html">HDFS</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/kafka-extraction-namespace.html">Apache Kafka Lookups</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/lookups-cached-global.html">Globally Cached Lookups</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/mysql.html">MySQL Metadata Store</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/orc.html">ORC Extension</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/druid-pac4j.html">Druid pac4j based Security extension</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/parquet.html">Apache Parquet Extension</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/postgresql.html">PostgreSQL Metadata Store</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/protobuf.html">Protobuf</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/s3.html">S3-compatible</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/simple-client-sslcontext.html">Simple SSLContext Provider Module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/stats.html">Stats aggregator</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/test-stats.html">Test Stats Aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/druid-aws-rds.html">Druid AWS RDS Module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/kubernetes.html">Kubernetes</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/ambari-metrics-emitter.html">Ambari Metrics Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/cassandra.html">Apache Cassandra</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/cloudfiles.html">Rackspace Cloud Files</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/distinctcount.html">DistinctCount Aggregator</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/graphite.html">Graphite Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/influx.html">InfluxDB Line Protocol Parser</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/influxdb-emitter.html">InfluxDB Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/kafka-emitter.html">Kafka Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/materialized-view.html">Materialized View</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/momentsketch-quantiles.html">Moment Sketches for Approximate Quantiles module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/moving-average-query.html">Moving Average Query</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/opentsdb-emitter.html">OpenTSDB Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/redis-cache.html">Druid Redis Cache</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/sqlserver.html">Microsoft SQLServer</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/statsd.html">StatsD Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/tdigestsketch-quantiles.html">T-Digest Quantiles Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/thrift.html">Thrift</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/time-min-max.html">Timestamp Min/Max aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/gce-extensions.html">GCE Extensions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/aliyun-oss.html">Aliyun OSS</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/prometheus.html">Prometheus Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/kubernetes.html">kubernetes</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/hll-old.html">Cardinality/HyperUnique aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/select-query.html">Select</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/standalone-realtime.html">Realtime Process</a></li></ul></div></div></section></div><script> |
| var coll = document.getElementsByClassName('collapsible'); |
| var checkActiveCategory = true; |
| for (var i = 0; i < coll.length; i++) { |
| var links = coll[i].nextElementSibling.getElementsByTagName('*'); |
| if (checkActiveCategory){ |
| for (var j = 0; j < links.length; j++) { |
| if (links[j].classList.contains('navListItemActive')){ |
| coll[i].nextElementSibling.classList.toggle('hide'); |
| coll[i].childNodes[1].classList.toggle('rotate'); |
| checkActiveCategory = false; |
| break; |
| } |
| } |
| } |
| |
| coll[i].addEventListener('click', function() { |
| var arrow = this.childNodes[1]; |
| arrow.classList.toggle('rotate'); |
| var content = this.nextElementSibling; |
| content.classList.toggle('hide'); |
| }); |
| } |
| |
| document.addEventListener('DOMContentLoaded', function() { |
| createToggler('#navToggler', '#docsNav', 'docsSliderActive'); |
| createToggler('#tocToggler', 'body', 'tocActive'); |
| |
| var headings = document.querySelector('.toc-headings'); |
| headings && headings.addEventListener('click', function(event) { |
| var el = event.target; |
| while(el !== headings){ |
| if (el.tagName === 'A') { |
| document.body.classList.remove('tocActive'); |
| break; |
| } else{ |
| el = el.parentNode; |
| } |
| } |
| }, false); |
| |
| function createToggler(togglerSelector, targetSelector, className) { |
| var toggler = document.querySelector(togglerSelector); |
| var target = document.querySelector(targetSelector); |
| |
| if (!toggler) { |
| return; |
| } |
| |
| toggler.onclick = function(event) { |
| event.preventDefault(); |
| |
| target.classList.toggle(className); |
| }; |
| } |
| }); |
| </script></nav></div><div class="container mainContainer docsContainer"><div class="wrapper"><div class="post"><header class="postHeader"><a class="edit-page-link button" href="https://github.com/apache/druid/edit/master/docs/multi-stage-query/msq-concepts.md" target="_blank" rel="noreferrer noopener">Edit</a><h1 id="__docusaurus" class="postHeaderTitle">SQL-based ingestion concepts</h1></header><article><div><span><!-- |
| ~ Licensed to the Apache Software Foundation (ASF) under one |
| ~ or more contributor license agreements. See the NOTICE file |
| ~ distributed with this work for additional information |
| ~ regarding copyright ownership. The ASF licenses this file |
| ~ to you under the Apache License, Version 2.0 (the |
| ~ "License"); you may not use this file except in compliance |
| ~ with the License. You may obtain a copy of the License at |
| ~ |
| ~ http://www.apache.org/licenses/LICENSE-2.0 |
| ~ |
| ~ Unless required by applicable law or agreed to in writing, |
| ~ software distributed under the License is distributed on an |
| ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| ~ KIND, either express or implied. See the License for the |
| ~ specific language governing permissions and limitations |
| ~ under the License. |
| --> |
| <blockquote> |
| <p>SQL-based ingestion using the multi-stage query task engine is our recommended solution starting in Druid 24.0. Alternative ingestion solutions, such as native batch and Hadoop-based ingestion systems, will still be supported. We recommend you read all <a href="/docs/latest/multi-stage-query/known-issues.html">known issues</a> and test the feature in a development environment before rolling it out in production. Using the multi-stage query task engine with <code>SELECT</code> statements that do not write to a datasource is experimental.</p> |
| </blockquote> |
| <p>This topic covers the main concepts and terminology of the multi-stage query architecture.</p> |
| <h2><a class="anchor" aria-hidden="true" id="vocabulary"></a><a href="#vocabulary" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Vocabulary</h2> |
| <p>You might see the following terms in the documentation or while you're using the multi-stage query architecture and task engine, such as when you view the report for a query:</p> |
| <ul> |
| <li><p><strong>Controller</strong>: An indexing service task of type <code>query_controller</code> that manages |
| the execution of a query. There is one controller task per query.</p></li> |
| <li><p><strong>Worker</strong>: Indexing service tasks of type <code>query_worker</code> that execute a |
| query. There can be multiple worker tasks per query. Internally, |
| the tasks process items in parallel using their processing pools (up to <code>druid.processing.numThreads</code> of execution parallelism |
| within a worker task).</p></li> |
| <li><p><strong>Stage</strong>: A stage of query execution that is parallelized across |
| worker tasks. Workers exchange data with each other between stages.</p></li> |
| <li><p><strong>Partition</strong>: A slice of data output by worker tasks. In INSERT or REPLACE |
| queries, the partitions of the final stage become Druid segments.</p></li> |
| <li><p><strong>Shuffle</strong>: Workers exchange data between themselves on a per-partition basis in a process called |
| shuffling. During a shuffle, each output partition is sorted by a clustering key.</p></li> |
| </ul> |
| <h2><a class="anchor" aria-hidden="true" id="how-the-msq-task-engine-works"></a><a href="#how-the-msq-task-engine-works" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>How the MSQ task engine works</h2> |
| <p>Query tasks, specifically queries for INSERT, REPLACE, and SELECT, execute using indexing service tasks. Every query occupies at least two task slots while running.</p> |
| <p>When you submit a query task to the MSQ task engine, the following happens:</p> |
| <ol> |
| <li><p>The Broker plans your SQL query into a native query, as usual.</p></li> |
| <li><p>The Broker wraps the native query into a task of type <code>query_controller</code> |
| and submits it to the indexing service.</p></li> |
| <li><p>The Broker returns the task ID to you and exits.</p></li> |
| <li><p>The controller task launches some number of worker tasks determined by |
| the <code>maxNumTasks</code> and <code>taskAssignment</code> <a href="/docs/latest/multi-stage-query/reference.html#context-parameters">context parameters</a>. You can set these settings individually for each query.</p></li> |
| <li><p>The worker tasks execute the query.</p></li> |
| <li><p>If the query is a SELECT query, the worker tasks send the results |
| back to the controller task, which writes them into its task report. |
| If the query is an INSERT or REPLACE query, the worker tasks generate and |
| publish new Druid segments to the provided datasource.</p></li> |
| </ol> |
| <h2><a class="anchor" aria-hidden="true" id="parallelism"></a><a href="#parallelism" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Parallelism</h2> |
| <p>Parallelism affects performance.</p> |
| <p>The <a href="/docs/latest/multi-stage-query/reference.html#context-parameters"><code>maxNumTasks</code></a> query parameter determines the maximum number of tasks (workers and one controller) your query will use. Generally, queries perform better with more workers. The lowest possible value of <code>maxNumTasks</code> is two (one worker and one controller), and the highest possible value is equal to the number of free task slots in your cluster.</p> |
| <p>The <code>druid.worker.capacity</code> server property on each Middle Manager determines the maximum number |
| of worker tasks that can run on each server at once. Worker tasks run single-threaded, which |
| also determines the maximum number of processors on the server that can contribute towards |
| multi-stage queries. Since data servers are shared between Historicals and |
| Middle Managers, the default setting for <code>druid.worker.capacity</code> is lower than the number of |
| processors on the server. Advanced users may consider enhancing parallelism by increasing this |
| value to one less than the number of processors on the server. In most cases, this increase must |
| be accompanied by an adjustment of the memory allotment of the Historical process, |
| Middle-Manager-launched tasks, or both, to avoid memory overcommitment and server instability. If |
| you are not comfortable tuning these memory usage parameters to avoid overcommitment, it is best |
| to stick with the default <code>druid.worker.capacity</code>.</p> |
| <h2><a class="anchor" aria-hidden="true" id="memory-usage"></a><a href="#memory-usage" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Memory usage</h2> |
| <p>Increasing the amount of available memory can improve performance as follows:</p> |
| <ul> |
| <li>Segment generation becomes more efficient when data doesn't spill to disk as often.</li> |
| <li>Sorting stage output data becomes more efficient since available memory affects the |
| number of required sorting passes.</li> |
| </ul> |
| <p>Worker tasks use both JVM heap memory and off-heap ("direct") memory.</p> |
| <p>On Peons launched by Middle Managers, the bulk of the JVM heap (75%) is split up into two bundles of equal size: one processor bundle and one worker bundle. Each one comprises 37.5% of the available JVM heap.</p> |
| <p>The processor memory bundle is used for query processing and segment generation. Each processor bundle must |
| also provides space to buffer I/O between stages. Specifically, each downstream stage requires 1 MB of buffer space for |
| each upstream worker. For example, if you have 100 workers running in stage 0, and stage 1 reads from stage 0, |
| then each worker in stage 1 requires 1M * 100 = 100 MB of memory for frame buffers.</p> |
| <p>The worker memory bundle is used for sorting stage output data prior to shuffle. Workers can sort |
| more data than fits in memory; in this case, they will switch to using disk.</p> |
| <p>Worker tasks also use off-heap ("direct") memory. Set the amount of direct |
| memory available (<code>-XX:MaxDirectMemorySize</code>) to at least |
| <code>(druid.processing.numThreads + 1) * druid.processing.buffer.sizeBytes</code>. Increasing the |
| amount of direct memory available beyond the minimum does not speed up processing.</p> |
| <p>It may be necessary to override one or more memory-related parameters if you run into one of the <a href="/docs/latest/multi-stage-query/known-issues.html">known issues</a> around memory usage.</p> |
| <h2><a class="anchor" aria-hidden="true" id="limits"></a><a href="#limits" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Limits</h2> |
| <p>Knowing the limits for the MSQ task engine can help you troubleshoot any <a href="#error-codes">errors</a> that you encounter. Many of the errors occur as a result of reaching a limit.</p> |
| <p>The following table lists query limits:</p> |
| <table> |
| <thead> |
| <tr><th>Limit</th><th>Value</th><th>Error if exceeded</th></tr> |
| </thead> |
| <tbody> |
| <tr><td>Size of an individual row written to a frame. Row size when written to a frame may differ from the original row size.</td><td>1 MB</td><td><code>RowTooLarge</code></td></tr> |
| <tr><td>Number of segment-granular time chunks encountered during ingestion.</td><td>5,000</td><td><code>TooManyBuckets</code></td></tr> |
| <tr><td>Number of input files/segments per worker.</td><td>10,000</td><td><code>TooManyInputFiles</code></td></tr> |
| <tr><td>Number of output partitions for any one stage. Number of segments generated during ingestion.</td><td>25,000</td><td><code>TooManyPartitions</code></td></tr> |
| <tr><td>Number of output columns for any one stage.</td><td>2,000</td><td><code>TooManyColumns</code></td></tr> |
| <tr><td>Number of workers for any one stage.</td><td>Hard limit is 1,000. Memory-dependent soft limit may be lower.</td><td><code>TooManyWorkers</code></td></tr> |
| <tr><td>Maximum memory occupied by broadcasted tables.</td><td>30% of each <a href="#memory-usage">processor memory bundle</a>.</td><td><code>BroadcastTablesTooLarge</code></td></tr> |
| </tbody> |
| </table> |
| <h2><a class="anchor" aria-hidden="true" id="error-codes"></a><a href="#error-codes" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Error codes</h2> |
| <p>The following table describes error codes you may encounter in the <code>multiStageQuery.payload.status.errorReport.error.errorCode</code> field:</p> |
| <table> |
| <thead> |
| <tr><th>Code</th><th>Meaning</th><th>Additional fields</th></tr> |
| </thead> |
| <tbody> |
| <tr><td>BroadcastTablesTooLarge</td><td>The size of the broadcast tables, used in right hand side of the joins, exceeded the memory reserved for them in a worker task.</td><td><code>maxBroadcastTablesSize</code>: Memory reserved for the broadcast tables, measured in bytes.</td></tr> |
| <tr><td>Canceled</td><td>The query was canceled. Common reasons for cancellation:<br /><br /><ul><li>User-initiated shutdown of the controller task via the <code>/druid/indexer/v1/task/{taskId}/shutdown</code> API.</li><li>Restart or failure of the server process that was running the controller task.</li></ul></td><td></td></tr> |
| <tr><td>CannotParseExternalData</td><td>A worker task could not parse data from an external datasource.</td><td></td></tr> |
| <tr><td>ColumnNameRestricted</td><td>The query uses a restricted column name.</td><td></td></tr> |
| <tr><td>ColumnTypeNotSupported</td><td>Support for writing or reading from a particular column type is not supported.</td><td></td></tr> |
| <tr><td>ColumnTypeNotSupported</td><td>The query attempted to use a column type that is not supported by the frame format. This occurs with ARRAY types, which are not yet implemented for frames.</td><td><code>columnName</code><br /> <br /><code>columnType</code></td></tr> |
| <tr><td>InsertCannotAllocateSegment</td><td>The controller task could not allocate a new segment ID due to conflict with existing segments or pending segments. Common reasons for such conflicts:<br /> <br /><ul><li>Attempting to mix different granularities in the same intervals of the same datasource.</li><li>Prior ingestions that used non-extendable shard specs.</li></ul></td><td><code>dataSource</code><br /> <br /><code>interval</code>: The interval for the attempted new segment allocation.</td></tr> |
| <tr><td>InsertCannotBeEmpty</td><td>An INSERT or REPLACE query did not generate any output rows in a situation where output rows are required for success. This can happen for INSERT or REPLACE queries with <code>PARTITIONED BY</code> set to something other than <code>ALL</code> or <code>ALL TIME</code>.</td><td><code>dataSource</code></td></tr> |
| <tr><td>InsertCannotOrderByDescending</td><td>An INSERT query contained a <code>CLUSTERED BY</code> expression in descending order. Druid's segment generation code only supports ascending order.</td><td><code>columnName</code></td></tr> |
| <tr><td>InsertCannotReplaceExistingSegment</td><td>A REPLACE query cannot proceed because an existing segment partially overlaps those bounds, and the portion within the bounds is not fully overshadowed by query results. <br /> <br />There are two ways to address this without modifying your query:<ul><li>Shrink the OVERLAP filter to match the query results.</li><li>Expand the OVERLAP filter to fully contain the existing segment.</li></ul></td><td><code>segmentId</code>: The existing segment <br /></td></tr> |
| <tr><td>InsertLockPreempted</td><td>An INSERT or REPLACE query was canceled by a higher-priority ingestion job, such as a real-time ingestion task.</td><td></td></tr> |
| <tr><td>InsertTimeNull</td><td>An INSERT or REPLACE query encountered a null timestamp in the <code>__time</code> field.<br /><br />This can happen due to using an expression like <code>TIME_PARSE(timestamp) AS __time</code> with a timestamp that cannot be parsed. (TIME_PARSE returns null when it cannot parse a timestamp.) In this case, try parsing your timestamps using a different function or pattern.<br /><br />If your timestamps may genuinely be null, consider using COALESCE to provide a default value. One option is CURRENT_TIMESTAMP, which represents the start time of the job.</td></tr> |
| <tr><td>InsertTimeOutOfBounds</td><td>A REPLACE query generated a timestamp outside the bounds of the TIMESTAMP parameter for your OVERWRITE WHERE clause.<br /> <br />To avoid this error, verify that the you specified is valid.</td><td><code>interval</code>: time chunk interval corresponding to the out-of-bounds timestamp</td></tr> |
| <tr><td>InvalidNullByte</td><td>A string column included a null byte. Null bytes in strings are not permitted.</td><td><code>column</code>: The column that included the null byte</td></tr> |
| <tr><td>QueryNotSupported</td><td>QueryKit could not translate the provided native query to a multi-stage query.<br /> <br />This can happen if the query uses features that aren't supported, like GROUPING SETS.</td><td></td></tr> |
| <tr><td>RowTooLarge</td><td>The query tried to process a row that was too large to write to a single frame. See the <a href="#limits">Limits</a> table for the specific limit on frame size. Note that the effective maximum row size is smaller than the maximum frame size due to alignment considerations during frame writing.</td><td><code>maxFrameSize</code>: The limit on the frame size.</td></tr> |
| <tr><td>TaskStartTimeout</td><td>Unable to launch all the worker tasks in time. <br /> <br />There might be insufficient available slots to start all the worker tasks simultaneously.<br /> <br /> Try splitting up the query into smaller chunks with lesser <code>maxNumTasks</code> number. Another option is to increase capacity.</td><td></td></tr> |
| <tr><td>TooManyBuckets</td><td>Exceeded the number of partition buckets for a stage. Partition buckets are only used for <code>segmentGranularity</code> during INSERT queries. The most common reason for this error is that your <code>segmentGranularity</code> is too narrow relative to the data. See the <a href="/docs/latest/multi-stage-query/concepts.html#limits">Limits</a> table for the specific limit.</td><td><code>maxBuckets</code>: The limit on buckets.</td></tr> |
| <tr><td>TooManyInputFiles</td><td>Exceeded the number of input files/segments per worker. See the <a href="/docs/latest/multi-stage-query/concepts.html#limits">Limits</a> table for the specific limit.</td><td><code>umInputFiles</code>: The total number of input files/segments for the stage.<br /><br /><code>maxInputFiles</code>: The maximum number of input files/segments per worker per stage.<br /><br /><code>minNumWorker</code>: The minimum number of workers required for a successful run.</td></tr> |
| <tr><td>TooManyPartitions</td><td>Exceeded the number of partitions for a stage. The most common reason for this is that the final stage of an INSERT or REPLACE query generated too many segments. See the <a href="/docs/latest/multi-stage-query/concepts.html#limits">Limits</a> table for the specific limit.</td><td><code>maxPartitions</code>: The limit on partitions which was exceeded</td></tr> |
| <tr><td>TooManyColumns</td><td>Exceeded the number of columns for a stage. See the <a href="#limits">Limits</a> table for the specific limit.</td><td><code>maxColumns</code>: The limit on columns which was exceeded.</td></tr> |
| <tr><td>TooManyWarnings</td><td>Exceeded the allowed number of warnings of a particular type.</td><td><code>rootErrorCode</code>: The error code corresponding to the exception that exceeded the required limit. <br /><br /><code>maxWarnings</code>: Maximum number of warnings that are allowed for the corresponding <code>rootErrorCode</code>.</td></tr> |
| <tr><td>TooManyWorkers</td><td>Exceeded the supported number of workers running simultaneously. See the <a href="#limits">Limits</a> table for the specific limit.</td><td><code>workers</code>: The number of simultaneously running workers that exceeded a hard or soft limit. This may be larger than the number of workers in any one stage if multiple stages are running simultaneously. <br /><br /><code>maxWorkers</code>: The hard or soft limit on workers that was exceeded.</td></tr> |
| <tr><td>NotEnoughMemory</td><td>Insufficient memory to launch a stage.</td><td><code>serverMemory</code>: The amount of memory available to a single process.<br /><br /><code>serverWorkers</code>: The number of workers running in a single process.<br /><br /><code>serverThreads</code>: The number of threads in a single process.</td></tr> |
| <tr><td>WorkerFailed</td><td>A worker task failed unexpectedly.</td><td><code>workerTaskId</code>: The ID of the worker task.</td></tr> |
| <tr><td>WorkerRpcFailed</td><td>A remote procedure call to a worker task failed and could not recover.</td><td><code>workerTaskId</code>: the id of the worker task</td></tr> |
| <tr><td>UnknownError</td><td>All other errors.</td><td></td></tr> |
| </tbody> |
| </table> |
| </span></div></article></div><div class="docs-prevnext"><a class="docs-prev button" href="/docs/latest/multi-stage-query/index.html"><span class="arrow-prev">← </span><span>Overview and syntax</span></a><a class="docs-next button" href="/docs/latest/multi-stage-query/connect-external-data.html"><span>Tutorial - Load files with SQL-based ingestion</span><span class="arrow-next"> →</span></a></div></div></div><nav class="onPageNav"><ul class="toc-headings"><li><a href="#vocabulary">Vocabulary</a></li><li><a href="#how-the-msq-task-engine-works">How the MSQ task engine works</a></li><li><a href="#parallelism">Parallelism</a></li><li><a href="#memory-usage">Memory usage</a></li><li><a href="#limits">Limits</a></li><li><a href="#error-codes">Error codes</a></li></ul></nav></div><footer class="nav-footer druid-footer" id="footer"><div class="container"><div class="text-center"><p><a href="/technology">Technology</a> · <a href="/use-cases">Use Cases</a> · <a href="/druid-powered">Powered by Druid</a> · <a href="/docs/latest/">Docs</a> · <a href="/community/">Community</a> · <a href="/downloads.html">Download</a> · <a href="/faq">FAQ</a></p></div><div class="text-center"><a title="Join the user group" href="https://groups.google.com/forum/#!forum/druid-user" target="_blank"><span class="fa fa-comments"></span></a> · <a title="Follow Druid" href="https://twitter.com/druidio" target="_blank"><span class="fab fa-twitter"></span></a> · <a title="Download via Apache" href="https://www.apache.org/dyn/closer.cgi?path=/incubator/druid/{{ site.druid_versions[0].versions[0].version }}/apache-druid-{{ site.druid_versions[0].versions[0].version }}-bin.tar.gz" target="_blank"><span class="fas fa-feather"></span></a> · <a title="GitHub" href="https://github.com/apache/druid" target="_blank"><span class="fab fa-github"></span></a></div><div class="text-center license">Copyright © 2022 <a href="https://www.apache.org/" target="_blank">Apache Software Foundation</a>.<br/>Except where otherwise noted, licensed under <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a>.<br/>Apache Druid, Druid, and the Druid logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</div></div></footer></div><script type="text/javascript" src="https://cdn.jsdelivr.net/docsearch.js/1/docsearch.min.js"></script><script> |
| document.addEventListener('keyup', function(e) { |
| if (e.target !== document.body) { |
| return; |
| } |
| // keyCode for '/' (slash) |
| if (e.keyCode === 191) { |
| const search = document.getElementById('search_input_react'); |
| search && search.focus(); |
| } |
| }); |
| </script><script> |
| var search = docsearch({ |
| |
| apiKey: '2de99082a9f38e49dfaa059bbe4c901d', |
| indexName: 'apache_druid', |
| inputSelector: '#search_input_react', |
| algoliaOptions: {"facetFilters":["language:en","version:24.0.0"]} |
| }); |
| </script></body></html> |