blob: 3c37a82c9006a9572cd6a7bdf8ea82fb18e895ee [file] [log] [blame]
<!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta http-equiv="X-UA-Compatible" content="IE=edge"/><title>Schema design tips ยท Apache Druid</title><meta name="viewport" content="width=device-width, initial-scale=1.0"/><link rel="canonical" href="https://druid.apache.org/docs/26.0.0/ingestion/schema-design.html"/><meta name="generator" content="Docusaurus"/><meta name="description" content="&lt;!--"/><meta name="docsearch:language" content="en"/><meta name="docsearch:version" content="26.0.0" /><meta property="og:title" content="Schema design tips ยท Apache Druid"/><meta property="og:type" content="website"/><meta property="og:url" content="https://druid.apache.org/index.html"/><meta property="og:description" content="&lt;!--"/><meta property="og:image" content="https://druid.apache.org/img/druid_nav.png"/><meta name="twitter:card" content="summary"/><meta name="twitter:image" content="https://druid.apache.org/img/druid_nav.png"/><link rel="shortcut icon" href="/img/favicon.png"/><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.css"/><link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css"/><script async="" src="https://www.googletagmanager.com/gtag/js?id=UA-131010415-1"></script><script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments); }
gtag('js', new Date());
gtag('config', 'UA-131010415-1');
</script><link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.7.2/css/all.css"/><link rel="stylesheet" href="/css/code-block-buttons.css"/><script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.4/clipboard.min.js"></script><script type="text/javascript" src="/js/code-block-buttons.js"></script><script src="/js/scrollSpy.js"></script><link rel="stylesheet" href="/css/main.css"/><script src="/js/codetabs.js"></script></head><body class="sideNavVisible separateOnPageNav"><div class="fixedHeaderContainer"><div class="headerWrapper wrapper"><header><a href="/"><img class="logo" src="/img/druid_nav.png" alt="Apache Druid"/></a><div class="navigationWrapper navigationSlider"><nav class="slidingNav"><ul class="nav-site nav-site-internal"><li class=""><a href="/technology" target="_self">Technology</a></li><li class=""><a href="/use-cases" target="_self">Use Cases</a></li><li class=""><a href="/druid-powered" target="_self">Powered By</a></li><li class="siteNavGroupActive"><a href="/docs/26.0.0/design/index.html" target="_self">Docs</a></li><li class=""><a href="/community/" target="_self">Community</a></li><li class=""><a href="https://www.apache.org" target="_self">Apache</a></li><li class=""><a href="/downloads.html" target="_self">Download</a></li><li class="navSearchWrapper reactNavSearchWrapper"><input type="text" id="search_input_react" placeholder="Search" title="Search"/></li></ul></nav></div></header></div></div><div class="navPusher"><div class="docMainWrapper wrapper"><div class="docsNavContainer" id="docsNav"><nav class="toc"><div class="toggleNav"><section class="navWrapper wrapper"><div class="navBreadcrumb wrapper"><div class="navToggle" id="navToggler"><div class="hamburger-menu"><div class="line1"></div><div class="line2"></div><div class="line3"></div></div></div><h2><i>โ€บ</i><span>Ingestion</span></h2><div class="tocToggler" id="tocToggler"><i class="icon-toc"></i></div></div><div class="navGroups"><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Getting started<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/index.html">Introduction to Apache Druid</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/index.html">Quickstart (local)</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/single-server.html">Single server deployment</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/cluster.html">Clustered deployment</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Tutorials<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-batch.html">Load files natively</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-msq-extern.html">Load files using SQL ๐Ÿ†•</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-kafka.html">Load from Apache Kafka</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-batch-hadoop.html">Load from Apache Hadoop</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-query.html">Querying data</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-rollup.html">Roll-up</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-sketches-theta.html">Theta sketches</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-retention.html">Configuring data retention</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-update-data.html">Updating existing data</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-compaction.html">Compacting segments</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-delete-data.html">Deleting data</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-ingestion-spec.html">Writing an ingestion spec</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-transform-spec.html">Transforming input data</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/docker.html">Tutorial: Run with Docker</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-kerberos-hadoop.html">Kerberized HDFS deep storage</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-msq-convert-spec.html">Convert ingestion spec to SQL</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-sql-query-view.html">Get to know Query view</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-unnest-arrays.html">Unnesting arrays</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-jupyter-index.html">Jupyter Notebook tutorials</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-jupyter-docker.html">Docker for tutorials</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-jdbc.html">JDBC connector</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Design<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/architecture.html">Design</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/segments.html">Segments</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/processes.html">Processes and servers</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/dependencies/deep-storage.html">Deep storage</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/dependencies/metadata-storage.html">Metadata storage</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/dependencies/zookeeper.html">ZooKeeper</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Ingestion<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/index.html">Ingestion</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/data-formats.html">Data formats</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/data-model.html">Data model</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/rollup.html">Data rollup</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/partitioning.html">Partitioning</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/ingestion-spec.html">Ingestion spec</a></li><li class="navListItem navListItemActive"><a class="navItem" href="/docs/26.0.0/ingestion/schema-design.html">Schema design tips</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Stream ingestion</h4><ul><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/kafka-ingestion.html">Apache Kafka ingestion</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/kafka-supervisor-reference.html">Apache Kafka supervisor</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/kafka-supervisor-operations.html">Apache Kafka operations</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/kinesis-ingestion.html">Amazon Kinesis</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Batch ingestion</h4><ul><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/native-batch.html">Native batch</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/native-batch-input-sources.html">Native batch: input sources</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/migrate-from-firehose.html">Migrate from firehose</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/hadoop.html">Hadoop-based</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">SQL-based ingestion ๐Ÿ†•</h4><ul><li class="navListItem"><a class="navItem" href="/docs/26.0.0/multi-stage-query/index.html">Overview</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/multi-stage-query/concepts.html">Key concepts</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/multi-stage-query/api.html">API</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/multi-stage-query/security.html">Security</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/multi-stage-query/examples.html">Examples</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/multi-stage-query/reference.html">Reference</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/multi-stage-query/known-issues.html">Known issues</a></li></ul></div><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/tasks.html">Task reference</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/faq.html">Troubleshooting FAQ</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Data management<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/26.0.0/data-management/index.html">Overview</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/data-management/update.html">Data updates</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/data-management/delete.html">Data deletion</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/data-management/schema-changes.html">Schema changes</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/data-management/compaction.html">Compaction</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/data-management/automatic-compaction.html">Automatic compaction</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Querying<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Druid SQL</h4><ul><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql.html">Overview and syntax</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql-data-types.html">SQL data types</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql-operators.html">Operators</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql-scalar.html">Scalar functions</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql-aggregations.html">Aggregation functions</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql-multivalue-string-functions.html">Multi-value string functions</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql-json-functions.html">JSON functions</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql-functions.html">All functions</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql-api.html">Druid SQL API</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql-jdbc.html">JDBC driver API</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql-query-context.html">SQL query context</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql-metadata-tables.html">SQL metadata tables</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql-translation.html">SQL query translation</a></li></ul></div><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/querying.html">Native queries</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/query-execution.html">Query execution</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/troubleshooting.html">Troubleshooting</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Concepts</h4><ul><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/datasource.html">Datasources</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/joins.html">Joins</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/lookups.html">Lookups</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/multi-value-dimensions.html">Multi-value dimensions</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/nested-columns.html">Nested columns</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/multitenancy.html">Multitenancy</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/caching.html">Query caching</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/using-caching.html">Using query caching</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/query-context.html">Query context</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Native query types</h4><ul><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/timeseriesquery.html">Timeseries</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/topnquery.html">TopN</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/groupbyquery.html">GroupBy</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/scan-query.html">Scan</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/searchquery.html">Search</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/timeboundaryquery.html">TimeBoundary</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/segmentmetadataquery.html">SegmentMetadata</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/datasourcemetadataquery.html">DatasourceMetadata</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Native query components</h4><ul><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/filters.html">Filters</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/granularities.html">Granularities</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/dimensionspecs.html">Dimensions</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/aggregations.html">Aggregations</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/post-aggregations.html">Post-aggregations</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/misc/math-expr.html">Expressions</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/having.html">Having filters (groupBy)</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/limitspec.html">Sorting and limiting (groupBy)</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/topnmetricspec.html">Sorting (topN)</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sorting-orders.html">String comparators</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/virtual-columns.html">Virtual columns</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/geo.html">Spatial filters</a></li></ul></div></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Configuration<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/26.0.0/configuration/index.html">Configuration reference</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions.html">Extensions</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/configuration/logging.html">Logging</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Operations<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/web-console.html">Web console</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/java.html">Java runtime</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Security</h4><ul><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/security-overview.html">Security overview</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/security-user-auth.html">User authentication and authorization</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/auth-ldap.html">LDAP auth</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/password-provider.html">Password providers</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/dynamic-config-provider.html">Dynamic Config Providers</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/tls-support.html">TLS support</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Performance tuning</h4><ul><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/basic-cluster-tuning.html">Basic cluster tuning</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/segment-optimization.html">Segment size optimization</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/mixed-workloads.html">Mixed workloads</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/http-compression.html">HTTP compression</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/clean-metadata-store.html">Automated metadata cleanup</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Monitoring</h4><ul><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/request-logging.html">Request logging</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/metrics.html">Metrics</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/alerts.html">Alerts</a></li></ul></div><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/api-reference.html">API reference</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/high-availability.html">High availability</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/rolling-updates.html">Rolling updates</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/rule-configuration.html">Using rules to drop and retain data</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/other-hadoop.html">Working with different versions of Apache Hadoop</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Misc</h4><ul><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/dump-segment.html">dump-segment tool</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/reset-cluster.html">reset-cluster tool</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/insert-segment-to-db.html">insert-segment-to-db tool</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/pull-deps.html">pull-deps tool</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/deep-storage-migration.html">Deep storage migration</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/export-metadata.html">Export Metadata Tool</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/metadata-migration.html">Metadata Migration</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/use_sbt_to_build_fat_jar.html">Content for build.sbt</a></li></ul></div></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Development<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/overview.html">Developing on Druid</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/modules.html">Creating extensions</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/javascript.html">JavaScript functionality</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/build.html">Build from source</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/versioning.html">Versioning</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/experimental.html">Experimental features</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Misc<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/26.0.0/misc/papers-and-talks.html">Papers</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Hidden<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/26.0.0/comparisons/druid-vs-elasticsearch.html">Apache Druid vs Elasticsearch</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/comparisons/druid-vs-key-value.html">Apache Druid vs. Key/Value Stores (HBase/Cassandra/OpenTSDB)</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/comparisons/druid-vs-kudu.html">Apache Druid vs Kudu</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/comparisons/druid-vs-redshift.html">Apache Druid vs Redshift</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/comparisons/druid-vs-spark.html">Apache Druid vs Spark</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/comparisons/druid-vs-sql-on-hadoop.html">Apache Druid vs SQL-on-Hadoop</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/auth.html">Authentication and Authorization</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/broker.html">Broker</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/coordinator.html">Coordinator Process</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/historical.html">Historical Process</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/indexer.html">Indexer Process</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/indexing-service.html">Indexing Service</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/middlemanager.html">MiddleManager Process</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/overlord.html">Overlord Process</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/router.html">Router Process</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/peons.html">Peons</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/approximate-histograms.html">Approximate Histogram aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/avro.html">Apache Avro</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/azure.html">Microsoft Azure</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/bloom-filter.html">Bloom Filter</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/datasketches-extension.html">DataSketches extension</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/datasketches-hll.html">DataSketches HLL Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/datasketches-quantiles.html">DataSketches Quantiles Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/datasketches-theta.html">DataSketches Theta Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/datasketches-tuple.html">DataSketches Tuple Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/druid-basic-security.html">Basic Security</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/druid-kerberos.html">Kerberos</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/druid-lookups.html">Cached Lookup Module</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/druid-ranger-security.html">Apache Ranger Security</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/google.html">Google Cloud Storage</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/hdfs.html">HDFS</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/kafka-extraction-namespace.html">Apache Kafka Lookups</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/lookups-cached-global.html">Globally Cached Lookups</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/mysql.html">MySQL Metadata Store</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/orc.html">ORC Extension</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/druid-pac4j.html">Druid pac4j based Security extension</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/parquet.html">Apache Parquet Extension</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/postgresql.html">PostgreSQL Metadata Store</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/protobuf.html">Protobuf</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/s3.html">S3-compatible</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/simple-client-sslcontext.html">Simple SSLContext Provider Module</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/stats.html">Stats aggregator</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/test-stats.html">Test Stats Aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/druid-aws-rds.html">Druid AWS RDS Module</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/kubernetes.html">Kubernetes</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/ambari-metrics-emitter.html">Ambari Metrics Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/cassandra.html">Apache Cassandra</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/cloudfiles.html">Rackspace Cloud Files</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/distinctcount.html">DistinctCount Aggregator</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/graphite.html">Graphite Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/influx.html">InfluxDB Line Protocol Parser</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/influxdb-emitter.html">InfluxDB Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/kafka-emitter.html">Kafka Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/materialized-view.html">Materialized View</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/momentsketch-quantiles.html">Moment Sketches for Approximate Quantiles module</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/moving-average-query.html">Moving Average Query</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/opentsdb-emitter.html">OpenTSDB Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/redis-cache.html">Druid Redis Cache</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/sqlserver.html">Microsoft SQLServer</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/statsd.html">StatsD Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/tdigestsketch-quantiles.html">T-Digest Quantiles Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/thrift.html">Thrift</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/time-min-max.html">Timestamp Min/Max aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/gce-extensions.html">GCE Extensions</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/aliyun-oss.html">Aliyun OSS</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/prometheus.html">Prometheus Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/kubernetes.html">kubernetes</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/hll-old.html">Cardinality/HyperUnique aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/select-query.html">Select</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/native-batch-firehose.html">Firehose (deprecated)</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/native-batch-simple-task.html">Native batch (simple)</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/standalone-realtime.html">Realtime Process</a></li></ul></div></div></section></div><script>
var coll = document.getElementsByClassName('collapsible');
var checkActiveCategory = true;
for (var i = 0; i < coll.length; i++) {
var links = coll[i].nextElementSibling.getElementsByTagName('*');
if (checkActiveCategory){
for (var j = 0; j < links.length; j++) {
if (links[j].classList.contains('navListItemActive')){
coll[i].nextElementSibling.classList.toggle('hide');
coll[i].childNodes[1].classList.toggle('rotate');
checkActiveCategory = false;
break;
}
}
}
coll[i].addEventListener('click', function() {
var arrow = this.childNodes[1];
arrow.classList.toggle('rotate');
var content = this.nextElementSibling;
content.classList.toggle('hide');
});
}
document.addEventListener('DOMContentLoaded', function() {
createToggler('#navToggler', '#docsNav', 'docsSliderActive');
createToggler('#tocToggler', 'body', 'tocActive');
var headings = document.querySelector('.toc-headings');
headings && headings.addEventListener('click', function(event) {
var el = event.target;
while(el !== headings){
if (el.tagName === 'A') {
document.body.classList.remove('tocActive');
break;
} else{
el = el.parentNode;
}
}
}, false);
function createToggler(togglerSelector, targetSelector, className) {
var toggler = document.querySelector(togglerSelector);
var target = document.querySelector(targetSelector);
if (!toggler) {
return;
}
toggler.onclick = function(event) {
event.preventDefault();
target.classList.toggle(className);
};
}
});
</script></nav></div><div class="container mainContainer docsContainer"><div class="wrapper"><div class="post"><header class="postHeader"><a class="edit-page-link button" href="https://github.com/apache/druid/edit/master/docs/ingestion/schema-design.md" target="_blank" rel="noreferrer noopener">Edit</a><h1 id="__docusaurus" class="postHeaderTitle">Schema design tips</h1></header><article><div><span><!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing,
~ software distributed under the License is distributed on an
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
~ KIND, either express or implied. See the License for the
~ specific language governing permissions and limitations
~ under the License.
-->
<h2><a class="anchor" aria-hidden="true" id="druids-data-model"></a><a href="#druids-data-model" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Druid's data model</h2>
<p>For general information, check out the documentation on <a href="/docs/26.0.0/ingestion/data-model.html">Druid's data model</a> on the main
ingestion overview page. The rest of this page discusses tips for users coming from other kinds of systems, as well as
general tips and common practices.</p>
<ul>
<li>Druid data is stored in <a href="/docs/26.0.0/ingestion/data-model.html">datasources</a>, which are similar to tables in a traditional RDBMS.</li>
<li>Druid datasources can be ingested with or without <a href="/docs/26.0.0/ingestion/rollup.html">rollup</a>. With rollup enabled, Druid partially aggregates your data during ingestion, potentially reducing its row count, decreasing storage footprint, and improving query performance. With rollup disabled, Druid stores one row for each row in your input data, without any pre-aggregation.</li>
<li>Every row in Druid must have a timestamp. Data is always partitioned by time, and every query has a time filter. Query results can also be broken down by time buckets like minutes, hours, days, and so on.</li>
<li>All columns in Druid datasources, other than the timestamp column, are either dimensions or metrics. This follows the <a href="https://en.wikipedia.org/wiki/Online_analytical_processing#Overview_of_OLAP_systems">standard naming convention</a> of OLAP data.</li>
<li>Typical production datasources have tens to hundreds of columns.</li>
<li><a href="/docs/26.0.0/ingestion/data-model.html#dimensions">Dimension columns</a> are stored as-is, so they can be filtered on, grouped by, or aggregated at query time. They are always single Strings, <a href="/docs/26.0.0/querying/multi-value-dimensions.html">arrays of Strings</a>, single Longs, single Doubles or single Floats.</li>
<li><a href="/docs/26.0.0/ingestion/data-model.html#metrics">Metric columns</a> are stored <a href="/docs/26.0.0/querying/aggregations.html">pre-aggregated</a>, so they can only be aggregated at query time (not filtered or grouped by). They are often stored as numbers (integers or floats) but can also be stored as complex objects like <a href="/docs/26.0.0/querying/aggregations.html#approximate-aggregations">HyperLogLog sketches or approximate quantile sketches</a>. Metrics can be configured at ingestion time even when rollup is disabled, but are most useful when rollup is enabled.</li>
</ul>
<h2><a class="anchor" aria-hidden="true" id="if-youre-coming-from-a"></a><a href="#if-youre-coming-from-a" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>If you're coming from a</h2>
<h3><a class="anchor" aria-hidden="true" id="relational-model"></a><a href="#relational-model" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Relational model</h3>
<p>(Like Hive or PostgreSQL.)</p>
<p>Druid datasources are generally equivalent to tables in a relational database. Druid <a href="/docs/26.0.0/querying/lookups.html">lookups</a>
can act similarly to data-warehouse-style dimension tables, but as you'll see below, denormalization is often
recommended if you can get away with it.</p>
<p>Common practice for relational data modeling involves <a href="https://en.wikipedia.org/wiki/Database_normalization">normalization</a>:
the idea of splitting up data into multiple tables such that data redundancy is reduced or eliminated. For example, in a
&quot;sales&quot; table, best-practices relational modeling calls for a &quot;product id&quot; column that is a foreign key into a separate
&quot;products&quot; table, which in turn has &quot;product id&quot;, &quot;product name&quot;, and &quot;product category&quot; columns. This prevents the
product name and category from needing to be repeated on different rows in the &quot;sales&quot; table that refer to the same
product.</p>
<p>In Druid, on the other hand, it is common to use totally flat datasources that do not require joins at query time. In
the example of the &quot;sales&quot; table, in Druid it would be typical to store &quot;product_id&quot;, &quot;product_name&quot;, and
&quot;product_category&quot; as dimensions directly in a Druid &quot;sales&quot; datasource, without using a separate &quot;products&quot; table.
Totally flat schemas substantially increase performance, since the need for joins is eliminated at query time. As an
an added speed boost, this also allows Druid's query layer to operate directly on compressed dictionary-encoded data.
Perhaps counter-intuitively, this does <em>not</em> substantially increase storage footprint relative to normalized schemas,
since Druid uses dictionary encoding to effectively store just a single integer per row for string columns.</p>
<p>If necessary, Druid datasources can be partially normalized through the use of <a href="/docs/26.0.0/querying/lookups.html">lookups</a>,
which are the rough equivalent of dimension tables in a relational database. At query time, you would use Druid's SQL
<code>LOOKUP</code> function, or native lookup extraction functions, instead of using the JOIN keyword like you would in a
relational database. Since lookup tables impose an increase in memory footprint and incur more computational overhead
at query time, it is only recommended to do this if you need the ability to update a lookup table and have the changes
reflected immediately for already-ingested rows in your main table.</p>
<p>Tips for modeling relational data in Druid:</p>
<ul>
<li>Druid datasources do not have primary or unique keys, so skip those.</li>
<li>Denormalize if possible. If you need to be able to update dimension / lookup tables periodically and have those
changes reflected in already-ingested data, consider partial normalization with <a href="/docs/26.0.0/querying/lookups.html">lookups</a>.</li>
<li>If you need to join two large distributed tables with each other, you must do this before loading the data into Druid.
Druid does not support query-time joins of two datasources. Lookups do not help here, since a full copy of each lookup
table is stored on each Druid server, so they are not a good choice for large tables.</li>
<li>Consider whether you want to enable <a href="#rollup">rollup</a> for pre-aggregation, or whether you want to disable
rollup and load your existing data as-is. Rollup in Druid is similar to creating a summary table in a relational model.</li>
</ul>
<h3><a class="anchor" aria-hidden="true" id="time-series-model"></a><a href="#time-series-model" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Time series model</h3>
<p>(Like OpenTSDB or InfluxDB.)</p>
<p>Similar to time series databases, Druid's data model requires a timestamp. Druid is not a timeseries database, but
it is a natural choice for storing timeseries data. Its flexible data model allows it to store both timeseries and
non-timeseries data, even in the same datasource.</p>
<p>To achieve best-case compression and query performance in Druid for timeseries data, it is important to partition and
sort by metric name, like timeseries databases often do. See <a href="/docs/26.0.0/ingestion/partitioning.html">Partitioning and sorting</a> for more details.</p>
<p>Tips for modeling timeseries data in Druid:</p>
<ul>
<li>Druid does not think of data points as being part of a &quot;time series&quot;. Instead, Druid treats each point separately
for ingestion and aggregation.</li>
<li>Create a dimension that indicates the name of the series that a data point belongs to. This dimension is often called
&quot;metric&quot; or &quot;name&quot;. Do not get the dimension named &quot;metric&quot; confused with the concept of Druid metrics. Place this
first in the list of dimensions in your &quot;dimensionsSpec&quot; for best performance (this helps because it improves locality;
see <a href="/docs/26.0.0/ingestion/partitioning.html">partitioning and sorting</a> below for details).</li>
<li>Create other dimensions for attributes attached to your data points. These are often called &quot;tags&quot; in timeseries
database systems.</li>
<li>Create <a href="/docs/26.0.0/querying/aggregations.html">metrics</a> corresponding to the types of aggregations that you want to be able
to query. Typically this includes &quot;sum&quot;, &quot;min&quot;, and &quot;max&quot; (in one of the long, float, or double flavors). If you want the ability
to compute percentiles or quantiles, use Druid's <a href="/docs/26.0.0/querying/aggregations.html#approximate-aggregations">approximate aggregators</a>.</li>
<li>Consider enabling <a href="/docs/26.0.0/ingestion/rollup.html">rollup</a>, which will allow Druid to potentially combine multiple points into one
row in your Druid datasource. This can be useful if you want to store data at a different time granularity than it is
naturally emitted. It is also useful if you want to combine timeseries and non-timeseries data in the same datasource.</li>
<li>If you don't know ahead of time what columns you'll want to ingest, use an empty dimensions list to trigger
<a href="#schema-auto-discovery-for-dimensions">automatic detection of dimension columns</a>.</li>
</ul>
<h3><a class="anchor" aria-hidden="true" id="log-aggregation-model"></a><a href="#log-aggregation-model" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Log aggregation model</h3>
<p>(Like Elasticsearch or Splunk.)</p>
<p>Similar to log aggregation systems, Druid offers inverted indexes for fast searching and filtering. Druid's search
capabilities are generally less developed than these systems, and its analytical capabilities are generally more
developed. The main data modeling differences between Druid and these systems are that when ingesting data into Druid,
you must be more explicit. Druid columns have types specific upfront.</p>
<p>Tips for modeling log data in Druid:</p>
<ul>
<li>If you don't know ahead of time what columns to ingest, you can have Druid perform <a href="#schema-auto-discovery-for-dimensions">schema auto-discovery</a>.</li>
<li>If you have nested data, you can ingest it using the <a href="/docs/26.0.0/querying/nested-columns.html">nested columns</a> feature or flatten it using a <a href="/docs/26.0.0/ingestion/ingestion-spec.html#flattenspec"><code>flattenSpec</code></a>.</li>
<li>Consider enabling <a href="/docs/26.0.0/ingestion/rollup.html">rollup</a> if you have mainly analytical use cases for your log data. This will
mean you lose the ability to retrieve individual events from Druid, but you potentially gain substantial compression and
query performance boosts.</li>
</ul>
<h2><a class="anchor" aria-hidden="true" id="general-tips-and-best-practices"></a><a href="#general-tips-and-best-practices" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>General tips and best practices</h2>
<h3><a class="anchor" aria-hidden="true" id="rollup"></a><a href="#rollup" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Rollup</h3>
<p>Druid can roll up data as it is ingested to minimize the amount of raw data that needs to be stored. This is a form
of summarization or pre-aggregation. For more details, see the <a href="/docs/26.0.0/ingestion/rollup.html">Rollup</a> section of the ingestion
documentation.</p>
<h3><a class="anchor" aria-hidden="true" id="partitioning-and-sorting"></a><a href="#partitioning-and-sorting" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Partitioning and sorting</h3>
<p>Optimally partitioning and sorting your data can have substantial impact on footprint and performance. For more details,
see the <a href="/docs/26.0.0/ingestion/partitioning.html">Partitioning</a> section of the ingestion documentation.</p>
<p><a name="sketches"></a></p>
<h3><a class="anchor" aria-hidden="true" id="sketches-for-high-cardinality-columns"></a><a href="#sketches-for-high-cardinality-columns" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Sketches for high cardinality columns</h3>
<p>When dealing with high cardinality columns like user IDs or other unique IDs, consider using sketches for approximate
analysis rather than operating on the actual values. When you ingest data using a sketch, Druid does not store the
original raw data, but instead stores a &quot;sketch&quot; of it that it can feed into a later computation at query time. Popular
use cases for sketches include count-distinct and quantile computation. Each sketch is designed for just one particular
kind of computation.</p>
<p>In general using sketches serves two main purposes: improving rollup, and reducing memory footprint at
query time.</p>
<p>Sketches improve rollup ratios because they allow you to collapse multiple distinct values into the same sketch. For
example, if you have two rows that are identical except for a user ID (perhaps two users did the same action at the
same time), storing them in a count-distinct sketch instead of as-is means you can store the data in one row instead of
two. You won't be able to retrieve the user IDs or compute exact distinct counts, but you'll still be able to compute
approximate distinct counts, and you'll reduce your storage footprint.</p>
<p>Sketches reduce memory footprint at query time because they limit the amount of data that needs to be shuffled between
servers. For example, in a quantile computation, instead of needing to send all data points to a central location
so they can be sorted and the quantile can be computed, Druid instead only needs to send a sketch of the points. This
can reduce data transfer needs to mere kilobytes.</p>
<p>For details about the sketches available in Druid, see the
<a href="/docs/26.0.0/querying/aggregations.html#approximate-aggregations">approximate aggregators</a> page.</p>
<p>If you prefer videos, take a look at <a href="https://www.youtube.com/watch?v=Hpd3f_MLdXo">Not exactly!</a>, a conference talk
about sketches in Druid.</p>
<h3><a class="anchor" aria-hidden="true" id="string-vs-numeric-dimensions"></a><a href="#string-vs-numeric-dimensions" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>String vs numeric dimensions</h3>
<p>If the user wishes to ingest a column as a numeric-typed dimension (Long, Double or Float), it is necessary to specify the type of the column in the <code>dimensions</code> section of the <code>dimensionsSpec</code>. If the type is omitted, Druid will ingest a column as the default String type.</p>
<p>There are performance tradeoffs between string and numeric columns. Numeric columns are generally faster to group on
than string columns. But unlike string columns, numeric columns don't have indexes, so they can be slower to filter on.
You may want to experiment to find the optimal choice for your use case.</p>
<p>For details about how to configure numeric dimensions, see the <a href="/docs/26.0.0/ingestion/ingestion-spec.html#dimensionsspec"><code>dimensionsSpec</code></a> documentation.</p>
<h3><a class="anchor" aria-hidden="true" id="secondary-timestamps"></a><a href="#secondary-timestamps" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Secondary timestamps</h3>
<p>Druid schemas must always include a primary timestamp. The primary timestamp is used for
<a href="/docs/26.0.0/ingestion/partitioning.html">partitioning and sorting</a> your data, so it should be the timestamp that you will most often filter on.
Druid is able to rapidly identify and retrieve data corresponding to time ranges of the primary timestamp column.</p>
<p>If your data has more than one timestamp, you can ingest the others as secondary timestamps. The best way to do this
is to ingest them as <a href="/docs/26.0.0/ingestion/ingestion-spec.html#dimensionsspec">long-typed dimensions</a> in milliseconds format.
If necessary, you can get them into this format using a <a href="/docs/26.0.0/ingestion/ingestion-spec.html#transformspec"><code>transformSpec</code></a> and
<a href="/docs/26.0.0/misc/math-expr.html">expressions</a> like <code>timestamp_parse</code>, which returns millisecond timestamps.</p>
<p>At query time, you can query secondary timestamps with <a href="/docs/26.0.0/querying/sql-scalar.html#date-and-time-functions">SQL time functions</a>
like <code>MILLIS_TO_TIMESTAMP</code>, <code>TIME_FLOOR</code>, and others. If you're using native Druid queries, you can use
<a href="/docs/26.0.0/misc/math-expr.html">expressions</a>.</p>
<h3><a class="anchor" aria-hidden="true" id="nested-dimensions"></a><a href="#nested-dimensions" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Nested dimensions</h3>
<p>You can ingest and store nested data in a Druid column as a <code>COMPLEX&lt;json&gt;</code> data type. See <a href="/docs/26.0.0/querying/nested-columns.html">Nested columns</a> for more information.</p>
<p>If you want to ingest nested data in a format unsupported by the nested columns feature, you must use the <code>flattenSpec</code> object to flatten it. For example, if you have data of the following form:</p>
<pre><code class="hljs css language-json">{ <span class="hljs-attr">"foo"</span>: { <span class="hljs-attr">"bar"</span>: <span class="hljs-number">3</span> } }
</code></pre>
<p>then before indexing it, you should transform it to:</p>
<pre><code class="hljs css language-json">{ <span class="hljs-attr">"foo_bar"</span>: <span class="hljs-number">3</span> }
</code></pre>
<p>See the <a href="/docs/26.0.0/ingestion/ingestion-spec.html#flattenspec"><code>flattenSpec</code></a> documentation for more details.</p>
<p><a name="counting"></a></p>
<h3><a class="anchor" aria-hidden="true" id="counting-the-number-of-ingested-events"></a><a href="#counting-the-number-of-ingested-events" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Counting the number of ingested events</h3>
<p>When rollup is enabled, count aggregators at query time do not actually tell you the number of rows that have been
ingested. They tell you the number of rows in the Druid datasource, which may be smaller than the number of rows
ingested.</p>
<p>In this case, a count aggregator at <em>ingestion</em> time can be used to count the number of events. However, it is important to note
that when you query for this metric, you should use a <code>longSum</code> aggregator. A <code>count</code> aggregator at query time will return
the number of Druid rows for the time interval, which can be used to determine what the roll-up ratio was.</p>
<p>To clarify with an example, if your ingestion spec contains:</p>
<pre><code class="hljs css language-json">"metricsSpec": [
{ "type": "count", "name": "count" }
]
</code></pre>
<p>You should query for the number of ingested rows with:</p>
<pre><code class="hljs css language-json">"aggregations": [
{ "type": "longSum", "name": "numIngestedEvents", "fieldName": "count" }
]
</code></pre>
<h3><a class="anchor" aria-hidden="true" id="schema-auto-discovery-for-dimensions"></a><a href="#schema-auto-discovery-for-dimensions" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Schema auto-discovery for dimensions</h3>
<p>Druid can infer the schema for your data in one of two ways:</p>
<ul>
<li><a href="#type-aware-schema-discovery">Type-aware schema discovery (experimental)</a> where Druid infers the schema and type for your data. Type-aware schema discovery is an experimental feature currently available for native batch and streaming ingestion.</li>
<li><a href="#string-based-schema-discovery">String-based schema discovery</a> where all the discovered columns are typed as either native string or multi-value string columns.</li>
</ul>
<h4><a class="anchor" aria-hidden="true" id="type-aware-schema-discovery"></a><a href="#type-aware-schema-discovery" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Type-aware schema discovery</h4>
<blockquote>
<p>Note that using type-aware schema discovery can impact downstream BI tools depending on how they handle ARRAY typed columns.</p>
</blockquote>
<p>You can have Druid infer the schema and types for your data partially or fully by setting <code>dimensionsSpec.useSchemaDiscovery</code> to <code>true</code> and defining some or no dimensions in the dimensions list.</p>
<p>When performing type-aware schema discovery, Druid can discover all of the columns of your input data (that aren't in
the exclusion list). Druid automatically chooses the most appropriate native Druid type among <code>STRING</code>, <code>LONG</code>,
<code>DOUBLE</code>, <code>ARRAY&lt;STRING&gt;</code>, <code>ARRAY&lt;LONG&gt;</code>, <code>ARRAY&lt;DOUBLE&gt;</code>, or <code>COMPLEX&lt;json&gt;</code> for nested data. For input formats with
native boolean types, Druid ingests these values as strings if <code>druid.expressions.useStrictBooleans</code> is set to <code>false</code>
(the default), or longs if set to <code>true</code> (for more SQL compatible behavior). Array typed columns can be queried using
the <a href="/docs/26.0.0/querying/sql-array-functions.html">array functions</a> or <a href="/docs/26.0.0/querying/sql-functions.html#unnest">UNNEST</a>. Nested
columns can be queried with the <a href="/docs/26.0.0/querying/sql-json-functions.html">JSON functions</a>.</p>
<p>Mixed type columns are stored in the <em>least</em> restrictive type that can represent all values in the column. For example:</p>
<ul>
<li>Mixed numeric columns are <code>DOUBLE</code></li>
<li>If there are any strings present, then the column is a <code>STRING</code></li>
<li>If there are arrays, then the column becomes an array with the least restrictive element type</li>
<li>Any nested data or arrays of nested data become <code>COMPLEX&lt;json&gt;</code> nested columns.</li>
</ul>
<p>If you're already using string-based schema discovery and want to migrate, see <a href="#migrating-to-type-aware-schema-discovery">Migrating to type-aware schema discovery</a>.</p>
<h4><a class="anchor" aria-hidden="true" id="string-based-schema-discovery"></a><a href="#string-based-schema-discovery" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>String-based schema discovery</h4>
<p>If you do not set <code>dimensionsSpec.useSchemaDiscovery</code> to <code>true</code>, Druid can still use the string-based schema discovery for ingestion if any of the following conditions are met:</p>
<ul>
<li>The dimension list is empty</li>
<li>You set <code>includeAllDimensions</code> to <code>true</code></li>
</ul>
<p>Druid coerces primitives and arrays of primitive types into the native Druid string type. Nested data structures and arrays of nested data structures are ignored and not ingested.</p>
<h4><a class="anchor" aria-hidden="true" id="migrating-to-type-aware-schema-discovery"></a><a href="#migrating-to-type-aware-schema-discovery" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Migrating to type-aware schema discovery</h4>
<p>If you previously used string-based schema discovery and want to migrate to type-aware schema discovery, do the following:</p>
<ul>
<li>Update any queries that use multi-value dimensions (MVDs) to use UNNEST in conjunction with other functions so that no MVD behavior is being relied upon. Type-aware schema discovery generates ARRAY typed columns instead of MVDs, so queries that use any MVD features will fail.</li>
<li>Be aware of mixed typed inputs and test how type-aware schema discovery handles them. Druid attempts to cast them as the least restrictive type.</li>
<li>If you notice issues with numeric types, you may need to explicitly cast them. Generally, Druid handles the coercion for you.</li>
<li>Update your dimension exclusion list and add any nested columns if you want to continue to exclude them. String-based schema discovery automatically ignores nested columns, but type-aware schema discovery will ingest them.</li>
</ul>
<h3><a class="anchor" aria-hidden="true" id="including-the-same-column-as-a-dimension-and-a-metric"></a><a href="#including-the-same-column-as-a-dimension-and-a-metric" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Including the same column as a dimension and a metric</h3>
<p>One workflow with unique IDs is to be able to filter on a particular ID, while still being able to do fast unique counts on the ID column.
If you are not using schema-less dimensions, this use case is supported by setting the <code>name</code> of the metric to something different than the dimension.
If you are using schema-less dimensions, the best practice here is to include the same column twice, once as a dimension, and as a <code>hyperUnique</code> metric. This may involve
some work at ETL time.</p>
<p>As an example, for schema-less dimensions, repeat the same column:</p>
<pre><code class="hljs css language-json">{ <span class="hljs-attr">"device_id_dim"</span>: <span class="hljs-number">123</span>, <span class="hljs-attr">"device_id_met"</span>: <span class="hljs-number">123</span> }
</code></pre>
<p>and in your <code>metricsSpec</code>, include:</p>
<pre><code class="hljs css language-json">{ <span class="hljs-attr">"type"</span>: <span class="hljs-string">"hyperUnique"</span>, <span class="hljs-attr">"name"</span>: <span class="hljs-string">"devices"</span>, <span class="hljs-attr">"fieldName"</span>: <span class="hljs-string">"device_id_met"</span> }
</code></pre>
<p><code>device_id_dim</code> should automatically get picked up as a dimension.</p>
</span></div></article></div><div class="docs-prevnext"><a class="docs-prev button" href="/docs/26.0.0/ingestion/ingestion-spec.html"><span class="arrow-prev">โ† </span><span>Ingestion spec</span></a><a class="docs-next button" href="/docs/26.0.0/development/extensions-core/kafka-ingestion.html"><span>Apache Kafka ingestion</span><span class="arrow-next"> โ†’</span></a></div></div></div><nav class="onPageNav"><ul class="toc-headings"><li><a href="#druids-data-model">Druid's data model</a></li><li><a href="#if-youre-coming-from-a">If you're coming from a</a><ul class="toc-headings"><li><a href="#relational-model">Relational model</a></li><li><a href="#time-series-model">Time series model</a></li><li><a href="#log-aggregation-model">Log aggregation model</a></li></ul></li><li><a href="#general-tips-and-best-practices">General tips and best practices</a><ul class="toc-headings"><li><a href="#rollup">Rollup</a></li><li><a href="#partitioning-and-sorting">Partitioning and sorting</a></li><li><a href="#sketches-for-high-cardinality-columns">Sketches for high cardinality columns</a></li><li><a href="#string-vs-numeric-dimensions">String vs numeric dimensions</a></li><li><a href="#secondary-timestamps">Secondary timestamps</a></li><li><a href="#nested-dimensions">Nested dimensions</a></li><li><a href="#counting-the-number-of-ingested-events">Counting the number of ingested events</a></li><li><a href="#schema-auto-discovery-for-dimensions">Schema auto-discovery for dimensions</a></li><li><a href="#including-the-same-column-as-a-dimension-and-a-metric">Including the same column as a dimension and a metric</a></li></ul></li></ul></nav></div><footer class="nav-footer druid-footer" id="footer"><div class="container"><div class="text-center"><p><a href="/technology">Technology</a>โ€‚ยทโ€‚<a href="/use-cases">Use Cases</a>โ€‚ยทโ€‚<a href="/druid-powered">Powered by Druid</a>โ€‚ยทโ€‚<a href="/docs/26.0.0/">Docs</a>โ€‚ยทโ€‚<a href="/community/">Community</a>โ€‚ยทโ€‚<a href="/downloads.html">Download</a>โ€‚ยทโ€‚<a href="/faq">FAQ</a></p></div><div class="text-center"><a title="Join the user group" href="https://groups.google.com/forum/#!forum/druid-user" target="_blank"><span class="fa fa-comments"></span></a>โ€‚ยทโ€‚<a title="Follow Druid" href="https://twitter.com/druidio" target="_blank"><span class="fab fa-twitter"></span></a>โ€‚ยทโ€‚<a title="Download via Apache" href="https://www.apache.org/dyn/closer.cgi?path=/incubator/druid/{{ site.druid_versions[0].versions[0].version }}/apache-druid-{{ site.druid_versions[0].versions[0].version }}-bin.tar.gz" target="_blank"><span class="fas fa-feather"></span></a>โ€‚ยทโ€‚<a title="GitHub" href="https://github.com/apache/druid" target="_blank"><span class="fab fa-github"></span></a></div><div class="text-center license">Copyright ยฉ 2022 <a href="https://www.apache.org/" target="_blank">Apache Software Foundation</a>.<br/>Except where otherwise noted, licensed under <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a>.<br/>Apache Druid, Druid, and the Druid logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</div></div></footer></div><script type="text/javascript" src="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.js"></script><script>
document.addEventListener('keyup', function(e) {
if (e.target !== document.body) {
return;
}
// keyCode for '/' (slash)
if (e.keyCode === 191) {
const search = document.getElementById('search_input_react');
search && search.focus();
}
});
</script><script>
var search = docsearch({
appId: 'CPK9PMSCEY',
apiKey: 'd4ef4ffe3a2f0c7d1e34b062fd98736b',
indexName: 'apache_druid',
inputSelector: '#search_input_react',
algoliaOptions: {"facetFilters":["language:en","version:26.0.0"]}
});
</script></body></html>