blob: 88f3c948dd375e64671c0c50a1d5cfcb218fab11 [file] [log] [blame]
<!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta http-equiv="X-UA-Compatible" content="IE=edge"/><title>Segments ยท Apache Druid</title><meta name="viewport" content="width=device-width, initial-scale=1.0"/><link rel="canonical" href="https://druid.apache.org/docs/26.0.0/design/segments.html"/><meta name="generator" content="Docusaurus"/><meta name="description" content="&lt;!--"/><meta name="docsearch:language" content="en"/><meta name="docsearch:version" content="26.0.0" /><meta property="og:title" content="Segments ยท Apache Druid"/><meta property="og:type" content="website"/><meta property="og:url" content="https://druid.apache.org/index.html"/><meta property="og:description" content="&lt;!--"/><meta property="og:image" content="https://druid.apache.org/img/druid_nav.png"/><meta name="twitter:card" content="summary"/><meta name="twitter:image" content="https://druid.apache.org/img/druid_nav.png"/><link rel="shortcut icon" href="/img/favicon.png"/><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.css"/><link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css"/><script async="" src="https://www.googletagmanager.com/gtag/js?id=UA-131010415-1"></script><script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments); }
gtag('js', new Date());
gtag('config', 'UA-131010415-1');
</script><link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.7.2/css/all.css"/><link rel="stylesheet" href="/css/code-block-buttons.css"/><script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.4/clipboard.min.js"></script><script type="text/javascript" src="/js/code-block-buttons.js"></script><script src="/js/scrollSpy.js"></script><link rel="stylesheet" href="/css/main.css"/><script src="/js/codetabs.js"></script></head><body class="sideNavVisible separateOnPageNav"><div class="fixedHeaderContainer"><div class="headerWrapper wrapper"><header><a href="/"><img class="logo" src="/img/druid_nav.png" alt="Apache Druid"/></a><div class="navigationWrapper navigationSlider"><nav class="slidingNav"><ul class="nav-site nav-site-internal"><li class=""><a href="/technology" target="_self">Technology</a></li><li class=""><a href="/use-cases" target="_self">Use Cases</a></li><li class=""><a href="/druid-powered" target="_self">Powered By</a></li><li class="siteNavGroupActive"><a href="/docs/26.0.0/design/index.html" target="_self">Docs</a></li><li class=""><a href="/community/" target="_self">Community</a></li><li class=""><a href="https://www.apache.org" target="_self">Apache</a></li><li class=""><a href="/downloads.html" target="_self">Download</a></li><li class="navSearchWrapper reactNavSearchWrapper"><input type="text" id="search_input_react" placeholder="Search" title="Search"/></li></ul></nav></div></header></div></div><div class="navPusher"><div class="docMainWrapper wrapper"><div class="docsNavContainer" id="docsNav"><nav class="toc"><div class="toggleNav"><section class="navWrapper wrapper"><div class="navBreadcrumb wrapper"><div class="navToggle" id="navToggler"><div class="hamburger-menu"><div class="line1"></div><div class="line2"></div><div class="line3"></div></div></div><h2><i>โ€บ</i><span>Design</span></h2><div class="tocToggler" id="tocToggler"><i class="icon-toc"></i></div></div><div class="navGroups"><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Getting started<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/index.html">Introduction to Apache Druid</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/index.html">Quickstart (local)</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/single-server.html">Single server deployment</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/cluster.html">Clustered deployment</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Tutorials<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-batch.html">Load files natively</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-msq-extern.html">Load files using SQL ๐Ÿ†•</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-kafka.html">Load from Apache Kafka</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-batch-hadoop.html">Load from Apache Hadoop</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-query.html">Querying data</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-rollup.html">Roll-up</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-sketches-theta.html">Theta sketches</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-retention.html">Configuring data retention</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-update-data.html">Updating existing data</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-compaction.html">Compacting segments</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-delete-data.html">Deleting data</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-ingestion-spec.html">Writing an ingestion spec</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-transform-spec.html">Transforming input data</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/docker.html">Tutorial: Run with Docker</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-kerberos-hadoop.html">Kerberized HDFS deep storage</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-msq-convert-spec.html">Convert ingestion spec to SQL</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-sql-query-view.html">Get to know Query view</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-unnest-arrays.html">Unnesting arrays</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-jupyter-index.html">Jupyter Notebook tutorials</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-jupyter-docker.html">Docker for tutorials</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/tutorials/tutorial-jdbc.html">JDBC connector</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Design<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/architecture.html">Design</a></li><li class="navListItem navListItemActive"><a class="navItem" href="/docs/26.0.0/design/segments.html">Segments</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/processes.html">Processes and servers</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/dependencies/deep-storage.html">Deep storage</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/dependencies/metadata-storage.html">Metadata storage</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/dependencies/zookeeper.html">ZooKeeper</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Ingestion<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/index.html">Ingestion</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/data-formats.html">Data formats</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/data-model.html">Data model</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/rollup.html">Data rollup</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/partitioning.html">Partitioning</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/ingestion-spec.html">Ingestion spec</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/schema-design.html">Schema design tips</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Stream ingestion</h4><ul><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/kafka-ingestion.html">Apache Kafka ingestion</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/kafka-supervisor-reference.html">Apache Kafka supervisor</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/kafka-supervisor-operations.html">Apache Kafka operations</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/kinesis-ingestion.html">Amazon Kinesis</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Batch ingestion</h4><ul><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/native-batch.html">Native batch</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/native-batch-input-sources.html">Native batch: input sources</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/migrate-from-firehose.html">Migrate from firehose</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/hadoop.html">Hadoop-based</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">SQL-based ingestion ๐Ÿ†•</h4><ul><li class="navListItem"><a class="navItem" href="/docs/26.0.0/multi-stage-query/index.html">Overview</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/multi-stage-query/concepts.html">Key concepts</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/multi-stage-query/api.html">API</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/multi-stage-query/security.html">Security</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/multi-stage-query/examples.html">Examples</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/multi-stage-query/reference.html">Reference</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/multi-stage-query/known-issues.html">Known issues</a></li></ul></div><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/tasks.html">Task reference</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/faq.html">Troubleshooting FAQ</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Data management<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/26.0.0/data-management/index.html">Overview</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/data-management/update.html">Data updates</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/data-management/delete.html">Data deletion</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/data-management/schema-changes.html">Schema changes</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/data-management/compaction.html">Compaction</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/data-management/automatic-compaction.html">Automatic compaction</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Querying<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Druid SQL</h4><ul><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql.html">Overview and syntax</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql-data-types.html">SQL data types</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql-operators.html">Operators</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql-scalar.html">Scalar functions</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql-aggregations.html">Aggregation functions</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql-multivalue-string-functions.html">Multi-value string functions</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql-json-functions.html">JSON functions</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql-functions.html">All functions</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql-api.html">Druid SQL API</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql-jdbc.html">JDBC driver API</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql-query-context.html">SQL query context</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql-metadata-tables.html">SQL metadata tables</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sql-translation.html">SQL query translation</a></li></ul></div><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/querying.html">Native queries</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/query-execution.html">Query execution</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/troubleshooting.html">Troubleshooting</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Concepts</h4><ul><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/datasource.html">Datasources</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/joins.html">Joins</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/lookups.html">Lookups</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/multi-value-dimensions.html">Multi-value dimensions</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/nested-columns.html">Nested columns</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/multitenancy.html">Multitenancy</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/caching.html">Query caching</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/using-caching.html">Using query caching</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/query-context.html">Query context</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Native query types</h4><ul><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/timeseriesquery.html">Timeseries</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/topnquery.html">TopN</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/groupbyquery.html">GroupBy</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/scan-query.html">Scan</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/searchquery.html">Search</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/timeboundaryquery.html">TimeBoundary</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/segmentmetadataquery.html">SegmentMetadata</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/datasourcemetadataquery.html">DatasourceMetadata</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Native query components</h4><ul><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/filters.html">Filters</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/granularities.html">Granularities</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/dimensionspecs.html">Dimensions</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/aggregations.html">Aggregations</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/post-aggregations.html">Post-aggregations</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/misc/math-expr.html">Expressions</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/having.html">Having filters (groupBy)</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/limitspec.html">Sorting and limiting (groupBy)</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/topnmetricspec.html">Sorting (topN)</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/sorting-orders.html">String comparators</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/virtual-columns.html">Virtual columns</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/geo.html">Spatial filters</a></li></ul></div></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Configuration<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/26.0.0/configuration/index.html">Configuration reference</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions.html">Extensions</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/configuration/logging.html">Logging</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Operations<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/web-console.html">Web console</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/java.html">Java runtime</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Security</h4><ul><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/security-overview.html">Security overview</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/security-user-auth.html">User authentication and authorization</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/auth-ldap.html">LDAP auth</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/password-provider.html">Password providers</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/dynamic-config-provider.html">Dynamic Config Providers</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/tls-support.html">TLS support</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Performance tuning</h4><ul><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/basic-cluster-tuning.html">Basic cluster tuning</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/segment-optimization.html">Segment size optimization</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/mixed-workloads.html">Mixed workloads</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/http-compression.html">HTTP compression</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/clean-metadata-store.html">Automated metadata cleanup</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Monitoring</h4><ul><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/request-logging.html">Request logging</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/metrics.html">Metrics</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/alerts.html">Alerts</a></li></ul></div><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/api-reference.html">API reference</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/high-availability.html">High availability</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/rolling-updates.html">Rolling updates</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/rule-configuration.html">Using rules to drop and retain data</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/other-hadoop.html">Working with different versions of Apache Hadoop</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Misc</h4><ul><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/dump-segment.html">dump-segment tool</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/reset-cluster.html">reset-cluster tool</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/insert-segment-to-db.html">insert-segment-to-db tool</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/pull-deps.html">pull-deps tool</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/deep-storage-migration.html">Deep storage migration</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/export-metadata.html">Export Metadata Tool</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/metadata-migration.html">Metadata Migration</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/use_sbt_to_build_fat_jar.html">Content for build.sbt</a></li></ul></div></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Development<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/overview.html">Developing on Druid</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/modules.html">Creating extensions</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/javascript.html">JavaScript functionality</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/build.html">Build from source</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/versioning.html">Versioning</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/experimental.html">Experimental features</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Misc<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/26.0.0/misc/papers-and-talks.html">Papers</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Hidden<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/26.0.0/comparisons/druid-vs-elasticsearch.html">Apache Druid vs Elasticsearch</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/comparisons/druid-vs-key-value.html">Apache Druid vs. Key/Value Stores (HBase/Cassandra/OpenTSDB)</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/comparisons/druid-vs-kudu.html">Apache Druid vs Kudu</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/comparisons/druid-vs-redshift.html">Apache Druid vs Redshift</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/comparisons/druid-vs-spark.html">Apache Druid vs Spark</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/comparisons/druid-vs-sql-on-hadoop.html">Apache Druid vs SQL-on-Hadoop</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/auth.html">Authentication and Authorization</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/broker.html">Broker</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/coordinator.html">Coordinator Process</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/historical.html">Historical Process</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/indexer.html">Indexer Process</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/indexing-service.html">Indexing Service</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/middlemanager.html">MiddleManager Process</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/overlord.html">Overlord Process</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/router.html">Router Process</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/design/peons.html">Peons</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/approximate-histograms.html">Approximate Histogram aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/avro.html">Apache Avro</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/azure.html">Microsoft Azure</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/bloom-filter.html">Bloom Filter</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/datasketches-extension.html">DataSketches extension</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/datasketches-hll.html">DataSketches HLL Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/datasketches-quantiles.html">DataSketches Quantiles Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/datasketches-theta.html">DataSketches Theta Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/datasketches-tuple.html">DataSketches Tuple Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/druid-basic-security.html">Basic Security</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/druid-kerberos.html">Kerberos</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/druid-lookups.html">Cached Lookup Module</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/druid-ranger-security.html">Apache Ranger Security</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/google.html">Google Cloud Storage</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/hdfs.html">HDFS</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/kafka-extraction-namespace.html">Apache Kafka Lookups</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/lookups-cached-global.html">Globally Cached Lookups</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/mysql.html">MySQL Metadata Store</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/orc.html">ORC Extension</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/druid-pac4j.html">Druid pac4j based Security extension</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/parquet.html">Apache Parquet Extension</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/postgresql.html">PostgreSQL Metadata Store</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/protobuf.html">Protobuf</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/s3.html">S3-compatible</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/simple-client-sslcontext.html">Simple SSLContext Provider Module</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/stats.html">Stats aggregator</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/test-stats.html">Test Stats Aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/druid-aws-rds.html">Druid AWS RDS Module</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-core/kubernetes.html">Kubernetes</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/ambari-metrics-emitter.html">Ambari Metrics Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/cassandra.html">Apache Cassandra</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/cloudfiles.html">Rackspace Cloud Files</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/distinctcount.html">DistinctCount Aggregator</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/graphite.html">Graphite Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/influx.html">InfluxDB Line Protocol Parser</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/influxdb-emitter.html">InfluxDB Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/kafka-emitter.html">Kafka Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/materialized-view.html">Materialized View</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/momentsketch-quantiles.html">Moment Sketches for Approximate Quantiles module</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/moving-average-query.html">Moving Average Query</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/opentsdb-emitter.html">OpenTSDB Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/redis-cache.html">Druid Redis Cache</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/sqlserver.html">Microsoft SQLServer</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/statsd.html">StatsD Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/tdigestsketch-quantiles.html">T-Digest Quantiles Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/thrift.html">Thrift</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/time-min-max.html">Timestamp Min/Max aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/gce-extensions.html">GCE Extensions</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/aliyun-oss.html">Aliyun OSS</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/development/extensions-contrib/prometheus.html">Prometheus Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/operations/kubernetes.html">kubernetes</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/hll-old.html">Cardinality/HyperUnique aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/querying/select-query.html">Select</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/native-batch-firehose.html">Firehose (deprecated)</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/native-batch-simple-task.html">Native batch (simple)</a></li><li class="navListItem"><a class="navItem" href="/docs/26.0.0/ingestion/standalone-realtime.html">Realtime Process</a></li></ul></div></div></section></div><script>
var coll = document.getElementsByClassName('collapsible');
var checkActiveCategory = true;
for (var i = 0; i < coll.length; i++) {
var links = coll[i].nextElementSibling.getElementsByTagName('*');
if (checkActiveCategory){
for (var j = 0; j < links.length; j++) {
if (links[j].classList.contains('navListItemActive')){
coll[i].nextElementSibling.classList.toggle('hide');
coll[i].childNodes[1].classList.toggle('rotate');
checkActiveCategory = false;
break;
}
}
}
coll[i].addEventListener('click', function() {
var arrow = this.childNodes[1];
arrow.classList.toggle('rotate');
var content = this.nextElementSibling;
content.classList.toggle('hide');
});
}
document.addEventListener('DOMContentLoaded', function() {
createToggler('#navToggler', '#docsNav', 'docsSliderActive');
createToggler('#tocToggler', 'body', 'tocActive');
var headings = document.querySelector('.toc-headings');
headings && headings.addEventListener('click', function(event) {
var el = event.target;
while(el !== headings){
if (el.tagName === 'A') {
document.body.classList.remove('tocActive');
break;
} else{
el = el.parentNode;
}
}
}, false);
function createToggler(togglerSelector, targetSelector, className) {
var toggler = document.querySelector(togglerSelector);
var target = document.querySelector(targetSelector);
if (!toggler) {
return;
}
toggler.onclick = function(event) {
event.preventDefault();
target.classList.toggle(className);
};
}
});
</script></nav></div><div class="container mainContainer docsContainer"><div class="wrapper"><div class="post"><header class="postHeader"><a class="edit-page-link button" href="https://github.com/apache/druid/edit/master/docs/design/segments.md" target="_blank" rel="noreferrer noopener">Edit</a><h1 id="__docusaurus" class="postHeaderTitle">Segments</h1></header><article><div><span><!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing,
~ software distributed under the License is distributed on an
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
~ KIND, either express or implied. See the License for the
~ specific language governing permissions and limitations
~ under the License.
-->
<p>Apache Druid stores its data and indexes in <em>segment files</em> partitioned by time. Druid creates a segment for each segment interval that contains data. If an interval is emptyโ€”that is, containing no rowsโ€”no segment exists for that time interval. Druid may create multiple segments for the same interval if you ingest data for that period via different ingestion jobs. <a href="/docs/26.0.0/data-management/compaction.html">Compaction</a> is the Druid process that attempts to combine these segments into a single segment per interval for optimal performance.</p>
<p>The time interval is configurable in the <code>segmentGranularity</code> parameter of the <a href="/docs/26.0.0/ingestion/ingestion-spec.html#granularityspec"><code>granularitySpec</code></a>.</p>
<p>For Druid to operate well under heavy query load, it is important for the segment
file size to be within the recommended range of 300-700 MB. If your
segment files are larger than this range, then consider either
changing the granularity of the segment time interval or partitioning your
data and/or adjusting the <code>targetRowsPerSegment</code> in your <code>partitionsSpec</code>.
A good starting point for this parameter is 5 million rows.
See the Sharding section below and the &quot;Partitioning specification&quot; section of
the <a href="/docs/26.0.0/ingestion/hadoop.html#partitionsspec">Batch ingestion</a> documentation
for more guidance.</p>
<h2><a class="anchor" aria-hidden="true" id="segment-file-structure"></a><a href="#segment-file-structure" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Segment file structure</h2>
<p>Segment files are <em>columnar</em>: the data for each column is laid out in
separate data structures. By storing each column separately, Druid decreases query latency by scanning only those columns actually needed for a query. There are three basic column types: timestamp, dimensions, and metrics:</p>
<p><img src="../assets/druid-column-types.png" alt="Druid column types" title="Druid Column Types"></p>
<p>Timestamp and metrics type columns are arrays of integer or floating point values compressed with
<a href="https://github.com/lz4/lz4-java">LZ4</a>. Once a query identifies which rows to select, it decompresses them, pulls out the relevant rows, and applies the
desired aggregation operator. If a query doesnโ€™t require a column, Druid skips over that column's data.</p>
<p>Dimension columns are different because they support filter and
group-by operations, so each dimension requires the following
three data structures:</p>
<ul>
<li><strong>Dictionary</strong>: Maps values (which are always treated as strings) to integer IDs, allowing compact representation of the list and bitmap values.</li>
<li><strong>List</strong>: The columnโ€™s values, encoded using the dictionary. Required for GroupBy and TopN queries. These operators allow queries that solely aggregate metrics based on filters to run without accessing the list of values.</li>
<li><strong>Bitmap</strong>: One bitmap for each distinct value in the column, to indicate which rows contain that value. Bitmaps allow for quick filtering operations because they are convenient for quickly applying AND and OR operators. Also known as inverted indexes.</li>
</ul>
<p>To get a better sense of these data structures, consider the &quot;Page&quot; column from the example data above, represented by the following data structures:</p>
<pre><code class="hljs"><span class="hljs-number">1</span>: Dictionary
{
<span class="hljs-string">"Justin Bieber"</span>: <span class="hljs-number">0</span>,
<span class="hljs-string">"Ke$ha"</span>: <span class="hljs-number">1</span>
}
<span class="hljs-number">2</span>: List of column data
[<span class="hljs-number">0</span>,
<span class="hljs-number">0</span>,
<span class="hljs-number">1</span>,
<span class="hljs-number">1</span>]
<span class="hljs-number">3</span>: Bitmaps
value=<span class="hljs-string">"Justin Bieber"</span>: [<span class="hljs-number">1</span>,<span class="hljs-number">1</span>,<span class="hljs-number">0</span>,<span class="hljs-number">0</span>]
value=<span class="hljs-string">"Ke$ha"</span>: [<span class="hljs-number">0</span>,<span class="hljs-number">0</span>,<span class="hljs-number">1</span>,<span class="hljs-number">1</span>]
</code></pre>
<p>Note that the bitmap is different from the dictionary and list data structures: the dictionary and list grow linearly with the size of the data, but the size of the bitmap section is the product of data size and column cardinality. That is, there is one bitmap per separate column value. Columns with the same value share the same bitmap.</p>
<p>For each row in the list of column data, there is only a single bitmap that has a non-zero entry. This means that high cardinality columns have extremely sparse, and therefore highly compressible, bitmaps. Druid exploits this using compression algorithms that are specially suited for bitmaps, such as <a href="https://github.com/RoaringBitmap/RoaringBitmap">Roaring bitmap compression</a>.</p>
<h2><a class="anchor" aria-hidden="true" id="handling-null-values"></a><a href="#handling-null-values" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Handling null values</h2>
<p>By default, Druid string dimension columns use the values <code>''</code> and <code>null</code> interchangeably. Numeric and metric columns cannot represent <code>null</code> but use nulls to mean <code>0</code>. However, Druid provides a SQL compatible null handling mode, which you can enable at the system level through <code>druid.generic.useDefaultValueForNull</code>. This setting, when set to <code>false</code>, allows Druid to create segments <em>at ingestion time</em> in which the following occurs:</p>
<ul>
<li>String columns can distinguish <code>''</code> from <code>null</code>,</li>
<li>Numeric columns can represent <code>null</code> valued rows instead of <code>0</code>.</li>
</ul>
<p>String dimension columns contain no additional column structures in SQL compatible null handling mode. Instead, they reserve an additional dictionary entry for the <code>null</code> value. Numeric columns are stored in the segment with an additional bitmap in which the set bits indicate <code>null</code>-valued rows.</p>
<p>In addition to slightly increased segment sizes, SQL compatible null handling can incur a performance cost at query time, due to the need to check the null bitmap. This performance cost only occurs for columns that actually contain null values.</p>
<h2><a class="anchor" aria-hidden="true" id="segments-with-different-schemas"></a><a href="#segments-with-different-schemas" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Segments with different schemas</h2>
<p>Druid segments for the same datasource may have different schemas. If a string column (dimension) exists in one segment but not another, queries that involve both segments still work. In default mode, queries for the segment without the dimension behave as if the dimension contains only blank values. In SQL-compatible mode, queries for the segment without the dimension behave as if the dimension contains only null values. Similarly, if one segment has a numeric column (metric) but another does not, queries on the segment without the metric generally operate as expected. Aggregations over the missing metric operate as if the metric doesn't exist.</p>
<h2><a class="anchor" aria-hidden="true" id="column-format"></a><a href="#column-format" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Column format</h2>
<p>Each column is stored as two parts:</p>
<ul>
<li>A Jackson-serialized <code>ColumnDescriptor</code>.</li>
<li>The binary data for the column.</li>
</ul>
<p>A <code>ColumnDescriptor</code> is Jackson-serialized instance of the internal Druid <code>ColumnDescriptor</code> class . It allows the use of Jackson's polymorphic deserialization to add new and interesting methods of serialization with minimal impact to the code. It consists of some metadata about the column (for example: type, whether it's multi-value) and a list of serialization/deserialization logic that can deserialize the rest of the binary.</p>
<h3><a class="anchor" aria-hidden="true" id="multi-value-columns"></a><a href="#multi-value-columns" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Multi-value columns</h3>
<p>A multi-value column allows a single row to contain multiple strings for a column. You can think of it as an array of strings. If a datasource uses multi-value columns, then the data structures within the segment files look a bit different. Let's imagine that in the example above, the second row is tagged with both the <code>Ke$ha</code> <em>and</em> <code>Justin Bieber</code> topics, as follows:</p>
<pre><code class="hljs"><span class="hljs-number">1</span>: Dictionary
{
<span class="hljs-string">"Justin Bieber"</span>: <span class="hljs-number">0</span>,
<span class="hljs-string">"Ke$ha"</span>: <span class="hljs-number">1</span>
}
<span class="hljs-number">2</span>: List of column data
[<span class="hljs-number">0</span>,
[<span class="hljs-number">0</span>,<span class="hljs-number">1</span>], &lt;--Row value <span class="hljs-keyword">in</span> a multi-value column can contain an <span class="hljs-built_in">array</span> of values
<span class="hljs-number">1</span>,
<span class="hljs-number">1</span>]
<span class="hljs-number">3</span>: Bitmaps
value=<span class="hljs-string">"Justin Bieber"</span>: [<span class="hljs-number">1</span>,<span class="hljs-number">1</span>,<span class="hljs-number">0</span>,<span class="hljs-number">0</span>]
value=<span class="hljs-string">"Ke$ha"</span>: [<span class="hljs-number">0</span>,<span class="hljs-number">1</span>,<span class="hljs-number">1</span>,<span class="hljs-number">1</span>]
^
|
|
Multi-value column contains multiple non-zero entries
</code></pre>
<p>Note the changes to the second row in the list of column data and the <code>Ke$ha</code>
bitmap. If a row has more than one value for a column, its entry in
the list is an array of values. Additionally, a row with <em>n</em> values in the list has <em>n</em> non-zero valued entries in bitmaps.</p>
<h2><a class="anchor" aria-hidden="true" id="compression"></a><a href="#compression" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Compression</h2>
<p>Druid uses LZ4 by default to compress blocks of values for string, long, float, and double columns. Druid uses Roaring to compress bitmaps for string columns and numeric null values. We recommend that you use these defaults unless you've experimented with your data and query patterns suggest that non-default options will perform better in your specific case.</p>
<p>Druid also supports Concise bitmap compression. For string column bitmaps, the differences between using Roaring and Concise are most pronounced for high cardinality columns. In this case, Roaring is substantially faster on filters that match many values, but in some cases Concise can have a lower footprint due to the overhead of the Roaring format (but is still slower when many values are matched). You configure compression at the segment level, not for individual columns. See <a href="/docs/26.0.0/ingestion/ingestion-spec.html#indexspec">IndexSpec</a> for more details.</p>
<h2><a class="anchor" aria-hidden="true" id="segment-identification"></a><a href="#segment-identification" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Segment identification</h2>
<p>Segment identifiers typically contain the segment datasource, interval start time (in ISO 8601 format), interval end time (in ISO 8601 format), and version information. If data is additionally sharded beyond a time range, the segment identifier also contains a partition number:</p>
<p><code>datasource_intervalStart_intervalEnd_version_partitionNum</code></p>
<h3><a class="anchor" aria-hidden="true" id="segment-id-examples"></a><a href="#segment-id-examples" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Segment ID examples</h3>
<p>The increasing partition numbers in the following segments indicate that multiple segments exist for the same interval:</p>
<pre><code class="hljs">foo_2015<span class="hljs-number">-01</span><span class="hljs-number">-01</span>/<span class="hljs-number">2015</span><span class="hljs-number">-01</span><span class="hljs-number">-02</span>_v1_0
foo_2015<span class="hljs-number">-01</span><span class="hljs-number">-01</span>/<span class="hljs-number">2015</span><span class="hljs-number">-01</span><span class="hljs-number">-02</span>_v1_1
foo_2015<span class="hljs-number">-01</span><span class="hljs-number">-01</span>/<span class="hljs-number">2015</span><span class="hljs-number">-01</span><span class="hljs-number">-02</span>_v1_2
</code></pre>
<p>If you reindex the data with a new schema, Druid allocates a new version ID to the newly created segments:</p>
<pre><code class="hljs">foo_2015<span class="hljs-number">-01</span><span class="hljs-number">-01</span>/<span class="hljs-number">2015</span><span class="hljs-number">-01</span><span class="hljs-number">-02</span>_v2_0
foo_2015<span class="hljs-number">-01</span><span class="hljs-number">-01</span>/<span class="hljs-number">2015</span><span class="hljs-number">-01</span><span class="hljs-number">-02</span>_v2_1
foo_2015<span class="hljs-number">-01</span><span class="hljs-number">-01</span>/<span class="hljs-number">2015</span><span class="hljs-number">-01</span><span class="hljs-number">-02</span>_v2_2
</code></pre>
<h2><a class="anchor" aria-hidden="true" id="sharding"></a><a href="#sharding" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Sharding</h2>
<p>Multiple segments can exist for a single time interval and datasource. These segments form a <code>block</code> for an interval. Depending on the type of <code>shardSpec</code> used to shard the data, Druid queries may only complete if a <code>block</code> is complete. For example, if a block consists of the following three segments:</p>
<pre><code class="hljs">sampleData_2011<span class="hljs-number">-01</span><span class="hljs-number">-01</span>T02:<span class="hljs-number">00</span>:<span class="hljs-number">00</span>:<span class="hljs-number">00</span>Z_2011<span class="hljs-number">-01</span><span class="hljs-number">-01</span>T03:<span class="hljs-number">00</span>:<span class="hljs-number">00</span>:<span class="hljs-number">00</span>Z_v1_0
sampleData_2011<span class="hljs-number">-01</span><span class="hljs-number">-01</span>T02:<span class="hljs-number">00</span>:<span class="hljs-number">00</span>:<span class="hljs-number">00</span>Z_2011<span class="hljs-number">-01</span><span class="hljs-number">-01</span>T03:<span class="hljs-number">00</span>:<span class="hljs-number">00</span>:<span class="hljs-number">00</span>Z_v1_1
sampleData_2011<span class="hljs-number">-01</span><span class="hljs-number">-01</span>T02:<span class="hljs-number">00</span>:<span class="hljs-number">00</span>:<span class="hljs-number">00</span>Z_2011<span class="hljs-number">-01</span><span class="hljs-number">-01</span>T03:<span class="hljs-number">00</span>:<span class="hljs-number">00</span>:<span class="hljs-number">00</span>Z_v1_2
</code></pre>
<p>All three segments must load before a query for the interval <code>2011-01-01T02:00:00:00Z_2011-01-01T03:00:00:00Z</code> can complete.</p>
<p>Linear shard specs are an exception to this rule. Linear shard specs do not enforce &quot;completeness&quot; so queries can complete even if shards are not completely loaded.</p>
<p>For example, if a real-time ingestion creates three segments that were sharded with linear shard spec, and only two of the segments are loaded, queries return results for those two segments.</p>
<h2><a class="anchor" aria-hidden="true" id="segment-components"></a><a href="#segment-components" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Segment components</h2>
<p>A segment contains several files:</p>
<ul>
<li><p><code>version.bin</code></p>
<p>4 bytes representing the current segment version as an integer. For example, for v9 segments the version is 0x0, 0x0, 0x0, 0x9.</p></li>
<li><p><code>meta.smoosh</code></p>
<p>A file containing metadata (filenames and offsets) about the contents of the other <code>smoosh</code> files.</p></li>
<li><p><code>XXXXX.smoosh</code></p>
<p>Smoosh (<code>.smoosh</code>) files contain concatenated binary data. This file consolidation reduces the number of file descriptors that must be open when accessing data. The files are 2 GB or less in size to remain within the limit of a memory-mapped <code>ByteBuffer</code> in Java.
Smoosh files contain the following:</p>
<ul>
<li>Individual files for each column in the data, including one for the <code>__time</code> column that refers to the timestamp of the segment.</li>
<li>An <code>index.drd</code> file that contains additional segment metadata.</li>
</ul></li>
</ul>
<p>In the codebase, segments have an internal format version. The current segment format version is <code>v9</code>.</p>
<h2><a class="anchor" aria-hidden="true" id="implications-of-updating-segments"></a><a href="#implications-of-updating-segments" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Implications of updating segments</h2>
<p>Druid uses versioning to manage updates to create a form of multi-version concurrency control (MVCC). These MVCC versions are distinct from the segment format version discussed above.</p>
<p>Note that updates that span multiple segment intervals are only atomic within each interval. They are not atomic across the entire update. For example, if you have the following segments:</p>
<pre><code class="hljs">foo_2015<span class="hljs-number">-01</span><span class="hljs-number">-01</span>/<span class="hljs-number">2015</span><span class="hljs-number">-01</span><span class="hljs-number">-02</span>_v1_0
foo_2015<span class="hljs-number">-01</span><span class="hljs-number">-02</span>/<span class="hljs-number">2015</span><span class="hljs-number">-01</span><span class="hljs-number">-03</span>_v1_1
foo_2015<span class="hljs-number">-01</span><span class="hljs-number">-03</span>/<span class="hljs-number">2015</span><span class="hljs-number">-01</span><span class="hljs-number">-04</span>_v1_2
</code></pre>
<p><code>v2</code> segments are loaded into the cluster as soon as they are built and replace <code>v1</code> segments for the period of time the segments overlap. Before <code>v2</code> segments are completely loaded, the cluster may contain a mixture of <code>v1</code> and <code>v2</code> segments.</p>
<pre><code class="hljs">foo_2015<span class="hljs-number">-01</span><span class="hljs-number">-01</span>/<span class="hljs-number">2015</span><span class="hljs-number">-01</span><span class="hljs-number">-02</span>_v1_0
foo_2015<span class="hljs-number">-01</span><span class="hljs-number">-02</span>/<span class="hljs-number">2015</span><span class="hljs-number">-01</span><span class="hljs-number">-03</span>_v2_1
foo_2015<span class="hljs-number">-01</span><span class="hljs-number">-03</span>/<span class="hljs-number">2015</span><span class="hljs-number">-01</span><span class="hljs-number">-04</span>_v1_2
</code></pre>
<p>In this case, queries may hit a mixture of <code>v1</code> and <code>v2</code> segments.</p>
</span></div></article></div><div class="docs-prevnext"><a class="docs-prev button" href="/docs/26.0.0/design/architecture.html"><span class="arrow-prev">โ† </span><span>Design</span></a><a class="docs-next button" href="/docs/26.0.0/design/processes.html"><span>Processes and servers</span><span class="arrow-next"> โ†’</span></a></div></div></div><nav class="onPageNav"><ul class="toc-headings"><li><a href="#segment-file-structure">Segment file structure</a></li><li><a href="#handling-null-values">Handling null values</a></li><li><a href="#segments-with-different-schemas">Segments with different schemas</a></li><li><a href="#column-format">Column format</a><ul class="toc-headings"><li><a href="#multi-value-columns">Multi-value columns</a></li></ul></li><li><a href="#compression">Compression</a></li><li><a href="#segment-identification">Segment identification</a><ul class="toc-headings"><li><a href="#segment-id-examples">Segment ID examples</a></li></ul></li><li><a href="#sharding">Sharding</a></li><li><a href="#segment-components">Segment components</a></li><li><a href="#implications-of-updating-segments">Implications of updating segments</a></li></ul></nav></div><footer class="nav-footer druid-footer" id="footer"><div class="container"><div class="text-center"><p><a href="/technology">Technology</a>โ€‚ยทโ€‚<a href="/use-cases">Use Cases</a>โ€‚ยทโ€‚<a href="/druid-powered">Powered by Druid</a>โ€‚ยทโ€‚<a href="/docs/26.0.0/">Docs</a>โ€‚ยทโ€‚<a href="/community/">Community</a>โ€‚ยทโ€‚<a href="/downloads.html">Download</a>โ€‚ยทโ€‚<a href="/faq">FAQ</a></p></div><div class="text-center"><a title="Join the user group" href="https://groups.google.com/forum/#!forum/druid-user" target="_blank"><span class="fa fa-comments"></span></a>โ€‚ยทโ€‚<a title="Follow Druid" href="https://twitter.com/druidio" target="_blank"><span class="fab fa-twitter"></span></a>โ€‚ยทโ€‚<a title="Download via Apache" href="https://www.apache.org/dyn/closer.cgi?path=/incubator/druid/{{ site.druid_versions[0].versions[0].version }}/apache-druid-{{ site.druid_versions[0].versions[0].version }}-bin.tar.gz" target="_blank"><span class="fas fa-feather"></span></a>โ€‚ยทโ€‚<a title="GitHub" href="https://github.com/apache/druid" target="_blank"><span class="fab fa-github"></span></a></div><div class="text-center license">Copyright ยฉ 2022 <a href="https://www.apache.org/" target="_blank">Apache Software Foundation</a>.<br/>Except where otherwise noted, licensed under <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a>.<br/>Apache Druid, Druid, and the Druid logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</div></div></footer></div><script type="text/javascript" src="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.js"></script><script>
document.addEventListener('keyup', function(e) {
if (e.target !== document.body) {
return;
}
// keyCode for '/' (slash)
if (e.keyCode === 191) {
const search = document.getElementById('search_input_react');
search && search.focus();
}
});
</script><script>
var search = docsearch({
appId: 'CPK9PMSCEY',
apiKey: 'd4ef4ffe3a2f0c7d1e34b062fd98736b',
indexName: 'apache_druid',
inputSelector: '#search_input_react',
algoliaOptions: {"facetFilters":["language:en","version:26.0.0"]}
});
</script></body></html>