blob: 88a77257d318981f9a9e606a536e5d32098c3731 [file] [log] [blame]
<!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta http-equiv="X-UA-Compatible" content="IE=edge"/><title>Data management · Apache Druid</title><meta name="viewport" content="width=device-width"/><link rel="canonical" href="https://druid.apache.org/docs/latest/ingestion/data-management.html"/><meta name="generator" content="Docusaurus"/><meta name="description" content="&lt;!--"/><meta name="docsearch:language" content="en"/><meta name="docsearch:version" content="0.23.0" /><meta property="og:title" content="Data management · Apache Druid"/><meta property="og:type" content="website"/><meta property="og:url" content="https://druid.apache.org/index.html"/><meta property="og:description" content="&lt;!--"/><meta property="og:image" content="https://druid.apache.org/img/druid_nav.png"/><meta name="twitter:card" content="summary"/><meta name="twitter:image" content="https://druid.apache.org/img/druid_nav.png"/><link rel="shortcut icon" href="/img/favicon.png"/><link rel="stylesheet" href="https://cdn.jsdelivr.net/docsearch.js/1/docsearch.min.css"/><link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css"/><script async="" src="https://www.googletagmanager.com/gtag/js?id=UA-131010415-1"></script><script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments); }
gtag('js', new Date());
gtag('config', 'UA-131010415-1');
</script><link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.7.2/css/all.css"/><link rel="stylesheet" href="/css/code-block-buttons.css"/><script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.4/clipboard.min.js"></script><script type="text/javascript" src="/js/code-block-buttons.js"></script><script src="/js/scrollSpy.js"></script><link rel="stylesheet" href="/css/main.css"/><script src="/js/codetabs.js"></script></head><body class="sideNavVisible separateOnPageNav"><div class="fixedHeaderContainer"><div class="headerWrapper wrapper"><header><a href="/"><img class="logo" src="/img/druid_nav.png" alt="Apache Druid"/></a><div class="navigationWrapper navigationSlider"><nav class="slidingNav"><ul class="nav-site nav-site-internal"><li class=""><a href="/technology" target="_self">Technology</a></li><li class=""><a href="/use-cases" target="_self">Use Cases</a></li><li class=""><a href="/druid-powered" target="_self">Powered By</a></li><li class="siteNavGroupActive"><a href="/docs/latest/design/index.html" target="_self">Docs</a></li><li class=""><a href="/community/" target="_self">Community</a></li><li class=""><a href="https://www.apache.org" target="_self">Apache</a></li><li class=""><a href="/downloads.html" target="_self">Download</a></li><li class="navSearchWrapper reactNavSearchWrapper"><input type="text" id="search_input_react" placeholder="Search" title="Search"/></li></ul></nav></div></header></div></div><div class="navPusher"><div class="docMainWrapper wrapper"><div class="docsNavContainer" id="docsNav"><nav class="toc"><div class="toggleNav"><section class="navWrapper wrapper"><div class="navBreadcrumb wrapper"><div class="navToggle" id="navToggler"><div class="hamburger-menu"><div class="line1"></div><div class="line2"></div><div class="line3"></div></div></div><h2><i></i><span>Ingestion</span></h2><div class="tocToggler" id="tocToggler"><i class="icon-toc"></i></div></div><div class="navGroups"><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Getting started<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/design/index.html">Introduction to Apache Druid</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/index.html">Quickstart</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/docker.html">Docker</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/single-server.html">Single server deployment</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/cluster.html">Clustered deployment</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Tutorials<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-batch.html">Loading files natively</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-kafka.html">Load from Apache Kafka</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-batch-hadoop.html">Load from Apache Hadoop</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-query.html">Querying data</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-rollup.html">Roll-up</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-retention.html">Configuring data retention</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-update-data.html">Updating existing data</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-compaction.html">Compacting segments</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-delete-data.html">Deleting data</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-ingestion-spec.html">Writing an ingestion spec</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-transform-spec.html">Transforming input data</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-kerberos-hadoop.html">Kerberized HDFS deep storage</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Design<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/design/architecture.html">Design</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/segments.html">Segments</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/processes.html">Processes and servers</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/dependencies/deep-storage.html">Deep storage</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/dependencies/metadata-storage.html">Metadata storage</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/dependencies/zookeeper.html">ZooKeeper</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Ingestion<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/index.html">Ingestion</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/data-formats.html">Data formats</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/data-model.html">Data model</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/rollup.html">Data rollup</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/partitioning.html">Partitioning</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/ingestion-spec.html">Ingestion spec</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/schema-design.html">Schema design tips</a></li><li class="navListItem navListItemActive"><a class="navItem" href="/docs/latest/ingestion/data-management.html">Data management</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/compaction.html">Compaction</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Stream ingestion</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/kafka-ingestion.html">Apache Kafka ingestion</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/kafka-supervisor-reference.html">Apache Kafka supervisor</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/kafka-supervisor-operations.html">Apache Kafka operations</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/kinesis-ingestion.html">Amazon Kinesis</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Batch ingestion</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/native-batch.html">Native batch</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/native-batch-simple-task.html">Simple task indexing</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/native-batch-input-sources.html">Input sources</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/native-batch-firehose.html">Firehose</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/hadoop.html">Hadoop-based</a></li></ul></div><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/tasks.html">Task reference</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/faq.html">Troubleshooting FAQ</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Querying<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Druid SQL</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql.html">Overview and syntax</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-data-types.html">SQL data types</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-operators.html">Operators</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-scalar.html">Scalar functions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-aggregations.html">Aggregation functions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-multivalue-string-functions.html">Multi-value string functions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-api.html">Druid SQL API</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-jdbc.html">JDBC driver API</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-query-context.html">SQL query context</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-metadata-tables.html">SQL metadata tables</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-translation.html">SQL query translation</a></li></ul></div><li class="navListItem"><a class="navItem" href="/docs/latest/querying/querying.html">Native queries</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/query-execution.html">Query execution</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/troubleshooting.html">Troubleshooting</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Concepts</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/querying/datasource.html">Datasources</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/joins.html">Joins</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/lookups.html">Lookups</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/multi-value-dimensions.html">Multi-value dimensions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/multitenancy.html">Multitenancy</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/caching.html">Query caching</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/using-caching.html">Using query caching</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/query-context.html">Query context</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Native query types</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/querying/timeseriesquery.html">Timeseries</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/topnquery.html">TopN</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/groupbyquery.html">GroupBy</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/scan-query.html">Scan</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/searchquery.html">Search</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/timeboundaryquery.html">TimeBoundary</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/segmentmetadataquery.html">SegmentMetadata</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/datasourcemetadataquery.html">DatasourceMetadata</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Native query components</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/querying/filters.html">Filters</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/granularities.html">Granularities</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/dimensionspecs.html">Dimensions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/aggregations.html">Aggregations</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/post-aggregations.html">Post-aggregations</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/misc/math-expr.html">Expressions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/having.html">Having filters (groupBy)</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/limitspec.html">Sorting and limiting (groupBy)</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/topnmetricspec.html">Sorting (topN)</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sorting-orders.html">String comparators</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/virtual-columns.html">Virtual columns</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/geo.html">Spatial filters</a></li></ul></div></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Configuration<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/configuration/index.html">Configuration reference</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions.html">Extensions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/configuration/logging.html">Logging</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Operations<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/operations/druid-console.html">Web console</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/getting-started.html">Getting started with Apache Druid</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Security</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/operations/security-overview.html">Security overview</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/security-user-auth.html">User authentication and authorization</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/auth-ldap.html">LDAP auth</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/password-provider.html">Password providers</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/dynamic-config-provider.html">Dynamic Config Providers</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/tls-support.html">TLS support</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Performance tuning</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/operations/basic-cluster-tuning.html">Basic cluster tuning</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/segment-optimization.html">Segment Size Optimization</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/mixed-workloads.html">Mixed workloads</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/http-compression.html">HTTP compression</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/clean-metadata-store.html">Automated metadata cleanup</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Monitoring</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/operations/request-logging.html">Request logging</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/metrics.html">Metrics</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/alerts.html">Alerts</a></li></ul></div><li class="navListItem"><a class="navItem" href="/docs/latest/operations/api-reference.html">API reference</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/high-availability.html">High availability</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/rolling-updates.html">Rolling updates</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/rule-configuration.html">Retaining or automatically dropping data</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/other-hadoop.html">Working with different versions of Apache Hadoop</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Misc</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/operations/management-uis.html">Legacy Management UIs</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/dump-segment.html">dump-segment tool</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/reset-cluster.html">reset-cluster tool</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/insert-segment-to-db.html">insert-segment-to-db tool</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/pull-deps.html">pull-deps tool</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/deep-storage-migration.html">Deep storage migration</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/export-metadata.html">Export Metadata Tool</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/metadata-migration.html">Metadata Migration</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/use_sbt_to_build_fat_jar.html">Content for build.sbt</a></li></ul></div></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Development<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/development/overview.html">Developing on Druid</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/modules.html">Creating extensions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/javascript.html">JavaScript functionality</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/build.html">Build from source</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/versioning.html">Versioning</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/experimental.html">Experimental features</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Misc<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/misc/papers-and-talks.html">Papers</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Hidden<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/comparisons/druid-vs-elasticsearch.html">Apache Druid vs Elasticsearch</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/comparisons/druid-vs-key-value.html">Apache Druid vs. Key/Value Stores (HBase/Cassandra/OpenTSDB)</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/comparisons/druid-vs-kudu.html">Apache Druid vs Kudu</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/comparisons/druid-vs-redshift.html">Apache Druid vs Redshift</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/comparisons/druid-vs-spark.html">Apache Druid vs Spark</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/comparisons/druid-vs-sql-on-hadoop.html">Apache Druid vs SQL-on-Hadoop</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/auth.html">Authentication and Authorization</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/broker.html">Broker</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/coordinator.html">Coordinator Process</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/historical.html">Historical Process</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/indexer.html">Indexer Process</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/indexing-service.html">Indexing Service</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/middlemanager.html">MiddleManager Process</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/overlord.html">Overlord Process</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/router.html">Router Process</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/peons.html">Peons</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/approximate-histograms.html">Approximate Histogram aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/avro.html">Apache Avro</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/azure.html">Microsoft Azure</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/bloom-filter.html">Bloom Filter</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/datasketches-extension.html">DataSketches extension</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/datasketches-hll.html">DataSketches HLL Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/datasketches-quantiles.html">DataSketches Quantiles Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/datasketches-theta.html">DataSketches Theta Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/datasketches-tuple.html">DataSketches Tuple Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/druid-basic-security.html">Basic Security</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/druid-kerberos.html">Kerberos</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/druid-lookups.html">Cached Lookup Module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/druid-ranger-security.html">Apache Ranger Security</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/google.html">Google Cloud Storage</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/hdfs.html">HDFS</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/kafka-extraction-namespace.html">Apache Kafka Lookups</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/lookups-cached-global.html">Globally Cached Lookups</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/mysql.html">MySQL Metadata Store</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/orc.html">ORC Extension</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/druid-pac4j.html">Druid pac4j based Security extension</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/parquet.html">Apache Parquet Extension</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/postgresql.html">PostgreSQL Metadata Store</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/protobuf.html">Protobuf</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/s3.html">S3-compatible</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/simple-client-sslcontext.html">Simple SSLContext Provider Module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/stats.html">Stats aggregator</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/test-stats.html">Test Stats Aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/druid-aws-rds.html">Druid AWS RDS Module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/kubernetes.html">Kubernetes</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/ambari-metrics-emitter.html">Ambari Metrics Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/cassandra.html">Apache Cassandra</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/cloudfiles.html">Rackspace Cloud Files</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/distinctcount.html">DistinctCount Aggregator</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/graphite.html">Graphite Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/influx.html">InfluxDB Line Protocol Parser</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/influxdb-emitter.html">InfluxDB Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/kafka-emitter.html">Kafka Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/materialized-view.html">Materialized View</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/momentsketch-quantiles.html">Moment Sketches for Approximate Quantiles module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/moving-average-query.html">Moving Average Query</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/opentsdb-emitter.html">OpenTSDB Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/redis-cache.html">Druid Redis Cache</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/sqlserver.html">Microsoft SQLServer</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/statsd.html">StatsD Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/tdigestsketch-quantiles.html">T-Digest Quantiles Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/thrift.html">Thrift</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/time-min-max.html">Timestamp Min/Max aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/gce-extensions.html">GCE Extensions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/aliyun-oss.html">Aliyun OSS</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/prometheus.html">Prometheus Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/kubernetes.html">kubernetes</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/hll-old.html">Cardinality/HyperUnique aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/select-query.html">Select</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/standalone-realtime.html">Realtime Process</a></li></ul></div></div></section></div><script>
var coll = document.getElementsByClassName('collapsible');
var checkActiveCategory = true;
for (var i = 0; i < coll.length; i++) {
var links = coll[i].nextElementSibling.getElementsByTagName('*');
if (checkActiveCategory){
for (var j = 0; j < links.length; j++) {
if (links[j].classList.contains('navListItemActive')){
coll[i].nextElementSibling.classList.toggle('hide');
coll[i].childNodes[1].classList.toggle('rotate');
checkActiveCategory = false;
break;
}
}
}
coll[i].addEventListener('click', function() {
var arrow = this.childNodes[1];
arrow.classList.toggle('rotate');
var content = this.nextElementSibling;
content.classList.toggle('hide');
});
}
document.addEventListener('DOMContentLoaded', function() {
createToggler('#navToggler', '#docsNav', 'docsSliderActive');
createToggler('#tocToggler', 'body', 'tocActive');
var headings = document.querySelector('.toc-headings');
headings && headings.addEventListener('click', function(event) {
var el = event.target;
while(el !== headings){
if (el.tagName === 'A') {
document.body.classList.remove('tocActive');
break;
} else{
el = el.parentNode;
}
}
}, false);
function createToggler(togglerSelector, targetSelector, className) {
var toggler = document.querySelector(togglerSelector);
var target = document.querySelector(targetSelector);
if (!toggler) {
return;
}
toggler.onclick = function(event) {
event.preventDefault();
target.classList.toggle(className);
};
}
});
</script></nav></div><div class="container mainContainer docsContainer"><div class="wrapper"><div class="post"><header class="postHeader"><a class="edit-page-link button" href="https://github.com/apache/druid/edit/master/docs/ingestion/data-management.md" target="_blank" rel="noreferrer noopener">Edit</a><h1 id="__docusaurus" class="postHeaderTitle">Data management</h1></header><article><div><span><!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing,
~ software distributed under the License is distributed on an
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
~ KIND, either express or implied. See the License for the
~ specific language governing permissions and limitations
~ under the License.
-->
Within the context of this topic data management refers to Apache Druid's data maintenance capabilities for existing datasources. There are several options to help you keep your data relevant and to help your Druid cluster remain performant. For example updating, reingesting, adding lookups, reindexing, or deleting data.
<p>In addition to the tasks covered on this page, you can also use segment compaction to improve the layout of your existing data. Refer to <a href="/docs/latest/operations/segment-optimization.html">Segment optimization</a> to see if compaction will help in your environment. For an overview and steps to configure manual compaction tasks, see <a href="/docs/latest/ingestion/compaction.html">Compaction</a>.</p>
<h2><a class="anchor" aria-hidden="true" id="adding-new-data-to-existing-datasources"></a><a href="#adding-new-data-to-existing-datasources" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Adding new data to existing datasources</h2>
<p>Druid can insert new data to an existing datasource by appending new segments to existing segment sets. It can also add new data by merging an existing set of segments with new data and overwriting the original set.</p>
<p>Druid does not support single-record updates by primary key.</p>
<p><a name="update"></a></p>
<h2><a class="anchor" aria-hidden="true" id="updating-existing-data"></a><a href="#updating-existing-data" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Updating existing data</h2>
<p>Once you ingest some data in a dataSource for an interval and create Apache Druid segments, you might want to make changes to
the ingested data. There are several ways this can be done.</p>
<h3><a class="anchor" aria-hidden="true" id="using-lookups"></a><a href="#using-lookups" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Using lookups</h3>
<p>If you have a dimension where values need to be updated frequently, try first using <a href="/docs/latest/querying/lookups.html">lookups</a>. A
classic use case of lookups is when you have an ID dimension stored in a Druid segment, and want to map the ID dimension to a
human-readable String value that may need to be updated periodically.</p>
<h3><a class="anchor" aria-hidden="true" id="reingesting-data"></a><a href="#reingesting-data" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Reingesting data</h3>
<p>If lookup-based techniques are not sufficient, you will need to reingest data into Druid for the time chunks that you
want to update. This can be done using one of the <a href="/docs/latest/ingestion/index.html#batch">batch ingestion methods</a> in overwrite mode (the
default mode). It can also be done using <a href="/docs/latest/ingestion/index.html#streaming">streaming ingestion</a>, provided you drop data for the
relevant time chunks first.</p>
<p>If you do the reingestion in batch mode, Druid's atomic update mechanism means that queries will flip seamlessly from
the old data to the new data.</p>
<p>We recommend keeping a copy of your raw data around in case you ever need to reingest it.</p>
<h3><a class="anchor" aria-hidden="true" id="with-hadoop-based-ingestion"></a><a href="#with-hadoop-based-ingestion" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>With Hadoop-based ingestion</h3>
<p>This section assumes you understand how to do batch ingestion using Hadoop. See
<a href="/docs/latest/ingestion/hadoop.html">Hadoop batch ingestion</a> for more information. Hadoop batch-ingestion can be used for reindexing and delta ingestion.</p>
<p>Druid uses an <code>inputSpec</code> in the <code>ioConfig</code> to know where the data to be ingested is located and how to read it.
For simple Hadoop batch ingestion, <code>static</code> or <code>granularity</code> spec types allow you to read data stored in deep storage.</p>
<p>There are other types of <code>inputSpec</code> to enable reindexing and delta ingestion.</p>
<h3><a class="anchor" aria-hidden="true" id="reindexing-with-native-batch-ingestion"></a><a href="#reindexing-with-native-batch-ingestion" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Reindexing with Native Batch Ingestion</h3>
<p>This section assumes you understand how to do batch ingestion without Hadoop using <a href="/docs/latest/ingestion/native-batch.html">native batch indexing</a>. Native batch indexing uses an <code>inputSource</code> to know where and how to read the input data. You can use the <a href="/docs/latest/ingestion/native-batch-input-sources.html"><code>DruidInputSource</code></a> to read data from segments inside Druid. You can use Parallel task (<code>index_parallel</code>) for all native batch reindexing tasks. Increase the <code>maxNumConcurrentSubTasks</code> to accommodate the amount of data your are reindexing. See <a href="/docs/latest/ingestion/native-batch.html#capacity-planning">Capacity planning</a>.</p>
<p><a name="delete"></a></p>
<h2><a class="anchor" aria-hidden="true" id="deleting-data"></a><a href="#deleting-data" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Deleting data</h2>
<p>Druid supports permanent deletion of segments that are in an &quot;unused&quot; state (see the
<a href="/docs/latest/design/architecture.html#segment-lifecycle">Segment lifecycle</a> section of the Architecture page).</p>
<p>The Kill Task deletes unused segments within a specified interval from metadata storage and deep storage.</p>
<p>For more information, please see <a href="/docs/latest/ingestion/tasks.html#kill">Kill Task</a>.</p>
<p>Permanent deletion of a segment in Apache Druid has two steps:</p>
<ol>
<li>The segment must first be marked as &quot;unused&quot;. This occurs when a segment is dropped by retention rules, and when a user manually disables a segment through the Coordinator API.</li>
<li>After segments have been marked as &quot;unused&quot;, a Kill Task will delete any &quot;unused&quot; segments from Druid's metadata store as well as deep storage.</li>
</ol>
<p>For documentation on retention rules, please see <a href="/docs/latest/operations/rule-configuration.html">Data Retention</a>.</p>
<p>For documentation on disabling segments using the Coordinator API, please see the
<a href="/docs/latest/operations/api-reference.html#coordinator-datasources">Coordinator Datasources API</a> reference.</p>
<p>A data deletion tutorial is available at <a href="/docs/latest/tutorials/tutorial-delete-data.html">Tutorial: Deleting data</a></p>
<h2><a class="anchor" aria-hidden="true" id="kill-task"></a><a href="#kill-task" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Kill Task</h2>
<p>The kill task deletes all information about segments and removes them from deep storage. Segments to kill must be unused (used==0) in the Druid segment table.</p>
<p>The available grammar is:</p>
<pre><code class="hljs css language-json">{
<span class="hljs-attr">"type"</span>: <span class="hljs-string">"kill"</span>,
<span class="hljs-attr">"id"</span>: &lt;task_id&gt;,
<span class="hljs-attr">"dataSource"</span>: &lt;task_datasource&gt;,
<span class="hljs-attr">"interval"</span> : &lt;all_segments_in_this_interval_will_die!&gt;,
<span class="hljs-attr">"markAsUnused"</span>: &lt;<span class="hljs-literal">true</span>|<span class="hljs-literal">false</span>&gt;,
<span class="hljs-attr">"context"</span>: &lt;task context&gt;
}
</code></pre>
<p>If <code>markAsUnused</code> is true (default is false), the kill task will first mark any segments within the specified interval as unused, before deleting the unused segments within the interval.</p>
<p><strong>WARNING!</strong> The kill task permanently removes all information about the affected segments from the metadata store and deep storage. These segments cannot be recovered after the kill task runs, this operation cannot be undone.</p>
<h2><a class="anchor" aria-hidden="true" id="retention"></a><a href="#retention" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Retention</h2>
<p>Druid supports retention rules, which are used to define intervals of time where data should be preserved, and intervals where data should be discarded.</p>
<p>Druid also supports separating Historical processes into tiers, and the retention rules can be configured to assign data for specific intervals to specific tiers.</p>
<p>These features are useful for performance/cost management; a common use case is separating Historical processes into a &quot;hot&quot; tier and a &quot;cold&quot; tier.</p>
<p>For more information, please see <a href="/docs/latest/operations/rule-configuration.html">Load rules</a>.</p>
<h2><a class="anchor" aria-hidden="true" id="learn-more"></a><a href="#learn-more" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Learn more</h2>
<p>See the following topics for more information:</p>
<ul>
<li><a href="/docs/latest/ingestion/compaction.html">Compaction</a> for an overview and steps to configure manual compaction tasks.</li>
<li><a href="/docs/latest/design/segments.html">Segments</a> for information on how Druid handles segment versioning.</li>
</ul>
</span></div></article></div><div class="docs-prevnext"><a class="docs-prev button" href="/docs/latest/ingestion/schema-design.html"><span class="arrow-prev"></span><span>Schema design tips</span></a><a class="docs-next button" href="/docs/latest/ingestion/compaction.html"><span>Compaction</span><span class="arrow-next"></span></a></div></div></div><nav class="onPageNav"><ul class="toc-headings"><li><a href="#adding-new-data-to-existing-datasources">Adding new data to existing datasources</a></li><li><a href="#updating-existing-data">Updating existing data</a><ul class="toc-headings"><li><a href="#using-lookups">Using lookups</a></li><li><a href="#reingesting-data">Reingesting data</a></li><li><a href="#with-hadoop-based-ingestion">With Hadoop-based ingestion</a></li><li><a href="#reindexing-with-native-batch-ingestion">Reindexing with Native Batch Ingestion</a></li></ul></li><li><a href="#deleting-data">Deleting data</a></li><li><a href="#kill-task">Kill Task</a></li><li><a href="#retention">Retention</a></li><li><a href="#learn-more">Learn more</a></li></ul></nav></div><footer class="nav-footer druid-footer" id="footer"><div class="container"><div class="text-center"><p><a href="/technology">Technology</a> · <a href="/use-cases">Use Cases</a> · <a href="/druid-powered">Powered by Druid</a> · <a href="/docs/latest/latest">Docs</a> · <a href="/community/">Community</a> · <a href="/downloads.html">Download</a> · <a href="/faq">FAQ</a></p></div><div class="text-center"><a title="Join the user group" href="https://groups.google.com/forum/#!forum/druid-user" target="_blank"><span class="fa fa-comments"></span></a> · <a title="Follow Druid" href="https://twitter.com/druidio" target="_blank"><span class="fab fa-twitter"></span></a> · <a title="Download via Apache" href="https://www.apache.org/dyn/closer.cgi?path=/incubator/druid/{{ site.druid_versions[0].versions[0].version }}/apache-druid-{{ site.druid_versions[0].versions[0].version }}-bin.tar.gz" target="_blank"><span class="fas fa-feather"></span></a> · <a title="GitHub" href="https://github.com/apache/druid" target="_blank"><span class="fab fa-github"></span></a></div><div class="text-center license">Copyright © 2019 <a href="https://www.apache.org/" target="_blank">Apache Software Foundation</a>.<br/>Except where otherwise noted, licensed under <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a>.<br/>Apache Druid, Druid, and the Druid logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</div></div></footer></div><script type="text/javascript" src="https://cdn.jsdelivr.net/docsearch.js/1/docsearch.min.js"></script><script>
document.addEventListener('keyup', function(e) {
if (e.target !== document.body) {
return;
}
// keyCode for '/' (slash)
if (e.keyCode === 191) {
const search = document.getElementById('search_input_react');
search && search.focus();
}
});
</script><script>
var search = docsearch({
apiKey: '2de99082a9f38e49dfaa059bbe4c901d',
indexName: 'apache_druid',
inputSelector: '#search_input_react',
algoliaOptions: {"facetFilters":["language:en","version:0.23.0"]}
});
</script></body></html>