blob: d49ee022092e1e296ddbb291a8f7fddd3e8d2a0d [file] [log] [blame]
<!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta http-equiv="X-UA-Compatible" content="IE=edge"/><title>Tutorial: Load streaming data from Apache Kafka Β· Apache Druid</title><meta name="viewport" content="width=device-width, initial-scale=1.0"/><link rel="canonical" href="https://druid.apache.org/docs/latest/tutorials/tutorial-kafka.html"/><meta name="generator" content="Docusaurus"/><meta name="description" content="&lt;!--"/><meta name="docsearch:language" content="en"/><meta name="docsearch:version" content="26.0.0" /><meta property="og:title" content="Tutorial: Load streaming data from Apache Kafka Β· Apache Druid"/><meta property="og:type" content="website"/><meta property="og:url" content="https://druid.apache.org/index.html"/><meta property="og:description" content="&lt;!--"/><meta property="og:image" content="https://druid.apache.org/img/druid_nav.png"/><meta name="twitter:card" content="summary"/><meta name="twitter:image" content="https://druid.apache.org/img/druid_nav.png"/><link rel="shortcut icon" href="/img/favicon.png"/><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.css"/><link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css"/><script async="" src="https://www.googletagmanager.com/gtag/js?id=UA-131010415-1"></script><script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments); }
gtag('js', new Date());
gtag('config', 'UA-131010415-1');
</script><link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.7.2/css/all.css"/><link rel="stylesheet" href="/css/code-block-buttons.css"/><script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.4/clipboard.min.js"></script><script type="text/javascript" src="/js/code-block-buttons.js"></script><script src="/js/scrollSpy.js"></script><link rel="stylesheet" href="/css/main.css"/><script src="/js/codetabs.js"></script></head><body class="sideNavVisible separateOnPageNav"><div class="fixedHeaderContainer"><div class="headerWrapper wrapper"><header><a href="/"><img class="logo" src="/img/druid_nav.png" alt="Apache Druid"/></a><div class="navigationWrapper navigationSlider"><nav class="slidingNav"><ul class="nav-site nav-site-internal"><li class=""><a href="/technology" target="_self">Technology</a></li><li class=""><a href="/use-cases" target="_self">Use Cases</a></li><li class=""><a href="/druid-powered" target="_self">Powered By</a></li><li class="siteNavGroupActive"><a href="/docs/latest/design/index.html" target="_self">Docs</a></li><li class=""><a href="/community/" target="_self">Community</a></li><li class=""><a href="https://www.apache.org" target="_self">Apache</a></li><li class=""><a href="/downloads.html" target="_self">Download</a></li><li class="navSearchWrapper reactNavSearchWrapper"><input type="text" id="search_input_react" placeholder="Search" title="Search"/></li></ul></nav></div></header></div></div><div class="navPusher"><div class="docMainWrapper wrapper"><div class="docsNavContainer" id="docsNav"><nav class="toc"><div class="toggleNav"><section class="navWrapper wrapper"><div class="navBreadcrumb wrapper"><div class="navToggle" id="navToggler"><div class="hamburger-menu"><div class="line1"></div><div class="line2"></div><div class="line3"></div></div></div><h2><i>β€Ί</i><span>Tutorials</span></h2><div class="tocToggler" id="tocToggler"><i class="icon-toc"></i></div></div><div class="navGroups"><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Getting started<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/design/index.html">Introduction to Apache Druid</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/index.html">Quickstart (local)</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/single-server.html">Single server deployment</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/cluster.html">Clustered deployment</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Tutorials<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-batch.html">Load files natively</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-msq-extern.html">Load files using SQL πŸ†•</a></li><li class="navListItem navListItemActive"><a class="navItem" href="/docs/latest/tutorials/tutorial-kafka.html">Load from Apache Kafka</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-batch-hadoop.html">Load from Apache Hadoop</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-query.html">Querying data</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-rollup.html">Roll-up</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-sketches-theta.html">Theta sketches</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-retention.html">Configuring data retention</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-update-data.html">Updating existing data</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-compaction.html">Compacting segments</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-delete-data.html">Deleting data</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-ingestion-spec.html">Writing an ingestion spec</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-transform-spec.html">Transforming input data</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/docker.html">Tutorial: Run with Docker</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-kerberos-hadoop.html">Kerberized HDFS deep storage</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-msq-convert-spec.html">Convert ingestion spec to SQL</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-sql-query-view.html">Get to know Query view</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-unnest-arrays.html">Unnesting arrays</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-jupyter-index.html">Jupyter Notebook tutorials</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-jupyter-docker.html">Docker for tutorials</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/tutorials/tutorial-jdbc.html">JDBC connector</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Design<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/design/architecture.html">Design</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/segments.html">Segments</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/processes.html">Processes and servers</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/dependencies/deep-storage.html">Deep storage</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/dependencies/metadata-storage.html">Metadata storage</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/dependencies/zookeeper.html">ZooKeeper</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Ingestion<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/index.html">Ingestion</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/data-formats.html">Data formats</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/data-model.html">Data model</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/rollup.html">Data rollup</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/partitioning.html">Partitioning</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/ingestion-spec.html">Ingestion spec</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/schema-design.html">Schema design tips</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Stream ingestion</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/kafka-ingestion.html">Apache Kafka ingestion</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/kafka-supervisor-reference.html">Apache Kafka supervisor</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/kafka-supervisor-operations.html">Apache Kafka operations</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/kinesis-ingestion.html">Amazon Kinesis</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Batch ingestion</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/native-batch.html">Native batch</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/native-batch-input-sources.html">Native batch: input sources</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/migrate-from-firehose.html">Migrate from firehose</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/hadoop.html">Hadoop-based</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">SQL-based ingestion πŸ†•</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/multi-stage-query/index.html">Overview</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/multi-stage-query/concepts.html">Key concepts</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/multi-stage-query/api.html">API</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/multi-stage-query/security.html">Security</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/multi-stage-query/examples.html">Examples</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/multi-stage-query/reference.html">Reference</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/multi-stage-query/known-issues.html">Known issues</a></li></ul></div><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/tasks.html">Task reference</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/faq.html">Troubleshooting FAQ</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Data management<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/data-management/index.html">Overview</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/data-management/update.html">Data updates</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/data-management/delete.html">Data deletion</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/data-management/schema-changes.html">Schema changes</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/data-management/compaction.html">Compaction</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/data-management/automatic-compaction.html">Automatic compaction</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Querying<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Druid SQL</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql.html">Overview and syntax</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-data-types.html">SQL data types</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-operators.html">Operators</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-scalar.html">Scalar functions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-aggregations.html">Aggregation functions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-multivalue-string-functions.html">Multi-value string functions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-json-functions.html">JSON functions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-functions.html">All functions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-api.html">Druid SQL API</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-jdbc.html">JDBC driver API</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-query-context.html">SQL query context</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-metadata-tables.html">SQL metadata tables</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sql-translation.html">SQL query translation</a></li></ul></div><li class="navListItem"><a class="navItem" href="/docs/latest/querying/querying.html">Native queries</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/query-execution.html">Query execution</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/troubleshooting.html">Troubleshooting</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Concepts</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/querying/datasource.html">Datasources</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/joins.html">Joins</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/lookups.html">Lookups</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/multi-value-dimensions.html">Multi-value dimensions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/nested-columns.html">Nested columns</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/multitenancy.html">Multitenancy</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/caching.html">Query caching</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/using-caching.html">Using query caching</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/query-context.html">Query context</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Native query types</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/querying/timeseriesquery.html">Timeseries</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/topnquery.html">TopN</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/groupbyquery.html">GroupBy</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/scan-query.html">Scan</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/searchquery.html">Search</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/timeboundaryquery.html">TimeBoundary</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/segmentmetadataquery.html">SegmentMetadata</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/datasourcemetadataquery.html">DatasourceMetadata</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Native query components</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/querying/filters.html">Filters</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/granularities.html">Granularities</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/dimensionspecs.html">Dimensions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/aggregations.html">Aggregations</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/post-aggregations.html">Post-aggregations</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/misc/math-expr.html">Expressions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/having.html">Having filters (groupBy)</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/limitspec.html">Sorting and limiting (groupBy)</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/topnmetricspec.html">Sorting (topN)</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/sorting-orders.html">String comparators</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/virtual-columns.html">Virtual columns</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/geo.html">Spatial filters</a></li></ul></div></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Configuration<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/configuration/index.html">Configuration reference</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions.html">Extensions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/configuration/logging.html">Logging</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Operations<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/operations/web-console.html">Web console</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/java.html">Java runtime</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Security</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/operations/security-overview.html">Security overview</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/security-user-auth.html">User authentication and authorization</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/auth-ldap.html">LDAP auth</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/password-provider.html">Password providers</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/dynamic-config-provider.html">Dynamic Config Providers</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/tls-support.html">TLS support</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Performance tuning</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/operations/basic-cluster-tuning.html">Basic cluster tuning</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/segment-optimization.html">Segment size optimization</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/mixed-workloads.html">Mixed workloads</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/http-compression.html">HTTP compression</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/clean-metadata-store.html">Automated metadata cleanup</a></li></ul></div><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Monitoring</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/operations/request-logging.html">Request logging</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/metrics.html">Metrics</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/alerts.html">Alerts</a></li></ul></div><li class="navListItem"><a class="navItem" href="/docs/latest/operations/api-reference.html">API reference</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/high-availability.html">High availability</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/rolling-updates.html">Rolling updates</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/rule-configuration.html">Using rules to drop and retain data</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/other-hadoop.html">Working with different versions of Apache Hadoop</a></li><div class="navGroup subNavGroup"><h4 class="navGroupSubcategoryTitle">Misc</h4><ul><li class="navListItem"><a class="navItem" href="/docs/latest/operations/dump-segment.html">dump-segment tool</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/reset-cluster.html">reset-cluster tool</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/insert-segment-to-db.html">insert-segment-to-db tool</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/pull-deps.html">pull-deps tool</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/deep-storage-migration.html">Deep storage migration</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/export-metadata.html">Export Metadata Tool</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/metadata-migration.html">Metadata Migration</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/use_sbt_to_build_fat_jar.html">Content for build.sbt</a></li></ul></div></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Development<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/development/overview.html">Developing on Druid</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/modules.html">Creating extensions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/javascript.html">JavaScript functionality</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/build.html">Build from source</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/versioning.html">Versioning</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/experimental.html">Experimental features</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Misc<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/misc/papers-and-talks.html">Papers</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle collapsible">Hidden<span class="arrow"><svg width="24" height="24" viewBox="0 0 24 24"><path fill="#565656" d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z"></path><path d="M0 0h24v24H0z" fill="none"></path></svg></span></h3><ul class="hide"><li class="navListItem"><a class="navItem" href="/docs/latest/comparisons/druid-vs-elasticsearch.html">Apache Druid vs Elasticsearch</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/comparisons/druid-vs-key-value.html">Apache Druid vs. Key/Value Stores (HBase/Cassandra/OpenTSDB)</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/comparisons/druid-vs-kudu.html">Apache Druid vs Kudu</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/comparisons/druid-vs-redshift.html">Apache Druid vs Redshift</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/comparisons/druid-vs-spark.html">Apache Druid vs Spark</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/comparisons/druid-vs-sql-on-hadoop.html">Apache Druid vs SQL-on-Hadoop</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/auth.html">Authentication and Authorization</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/broker.html">Broker</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/coordinator.html">Coordinator Process</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/historical.html">Historical Process</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/indexer.html">Indexer Process</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/indexing-service.html">Indexing Service</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/middlemanager.html">MiddleManager Process</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/overlord.html">Overlord Process</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/router.html">Router Process</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/design/peons.html">Peons</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/approximate-histograms.html">Approximate Histogram aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/avro.html">Apache Avro</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/azure.html">Microsoft Azure</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/bloom-filter.html">Bloom Filter</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/datasketches-extension.html">DataSketches extension</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/datasketches-hll.html">DataSketches HLL Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/datasketches-quantiles.html">DataSketches Quantiles Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/datasketches-theta.html">DataSketches Theta Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/datasketches-tuple.html">DataSketches Tuple Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/druid-basic-security.html">Basic Security</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/druid-kerberos.html">Kerberos</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/druid-lookups.html">Cached Lookup Module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/druid-ranger-security.html">Apache Ranger Security</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/google.html">Google Cloud Storage</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/hdfs.html">HDFS</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/kafka-extraction-namespace.html">Apache Kafka Lookups</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/lookups-cached-global.html">Globally Cached Lookups</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/mysql.html">MySQL Metadata Store</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/orc.html">ORC Extension</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/druid-pac4j.html">Druid pac4j based Security extension</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/parquet.html">Apache Parquet Extension</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/postgresql.html">PostgreSQL Metadata Store</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/protobuf.html">Protobuf</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/s3.html">S3-compatible</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/simple-client-sslcontext.html">Simple SSLContext Provider Module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/stats.html">Stats aggregator</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/test-stats.html">Test Stats Aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/druid-aws-rds.html">Druid AWS RDS Module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-core/kubernetes.html">Kubernetes</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/ambari-metrics-emitter.html">Ambari Metrics Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/cassandra.html">Apache Cassandra</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/cloudfiles.html">Rackspace Cloud Files</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/distinctcount.html">DistinctCount Aggregator</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/graphite.html">Graphite Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/influx.html">InfluxDB Line Protocol Parser</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/influxdb-emitter.html">InfluxDB Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/kafka-emitter.html">Kafka Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/materialized-view.html">Materialized View</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/momentsketch-quantiles.html">Moment Sketches for Approximate Quantiles module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/moving-average-query.html">Moving Average Query</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/opentsdb-emitter.html">OpenTSDB Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/redis-cache.html">Druid Redis Cache</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/sqlserver.html">Microsoft SQLServer</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/statsd.html">StatsD Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/tdigestsketch-quantiles.html">T-Digest Quantiles Sketch module</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/thrift.html">Thrift</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/time-min-max.html">Timestamp Min/Max aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/gce-extensions.html">GCE Extensions</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/aliyun-oss.html">Aliyun OSS</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/development/extensions-contrib/prometheus.html">Prometheus Emitter</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/operations/kubernetes.html">kubernetes</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/hll-old.html">Cardinality/HyperUnique aggregators</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/querying/select-query.html">Select</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/native-batch-firehose.html">Firehose (deprecated)</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/native-batch-simple-task.html">Native batch (simple)</a></li><li class="navListItem"><a class="navItem" href="/docs/latest/ingestion/standalone-realtime.html">Realtime Process</a></li></ul></div></div></section></div><script>
var coll = document.getElementsByClassName('collapsible');
var checkActiveCategory = true;
for (var i = 0; i < coll.length; i++) {
var links = coll[i].nextElementSibling.getElementsByTagName('*');
if (checkActiveCategory){
for (var j = 0; j < links.length; j++) {
if (links[j].classList.contains('navListItemActive')){
coll[i].nextElementSibling.classList.toggle('hide');
coll[i].childNodes[1].classList.toggle('rotate');
checkActiveCategory = false;
break;
}
}
}
coll[i].addEventListener('click', function() {
var arrow = this.childNodes[1];
arrow.classList.toggle('rotate');
var content = this.nextElementSibling;
content.classList.toggle('hide');
});
}
document.addEventListener('DOMContentLoaded', function() {
createToggler('#navToggler', '#docsNav', 'docsSliderActive');
createToggler('#tocToggler', 'body', 'tocActive');
var headings = document.querySelector('.toc-headings');
headings && headings.addEventListener('click', function(event) {
var el = event.target;
while(el !== headings){
if (el.tagName === 'A') {
document.body.classList.remove('tocActive');
break;
} else{
el = el.parentNode;
}
}
}, false);
function createToggler(togglerSelector, targetSelector, className) {
var toggler = document.querySelector(togglerSelector);
var target = document.querySelector(targetSelector);
if (!toggler) {
return;
}
toggler.onclick = function(event) {
event.preventDefault();
target.classList.toggle(className);
};
}
});
</script></nav></div><div class="container mainContainer docsContainer"><div class="wrapper"><div class="post"><header class="postHeader"><a class="edit-page-link button" href="https://github.com/apache/druid/edit/master/docs/tutorials/tutorial-kafka.md" target="_blank" rel="noreferrer noopener">Edit</a><h1 id="__docusaurus" class="postHeaderTitle">Tutorial: Load streaming data from Apache Kafka</h1></header><article><div><span><!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing,
~ software distributed under the License is distributed on an
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
~ KIND, either express or implied. See the License for the
~ specific language governing permissions and limitations
~ under the License.
-->
<p>This tutorial shows you how to load data into Apache Druid from a Kafka stream, using Druid's Kafka indexing service.</p>
<p>The tutorial guides you through the steps to load sample nested clickstream data from the <a href="https://www.koalastothemax.com/">Koalas to the Max</a> game into a Kafka topic, then ingest the data into Druid.</p>
<h2><a class="anchor" aria-hidden="true" id="prerequisites"></a><a href="#prerequisites" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Prerequisites</h2>
<p>Before you follow the steps in this tutorial, download Druid as described in the <a href="/docs/latest/tutorials/index.html">quickstart</a> using the <a href="/docs/latest/operations/single-server.html">automatic single-machine configuration</a> and have it running on your local machine. You don't need to have loaded any data.</p>
<h2><a class="anchor" aria-hidden="true" id="download-and-start-kafka"></a><a href="#download-and-start-kafka" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Download and start Kafka</h2>
<p><a href="http://kafka.apache.org/">Apache Kafka</a> is a high-throughput message bus that works well with Druid. For this tutorial, use Kafka 2.7.0.</p>
<ol>
<li><p>To download Kafka, run the following commands in your terminal:</p>
<pre><code class="hljs css language-bash">curl -O https://archive.apache.org/dist/kafka/2.7.0/kafka_2.13-2.7.0.tgz
tar -xzf kafka_2.13-2.7.0.tgz
<span class="hljs-built_in">cd</span> kafka_2.13-2.7.0
</code></pre></li>
<li><p>If you're already running Kafka on the machine you're using for this tutorial, delete or rename the <code>kafka-logs</code> directory in <code>/tmp</code>.</p>
<blockquote>
<p>Druid and Kafka both rely on <a href="https://zookeeper.apache.org/">Apache ZooKeeper</a> to coordinate and manage services. Because Druid is already running, Kafka attaches to the Druid ZooKeeper instance when it starts up.<br /></p>
<p>In a production environment where you're running Druid and Kafka on different machines, <a href="https://kafka.apache.org/quickstart">start the Kafka ZooKeeper</a> before you start the Kafka broker.</p>
</blockquote></li>
<li><p>In the Kafka root directory, run this command to start a Kafka broker:</p>
<pre><code class="hljs css language-bash">./bin/kafka-server-start.sh config/server.properties
</code></pre></li>
<li><p>In a new terminal window, navigate to the Kafka root directory and run the following command to create a Kafka topic called <code>kttm</code>:</p>
<pre><code class="hljs css language-bash">./bin/kafka-topics.sh --create --topic kttm --bootstrap-server localhost:9092
</code></pre>
<p>Kafka returns a message when it successfully adds the topic: <code>Created topic kttm</code>.</p></li>
</ol>
<h2><a class="anchor" aria-hidden="true" id="load-data-into-kafka"></a><a href="#load-data-into-kafka" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Load data into Kafka</h2>
<p>In this section, you download sample data to the tutorial's directory and send the data to your Kafka topic.</p>
<ol>
<li><p>In your Kafka root directory, create a directory for the sample data:</p>
<pre><code class="hljs css language-bash">mkdir sample-data
</code></pre></li>
<li><p>Download the sample data to your new directory and extract it:</p>
<pre><code class="hljs css language-bash"><span class="hljs-built_in">cd</span> sample-data
curl -O https://static.imply.io/example-data/kttm-nested-v2/kttm-nested-v2-2019-08-25.json.gz
</code></pre></li>
<li><p>In your Kafka root directory, run the following commands to post sample events to the <code>kttm</code> Kafka topic:</p>
<pre><code class="hljs css language-bash"><span class="hljs-built_in">export</span> KAFKA_OPTS=<span class="hljs-string">"-Dfile.encoding=UTF-8"</span>
gzcat ./sample-data/kttm-nested-v2-2019-08-25.json.gz | ./bin/kafka-console-producer.sh --broker-list localhost:9092 --topic kttm
</code></pre></li>
</ol>
<h2><a class="anchor" aria-hidden="true" id="load-data-into-druid"></a><a href="#load-data-into-druid" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Load data into Druid</h2>
<p>Now that you have data in your Kafka topic, you can use Druid's Kafka indexing service to ingest the data into Druid.</p>
<p>To do this, you can use the Druid console data loader or you can submit a supervisor spec. Follow the steps below to try each method.</p>
<h3><a class="anchor" aria-hidden="true" id="load-data-with-the-console-data-loader"></a><a href="#load-data-with-the-console-data-loader" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Load data with the console data loader</h3>
<p>The Druid console data loader presents you with several screens to configure each section of the supervisor spec, then creates an ingestion task to ingest the Kafka data.</p>
<p>To use the console data loader:</p>
<ol>
<li><p>Navigate to <a href="http://localhost:8888">localhost:8888</a> and click <strong>Load data &gt; Streaming</strong>.</p>
<p><img src="../assets/tutorial-kafka-data-loader-01.png" alt="Data loader init" title="Data loader init"></p></li>
<li><p>Click <strong>Apache Kafka</strong> and then <strong>Connect data</strong>.</p></li>
<li><p>Enter <code>localhost:9092</code> as the bootstrap server and <code>kttm</code> as the topic, then click <strong>Apply</strong> and make sure you see data similar to the following:</p>
<p><img src="../assets/tutorial-kafka-data-loader-02.png" alt="Data loader sample" title="Data loader sample"></p></li>
<li><p>Click <strong>Next: Parse data</strong>.</p>
<p><img src="../assets/tutorial-kafka-data-loader-03.png" alt="Data loader parse data" title="Data loader parse data"></p>
<p>The data loader automatically tries to determine the correct parser for the data. For the sample data, it selects input format <code>json</code>. You can play around with the different options to get a preview of how Druid parses your data.</p></li>
<li><p>With the <code>json</code> input format selected, click <strong>Next: Parse time</strong>. You may need to click <strong>Apply</strong> first.</p>
<p><img src="../assets/tutorial-kafka-data-loader-04.png" alt="Data loader parse time" title="Data loader parse time"></p>
<p>Druid's architecture requires that you specify a primary timestamp column. Druid stores the timestamp in the <code>__time</code> column in your Druid datasource.
In a production environment, if you don't have a timestamp in your data, you can select <strong>Parse timestamp from:</strong> <code>None</code> to use a placeholder value.</p>
<p>For the sample data, the data loader selects the <code>timestamp</code> column in the raw data as the primary time column.</p></li>
<li><p>Click <strong>Next: ...</strong> three times to go past the <strong>Transform</strong> and <strong>Filter</strong> steps to <strong>Configure schema</strong>. You don't need to enter anything in these two steps because applying transforms and filters is out of scope for this tutorial.</p>
<p><img src="../assets/tutorial-kafka-data-loader-05.png" alt="Data loader schema" title="Data loader schema"></p></li>
<li><p>In the <strong>Configure schema</strong> step, you can select data types for the columns and configure <a href="/docs/latest/ingestion/data-model.html#dimensions">dimensions</a> and <a href="/docs/latest/ingestion/data-model.html#metrics">metrics</a> to ingest into Druid. The console does most of this for you, but you need to create JSON-type dimensions for the three nested columns in the data.</p>
<p>Click <strong>Add dimension</strong> and enter the following information. You can only add one dimension at a time.</p>
<ul>
<li>Name: <code>event</code>, Type: <code>json</code></li>
<li>Name: <code>agent</code>, Type: <code>json</code></li>
<li>Name: <code>geo_ip</code>, Type: <code>json</code></li>
</ul>
<p>After you create the dimensions, you can scroll to the right in the preview window to see the nested columns:</p>
<p><img src="../assets/tutorial-kafka-data-loader-05b.png" alt="Nested columns schema" title="Nested columns schema"></p></li>
<li><p>Click <strong>Next: Partition</strong> to configure how Druid partitions the data into segments.</p>
<p><img src="../assets/tutorial-kafka-data-loader-06.png" alt="Data loader partition" title="Data loader partition"></p></li>
<li><p>Select <code>day</code> as the <strong>Segment granularity</strong>. Since this is a small dataset, you don't need to make any further adjustments. Click <strong>Next: Tune</strong> to fine tune how Druid ingests data.</p>
<p><img src="../assets/tutorial-kafka-data-loader-07.png" alt="Data loader tune" title="Data loader tune"></p></li>
<li><p>In <strong>Input tuning</strong>, set <strong>Use earliest offset</strong> to <code>True</code>β€”this is very important because you want to consume the data from the start of the stream. There are no other changes to make here, so click <strong>Next: Publish</strong>.</p>
<p><img src="../assets/tutorial-kafka-data-loader-08.png" alt="Data loader publish" title="Data loader publish"></p></li>
<li><p>Name the datasource <code>kttm-kafka</code> and click <strong>Next: Edit spec</strong> to review your spec.</p>
<p><img src="../assets/tutorial-kafka-data-loader-09.png" alt="Data loader spec" title="Data loader spec"></p>
<p>The console presents the spec you've constructed. You can click the buttons above the spec to make changes in previous steps and see how the changes update the spec. You can also edit the spec directly and see it reflected in the previous steps.</p></li>
<li><p>Click <strong>Submit</strong> to create an ingestion task.</p>
<p>Druid displays the task view with the focus on the newly created supervisor.</p>
<p>The task view auto-refreshes, so wait until the supervisor launches a task. The status changes from <strong>Pending</strong> to <strong>Running</strong> as Druid starts to ingest data.</p>
<p><img src="../assets/tutorial-kafka-data-loader-10.png" alt="Tasks view" title="Tasks view"></p></li>
<li><p>Navigate to the <strong>Datasources</strong> view from the header.</p>
<p><img src="../assets/tutorial-kafka-data-loader-11.png" alt="Datasource view" title="Datasource view"></p>
<p>When the <code>kttm-kafka</code> datasource appears here, you can query it. See <a href="#query-your-data">Query your data</a> for details.</p>
<blockquote>
<p>If the datasource doesn't appear after a minute you might not have set the supervisor to read data from the start of the streamβ€”the <code>Use earliest offset</code> setting in the <strong>Tune</strong> step. Go to the <strong>Ingestion</strong> page and terminate the supervisor using the <strong>Actions(...)</strong> menu. <a href="#load-data-with-the-console-data-loader">Load the sample data</a> again and apply the correct setting when you get to the <strong>Tune</strong> step.</p>
</blockquote></li>
</ol>
<h3><a class="anchor" aria-hidden="true" id="submit-a-supervisor-spec"></a><a href="#submit-a-supervisor-spec" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Submit a supervisor spec</h3>
<p>As an alternative to using the data loader, you can submit a supervisor spec to Druid. You can do this in the console or using the Druid API.</p>
<h4><a class="anchor" aria-hidden="true" id="use-the-console"></a><a href="#use-the-console" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Use the console</h4>
<p>To submit a supervisor spec using the Druid console:</p>
<ol>
<li><p>Click <strong>Ingestion</strong> in the console, then click the ellipses next to the refresh button and select <strong>Submit JSON supervisor</strong>.</p></li>
<li><p>Paste this spec into the JSON window and click <strong>Submit</strong>.</p>
<pre><code class="hljs css language-json">{
<span class="hljs-attr">"type"</span>: <span class="hljs-string">"kafka"</span>,
<span class="hljs-attr">"spec"</span>: {
<span class="hljs-attr">"ioConfig"</span>: {
<span class="hljs-attr">"type"</span>: <span class="hljs-string">"kafka"</span>,
<span class="hljs-attr">"consumerProperties"</span>: {
<span class="hljs-attr">"bootstrap.servers"</span>: <span class="hljs-string">"localhost:9092"</span>
},
<span class="hljs-attr">"topic"</span>: <span class="hljs-string">"kttm"</span>,
<span class="hljs-attr">"inputFormat"</span>: {
<span class="hljs-attr">"type"</span>: <span class="hljs-string">"json"</span>
},
<span class="hljs-attr">"useEarliestOffset"</span>: <span class="hljs-literal">true</span>
},
<span class="hljs-attr">"tuningConfig"</span>: {
<span class="hljs-attr">"type"</span>: <span class="hljs-string">"kafka"</span>
},
<span class="hljs-attr">"dataSchema"</span>: {
<span class="hljs-attr">"dataSource"</span>: <span class="hljs-string">"kttm-kafka-supervisor-console"</span>,
<span class="hljs-attr">"timestampSpec"</span>: {
<span class="hljs-attr">"column"</span>: <span class="hljs-string">"timestamp"</span>,
<span class="hljs-attr">"format"</span>: <span class="hljs-string">"iso"</span>
},
<span class="hljs-attr">"dimensionsSpec"</span>: {
<span class="hljs-attr">"dimensions"</span>: [
<span class="hljs-string">"session"</span>,
<span class="hljs-string">"number"</span>,
<span class="hljs-string">"client_ip"</span>,
<span class="hljs-string">"language"</span>,
<span class="hljs-string">"adblock_list"</span>,
<span class="hljs-string">"app_version"</span>,
<span class="hljs-string">"path"</span>,
<span class="hljs-string">"loaded_image"</span>,
<span class="hljs-string">"referrer"</span>,
<span class="hljs-string">"referrer_host"</span>,
<span class="hljs-string">"server_ip"</span>,
<span class="hljs-string">"screen"</span>,
<span class="hljs-string">"window"</span>,
{
<span class="hljs-attr">"type"</span>: <span class="hljs-string">"long"</span>,
<span class="hljs-attr">"name"</span>: <span class="hljs-string">"session_length"</span>
},
<span class="hljs-string">"timezone"</span>,
<span class="hljs-string">"timezone_offset"</span>,
{
<span class="hljs-attr">"type"</span>: <span class="hljs-string">"json"</span>,
<span class="hljs-attr">"name"</span>: <span class="hljs-string">"event"</span>
},
{
<span class="hljs-attr">"type"</span>: <span class="hljs-string">"json"</span>,
<span class="hljs-attr">"name"</span>: <span class="hljs-string">"agent"</span>
},
{
<span class="hljs-attr">"type"</span>: <span class="hljs-string">"json"</span>,
<span class="hljs-attr">"name"</span>: <span class="hljs-string">"geo_ip"</span>
}
]
},
<span class="hljs-attr">"granularitySpec"</span>: {
<span class="hljs-attr">"queryGranularity"</span>: <span class="hljs-string">"none"</span>,
<span class="hljs-attr">"rollup"</span>: <span class="hljs-literal">false</span>,
<span class="hljs-attr">"segmentGranularity"</span>: <span class="hljs-string">"day"</span>
}
}
}
}
</code></pre></li>
</ol>
<p>This starts the supervisorβ€”the supervisor spawns tasks that start listening for incoming data.</p>
<ol start="3">
<li>Click <strong>Tasks</strong> on the console home page to monitor the status of the job. This spec writes the data in the <code>kttm</code> topic to a datasource named <code>kttm-kafka-supervisor-console</code>.</li>
</ol>
<h4><a class="anchor" aria-hidden="true" id="use-the-api"></a><a href="#use-the-api" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Use the API</h4>
<p>You can also use the Druid API to submit a supervisor spec.</p>
<ol>
<li><p>Run the following command to download the sample spec:</p>
<pre><code class="hljs css language-bash">curl -O https://druid.apache.org/docs/latest/assets/files/kttm-kafka-supervisor.json
</code></pre></li>
<li><p>Run the following command to submit the spec in the <code>kttm-kafka-supervisor.json</code> file:</p>
<pre><code class="hljs css language-bash">curl -XPOST -H <span class="hljs-string">'Content-Type: application/json'</span> kttm-kafka-supervisor.json http://localhost:8081/druid/indexer/v1/supervisor
</code></pre>
<p>After Druid successfully creates the supervisor, you get a response containing the supervisor ID: <code>{&quot;id&quot;:&quot;kttm-kafka-supervisor-api&quot;}</code>.</p></li>
<li><p>Click <strong>Tasks</strong> on the console home page to monitor the status of the job. This spec writes the data in the <code>kttm</code> topic to a datasource named <code>kttm-kafka-supervisor-api</code>.</p></li>
</ol>
<h2><a class="anchor" aria-hidden="true" id="query-your-data"></a><a href="#query-your-data" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Query your data</h2>
<p>After Druid sends data to the Kafka stream, it is immediately available for querying. Click <strong>Query</strong> in the Druid console to run SQL queries against the datasource.</p>
<p>Since this tutorial ingests a small dataset, you can run the query <code>SELECT * FROM &quot;kttm-kafka&quot;</code> to return all of the data in the dataset you created.</p>
<p><img src="../assets/tutorial-kafka-data-loader-12.png" alt="Query view" title="Query view"></p>
<p>Check out the <a href="/docs/latest/tutorials/tutorial-query.html">Querying data tutorial</a> to run some example queries on the newly loaded data.</p>
<h2><a class="anchor" aria-hidden="true" id="further-reading"></a><a href="#further-reading" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Further reading</h2>
<p>For more information, see the following topics:</p>
<ul>
<li><a href="/docs/latest/development/extensions-core/kafka-ingestion.html">Apache Kafka ingestion</a> for more information on loading data from Kafka streams.</li>
<li><a href="/docs/latest/development/extensions-core/kafka-supervisor-reference.html">Apache Kafka supervisor reference</a> for Kafka supervisor configuration information.</li>
<li><a href="/docs/latest/development/extensions-core/kafka-supervisor-operations.html">Apache Kafka supervisor operations reference</a> for information on running and maintaining Kafka supervisors for Druid.</li>
</ul>
</span></div></article></div><div class="docs-prevnext"><a class="docs-prev button" href="/docs/latest/tutorials/tutorial-msq-extern.html"><span class="arrow-prev">← </span><span>Load files using SQL πŸ†•</span></a><a class="docs-next button" href="/docs/latest/tutorials/tutorial-batch-hadoop.html"><span>Load from Apache Hadoop</span><span class="arrow-next"> β†’</span></a></div></div></div><nav class="onPageNav"><ul class="toc-headings"><li><a href="#prerequisites">Prerequisites</a></li><li><a href="#download-and-start-kafka">Download and start Kafka</a></li><li><a href="#load-data-into-kafka">Load data into Kafka</a></li><li><a href="#load-data-into-druid">Load data into Druid</a><ul class="toc-headings"><li><a href="#load-data-with-the-console-data-loader">Load data with the console data loader</a></li><li><a href="#submit-a-supervisor-spec">Submit a supervisor spec</a></li></ul></li><li><a href="#query-your-data">Query your data</a></li><li><a href="#further-reading">Further reading</a></li></ul></nav></div><footer class="nav-footer druid-footer" id="footer"><div class="container"><div class="text-center"><p><a href="/technology">Technology</a> · <a href="/use-cases">Use Cases</a> · <a href="/druid-powered">Powered by Druid</a> · <a href="/docs/latest/">Docs</a> · <a href="/community/">Community</a> · <a href="/downloads.html">Download</a> · <a href="/faq">FAQ</a></p></div><div class="text-center"><a title="Join the user group" href="https://groups.google.com/forum/#!forum/druid-user" target="_blank"><span class="fa fa-comments"></span></a> · <a title="Follow Druid" href="https://twitter.com/druidio" target="_blank"><span class="fab fa-twitter"></span></a> · <a title="Download via Apache" href="https://www.apache.org/dyn/closer.cgi?path=/incubator/druid/{{ site.druid_versions[0].versions[0].version }}/apache-druid-{{ site.druid_versions[0].versions[0].version }}-bin.tar.gz" target="_blank"><span class="fas fa-feather"></span></a> · <a title="GitHub" href="https://github.com/apache/druid" target="_blank"><span class="fab fa-github"></span></a></div><div class="text-center license">Copyright Β© 2022 <a href="https://www.apache.org/" target="_blank">Apache Software Foundation</a>.<br/>Except where otherwise noted, licensed under <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a>.<br/>Apache Druid, Druid, and the Druid logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</div></div></footer></div><script type="text/javascript" src="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.js"></script><script>
document.addEventListener('keyup', function(e) {
if (e.target !== document.body) {
return;
}
// keyCode for '/' (slash)
if (e.keyCode === 191) {
const search = document.getElementById('search_input_react');
search && search.focus();
}
});
</script><script>
var search = docsearch({
appId: 'CPK9PMSCEY',
apiKey: 'd4ef4ffe3a2f0c7d1e34b062fd98736b',
indexName: 'apache_druid',
inputSelector: '#search_input_react',
algoliaOptions: {"facetFilters":["language:en","version:26.0.0"]}
});
</script></body></html>