blob: 4fb1eaf11a1889b6ea35368ceef1533e7add38d6 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="description" content="Apache Druid">
<meta name="keywords" content="druid,kafka,database,analytics,streaming,real-time,real time,apache,open source">
<meta name="author" content="Apache Software Foundation">
<title>Druid | Ingestion Spec</title>
<link rel="alternate" type="application/atom+xml" href="/feed">
<link rel="shortcut icon" href="/img/favicon.png">
<link rel="stylesheet" href="/assets/css/font-awesome-5.css">
<link href='//fonts.googleapis.com/css?family=Open+Sans+Condensed:300,700,300italic|Open+Sans:300italic,400italic,600italic,400,300,600,700' rel='stylesheet' type='text/css'>
<link rel="stylesheet" href="/css/bootstrap-pure.css?v=1.1">
<link rel="stylesheet" href="/css/base.css?v=1.1">
<link rel="stylesheet" href="/css/header.css?v=1.1">
<link rel="stylesheet" href="/css/footer.css?v=1.1">
<link rel="stylesheet" href="/css/syntax.css?v=1.1">
<link rel="stylesheet" href="/css/docs.css?v=1.1">
<script>
(function() {
var cx = '000162378814775985090:molvbm0vggm';
var gcse = document.createElement('script');
gcse.type = 'text/javascript';
gcse.async = true;
gcse.src = (document.location.protocol == 'https:' ? 'https:' : 'http:') +
'//cse.google.com/cse.js?cx=' + cx;
var s = document.getElementsByTagName('script')[0];
s.parentNode.insertBefore(gcse, s);
})();
</script>
</head>
<body>
<!-- Start page_header include -->
<script src="//ajax.googleapis.com/ajax/libs/jquery/2.2.4/jquery.min.js"></script>
<div class="top-navigator">
<div class="container">
<div class="left-cont">
<a class="logo" href="/"><span class="druid-logo"></span></a>
</div>
<div class="right-cont">
<ul class="links">
<li class=""><a href="/technology">Technology</a></li>
<li class=""><a href="/use-cases">Use Cases</a></li>
<li class=""><a href="/druid-powered">Powered By</a></li>
<li class=""><a href="/docs/latest/design/">Docs</a></li>
<li class=""><a href="/community/">Community</a></li>
<li class="header-dropdown">
<a>Apache</a>
<div class="header-dropdown-menu">
<a href="https://www.apache.org/" target="_blank">Foundation</a>
<a href="https://www.apache.org/events/current-event" target="_blank">Events</a>
<a href="https://www.apache.org/licenses/" target="_blank">License</a>
<a href="https://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a>
<a href="https://www.apache.org/security/" target="_blank">Security</a>
<a href="https://www.apache.org/foundation/sponsorship.html" target="_blank">Sponsorship</a>
</div>
</li>
<li class=" button-link"><a href="/downloads.html">Download</a></li>
</ul>
</div>
</div>
<div class="action-button menu-icon">
<span class="fa fa-bars"></span> MENU
</div>
<div class="action-button menu-icon-close">
<span class="fa fa-times"></span> MENU
</div>
</div>
<script type="text/javascript">
var $menu = $('.right-cont');
var $menuIcon = $('.menu-icon');
var $menuIconClose = $('.menu-icon-close');
function showMenu() {
$menu.fadeIn(100);
$menuIcon.fadeOut(100);
$menuIconClose.fadeIn(100);
}
$menuIcon.click(showMenu);
function hideMenu() {
$menu.fadeOut(100);
$menuIconClose.fadeOut(100);
$menuIcon.fadeIn(100);
}
$menuIconClose.click(hideMenu);
$(window).resize(function() {
if ($(window).width() >= 840) {
$menu.fadeIn(100);
$menuIcon.fadeOut(100);
$menuIconClose.fadeOut(100);
}
else {
$menu.fadeOut(100);
$menuIcon.fadeIn(100);
$menuIconClose.fadeOut(100);
}
});
</script>
<!-- Stop page_header include -->
<div class="container doc-container">
<p> Looking for the <a href="/docs/26.0.0/">latest stable documentation</a>?</p>
<div class="row">
<div class="col-md-9 doc-content">
<p>
<a class="btn btn-default btn-xs visible-xs-inline-block visible-sm-inline-block" href="#toc">Table of Contents</a>
</p>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing,
~ software distributed under the License is distributed on an
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
~ KIND, either express or implied. See the License for the
~ specific language governing permissions and limitations
~ under the License.
-->
<h1 id="ingestion-spec">Ingestion Spec</h1>
<p>An Apache Druid (incubating) ingestion spec consists of 3 components:</p>
<div class="highlight"><pre><code class="language-json" data-lang="json"><span></span><span class="p">{</span>
<span class="nt">&quot;dataSchema&quot;</span> <span class="p">:</span> <span class="p">{</span><span class="err">...</span><span class="p">},</span>
<span class="nt">&quot;ioConfig&quot;</span> <span class="p">:</span> <span class="p">{</span><span class="err">...</span><span class="p">},</span>
<span class="nt">&quot;tuningConfig&quot;</span> <span class="p">:</span> <span class="p">{</span><span class="err">...</span><span class="p">}</span>
<span class="p">}</span>
</code></pre></div>
<table><thead>
<tr>
<th>Field</th>
<th>Type</th>
<th>Description</th>
<th>Required</th>
</tr>
</thead><tbody>
<tr>
<td>dataSchema</td>
<td>JSON Object</td>
<td>Specifies the the schema of the incoming data. All ingestion specs can share the same dataSchema.</td>
<td>yes</td>
</tr>
<tr>
<td>ioConfig</td>
<td>JSON Object</td>
<td>Specifies where the data is coming from and where the data is going. This object will vary with the ingestion method.</td>
<td>yes</td>
</tr>
<tr>
<td>tuningConfig</td>
<td>JSON Object</td>
<td>Specifies how to tune various ingestion parameters. This object will vary with the ingestion method.</td>
<td>no</td>
</tr>
</tbody></table>
<h1 id="dataschema">DataSchema</h1>
<p>An example dataSchema is shown below:</p>
<div class="highlight"><pre><code class="language-json" data-lang="json"><span></span><span class="s2">&quot;dataSchema&quot;</span> <span class="err">:</span> <span class="p">{</span>
<span class="nt">&quot;dataSource&quot;</span> <span class="p">:</span> <span class="s2">&quot;wikipedia&quot;</span><span class="p">,</span>
<span class="nt">&quot;parser&quot;</span> <span class="p">:</span> <span class="p">{</span>
<span class="nt">&quot;type&quot;</span> <span class="p">:</span> <span class="s2">&quot;string&quot;</span><span class="p">,</span>
<span class="nt">&quot;parseSpec&quot;</span> <span class="p">:</span> <span class="p">{</span>
<span class="nt">&quot;format&quot;</span> <span class="p">:</span> <span class="s2">&quot;json&quot;</span><span class="p">,</span>
<span class="nt">&quot;timestampSpec&quot;</span> <span class="p">:</span> <span class="p">{</span>
<span class="nt">&quot;column&quot;</span> <span class="p">:</span> <span class="s2">&quot;timestamp&quot;</span><span class="p">,</span>
<span class="nt">&quot;format&quot;</span> <span class="p">:</span> <span class="s2">&quot;auto&quot;</span>
<span class="p">},</span>
<span class="nt">&quot;dimensionsSpec&quot;</span> <span class="p">:</span> <span class="p">{</span>
<span class="nt">&quot;dimensions&quot;</span><span class="p">:</span> <span class="p">[</span>
<span class="s2">&quot;page&quot;</span><span class="p">,</span>
<span class="s2">&quot;language&quot;</span><span class="p">,</span>
<span class="s2">&quot;user&quot;</span><span class="p">,</span>
<span class="s2">&quot;unpatrolled&quot;</span><span class="p">,</span>
<span class="s2">&quot;newPage&quot;</span><span class="p">,</span>
<span class="s2">&quot;robot&quot;</span><span class="p">,</span>
<span class="s2">&quot;anonymous&quot;</span><span class="p">,</span>
<span class="s2">&quot;namespace&quot;</span><span class="p">,</span>
<span class="s2">&quot;continent&quot;</span><span class="p">,</span>
<span class="s2">&quot;country&quot;</span><span class="p">,</span>
<span class="s2">&quot;region&quot;</span><span class="p">,</span>
<span class="s2">&quot;city&quot;</span><span class="p">,</span>
<span class="p">{</span>
<span class="nt">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;long&quot;</span><span class="p">,</span>
<span class="nt">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;countryNum&quot;</span>
<span class="p">},</span>
<span class="p">{</span>
<span class="nt">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;float&quot;</span><span class="p">,</span>
<span class="nt">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;userLatitude&quot;</span>
<span class="p">},</span>
<span class="p">{</span>
<span class="nt">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;float&quot;</span><span class="p">,</span>
<span class="nt">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;userLongitude&quot;</span>
<span class="p">}</span>
<span class="p">],</span>
<span class="nt">&quot;dimensionExclusions&quot;</span> <span class="p">:</span> <span class="p">[],</span>
<span class="nt">&quot;spatialDimensions&quot;</span> <span class="p">:</span> <span class="p">[]</span>
<span class="p">}</span>
<span class="p">}</span>
<span class="p">},</span>
<span class="nt">&quot;metricsSpec&quot;</span> <span class="p">:</span> <span class="p">[{</span>
<span class="nt">&quot;type&quot;</span> <span class="p">:</span> <span class="s2">&quot;count&quot;</span><span class="p">,</span>
<span class="nt">&quot;name&quot;</span> <span class="p">:</span> <span class="s2">&quot;count&quot;</span>
<span class="p">},</span> <span class="p">{</span>
<span class="nt">&quot;type&quot;</span> <span class="p">:</span> <span class="s2">&quot;doubleSum&quot;</span><span class="p">,</span>
<span class="nt">&quot;name&quot;</span> <span class="p">:</span> <span class="s2">&quot;added&quot;</span><span class="p">,</span>
<span class="nt">&quot;fieldName&quot;</span> <span class="p">:</span> <span class="s2">&quot;added&quot;</span>
<span class="p">},</span> <span class="p">{</span>
<span class="nt">&quot;type&quot;</span> <span class="p">:</span> <span class="s2">&quot;doubleSum&quot;</span><span class="p">,</span>
<span class="nt">&quot;name&quot;</span> <span class="p">:</span> <span class="s2">&quot;deleted&quot;</span><span class="p">,</span>
<span class="nt">&quot;fieldName&quot;</span> <span class="p">:</span> <span class="s2">&quot;deleted&quot;</span>
<span class="p">},</span> <span class="p">{</span>
<span class="nt">&quot;type&quot;</span> <span class="p">:</span> <span class="s2">&quot;doubleSum&quot;</span><span class="p">,</span>
<span class="nt">&quot;name&quot;</span> <span class="p">:</span> <span class="s2">&quot;delta&quot;</span><span class="p">,</span>
<span class="nt">&quot;fieldName&quot;</span> <span class="p">:</span> <span class="s2">&quot;delta&quot;</span>
<span class="p">}],</span>
<span class="nt">&quot;granularitySpec&quot;</span> <span class="p">:</span> <span class="p">{</span>
<span class="nt">&quot;segmentGranularity&quot;</span> <span class="p">:</span> <span class="s2">&quot;DAY&quot;</span><span class="p">,</span>
<span class="nt">&quot;queryGranularity&quot;</span> <span class="p">:</span> <span class="s2">&quot;NONE&quot;</span><span class="p">,</span>
<span class="nt">&quot;intervals&quot;</span> <span class="p">:</span> <span class="p">[</span> <span class="s2">&quot;2013-08-31/2013-09-01&quot;</span> <span class="p">]</span>
<span class="p">},</span>
<span class="nt">&quot;transformSpec&quot;</span> <span class="p">:</span> <span class="kc">null</span>
<span class="p">}</span>
</code></pre></div>
<table><thead>
<tr>
<th>Field</th>
<th>Type</th>
<th>Description</th>
<th>Required</th>
</tr>
</thead><tbody>
<tr>
<td>dataSource</td>
<td>String</td>
<td>The name of the ingested datasource. Datasources can be thought of as tables.</td>
<td>yes</td>
</tr>
<tr>
<td>parser</td>
<td>JSON Object</td>
<td>Specifies how ingested data can be parsed.</td>
<td>yes</td>
</tr>
<tr>
<td>metricsSpec</td>
<td>JSON Object array</td>
<td>A list of <a href="../querying/aggregations.html">aggregators</a>.</td>
<td>yes</td>
</tr>
<tr>
<td>granularitySpec</td>
<td>JSON Object</td>
<td>Specifies how to create segments and roll up data.</td>
<td>yes</td>
</tr>
<tr>
<td>transformSpec</td>
<td>JSON Object</td>
<td>Specifes how to filter and transform input data. See <a href="../ingestion/transform-spec.html">transform specs</a>.</td>
<td>no</td>
</tr>
</tbody></table>
<h2 id="parser">Parser</h2>
<p>If <code>type</code> is not included, the parser defaults to <code>string</code>. For additional data formats, please see our <a href="../development/extensions.html">extensions list</a>.</p>
<h3 id="string-parser">String Parser</h3>
<table><thead>
<tr>
<th>Field</th>
<th>Type</th>
<th>Description</th>
<th>Required</th>
</tr>
</thead><tbody>
<tr>
<td>type</td>
<td>String</td>
<td>This should say <code>string</code> in general, or <code>hadoopyString</code> when used in a Hadoop indexing job.</td>
<td>no</td>
</tr>
<tr>
<td>parseSpec</td>
<td>JSON Object</td>
<td>Specifies the format, timestamp, and dimensions of the data.</td>
<td>yes</td>
</tr>
</tbody></table>
<h3 id="parsespec">ParseSpec</h3>
<p>ParseSpecs serve two purposes:</p>
<ul>
<li>The String Parser use them to determine the format (i.e. JSON, CSV, TSV) of incoming rows.</li>
<li>All Parsers use them to determine the timestamp and dimensions of incoming rows.</li>
</ul>
<p>If <code>format</code> is not included, the parseSpec defaults to <code>tsv</code>.</p>
<h4 id="json-parsespec">JSON ParseSpec</h4>
<p>Use this with the String Parser to load JSON.</p>
<table><thead>
<tr>
<th>Field</th>
<th>Type</th>
<th>Description</th>
<th>Required</th>
</tr>
</thead><tbody>
<tr>
<td>format</td>
<td>String</td>
<td>This should say <code>json</code>.</td>
<td>no</td>
</tr>
<tr>
<td>timestampSpec</td>
<td>JSON Object</td>
<td>Specifies the column and format of the timestamp.</td>
<td>yes</td>
</tr>
<tr>
<td>dimensionsSpec</td>
<td>JSON Object</td>
<td>Specifies the dimensions of the data.</td>
<td>yes</td>
</tr>
<tr>
<td>flattenSpec</td>
<td>JSON Object</td>
<td>Specifies flattening configuration for nested JSON data. See <a href="./flatten-json.html">Flattening JSON</a> for more info.</td>
<td>no</td>
</tr>
</tbody></table>
<h4 id="json-lowercase-parsespec">JSON Lowercase ParseSpec</h4>
<div class="note caution">
The _jsonLowercase_ parser is deprecated and may be removed in a future version of Druid.
</div>
<p>This is a special variation of the JSON ParseSpec that lower cases all the column names in the incoming JSON data. This parseSpec is required if you are updating to Druid 0.7.x from Druid 0.6.x, are directly ingesting JSON with mixed case column names, do not have any ETL in place to lower case those column names, and would like to make queries that include the data you created using 0.6.x and 0.7.x.</p>
<table><thead>
<tr>
<th>Field</th>
<th>Type</th>
<th>Description</th>
<th>Required</th>
</tr>
</thead><tbody>
<tr>
<td>format</td>
<td>String</td>
<td>This should say <code>jsonLowercase</code>.</td>
<td>yes</td>
</tr>
<tr>
<td>timestampSpec</td>
<td>JSON Object</td>
<td>Specifies the column and format of the timestamp.</td>
<td>yes</td>
</tr>
<tr>
<td>dimensionsSpec</td>
<td>JSON Object</td>
<td>Specifies the dimensions of the data.</td>
<td>yes</td>
</tr>
</tbody></table>
<h4 id="csv-parsespec">CSV ParseSpec</h4>
<p>Use this with the String Parser to load CSV. Strings are parsed using the com.opencsv library.</p>
<table><thead>
<tr>
<th>Field</th>
<th>Type</th>
<th>Description</th>
<th>Required</th>
</tr>
</thead><tbody>
<tr>
<td>format</td>
<td>String</td>
<td>This should say <code>csv</code>.</td>
<td>yes</td>
</tr>
<tr>
<td>timestampSpec</td>
<td>JSON Object</td>
<td>Specifies the column and format of the timestamp.</td>
<td>yes</td>
</tr>
<tr>
<td>dimensionsSpec</td>
<td>JSON Object</td>
<td>Specifies the dimensions of the data.</td>
<td>yes</td>
</tr>
<tr>
<td>listDelimiter</td>
<td>String</td>
<td>A custom delimiter for multi-value dimensions.</td>
<td>no (default == ctrl+A)</td>
</tr>
<tr>
<td>columns</td>
<td>JSON array</td>
<td>Specifies the columns of the data.</td>
<td>yes</td>
</tr>
</tbody></table>
<h4 id="tsv-delimited-parsespec">TSV / Delimited ParseSpec</h4>
<p>Use this with the String Parser to load any delimited text that does not require special escaping. By default,
the delimiter is a tab, so this will load TSV.</p>
<table><thead>
<tr>
<th>Field</th>
<th>Type</th>
<th>Description</th>
<th>Required</th>
</tr>
</thead><tbody>
<tr>
<td>format</td>
<td>String</td>
<td>This should say <code>tsv</code>.</td>
<td>yes</td>
</tr>
<tr>
<td>timestampSpec</td>
<td>JSON Object</td>
<td>Specifies the column and format of the timestamp.</td>
<td>yes</td>
</tr>
<tr>
<td>dimensionsSpec</td>
<td>JSON Object</td>
<td>Specifies the dimensions of the data.</td>
<td>yes</td>
</tr>
<tr>
<td>delimiter</td>
<td>String</td>
<td>A custom delimiter for data values.</td>
<td>no (default == \t)</td>
</tr>
<tr>
<td>listDelimiter</td>
<td>String</td>
<td>A custom delimiter for multi-value dimensions.</td>
<td>no (default == ctrl+A)</td>
</tr>
<tr>
<td>columns</td>
<td>JSON String array</td>
<td>Specifies the columns of the data.</td>
<td>yes</td>
</tr>
</tbody></table>
<h4 id="timeanddims-parsespec">TimeAndDims ParseSpec</h4>
<p>Use this with non-String Parsers to provide them with timestamp and dimensions information. Non-String Parsers
handle all formatting decisions on their own, without using the ParseSpec.</p>
<table><thead>
<tr>
<th>Field</th>
<th>Type</th>
<th>Description</th>
<th>Required</th>
</tr>
</thead><tbody>
<tr>
<td>format</td>
<td>String</td>
<td>This should say <code>timeAndDims</code>.</td>
<td>yes</td>
</tr>
<tr>
<td>timestampSpec</td>
<td>JSON Object</td>
<td>Specifies the column and format of the timestamp.</td>
<td>yes</td>
</tr>
<tr>
<td>dimensionsSpec</td>
<td>JSON Object</td>
<td>Specifies the dimensions of the data.</td>
<td>yes</td>
</tr>
</tbody></table>
<h3 id="timestampspec">TimestampSpec</h3>
<table><thead>
<tr>
<th>Field</th>
<th>Type</th>
<th>Description</th>
<th>Required</th>
</tr>
</thead><tbody>
<tr>
<td>column</td>
<td>String</td>
<td>The column of the timestamp.</td>
<td>yes</td>
</tr>
<tr>
<td>format</td>
<td>String</td>
<td>iso, posix, millis, micro, nano, auto or any <a href="http://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html">Joda time</a> format.</td>
<td>no (default == &#39;auto&#39;)</td>
</tr>
</tbody></table>
<p><a name="dimensions" /></p>
<h3 id="dimensionsspec">DimensionsSpec</h3>
<table><thead>
<tr>
<th>Field</th>
<th>Type</th>
<th>Description</th>
<th>Required</th>
</tr>
</thead><tbody>
<tr>
<td>dimensions</td>
<td>JSON array</td>
<td>A list of <a href="#dimension-schema">dimension schema</a> objects or dimension names. Providing a name is equivalent to providing a String-typed dimension schema with the given name. If this is an empty array, Druid will treat all non-timestamp, non-metric columns that do not appear in &quot;dimensionExclusions&quot; as String-typed dimension columns.</td>
<td>yes</td>
</tr>
<tr>
<td>dimensionExclusions</td>
<td>JSON String array</td>
<td>The names of dimensions to exclude from ingestion.</td>
<td>no (default == [])</td>
</tr>
<tr>
<td>spatialDimensions</td>
<td>JSON Object array</td>
<td>An array of <a href="../development/geo.html">spatial dimensions</a></td>
<td>no (default == [])</td>
</tr>
</tbody></table>
<h4 id="dimension-schema">Dimension Schema</h4>
<p>A dimension schema specifies the type and name of a dimension to be ingested.</p>
<p>For string columns, the dimension schema can also be used to enable or disable bitmap indexing by setting the
<code>createBitmapIndex</code> boolean. By default, bitmap indexes are enabled for all string columns. Only string columns can have
bitmap indexes; they are not supported for numeric columns.</p>
<p>For example, the following <code>dimensionsSpec</code> section from a <code>dataSchema</code> ingests one column as Long (<code>countryNum</code>), two
columns as Float (<code>userLatitude</code>, <code>userLongitude</code>), and the other columns as Strings, with bitmap indexes disabled
for the <code>comment</code> column.</p>
<div class="highlight"><pre><code class="language-json" data-lang="json"><span></span><span class="s2">&quot;dimensionsSpec&quot;</span> <span class="err">:</span> <span class="p">{</span>
<span class="nt">&quot;dimensions&quot;</span><span class="p">:</span> <span class="p">[</span>
<span class="s2">&quot;page&quot;</span><span class="p">,</span>
<span class="s2">&quot;language&quot;</span><span class="p">,</span>
<span class="s2">&quot;user&quot;</span><span class="p">,</span>
<span class="s2">&quot;unpatrolled&quot;</span><span class="p">,</span>
<span class="s2">&quot;newPage&quot;</span><span class="p">,</span>
<span class="s2">&quot;robot&quot;</span><span class="p">,</span>
<span class="s2">&quot;anonymous&quot;</span><span class="p">,</span>
<span class="s2">&quot;namespace&quot;</span><span class="p">,</span>
<span class="s2">&quot;continent&quot;</span><span class="p">,</span>
<span class="s2">&quot;country&quot;</span><span class="p">,</span>
<span class="s2">&quot;region&quot;</span><span class="p">,</span>
<span class="s2">&quot;city&quot;</span><span class="p">,</span>
<span class="p">{</span>
<span class="nt">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;string&quot;</span><span class="p">,</span>
<span class="nt">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;comment&quot;</span><span class="p">,</span>
<span class="nt">&quot;createBitmapIndex&quot;</span><span class="p">:</span> <span class="kc">false</span>
<span class="p">},</span>
<span class="p">{</span>
<span class="nt">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;long&quot;</span><span class="p">,</span>
<span class="nt">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;countryNum&quot;</span>
<span class="p">},</span>
<span class="p">{</span>
<span class="nt">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;float&quot;</span><span class="p">,</span>
<span class="nt">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;userLatitude&quot;</span>
<span class="p">},</span>
<span class="p">{</span>
<span class="nt">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;float&quot;</span><span class="p">,</span>
<span class="nt">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;userLongitude&quot;</span>
<span class="p">}</span>
<span class="p">],</span>
<span class="nt">&quot;dimensionExclusions&quot;</span> <span class="p">:</span> <span class="p">[],</span>
<span class="nt">&quot;spatialDimensions&quot;</span> <span class="p">:</span> <span class="p">[]</span>
<span class="p">}</span>
</code></pre></div>
<h2 id="metricsspec">metricsSpec</h2>
<p>The <code>metricsSpec</code> is a list of <a href="../querying/aggregations.html">aggregators</a>. If <code>rollup</code> is false in the granularity spec, the metrics spec should be an empty list and all columns should be defined in the <code>dimensionsSpec</code> instead (without rollup, there isn&#39;t a real distinction between dimensions and metrics at ingestion time). This is optional, however.</p>
<h2 id="granularityspec">GranularitySpec</h2>
<p>GranularitySpec is to define how to partition a dataSource into <a href="../design/index.html#datasources-and-segments">time chunks</a>.
The default granularitySpec is <code>uniform</code>, and can be changed by setting the <code>type</code> field.
Currently, <code>uniform</code> and <code>arbitrary</code> types are supported.</p>
<h3 id="uniform-granularity-spec">Uniform Granularity Spec</h3>
<p>This spec is used to generated segments with uniform intervals.</p>
<table><thead>
<tr>
<th>Field</th>
<th>Type</th>
<th>Description</th>
<th>Required</th>
</tr>
</thead><tbody>
<tr>
<td>segmentGranularity</td>
<td>string</td>
<td>The granularity to create time chunks at. Multiple segments can be created per time chunk. For example, with &#39;DAY&#39; <code>segmentGranularity</code>, the events of the same day fall into the same time chunk which can be optionally further partitioned into multiple segments based on other configurations and input size. See <a href="../querying/granularities.html">Granularity</a> for supported granularities.</td>
<td>no (default == &#39;DAY&#39;)</td>
</tr>
<tr>
<td>queryGranularity</td>
<td>string</td>
<td>The minimum granularity to be able to query results at and the granularity of the data inside the segment. E.g. a value of &quot;minute&quot; will mean that data is aggregated at minutely granularity. That is, if there are collisions in the tuple (minute(timestamp), dimensions), then it will aggregate values together using the aggregators instead of storing individual rows. A granularity of &#39;NONE&#39; means millisecond granularity. See <a href="../querying/granularities.html">Granularity</a> for supported granularities.</td>
<td>no (default == &#39;NONE&#39;)</td>
</tr>
<tr>
<td>rollup</td>
<td>boolean</td>
<td>rollup or not</td>
<td>no (default == true)</td>
</tr>
<tr>
<td>intervals</td>
<td>JSON string array</td>
<td>A list of intervals for the raw data being ingested. Ignored for real-time ingestion.</td>
<td>no. If specified, Hadoop and native non-parallel batch ingestion tasks may skip determining partitions phase which results in faster ingestion; native parallel ingestion tasks can request all their locks up-front instead of one by one. Batch ingestion will thrown away any data not in the specified intervals.</td>
</tr>
</tbody></table>
<h3 id="arbitrary-granularity-spec">Arbitrary Granularity Spec</h3>
<p>This spec is used to generate segments with arbitrary intervals (it tries to create evenly sized segments). This spec is not supported for real-time processing.</p>
<table><thead>
<tr>
<th>Field</th>
<th>Type</th>
<th>Description</th>
<th>Required</th>
</tr>
</thead><tbody>
<tr>
<td>queryGranularity</td>
<td>string</td>
<td>The minimum granularity to be able to query results at and the granularity of the data inside the segment. E.g. a value of &quot;minute&quot; will mean that data is aggregated at minutely granularity. That is, if there are collisions in the tuple (minute(timestamp), dimensions), then it will aggregate values together using the aggregators instead of storing individual rows. A granularity of &#39;NONE&#39; means millisecond granularity. See <a href="../querying/granularities.html">Granularity</a> for supported granularities.</td>
<td>no (default == &#39;NONE&#39;)</td>
</tr>
<tr>
<td>rollup</td>
<td>boolean</td>
<td>rollup or not</td>
<td>no (default == true)</td>
</tr>
<tr>
<td>intervals</td>
<td>JSON string array</td>
<td>A list of intervals for the raw data being ingested. Ignored for real-time ingestion.</td>
<td>no. If specified, Hadoop and native non-parallel batch ingestion tasks may skip determining partitions phase which results in faster ingestion; native parallel ingestion tasks can request all their locks up-front instead of one by one. Batch ingestion will thrown away any data not in the specified intervals.</td>
</tr>
</tbody></table>
<h1 id="transform-spec">Transform Spec</h1>
<p>Transform specs allow Druid to transform and filter input data during ingestion. See <a href="../ingestion/transform-spec.html">Transform specs</a></p>
<h1 id="io-config">IO Config</h1>
<p>The IOConfig spec differs based on the ingestion task type.</p>
<ul>
<li>Native Batch ingestion: See <a href="../ingestion/native_tasks.html#ioconfig">Native Batch IOConfig</a></li>
<li>Hadoop Batch ingestion: See <a href="../ingestion/hadoop.html#ioconfig">Hadoop Batch IOConfig</a></li>
<li>Kafka Indexing Service: See <a href="../development/extensions-core/kafka-ingestion.html#KafkaSupervisorIOConfig">Kafka Supervisor IOConfig</a></li>
<li>Stream Push Ingestion: Stream push ingestion with Tranquility does not require an IO Config.</li>
<li>Stream Pull Ingestion (Deprecated): See <a href="../ingestion/stream-pull.html#ioconfig">Stream pull ingestion</a>.</li>
</ul>
<h1 id="tuning-config">Tuning Config</h1>
<p>The TuningConfig spec differs based on the ingestion task type.</p>
<ul>
<li>Native Batch ingestion: See <a href="../ingestion/native_tasks.html#tuningconfig">Native Batch TuningConfig</a></li>
<li>Hadoop Batch ingestion: See <a href="../ingestion/hadoop.html#tuningconfig">Hadoop Batch TuningConfig</a></li>
<li>Kafka Indexing Service: See <a href="../development/extensions-core/kafka-ingestion.html#KafkaSupervisorTuningConfig">Kafka Supervisor TuningConfig</a></li>
<li>Stream Push Ingestion (Tranquility): See <a href="http://static.druid.io/tranquility/api/latest/#com.metamx.tranquility.druid.DruidTuning">Tranquility TuningConfig</a>.</li>
<li>Stream Pull Ingestion (Deprecated): See <a href="../ingestion/stream-pull.html#tuningconfig">Stream pull ingestion</a>.</li>
</ul>
<h1 id="evaluating-timestamp-dimensions-and-metrics">Evaluating Timestamp, Dimensions and Metrics</h1>
<p>Druid will interpret dimensions, dimension exclusions, and metrics in the following order:</p>
<ul>
<li>Any column listed in the list of dimensions is treated as a dimension.</li>
<li>Any column listed in the list of dimension exclusions is excluded as a dimension.</li>
<li>The timestamp column and columns/fieldNames required by metrics are excluded by default.</li>
<li>If a metric is also listed as a dimension, the metric must have a different name than the dimension name.</li>
</ul>
</div>
<div class="col-md-3">
<div class="searchbox">
<gcse:searchbox-only></gcse:searchbox-only>
</div>
<div id="toc" class="nav toc hidden-print">
</div>
</div>
</div>
</div>
<!-- Start page_footer include -->
<footer class="druid-footer">
<div class="container">
<div class="text-center">
<p>
<a href="/technology">Technology</a>&ensp;·&ensp;
<a href="/use-cases">Use Cases</a>&ensp;·&ensp;
<a href="/druid-powered">Powered by Druid</a>&ensp;·&ensp;
<a href="/docs/latest/">Docs</a>&ensp;·&ensp;
<a href="/community/">Community</a>&ensp;·&ensp;
<a href="/downloads.html">Download</a>&ensp;·&ensp;
<a href="/faq">FAQ</a>
</p>
</div>
<div class="text-center">
<a title="Join the user group" href="https://groups.google.com/forum/#!forum/druid-user" target="_blank"><span class="fa fa-comments"></span></a>&ensp;·&ensp;
<a title="Follow Druid" href="https://twitter.com/druidio" target="_blank"><span class="fab fa-twitter"></span></a>&ensp;·&ensp;
<a title="GitHub" href="https://github.com/apache/druid" target="_blank"><span class="fab fa-github"></span></a>
</div>
<div class="text-center license">
Copyright © 2020 <a href="https://www.apache.org/" target="_blank">Apache Software Foundation</a>.<br>
Except where otherwise noted, licensed under <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a>.<br>
Apache Druid, Druid, and the Druid logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.
</div>
</div>
</footer>
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-131010415-1"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'UA-131010415-1');
</script>
<script>
function trackDownload(type, url) {
ga('send', 'event', 'download', type, url);
}
</script>
<script src="//code.jquery.com/jquery.min.js"></script>
<script src="//maxcdn.bootstrapcdn.com/bootstrap/3.2.0/js/bootstrap.min.js"></script>
<script src="/assets/js/druid.js"></script>
<!-- stop page_footer include -->
<script>
$(function() {
$(".toc").load("/docs/0.14.1-incubating/toc.html");
// There is no way to tell when .gsc-input will be async loaded into the page so just try to set a placeholder until it works
var tries = 0;
var timer = setInterval(function() {
tries++;
if (tries > 300) clearInterval(timer);
var searchInput = $('input.gsc-input');
if (searchInput.length) {
searchInput.attr('placeholder', 'Search');
clearInterval(timer);
}
}, 100);
});
</script>
</body>
</html>