blob: a826135d09f23cb2652b97ce77f9bc0db5bf8206 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="description" content="Apache Druid">
<meta name="keywords" content="druid,kafka,database,analytics,streaming,real-time,real time,apache,open source">
<meta name="author" content="Apache Software Foundation">
<title>Druid | Realtime Process Configuration</title>
<link rel="alternate" type="application/atom+xml" href="/feed">
<link rel="shortcut icon" href="/img/favicon.png">
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.7.2/css/all.css" integrity="sha384-fnmOCqbTlWIlj8LyTjo7mOUStjsKC4pOpQbqyi7RrhN7udi9RwhKkMHpvLbHG9Sr" crossorigin="anonymous">
<link href='//fonts.googleapis.com/css?family=Open+Sans+Condensed:300,700,300italic|Open+Sans:300italic,400italic,600italic,400,300,600,700' rel='stylesheet' type='text/css'>
<link rel="stylesheet" href="/css/bootstrap-pure.css?v=1.1">
<link rel="stylesheet" href="/css/base.css?v=1.1">
<link rel="stylesheet" href="/css/header.css?v=1.1">
<link rel="stylesheet" href="/css/footer.css?v=1.1">
<link rel="stylesheet" href="/css/syntax.css?v=1.1">
<link rel="stylesheet" href="/css/docs.css?v=1.1">
<script>
(function() {
var cx = '000162378814775985090:molvbm0vggm';
var gcse = document.createElement('script');
gcse.type = 'text/javascript';
gcse.async = true;
gcse.src = (document.location.protocol == 'https:' ? 'https:' : 'http:') +
'//cse.google.com/cse.js?cx=' + cx;
var s = document.getElementsByTagName('script')[0];
s.parentNode.insertBefore(gcse, s);
})();
</script>
</head>
<body>
<!-- Start page_header include -->
<script src="//ajax.googleapis.com/ajax/libs/jquery/2.2.4/jquery.min.js"></script>
<div class="top-navigator">
<div class="container">
<div class="left-cont">
<a class="logo" href="/"><span class="druid-logo"></span></a>
</div>
<div class="right-cont">
<ul class="links">
<li class=""><a href="/technology">Technology</a></li>
<li class=""><a href="/use-cases">Use Cases</a></li>
<li class=""><a href="/druid-powered">Powered By</a></li>
<li class=""><a href="/docs/latest/design/">Docs</a></li>
<li class=""><a href="/community/">Community</a></li>
<li class="header-dropdown">
<a>Apache</a>
<div class="header-dropdown-menu">
<a href="https://www.apache.org/" target="_blank">Foundation</a>
<a href="https://www.apache.org/events/current-event" target="_blank">Events</a>
<a href="https://www.apache.org/licenses/" target="_blank">License</a>
<a href="https://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a>
<a href="https://www.apache.org/security/" target="_blank">Security</a>
<a href="https://www.apache.org/foundation/sponsorship.html" target="_blank">Sponsorship</a>
</div>
</li>
<li class=" button-link"><a href="/downloads.html">Download</a></li>
</ul>
</div>
</div>
<div class="action-button menu-icon">
<span class="fa fa-bars"></span> MENU
</div>
<div class="action-button menu-icon-close">
<span class="fa fa-times"></span> MENU
</div>
</div>
<script type="text/javascript">
var $menu = $('.right-cont');
var $menuIcon = $('.menu-icon');
var $menuIconClose = $('.menu-icon-close');
function showMenu() {
$menu.fadeIn(100);
$menuIcon.fadeOut(100);
$menuIconClose.fadeIn(100);
}
$menuIcon.click(showMenu);
function hideMenu() {
$menu.fadeOut(100);
$menuIconClose.fadeOut(100);
$menuIcon.fadeIn(100);
}
$menuIconClose.click(hideMenu);
$(window).resize(function() {
if ($(window).width() >= 840) {
$menu.fadeIn(100);
$menuIcon.fadeOut(100);
$menuIconClose.fadeOut(100);
}
else {
$menu.fadeOut(100);
$menuIcon.fadeIn(100);
$menuIconClose.fadeOut(100);
}
});
</script>
<!-- Stop page_header include -->
<div class="container doc-container">
<p> Looking for the <a href="/docs/0.23.0/">latest stable documentation</a>?</p>
<div class="row">
<div class="col-md-9 doc-content">
<p>
<a class="btn btn-default btn-xs visible-xs-inline-block visible-sm-inline-block" href="#toc">Table of Contents</a>
</p>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing,
~ software distributed under the License is distributed on an
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
~ KIND, either express or implied. See the License for the
~ specific language governing permissions and limitations
~ under the License.
-->
<h1 id="realtime-process-configuration">Realtime Process Configuration</h1>
<p>For general Apache Druid (incubating) Realtime Process information, see <a href="../design/realtime.html">here</a>.</p>
<h2 id="runtime-configuration">Runtime Configuration</h2>
<p>The realtime process uses several of the global configs in <a href="../configuration/index.html">Configuration</a> and has the following set of configurations as well:</p>
<h3 id="process-config">Process Config</h3>
<table><thead>
<tr>
<th>Property</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead><tbody>
<tr>
<td><code>druid.host</code></td>
<td>The host for the current process. This is used to advertise the current processes location as reachable from another process and should generally be specified such that <code>http://${druid.host}/</code> could actually talk to this process</td>
<td>InetAddress.getLocalHost().getCanonicalHostName()</td>
</tr>
<tr>
<td><code>druid.plaintextPort</code></td>
<td>This is the port to actually listen on; unless port mapping is used, this will be the same port as is on <code>druid.host</code></td>
<td>8084</td>
</tr>
<tr>
<td><code>druid.tlsPort</code></td>
<td>TLS port for HTTPS connector, if <a href="../operations/tls-support.html">druid.enableTlsPort</a> is set then this config will be used. If <code>druid.host</code> contains port then that port will be ignored. This should be a non-negative Integer.</td>
<td>8284</td>
</tr>
<tr>
<td><code>druid.service</code></td>
<td>The name of the service. This is used as a dimension when emitting metrics and alerts to differentiate between the various services</td>
<td>druid/realtime</td>
</tr>
</tbody></table>
<h3 id="realtime-operation">Realtime Operation</h3>
<table><thead>
<tr>
<th>Property</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead><tbody>
<tr>
<td><code>druid.publish.type</code></td>
<td>Where to publish segments. Choices are &quot;noop&quot; or &quot;metadata&quot;.</td>
<td>metadata</td>
</tr>
<tr>
<td><code>druid.realtime.specFile</code></td>
<td>File location of realtime specFile.</td>
<td>none</td>
</tr>
</tbody></table>
<h3 id="storing-intermediate-segments">Storing Intermediate Segments</h3>
<table><thead>
<tr>
<th>Property</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead><tbody>
<tr>
<td><code>druid.segmentCache.locations</code></td>
<td>Where intermediate segments are stored. The maxSize should always be zero.</td>
<td>none</td>
</tr>
</tbody></table>
<h3 id="query-configs">Query Configs</h3>
<h4 id="processing">Processing</h4>
<table><thead>
<tr>
<th>Property</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead><tbody>
<tr>
<td><code>druid.processing.buffer.sizeBytes</code></td>
<td>This specifies a buffer size for the storage of intermediate results. The computation engine in both the Historical and Realtime processes will use a scratch buffer of this size to do all of their intermediate computations off-heap. Larger values allow for more aggregations in a single pass over the data while smaller values can require more passes depending on the query that is being executed.</td>
<td>auto (max 1GB)</td>
</tr>
<tr>
<td><code>druid.processing.formatString</code></td>
<td>Realtime and Historical processes use this format string to name their processing threads.</td>
<td>processing-%s</td>
</tr>
<tr>
<td><code>druid.processing.numMergeBuffers</code></td>
<td>The number of direct memory buffers available for merging query results. The buffers are sized by <code>druid.processing.buffer.sizeBytes</code>. This property is effectively a concurrency limit for queries that require merging buffers. If you are using any queries that require merge buffers (currently, just groupBy v2) then you should have at least two of these.</td>
<td><code>max(2, druid.processing.numThreads / 4)</code></td>
</tr>
<tr>
<td><code>druid.processing.numThreads</code></td>
<td>The number of processing threads to have available for parallel processing of segments. Our rule of thumb is <code>num_cores - 1</code>, which means that even under heavy load there will still be one core available to do background tasks like talking with ZooKeeper and pulling down segments. If only one core is available, this property defaults to the value <code>1</code>.</td>
<td>Number of cores - 1 (or 1)</td>
</tr>
<tr>
<td><code>druid.processing.columnCache.sizeBytes</code></td>
<td>Maximum size in bytes for the dimension value lookup cache. Any value greater than <code>0</code> enables the cache. It is currently disabled by default. Enabling the lookup cache can significantly improve the performance of aggregators operating on dimension values, such as the JavaScript aggregator, or cardinality aggregator, but can slow things down if the cache hit rate is low (i.e. dimensions with few repeating values). Enabling it may also require additional garbage collection tuning to avoid long GC pauses.</td>
<td><code>0</code> (disabled)</td>
</tr>
<tr>
<td><code>druid.processing.tmpDir</code></td>
<td>Path where temporary files created while processing a query should be stored. If specified, this configuration takes priority over the default <code>java.io.tmpdir</code> path.</td>
<td>path represented by <code>java.io.tmpdir</code></td>
</tr>
</tbody></table>
<p>The amount of direct memory needed by Druid is at least
<code>druid.processing.buffer.sizeBytes * (druid.processing.numMergeBuffers + druid.processing.numThreads + 1)</code>. You can
ensure at least this amount of direct memory is available by providing <code>-XX:MaxDirectMemorySize=&lt;VALUE&gt;</code> at the command
line.</p>
<h4 id="general-query-configuration">General Query Configuration</h4>
<h5 id="groupby-query-config">GroupBy Query Config</h5>
<p>See <a href="../querying/groupbyquery.html#server-configuration">groupBy server configuration</a>.</p>
<h5 id="search-query-config">Search Query Config</h5>
<table><thead>
<tr>
<th>Property</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead><tbody>
<tr>
<td><code>druid.query.search.maxSearchLimit</code></td>
<td>Maximum number of search results to return.</td>
<td>1000</td>
</tr>
</tbody></table>
<h3 id="caching">Caching</h3>
<p>You can optionally configure caching to be enabled on the realtime process by setting caching configs here.</p>
<table><thead>
<tr>
<th>Property</th>
<th>Possible Values</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead><tbody>
<tr>
<td><code>druid.realtime.cache.useCache</code></td>
<td>true, false</td>
<td>Enable the cache on the realtime.</td>
<td>false</td>
</tr>
<tr>
<td><code>druid.realtime.cache.populateCache</code></td>
<td>true, false</td>
<td>Populate the cache on the realtime.</td>
<td>false</td>
</tr>
<tr>
<td><code>druid.realtime.cache.unCacheable</code></td>
<td>All druid query types</td>
<td>All query types to not cache.</td>
<td><code>[&quot;select&quot;]</code></td>
</tr>
<tr>
<td><code>druid.realtime.cache.maxEntrySize</code></td>
<td>positive integer or -1</td>
<td>Maximum size of an individual cache entry (processed results for one segment), in bytes, or -1 for unlimited.</td>
<td><code>1000000</code> (1MB)</td>
</tr>
</tbody></table>
<p>See <a href="caching.html">cache configuration</a> for how to configure cache settings.</p>
</div>
<div class="col-md-3">
<div class="searchbox">
<gcse:searchbox-only></gcse:searchbox-only>
</div>
<div id="toc" class="nav toc hidden-print">
</div>
</div>
</div>
</div>
<!-- Start page_footer include -->
<footer class="druid-footer">
<div class="container">
<div class="text-center">
<p>
<a href="/technology">Technology</a>&ensp;·&ensp;
<a href="/use-cases">Use Cases</a>&ensp;·&ensp;
<a href="/druid-powered">Powered by Druid</a>&ensp;·&ensp;
<a href="/docs/latest/">Docs</a>&ensp;·&ensp;
<a href="/community/">Community</a>&ensp;·&ensp;
<a href="/downloads.html">Download</a>&ensp;·&ensp;
<a href="/faq">FAQ</a>
</p>
</div>
<div class="text-center">
<a title="Join the user group" href="https://groups.google.com/forum/#!forum/druid-user" target="_blank"><span class="fa fa-comments"></span></a>&ensp;·&ensp;
<a title="Follow Druid" href="https://twitter.com/druidio" target="_blank"><span class="fab fa-twitter"></span></a>&ensp;·&ensp;
<a title="GitHub" href="https://github.com/apache/druid" target="_blank"><span class="fab fa-github"></span></a>
</div>
<div class="text-center license">
Copyright © 2020 <a href="https://www.apache.org/" target="_blank">Apache Software Foundation</a>.<br>
Except where otherwise noted, licensed under <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a>.<br>
Apache Druid, Druid, and the Druid logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.
</div>
</div>
</footer>
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-131010415-1"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'UA-131010415-1');
</script>
<script>
function trackDownload(type, url) {
ga('send', 'event', 'download', type, url);
}
</script>
<script src="//code.jquery.com/jquery.min.js"></script>
<script src="//maxcdn.bootstrapcdn.com/bootstrap/3.2.0/js/bootstrap.min.js"></script>
<script src="/assets/js/druid.js"></script>
<!-- stop page_footer include -->
<script>
$(function() {
$(".toc").load("/docs/0.14.1-incubating/toc.html");
// There is no way to tell when .gsc-input will be async loaded into the page so just try to set a placeholder until it works
var tries = 0;
var timer = setInterval(function() {
tries++;
if (tries > 300) clearInterval(timer);
var searchInput = $('input.gsc-input');
if (searchInput.length) {
searchInput.attr('placeholder', 'Search');
clearInterval(timer);
}
}, 100);
});
</script>
</body>
</html>