blob: ff2e8f52adcaed2b40353381735431d20ec872e5 [file] [log] [blame]
<!doctype html>
<html lang="en" dir="ltr" class="docs-wrapper docs-doc-page docs-version-current plugin-docs plugin-id-default docs-doc-id-development/extensions-contrib/cloudfiles">
<head>
<meta charset="UTF-8">
<meta name="generator" content="Docusaurus v2.4.1">
<title data-rh="true">Rackspace Cloud Files | Apache® Druid</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:image" content="https://druid.apache.org/img/druid_nav.png"><meta data-rh="true" name="twitter:image" content="https://druid.apache.org/img/druid_nav.png"><meta data-rh="true" property="og:url" content="https://druid.apache.org/docs/28.0.0/development/extensions-contrib/cloudfiles"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="current"><meta data-rh="true" name="docusaurus_tag" content="docs-default-current"><meta data-rh="true" name="docsearch:version" content="current"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-current"><meta data-rh="true" property="og:title" content="Rackspace Cloud Files | Apache® Druid"><meta data-rh="true" name="description" content="&lt;!--"><meta data-rh="true" property="og:description" content="&lt;!--"><link data-rh="true" rel="icon" href="/img/favicon.png"><link data-rh="true" rel="canonical" href="https://druid.apache.org/docs/28.0.0/development/extensions-contrib/cloudfiles"><link data-rh="true" rel="alternate" href="https://druid.apache.org/docs/28.0.0/development/extensions-contrib/cloudfiles" hreflang="en"><link data-rh="true" rel="alternate" href="https://druid.apache.org/docs/28.0.0/development/extensions-contrib/cloudfiles" hreflang="x-default"><link rel="preconnect" href="https://www.google-analytics.com">
<link rel="preconnect" href="https://www.googletagmanager.com">
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-131010415-1"></script>
<script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","UA-131010415-1",{})</script>
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.7.2/css/all.css">
<script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.4/clipboard.min.js"></script><link rel="stylesheet" href="/assets/css/styles.546f39eb.css">
<link rel="preload" href="/assets/js/runtime~main.0dcbfdea.js" as="script">
<link rel="preload" href="/assets/js/main.7f6fdf81.js" as="script">
</head>
<body class="navigation-with-keyboard">
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}return t}()||function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}()</script><div id="__docusaurus">
<div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><nav aria-label="Main" class="navbar navbar--fixed-top navbar--dark"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/img/druid_nav.png" alt="Apache® Druid" class="themedImage_ToTc themedImage--light_HNdA"><img src="/img/druid_nav.png" alt="Apache® Druid" class="themedImage_ToTc themedImage--dark_i4oU"></div></a></div><div class="navbar__items navbar__items--right"><a class="navbar__item navbar__link" href="/technology">Technology</a><a class="navbar__item navbar__link" href="/use-cases">Use Cases</a><a class="navbar__item navbar__link" href="/druid-powered">Powered By</a><a class="navbar__item navbar__link" href="/docs/28.0.0/design/">Docs</a><a class="navbar__item navbar__link" href="/community/">Community</a><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Apache®</a><ul class="dropdown__menu"><li><a href="https://www.apache.org/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Foundation<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://apachecon.com/?ref=druid.apache.org" target="_blank" rel="noopener noreferrer" class="dropdown__link">Events<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer" class="dropdown__link">License<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Thanks<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/security/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Security<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Sponsorship<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><a class="navbar__item navbar__link" href="/downloads/">Download</a><div class="searchBox_ZlJk"><div class="navbar__search"><span aria-label="expand searchbar" role="button" class="search-icon" tabindex="0"></span><input type="search" id="search_input_react" placeholder="Loading..." aria-label="Search" class="navbar__search-input search-bar" disabled=""></div></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="__docusaurus_skipToContent_fallback" class="main-wrapper mainWrapper_z2l0 docsWrapper_BCFX"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type="button"></button><div class="docPage__5DB"><main class="docMainContainer_gTbr docMainContainerEnhanced_Uz_u"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_VOVn"><div class="docItemContainer_Djhp"><article><div class="tocCollapsible_ETCw theme-doc-toc-mobile tocMobile_ITEo"><button type="button" class="clean-btn tocCollapsibleButton_TO0P">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>Rackspace Cloud Files</h1></header><p>To use this Apache Druid extension, <a href="/docs/28.0.0/configuration/extensions#loading-extensions">include</a> <code>druid-cloudfiles-extensions</code> in the extensions load list.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="deep-storage">Deep Storage<a href="#deep-storage" class="hash-link" aria-label="Direct link to Deep Storage" title="Direct link to Deep Storage"></a></h2><p><a href="http://www.rackspace.com/cloud/files/" target="_blank" rel="noopener noreferrer">Rackspace Cloud Files</a> is another option for deep storage. This requires some additional Druid configuration.</p><table><thead><tr><th>Property</th><th>Possible Values</th><th>Description</th><th>Default</th></tr></thead><tbody><tr><td><code>druid.storage.type</code></td><td>cloudfiles</td><td></td><td>Must be set.</td></tr><tr><td><code>druid.storage.region</code></td><td></td><td>Rackspace Cloud Files region.</td><td>Must be set.</td></tr><tr><td><code>druid.storage.container</code></td><td></td><td>Rackspace Cloud Files container name.</td><td>Must be set.</td></tr><tr><td><code>druid.storage.basePath</code></td><td></td><td>Rackspace Cloud Files base path to use in the container.</td><td>Must be set.</td></tr><tr><td><code>druid.storage.operationMaxRetries</code></td><td></td><td>Number of tries before cancel a Rackspace operation.</td><td>10</td></tr><tr><td><code>druid.cloudfiles.userName</code></td><td></td><td>Rackspace Cloud username</td><td>Must be set.</td></tr><tr><td><code>druid.cloudfiles.apiKey</code></td><td></td><td>Rackspace Cloud API key.</td><td>Must be set.</td></tr><tr><td><code>druid.cloudfiles.provider</code></td><td>rackspace-cloudfiles-us,rackspace-cloudfiles-uk</td><td>Name of the provider depending on the region.</td><td>Must be set.</td></tr><tr><td><code>druid.cloudfiles.useServiceNet</code></td><td>true,false</td><td>Whether to use the internal service net.</td><td>true</td></tr></tbody></table><h2 class="anchor anchorWithStickyNavbar_LWe7" id="firehose">Firehose<a href="#firehose" class="hash-link" aria-label="Direct link to Firehose" title="Direct link to Firehose"></a></h2><a name="firehose"></a><h4 class="anchor anchorWithStickyNavbar_LWe7" id="staticcloudfilesfirehose">StaticCloudFilesFirehose<a href="#staticcloudfilesfirehose" class="hash-link" aria-label="Direct link to StaticCloudFilesFirehose" title="Direct link to StaticCloudFilesFirehose"></a></h4><p>This firehose ingests events, similar to the StaticAzureBlobStoreFirehose, but from Rackspace&#x27;s Cloud Files.</p><p>Data is newline delimited, with one JSON object per line and parsed as per the <code>InputRowParser</code> configuration.</p><p>The storage account is shared with the one used for Rackspace&#x27;s Cloud Files deep storage functionality, but blobs can be in a different region and container.</p><p>As with the Azure blobstore, it is assumed to be gzipped if the extension ends in .gz</p><p>This firehose is <em>splittable</em> and can be used by <a href="/docs/28.0.0/ingestion/native-batch">native parallel index tasks</a>.
Since each split represents an object in this firehose, each worker task of <code>index_parallel</code> will read an object.</p><p>Sample spec:</p><div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token property">&quot;firehose&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;type&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;static-cloudfiles&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;blobs&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;region&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;DFW&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;container&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;container&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;path&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;/path/to/your/file.json&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;region&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;ORD&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;container&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;anothercontainer&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">&quot;path&quot;</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;/another/path.json&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>This firehose provides caching and prefetching features. In IndexTask, a firehose can be read twice if intervals or
shardSpecs are not specified, and, in this case, caching can be useful. Prefetching is preferred when direct scan of objects is slow.</p><table><thead><tr><th>property</th><th>description</th><th>default</th><th>required?</th></tr></thead><tbody><tr><td>type</td><td>This should be <code>static-cloudfiles</code>.</td><td>N/A</td><td>yes</td></tr><tr><td>blobs</td><td>JSON array of Cloud Files blobs.</td><td>N/A</td><td>yes</td></tr><tr><td>maxCacheCapacityBytes</td><td>Maximum size of the cache space in bytes. 0 means disabling cache.</td><td>1073741824</td><td>no</td></tr><tr><td>maxCacheCapacityBytes</td><td>Maximum size of the cache space in bytes. 0 means disabling cache. Cached files are not removed until the ingestion task completes.</td><td>1073741824</td><td>no</td></tr><tr><td>maxFetchCapacityBytes</td><td>Maximum size of the fetch space in bytes. 0 means disabling prefetch. Prefetched files are removed immediately once they are read.</td><td>1073741824</td><td>no</td></tr><tr><td>fetchTimeout</td><td>Timeout for fetching a Cloud Files object.</td><td>60000</td><td>no</td></tr><tr><td>maxFetchRetry</td><td>Maximum retry for fetching a Cloud Files object.</td><td>3</td><td>no</td></tr></tbody></table><p>Cloud Files Blobs:</p><table><thead><tr><th>property</th><th>description</th><th>default</th><th>required?</th></tr></thead><tbody><tr><td>container</td><td>Name of the Cloud Files container</td><td>N/A</td><td>yes</td></tr><tr><td>path</td><td>The path where data is located.</td><td>N/A</td><td>yes</td></tr></tbody></table></div></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages"></nav></div></div><div class="col col--3"><div class="tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#deep-storage" class="table-of-contents__link toc-highlight">Deep Storage</a></li><li><a href="#firehose" class="table-of-contents__link toc-highlight">Firehose</a></li></ul></div></div></div></div></main></div></div><footer class="footer"><div class="container container-fluid"><div class="footer__bottom text--center"><div class="margin-bottom--sm"><img src="/img/favicon.png" class="themedImage_ToTc themedImage--light_HNdA footer__logo"><img src="/img/favicon.png" class="themedImage_ToTc themedImage--dark_i4oU footer__logo"></div><div class="footer__copyright">Copyright © 2023 Apache Software Foundation. Except where otherwise noted, licensed under CC BY-SA 4.0. Apache Druid, Druid, and the Druid logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</div></div></div></footer></div>
<script src="/assets/js/runtime~main.0dcbfdea.js"></script>
<script src="/assets/js/main.7f6fdf81.js"></script>
</body>
</html>