blob: 6983aaa2d9fcc468a4350f6f52a5a1bd65818b74 [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<!--
| Generated by Apache Maven Doxia at 2021-06-15
| Rendered using Apache Maven Stylus Skin 1.5
-->
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Apache Hadoop 3.3.1 &#x2013; HDFS Provided Storage</title>
<style type="text/css" media="all">
@import url("./css/maven-base.css");
@import url("./css/maven-theme.css");
@import url("./css/site.css");
</style>
<link rel="stylesheet" href="./css/print.css" type="text/css" media="print" />
<meta name="Date-Revision-yyyymmdd" content="20210615" />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
</head>
<body class="composite">
<div id="banner">
<a href="http://hadoop.apache.org/" id="bannerLeft">
<img src="http://hadoop.apache.org/images/hadoop-logo.jpg" alt="" />
</a>
<a href="http://www.apache.org/" id="bannerRight">
<img src="http://www.apache.org/images/asf_logo_wide.png" alt="" />
</a>
<div class="clear">
<hr/>
</div>
</div>
<div id="breadcrumbs">
<div class="xleft">
<a href="http://www.apache.org/" class="externalLink">Apache</a>
&gt;
<a href="http://hadoop.apache.org/" class="externalLink">Hadoop</a>
&gt;
<a href="../index.html">Apache Hadoop Project Dist POM</a>
&gt;
<a href="index.html">Apache Hadoop 3.3.1</a>
&gt;
HDFS Provided Storage
</div>
<div class="xright"> <a href="http://wiki.apache.org/hadoop" class="externalLink">Wiki</a>
|
<a href="https://gitbox.apache.org/repos/asf/hadoop.git" class="externalLink">git</a>
|
<a href="http://hadoop.apache.org/" class="externalLink">Apache Hadoop</a>
&nbsp;| Last Published: 2021-06-15
&nbsp;| Version: 3.3.1
</div>
<div class="clear">
<hr/>
</div>
</div>
<div id="leftColumn">
<div id="navcolumn">
<h5>General</h5>
<ul>
<li class="none">
<a href="../../index.html">Overview</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/SingleCluster.html">Single Node Setup</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/ClusterSetup.html">Cluster Setup</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/CommandsManual.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/FileSystemShell.html">FileSystem Shell</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Compatibility.html">Compatibility Specification</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/DownstreamDev.html">Downstream Developer's Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/AdminCompatibilityGuide.html">Admin Compatibility Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/InterfaceClassification.html">Interface Classification</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/filesystem/index.html">FileSystem Specification</a>
</li>
</ul>
<h5>Common</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/CLIMiniCluster.html">CLI Mini Cluster</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/FairCallQueue.html">Fair Call Queue</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/NativeLibraries.html">Native Libraries</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Superusers.html">Proxy User</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/RackAwareness.html">Rack Awareness</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/SecureMode.html">Secure Mode</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/ServiceLevelAuth.html">Service Level Authorization</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/HttpAuthentication.html">HTTP Authentication</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/CredentialProviderAPI.html">Credential Provider API</a>
</li>
<li class="none">
<a href="../../hadoop-kms/index.html">Hadoop KMS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Tracing.html">Tracing</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/UnixShellGuide.html">Unix Shell Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/registry/index.html">Registry</a>
</li>
</ul>
<h5>HDFS</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsDesign.html">Architecture</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsUserGuide.html">User Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html">NameNode HA With QJM</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithNFS.html">NameNode HA With NFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ObserverNameNode.html">Observer NameNode</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/Federation.html">Federation</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ViewFs.html">ViewFs</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ViewFsOverloadScheme.html">ViewFsOverloadScheme</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsSnapshots.html">Snapshots</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsEditsViewer.html">Edits Viewer</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsImageViewer.html">Image Viewer</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsPermissionsGuide.html">Permissions and HDFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsQuotaAdminGuide.html">Quotas and HDFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/LibHdfs.html">libhdfs (C API)</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/WebHDFS.html">WebHDFS (REST API)</a>
</li>
<li class="none">
<a href="../../hadoop-hdfs-httpfs/index.html">HttpFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ShortCircuitLocalReads.html">Short Circuit Local Reads</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/CentralizedCacheManagement.html">Centralized Cache Management</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsNfsGateway.html">NFS Gateway</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsRollingUpgrade.html">Rolling Upgrade</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ExtendedAttributes.html">Extended Attributes</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/TransparentEncryption.html">Transparent Encryption</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsMultihoming.html">Multihoming</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ArchivalStorage.html">Storage Policies</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/MemoryStorage.html">Memory Storage Support</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/SLGUserGuide.html">Synthetic Load Generator</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSErasureCoding.html">Erasure Coding</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSDiskbalancer.html">Disk Balancer</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsUpgradeDomain.html">Upgrade Domain</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsDataNodeAdminGuide.html">DataNode Admin</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs-rbf/HDFSRouterFederation.html">Router Federation</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsProvidedStorage.html">Provided Storage</a>
</li>
</ul>
<h5>MapReduce</h5>
<ul>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html">Tutorial</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html">Compatibility with 1.x</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/EncryptedShuffle.html">Encrypted Shuffle</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/PluggableShuffleAndPluggableSort.html">Pluggable Shuffle/Sort</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/DistributedCacheDeploy.html">Distributed Cache Deploy</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/SharedCacheSupport.html">Support for YARN Shared Cache</a>
</li>
</ul>
<h5>MapReduce REST APIs</h5>
<ul>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredAppMasterRest.html">MR Application Master</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-hs/HistoryServerRest.html">MR History Server</a>
</li>
</ul>
<h5>YARN</h5>
<ul>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YARN.html">Architecture</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YarnCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/CapacityScheduler.html">Capacity Scheduler</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/FairScheduler.html">Fair Scheduler</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceManagerRestart.html">ResourceManager Restart</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceManagerHA.html">ResourceManager HA</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceModel.html">Resource Model</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeLabel.html">Node Labels</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeAttributes.html">Node Attributes</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/WebApplicationProxy.html">Web Application Proxy</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServer.html">Timeline Server</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html">Timeline Service V.2</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/WritingYarnApplications.html">Writing YARN Applications</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YarnApplicationSecurity.html">YARN Application Security</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeManager.html">NodeManager</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/DockerContainers.html">Running Applications in Docker Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/RuncContainers.html">Running Applications in runC Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeManagerCgroups.html">Using CGroups</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/SecureContainer.html">Secure Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ReservationSystem.html">Reservation System</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/GracefulDecommission.html">Graceful Decommission</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/OpportunisticContainers.html">Opportunistic Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/Federation.html">YARN Federation</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/SharedCache.html">Shared Cache</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/UsingGpus.html">Using GPU</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/UsingFPGA.html">Using FPGA</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/PlacementConstraints.html">Placement Constraints</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YarnUI2.html">YARN UI2</a>
</li>
</ul>
<h5>YARN REST APIs</h5>
<ul>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/WebServicesIntro.html">Introduction</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html">Resource Manager</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeManagerRest.html">Node Manager</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServer.html#Timeline_Server_REST_API_v1">Timeline Server</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html#Timeline_Service_v.2_REST_API">Timeline Service V.2</a>
</li>
</ul>
<h5>YARN Service</h5>
<ul>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/Overview.html">Overview</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/QuickStart.html">QuickStart</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/Concepts.html">Concepts</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/YarnServiceAPI.html">Yarn Service API</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/ServiceDiscovery.html">Service Discovery</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/SystemServices.html">System Services</a>
</li>
</ul>
<h5>Hadoop Compatible File Systems</h5>
<ul>
<li class="none">
<a href="../../hadoop-aliyun/tools/hadoop-aliyun/index.html">Aliyun OSS</a>
</li>
<li class="none">
<a href="../../hadoop-aws/tools/hadoop-aws/index.html">Amazon S3</a>
</li>
<li class="none">
<a href="../../hadoop-azure/index.html">Azure Blob Storage</a>
</li>
<li class="none">
<a href="../../hadoop-azure-datalake/index.html">Azure Data Lake Storage</a>
</li>
<li class="none">
<a href="../../hadoop-openstack/index.html">OpenStack Swift</a>
</li>
<li class="none">
<a href="../../hadoop-cos/cloud-storage/index.html">Tencent COS</a>
</li>
</ul>
<h5>Auth</h5>
<ul>
<li class="none">
<a href="../../hadoop-auth/index.html">Overview</a>
</li>
<li class="none">
<a href="../../hadoop-auth/Examples.html">Examples</a>
</li>
<li class="none">
<a href="../../hadoop-auth/Configuration.html">Configuration</a>
</li>
<li class="none">
<a href="../../hadoop-auth/BuildingIt.html">Building</a>
</li>
</ul>
<h5>Tools</h5>
<ul>
<li class="none">
<a href="../../hadoop-streaming/HadoopStreaming.html">Hadoop Streaming</a>
</li>
<li class="none">
<a href="../../hadoop-archives/HadoopArchives.html">Hadoop Archives</a>
</li>
<li class="none">
<a href="../../hadoop-archive-logs/HadoopArchiveLogs.html">Hadoop Archive Logs</a>
</li>
<li class="none">
<a href="../../hadoop-distcp/DistCp.html">DistCp</a>
</li>
<li class="none">
<a href="../../hadoop-gridmix/GridMix.html">GridMix</a>
</li>
<li class="none">
<a href="../../hadoop-rumen/Rumen.html">Rumen</a>
</li>
<li class="none">
<a href="../../hadoop-resourceestimator/ResourceEstimator.html">Resource Estimator Service</a>
</li>
<li class="none">
<a href="../../hadoop-sls/SchedulerLoadSimulator.html">Scheduler Load Simulator</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Benchmarking.html">Hadoop Benchmarking</a>
</li>
<li class="none">
<a href="../../hadoop-dynamometer/Dynamometer.html">Dynamometer</a>
</li>
</ul>
<h5>Reference</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/release/">Changelog and Release Notes</a>
</li>
<li class="none">
<a href="../../api/index.html">Java API docs</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/UnixShellAPI.html">Unix Shell API</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Metrics.html">Metrics</a>
</li>
</ul>
<h5>Configuration</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/core-default.xml">core-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/hdfs-default.xml">hdfs-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs-rbf/hdfs-rbf-default.xml">hdfs-rbf-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml">mapred-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-common/yarn-default.xml">yarn-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-kms/kms-default.html">kms-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-hdfs-httpfs/httpfs-default.html">httpfs-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/DeprecatedProperties.html">Deprecated Properties</a>
</li>
</ul>
<a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
<img alt="Built by Maven" src="./images/logos/maven-feather.png"/>
</a>
</div>
</div>
<div id="bodyColumn">
<div id="contentBox">
<!---
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<h1>HDFS Provided Storage</h1>
<p>Provided storage allows data <i>stored outside HDFS</i> to be mapped to and addressed from HDFS. It builds on <a href="./ArchivalStorage.html">heterogeneous storage</a> by introducing a new storage type, <tt>PROVIDED</tt>, to the set of media in a datanode. Clients accessing data in <tt>PROVIDED</tt> storages can cache replicas in local media, enforce HDFS invariants (e.g., security, quotas), and address more data than the cluster could persist in the storage attached to DataNodes. This architecture is particularly useful in scenarios where HDFS clusters are ephemeral (e.g., cloud scenarios), and/or require to read data that lives in other storage systems (e.g., blob stores).</p>
<p>Provided storage is an experimental feature in HDFS.</p>
<ul>
<li><a href="#Introduction">Introduction</a></li>
<li><a href="#Creating_HDFS_Clusters_with_PROVIDED_Storage">Creating HDFS Clusters with PROVIDED Storage</a>
<ul>
<li><a href="#PROVIDED_Configuration">PROVIDED Configuration</a></li>
<li><a href="#fs2img_tool">fs2img tool</a>
<ul>
<li><a href="#Examples">Examples</a></li></ul></li></ul></li>
<li><a href="#Alias_Map_Implementations">Alias Map Implementations</a>
<ul>
<li><a href="#InMemoryAliasMap">InMemoryAliasMap</a>
<ul>
<li><a href="#Configuration">Configuration</a></li></ul></li>
<li><a href="#TextFileRegionAliasMap">TextFileRegionAliasMap</a>
<ul>
<li><a href="#Configuration">Configuration</a></li></ul></li></ul></li></ul>
<div class="section">
<h2><a name="Introduction"></a>Introduction</h2>
<p>As of this writing, support for mounting external storage as <tt>PROVIDED</tt> blocks is limited to creating a <i>read-only image</i> of a remote namespace that implements the <tt>org.apache.hadoop.fs.FileSystem</tt> interface, and starting a NameNode to serve the image. Specifically, reads from a snapshot of a remote namespace are supported. Adding a remote namespace to an existing/running namenode, refreshing the remote snapshot, unmounting, and writes are not available in this release. One can use <a href="./ViewFs.html">ViewFs</a> and <a href="../hadoop-hdfs-rbf/HDFSRouterFederation.html">RBF</a> to integrate namespaces with <tt>PROVIDED</tt> storage into an existing deployment.</p></div>
<div class="section">
<h2><a name="Creating_HDFS_Clusters_with_PROVIDED_Storage"></a>Creating HDFS Clusters with <tt>PROVIDED</tt> Storage</h2>
<p>One can create snapshots of the remote namespace using the <tt>fs2img</tt> tool. Given a path to a remote <tt>FileSystem</tt>, the tool creates an <i>image</i> mirroring the namespace and an <i>alias map</i> that maps blockIDs in the generated image to a <tt>FileRegion</tt> in the remote filesystem. A <tt>FileRegion</tt> contains sufficient information to address a fixed sequence of bytes in the remote <tt>FileSystem</tt> (e.g., file, offset, length) and a nonce to verify that the region is unchanged since the image was generated.</p>
<p>After the NameNode image and alias map are created, the NameNode and DataNodes must be configured to consistently reference this address space. When a DataNode registers with an attached, <tt>PROVIDED</tt> storage, the NameNode considers all the external blocks to be addressable through that DataNode, and may begin to direct clients to it. Symmetrically, the DataNode must be able to map every block in the <tt>PROVIDED</tt> storage to remote data.</p>
<p>Deployment details vary depending on the configured alias map implementation.</p>
<div class="section">
<h3><a name="PROVIDED_Configuration"></a><tt>PROVIDED</tt> Configuration</h3>
<p>Each NameNode supports one alias map. When <tt>PROVIDED</tt> storage is enabled, the storage ID configured on the NameNode and DataNodes must match. All other details are internal to the alias map implementation.</p>
<p>The configuration to enable <tt>PROVIDED</tt> storage is as follows. The configuration options available for the alias map implementations are available below.</p>
<div>
<div>
<pre class="source">&lt;configuration&gt;
&lt;property&gt;
&lt;name&gt;dfs.namenode.provided.enabled&lt;/name&gt;
&lt;value&gt;true&lt;/value&gt;
&lt;description&gt;Enabled provided storage on the Namenode&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;dfs.datanode.data.dir&lt;/name&gt;
&lt;value&gt;[DISK]/local/path/to/blocks/, [PROVIDED]remoteFS://remoteFS-authority/path/to/data/&lt;/value&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;dfs.provided.storage.id&lt;/name&gt;
&lt;value&gt;DS-PROVIDED&lt;/value&gt;
&lt;description&gt;The storage ID used for provided storages in the cluster.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;dfs.provided.aliasmap.class&lt;/name&gt;
&lt;value&gt;org.apache.hadoop.hdfs.server.common.blockaliasmap.impl.TextFileRegionAliasMap&lt;/value&gt;
&lt;/property&gt;
&lt;/configuration&gt;
</pre></div></div>
</div>
<div class="section">
<h3><a name="fs2img_tool"></a>fs2img tool</h3>
<p>The <tt>fs2img</tt> tool &#x201c;walks&#x201d; over a remote namespace by recursively enumerating children of a remote URI to produce an FSImage. Some attributes can be controlled by plugins, such as owner/group mappings from the remote filesystem to HDFS and the mapping of files to HDFS blocks.</p>
<p>The various options available in running the tool are:</p>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th align="left"> Option </th>
<th align="left"> Property </th>
<th align="left"> Default </th>
<th align="left"> Description </th></tr>
</thead><tbody>
<tr class="b">
<td align="left"> <tt>-o</tt>, <tt>--outdir</tt> </td>
<td align="left"> dfs.namenode.name.dir </td>
<td align="left"> <a class="externalLink" href="file://${hadoop.tmp.dir}/dfs/name">file://${hadoop.tmp.dir}/dfs/name</a> </td>
<td align="left"> Output directory </td></tr>
<tr class="a">
<td align="left"> <tt>-b</tt>, <tt>--blockclass</tt> </td>
<td align="left"> dfs.provided.aliasmap.class </td>
<td align="left"> NullBlocksMap </td>
<td align="left"> Block output class </td></tr>
<tr class="b">
<td align="left"> <tt>-u</tt>, <tt>--ugiclass</tt> </td>
<td align="left"> hdfs.image.writer.ugi.class </td>
<td align="left"> SingleUGIResolver </td>
<td align="left"> UGI resolver class </td></tr>
<tr class="a">
<td align="left"> <tt>-i</tt>, <tt>--blockidclass</tt> </td>
<td align="left"> hdfs.image.writer.blockresolver.class </td>
<td align="left"> FixedBlockResolver </td>
<td align="left"> Block resolver class </td></tr>
<tr class="b">
<td align="left"> <tt>-c</tt>, <tt>--cachedirs</tt> </td>
<td align="left"> hdfs.image.writer.cache.entries </td>
<td align="left"> 100 </td>
<td align="left"> Max active dirents </td></tr>
<tr class="a">
<td align="left"> <tt>-cid</tt>, <tt>--clusterID</tt> </td>
<td align="left"> </td>
<td align="left"> </td>
<td align="left"> Cluster ID </td></tr>
<tr class="b">
<td align="left"> <tt>-bpid</tt>, <tt>--blockPoolID</tt></td>
<td align="left"> </td>
<td align="left"> </td>
<td align="left"> Block pool ID </td></tr>
</tbody>
</table>
<div class="section">
<h4><a name="Examples"></a>Examples</h4>
<p>Assign all files to be owned by &#x201c;rmarathe&#x201d;, write to gzip compressed text:</p>
<div>
<div>
<pre class="source">hadoop org.apache.hadoop.hdfs.server.namenode.FileSystemImage \
-Dhdfs.image.writer.ugi.single.user=rmarathe \
-Ddfs.provided.aliasmap.text.codec=gzip \
-Ddfs.provided.aliasmap.text.write.dir=file:///tmp/
-b org.apache.hadoop.hdfs.server.common.blockaliasmap.impl.TextFileRegionAliasMap \
-u org.apache.hadoop.hdfs.server.namenode.SingleUGIResolver \
-o file:///tmp/name \
hdfs://afreast/projects/ydau/onan
</pre></div></div>
<p>Assign ownership based on a custom <tt>UGIResolver</tt>, in LevelDB:</p>
<div>
<div>
<pre class="source">hadoop org.apache.hadoop.hdfs.server.namenode.FileSystemImage \
-Ddfs.provided.aliasmap.leveldb.path=/path/to/leveldb/map/dingos.db \
-b org.apache.hadoop.hdfs.server.common.blockaliasmap.impl.LevelDBFileRegionAliasMap \
-o file:///tmp/name \
-u CustomResolver \
hdfs://enfield/projects/ywqmd/incandenza
</pre></div></div>
</div></div></div>
<div class="section">
<h2><a name="Alias_Map_Implementations"></a>Alias Map Implementations</h2>
<p>The alias map implementation to use is configured using the <tt>dfs.provided.aliasmap.class</tt> parameter. Currently, the following two types of alias maps are supported.</p>
<div class="section">
<h3><a name="InMemoryAliasMap"></a>InMemoryAliasMap</h3>
<p>This is a LevelDB-based alias map that runs as a separate server in Namenode. The alias map itself can be created using the <tt>fs2img</tt> tool using the option <tt>-Ddfs.provided.aliasmap.leveldb.path=file:///path/to/leveldb/map/dingos.db -b org.apache.hadoop.hdfs.server.common.blockaliasmap.impl.LevelDBFileRegionAliasMap</tt> as in the example above.</p>
<p>Datanodes contact this alias map using the <tt>org.apache.hadoop.hdfs.server.aliasmap.InMemoryAliasMapProtocol</tt> protocol.</p>
<div class="section">
<h4><a name="Configuration"></a>Configuration</h4>
<div>
<div>
<pre class="source">&lt;configuration&gt;
&lt;property&gt;
&lt;name&gt;dfs.provided.aliasmap.inmemory.batch-size&lt;/name&gt;
&lt;value&gt;500&lt;/value&gt;
&lt;description&gt;
The batch size when iterating over the database backing the aliasmap
&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;dfs.provided.aliasmap.inmemory.dnrpc-address&lt;/name&gt;
&lt;value&gt;namenode:rpc-port&lt;/value&gt;
&lt;description&gt;
The address where the aliasmap server will be running
&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;dfs.provided.aliasmap.inmemory.leveldb.dir&lt;/name&gt;
&lt;value&gt;/path/to/leveldb/map/dingos.db&lt;/value&gt;
&lt;description&gt;
The directory where the leveldb files will be kept
&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;dfs.provided.aliasmap.inmemory.enabled&lt;/name&gt;
&lt;value&gt;true&lt;/value&gt;
&lt;description&gt;Enable the inmemory alias map on the NameNode. Defaults to false.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;dfs.provided.aliasmap.class&lt;/name&gt;
&lt;value&gt;org.apache.hadoop.hdfs.server.common.blockaliasmap.impl.InMemoryLevelDBAliasMapClient&lt;/value&gt;
&lt;/property&gt;
&lt;/configuration&gt;
</pre></div></div>
</div></div>
<div class="section">
<h3><a name="TextFileRegionAliasMap"></a>TextFileRegionAliasMap</h3>
<p>This alias map implementation stores the mapping from <tt>blockID</tt>s to <tt>FileRegion</tt>s in a delimited text file. This format is useful for test environments, particularly single-node.</p>
<div class="section">
<h4><a name="Configuration"></a>Configuration</h4>
<div>
<div>
<pre class="source">&lt;configuration&gt;
&lt;property&gt;
&lt;name&gt;dfs.provided.aliasmap.text.delimiter&lt;/name&gt;
&lt;value&gt;,&lt;/value&gt;
&lt;description&gt;
The delimiter used when the alias map is specified as
a text file.
&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;dfs.provided.aliasmap.text.read.file&lt;/name&gt;
&lt;value&gt;file:///path/to/aliasmap/blocks_blocPoolID.csv&lt;/value&gt;
&lt;description&gt;
The path specifying the alias map as a text file,
specified as a URI.
&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;dfs.provided.aliasmap.text.codec&lt;/name&gt;
&lt;value&gt;&lt;/value&gt;
&lt;description&gt;
The codec used to de-compress the alias map. Default value is empty.
&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;dfs.provided.aliasmap.text.write.dir&lt;/name&gt;
&lt;value&gt;file:///path/to/aliasmap/&lt;/value&gt;
&lt;description&gt;
The path to which the alias map should be written as a text
file, specified as a URI.
&lt;/description&gt;
&lt;/property&gt;
&lt;/configuration&gt;
</pre></div></div></div></div></div>
</div>
</div>
<div class="clear">
<hr/>
</div>
<div id="footer">
<div class="xright">
&#169; 2008-2021
Apache Software Foundation
- <a href="http://maven.apache.org/privacy-policy.html">Privacy Policy</a>.
Apache Maven, Maven, Apache, the Apache feather logo, and the Apache Maven project logos are trademarks of The Apache Software Foundation.
</div>
<div class="clear">
<hr/>
</div>
</div>
</body>
</html>