blob: ebc7e34918dd47f9dab81ec3e8eb8564c8742a24 [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<!--
| Generated by Apache Maven Doxia at 2021-06-15
| Rendered using Apache Maven Stylus Skin 1.5
-->
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Apache Hadoop 3.3.1 &#x2013; Offline Image Viewer Guide</title>
<style type="text/css" media="all">
@import url("./css/maven-base.css");
@import url("./css/maven-theme.css");
@import url("./css/site.css");
</style>
<link rel="stylesheet" href="./css/print.css" type="text/css" media="print" />
<meta name="Date-Revision-yyyymmdd" content="20210615" />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
</head>
<body class="composite">
<div id="banner">
<a href="http://hadoop.apache.org/" id="bannerLeft">
<img src="http://hadoop.apache.org/images/hadoop-logo.jpg" alt="" />
</a>
<a href="http://www.apache.org/" id="bannerRight">
<img src="http://www.apache.org/images/asf_logo_wide.png" alt="" />
</a>
<div class="clear">
<hr/>
</div>
</div>
<div id="breadcrumbs">
<div class="xleft">
<a href="http://www.apache.org/" class="externalLink">Apache</a>
&gt;
<a href="http://hadoop.apache.org/" class="externalLink">Hadoop</a>
&gt;
<a href="../index.html">Apache Hadoop Project Dist POM</a>
&gt;
<a href="index.html">Apache Hadoop 3.3.1</a>
&gt;
Offline Image Viewer Guide
</div>
<div class="xright"> <a href="http://wiki.apache.org/hadoop" class="externalLink">Wiki</a>
|
<a href="https://gitbox.apache.org/repos/asf/hadoop.git" class="externalLink">git</a>
|
<a href="http://hadoop.apache.org/" class="externalLink">Apache Hadoop</a>
&nbsp;| Last Published: 2021-06-15
&nbsp;| Version: 3.3.1
</div>
<div class="clear">
<hr/>
</div>
</div>
<div id="leftColumn">
<div id="navcolumn">
<h5>General</h5>
<ul>
<li class="none">
<a href="../../index.html">Overview</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/SingleCluster.html">Single Node Setup</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/ClusterSetup.html">Cluster Setup</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/CommandsManual.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/FileSystemShell.html">FileSystem Shell</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Compatibility.html">Compatibility Specification</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/DownstreamDev.html">Downstream Developer's Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/AdminCompatibilityGuide.html">Admin Compatibility Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/InterfaceClassification.html">Interface Classification</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/filesystem/index.html">FileSystem Specification</a>
</li>
</ul>
<h5>Common</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/CLIMiniCluster.html">CLI Mini Cluster</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/FairCallQueue.html">Fair Call Queue</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/NativeLibraries.html">Native Libraries</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Superusers.html">Proxy User</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/RackAwareness.html">Rack Awareness</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/SecureMode.html">Secure Mode</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/ServiceLevelAuth.html">Service Level Authorization</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/HttpAuthentication.html">HTTP Authentication</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/CredentialProviderAPI.html">Credential Provider API</a>
</li>
<li class="none">
<a href="../../hadoop-kms/index.html">Hadoop KMS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Tracing.html">Tracing</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/UnixShellGuide.html">Unix Shell Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/registry/index.html">Registry</a>
</li>
</ul>
<h5>HDFS</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsDesign.html">Architecture</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsUserGuide.html">User Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html">NameNode HA With QJM</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithNFS.html">NameNode HA With NFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ObserverNameNode.html">Observer NameNode</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/Federation.html">Federation</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ViewFs.html">ViewFs</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ViewFsOverloadScheme.html">ViewFsOverloadScheme</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsSnapshots.html">Snapshots</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsEditsViewer.html">Edits Viewer</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsImageViewer.html">Image Viewer</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsPermissionsGuide.html">Permissions and HDFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsQuotaAdminGuide.html">Quotas and HDFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/LibHdfs.html">libhdfs (C API)</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/WebHDFS.html">WebHDFS (REST API)</a>
</li>
<li class="none">
<a href="../../hadoop-hdfs-httpfs/index.html">HttpFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ShortCircuitLocalReads.html">Short Circuit Local Reads</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/CentralizedCacheManagement.html">Centralized Cache Management</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsNfsGateway.html">NFS Gateway</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsRollingUpgrade.html">Rolling Upgrade</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ExtendedAttributes.html">Extended Attributes</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/TransparentEncryption.html">Transparent Encryption</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsMultihoming.html">Multihoming</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ArchivalStorage.html">Storage Policies</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/MemoryStorage.html">Memory Storage Support</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/SLGUserGuide.html">Synthetic Load Generator</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSErasureCoding.html">Erasure Coding</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSDiskbalancer.html">Disk Balancer</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsUpgradeDomain.html">Upgrade Domain</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsDataNodeAdminGuide.html">DataNode Admin</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs-rbf/HDFSRouterFederation.html">Router Federation</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsProvidedStorage.html">Provided Storage</a>
</li>
</ul>
<h5>MapReduce</h5>
<ul>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html">Tutorial</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html">Compatibility with 1.x</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/EncryptedShuffle.html">Encrypted Shuffle</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/PluggableShuffleAndPluggableSort.html">Pluggable Shuffle/Sort</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/DistributedCacheDeploy.html">Distributed Cache Deploy</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/SharedCacheSupport.html">Support for YARN Shared Cache</a>
</li>
</ul>
<h5>MapReduce REST APIs</h5>
<ul>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredAppMasterRest.html">MR Application Master</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-hs/HistoryServerRest.html">MR History Server</a>
</li>
</ul>
<h5>YARN</h5>
<ul>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YARN.html">Architecture</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YarnCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/CapacityScheduler.html">Capacity Scheduler</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/FairScheduler.html">Fair Scheduler</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceManagerRestart.html">ResourceManager Restart</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceManagerHA.html">ResourceManager HA</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceModel.html">Resource Model</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeLabel.html">Node Labels</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeAttributes.html">Node Attributes</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/WebApplicationProxy.html">Web Application Proxy</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServer.html">Timeline Server</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html">Timeline Service V.2</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/WritingYarnApplications.html">Writing YARN Applications</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YarnApplicationSecurity.html">YARN Application Security</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeManager.html">NodeManager</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/DockerContainers.html">Running Applications in Docker Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/RuncContainers.html">Running Applications in runC Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeManagerCgroups.html">Using CGroups</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/SecureContainer.html">Secure Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ReservationSystem.html">Reservation System</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/GracefulDecommission.html">Graceful Decommission</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/OpportunisticContainers.html">Opportunistic Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/Federation.html">YARN Federation</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/SharedCache.html">Shared Cache</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/UsingGpus.html">Using GPU</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/UsingFPGA.html">Using FPGA</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/PlacementConstraints.html">Placement Constraints</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YarnUI2.html">YARN UI2</a>
</li>
</ul>
<h5>YARN REST APIs</h5>
<ul>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/WebServicesIntro.html">Introduction</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html">Resource Manager</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeManagerRest.html">Node Manager</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServer.html#Timeline_Server_REST_API_v1">Timeline Server</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html#Timeline_Service_v.2_REST_API">Timeline Service V.2</a>
</li>
</ul>
<h5>YARN Service</h5>
<ul>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/Overview.html">Overview</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/QuickStart.html">QuickStart</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/Concepts.html">Concepts</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/YarnServiceAPI.html">Yarn Service API</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/ServiceDiscovery.html">Service Discovery</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/SystemServices.html">System Services</a>
</li>
</ul>
<h5>Hadoop Compatible File Systems</h5>
<ul>
<li class="none">
<a href="../../hadoop-aliyun/tools/hadoop-aliyun/index.html">Aliyun OSS</a>
</li>
<li class="none">
<a href="../../hadoop-aws/tools/hadoop-aws/index.html">Amazon S3</a>
</li>
<li class="none">
<a href="../../hadoop-azure/index.html">Azure Blob Storage</a>
</li>
<li class="none">
<a href="../../hadoop-azure-datalake/index.html">Azure Data Lake Storage</a>
</li>
<li class="none">
<a href="../../hadoop-openstack/index.html">OpenStack Swift</a>
</li>
<li class="none">
<a href="../../hadoop-cos/cloud-storage/index.html">Tencent COS</a>
</li>
</ul>
<h5>Auth</h5>
<ul>
<li class="none">
<a href="../../hadoop-auth/index.html">Overview</a>
</li>
<li class="none">
<a href="../../hadoop-auth/Examples.html">Examples</a>
</li>
<li class="none">
<a href="../../hadoop-auth/Configuration.html">Configuration</a>
</li>
<li class="none">
<a href="../../hadoop-auth/BuildingIt.html">Building</a>
</li>
</ul>
<h5>Tools</h5>
<ul>
<li class="none">
<a href="../../hadoop-streaming/HadoopStreaming.html">Hadoop Streaming</a>
</li>
<li class="none">
<a href="../../hadoop-archives/HadoopArchives.html">Hadoop Archives</a>
</li>
<li class="none">
<a href="../../hadoop-archive-logs/HadoopArchiveLogs.html">Hadoop Archive Logs</a>
</li>
<li class="none">
<a href="../../hadoop-distcp/DistCp.html">DistCp</a>
</li>
<li class="none">
<a href="../../hadoop-gridmix/GridMix.html">GridMix</a>
</li>
<li class="none">
<a href="../../hadoop-rumen/Rumen.html">Rumen</a>
</li>
<li class="none">
<a href="../../hadoop-resourceestimator/ResourceEstimator.html">Resource Estimator Service</a>
</li>
<li class="none">
<a href="../../hadoop-sls/SchedulerLoadSimulator.html">Scheduler Load Simulator</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Benchmarking.html">Hadoop Benchmarking</a>
</li>
<li class="none">
<a href="../../hadoop-dynamometer/Dynamometer.html">Dynamometer</a>
</li>
</ul>
<h5>Reference</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/release/">Changelog and Release Notes</a>
</li>
<li class="none">
<a href="../../api/index.html">Java API docs</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/UnixShellAPI.html">Unix Shell API</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Metrics.html">Metrics</a>
</li>
</ul>
<h5>Configuration</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/core-default.xml">core-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/hdfs-default.xml">hdfs-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs-rbf/hdfs-rbf-default.xml">hdfs-rbf-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml">mapred-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-common/yarn-default.xml">yarn-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-kms/kms-default.html">kms-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-hdfs-httpfs/httpfs-default.html">httpfs-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/DeprecatedProperties.html">Deprecated Properties</a>
</li>
</ul>
<a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
<img alt="Built by Maven" src="./images/logos/maven-feather.png"/>
</a>
</div>
</div>
<div id="bodyColumn">
<div id="contentBox">
<!---
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<h1>Offline Image Viewer Guide</h1>
<ul>
<li><a href="#Overview">Overview</a></li>
<li><a href="#Usage">Usage</a>
<ul>
<li><a href="#Web_Processor">Web Processor</a></li>
<li><a href="#XML_Processor">XML Processor</a></li>
<li><a href="#ReverseXML_Processor">ReverseXML Processor</a></li>
<li><a href="#FileDistribution_Processor">FileDistribution Processor</a></li>
<li><a href="#Delimited_Processor">Delimited Processor</a></li>
<li><a href="#DetectCorruption_Processor">DetectCorruption Processor</a></li></ul></li>
<li><a href="#Options">Options</a></li>
<li><a href="#Analyzing_Results">Analyzing Results</a></li>
<li><a href="#oiv_legacy_Command">oiv_legacy Command</a>
<ul>
<li><a href="#Usage">Usage</a></li>
<li><a href="#Options">Options</a></li></ul></li></ul>
<div class="section">
<h2><a name="Overview"></a>Overview</h2>
<p>The Offline Image Viewer is a tool to dump the contents of hdfs fsimage files to a human-readable format and provide read-only WebHDFS API in order to allow offline analysis and examination of an Hadoop cluster&#x2019;s namespace. The tool is able to process very large image files relatively quickly. The tool handles the layout formats that were included with Hadoop versions 2.4 and up. If you want to handle older layout formats, you can use the Offline Image Viewer of Hadoop 2.3 or <a href="#oiv_legacy_Command">oiv_legacy Command</a>. If the tool is not able to process an image file, it will exit cleanly. The Offline Image Viewer does not require a Hadoop cluster to be running; it is entirely offline in its operation.</p>
<p>The Offline Image Viewer provides several output processors:</p>
<ol style="list-style-type: decimal">
<li>
<p>Web is the default output processor. It launches a HTTP server that exposes read-only WebHDFS API. Users can investigate the namespace interactively by using HTTP REST API. It does not support secure mode, nor HTTPS.</p>
</li>
<li>
<p>XML creates an XML document of the fsimage and includes all of the information within the fsimage. The output of this processor is amenable to automated processing and analysis with XML tools. Due to the verbosity of the XML syntax, this processor will also generate the largest amount of output.</p>
</li>
<li>
<p>FileDistribution is the tool for analyzing file sizes in the namespace image. In order to run the tool one should define a range of integers [0, maxSize] by specifying maxSize and a step. The range of integers is divided into segments of size step: [0, s[1], &#x2026;, s[n-1], maxSize], and the processor calculates how many files in the system fall into each segment [s[i-1], s[i]). Note that files larger than maxSize always fall into the very last segment. By default, the output file is formatted as a tab separated two column table: Size and NumFiles. Where Size represents the start of the segment, and numFiles is the number of files form the image which size falls in this segment. By specifying the option -format, the output file will be formatted in a human-readable fashion rather than a number of bytes that showed in Size column. In addition, the Size column will be changed to the Size Range column.</p>
</li>
<li>
<p>Delimited (experimental): Generate a text file with all of the elements common to both inodes and inodes-under-construction, separated by a delimiter. The default delimiter is \t, though this may be changed via the -delimiter argument.</p>
</li>
<li>
<p>DetectCorruption (experimental): Detect potential corruption of the image by selectively loading parts of it and actively searching for inconsistencies. Outputs a summary of the found corruptions in a delimited format. Note that the check is not exhaustive, and only catches missing nodes during the namespace reconstruction.</p>
</li>
<li>
<p>ReverseXML (experimental): This is the opposite of the XML processor; it reconstructs an fsimage from an XML file. This processor makes it easy to create fsimages for testing, and manually edit fsimages when there is corruption.</p>
</li>
</ol></div>
<div class="section">
<h2><a name="Usage"></a>Usage</h2>
<div class="section">
<h3><a name="Web_Processor"></a>Web Processor</h3>
<p>Web processor launches a HTTP server which exposes read-only WebHDFS API. Users can specify the address to listen by -addr option (default by localhost:5978).</p>
<div>
<div>
<pre class="source"> bash$ bin/hdfs oiv -i fsimage
14/04/07 13:25:14 INFO offlineImageViewer.WebImageViewer: WebImageViewer
started. Listening on /127.0.0.1:5978. Press Ctrl+C to stop the viewer.
</pre></div></div>
<p>Users can access the viewer and get the information of the fsimage by the following shell command:</p>
<div>
<div>
<pre class="source"> bash$ bin/hdfs dfs -ls webhdfs://127.0.0.1:5978/
Found 2 items
drwxrwx--* - root supergroup 0 2014-03-26 20:16 webhdfs://127.0.0.1:5978/tmp
drwxr-xr-x - root supergroup 0 2014-03-31 14:08 webhdfs://127.0.0.1:5978/user
</pre></div></div>
<p>To get the information of all the files and directories, you can simply use the following command:</p>
<div>
<div>
<pre class="source"> bash$ bin/hdfs dfs -ls -R webhdfs://127.0.0.1:5978/
</pre></div></div>
<p>Users can also get JSON formatted FileStatuses via HTTP REST API.</p>
<div>
<div>
<pre class="source"> bash$ curl -i http://127.0.0.1:5978/webhdfs/v1/?op=liststatus
HTTP/1.1 200 OK
Content-Type: application/json
Content-Length: 252
{&quot;FileStatuses&quot;:{&quot;FileStatus&quot;:[
{&quot;fileId&quot;:16386,&quot;accessTime&quot;:0,&quot;replication&quot;:0,&quot;owner&quot;:&quot;theuser&quot;,&quot;length&quot;:0,&quot;permission&quot;:&quot;755&quot;,&quot;blockSize&quot;:0,&quot;modificationTime&quot;:1392772497282,&quot;type&quot;:&quot;DIRECTORY&quot;,&quot;group&quot;:&quot;supergroup&quot;,&quot;childrenNum&quot;:1,&quot;pathSuffix&quot;:&quot;user&quot;}
]}}
</pre></div></div>
<p>The Web processor now supports the following operations:</p>
<ul>
<li><a href="./WebHDFS.html#List_a_Directory">LISTSTATUS</a></li>
<li><a href="./WebHDFS.html#Status_of_a_FileDirectory">GETFILESTATUS</a></li>
<li><a href="./WebHDFS.html#Get_ACL_Status">GETACLSTATUS</a></li>
<li><a href="./WebHDFS.html#Get_an_XAttr">GETXATTRS</a></li>
<li><a href="./WebHDFS.html#List_all_XAttrs">LISTXATTRS</a></li>
<li><a href="./WebHDFS.html#Get_Content_Summary_of_a_Directory">CONTENTSUMMARY</a></li>
</ul></div>
<div class="section">
<h3><a name="XML_Processor"></a>XML Processor</h3>
<p>XML Processor is used to dump all the contents in the fsimage. Users can specify input and output file via -i and -o command-line.</p>
<div>
<div>
<pre class="source"> bash$ bin/hdfs oiv -p XML -i fsimage -o fsimage.xml
</pre></div></div>
<p>This will create a file named fsimage.xml contains all the information in the fsimage. For very large image files, this process may take several minutes.</p>
<p>Applying the Offline Image Viewer with XML processor would result in the following output:</p>
<div>
<div>
<pre class="source"> &lt;?xml version=&quot;1.0&quot;?&gt;
&lt;fsimage&gt;
&lt;NameSection&gt;
&lt;genstampV1&gt;1000&lt;/genstampV1&gt;
&lt;genstampV2&gt;1002&lt;/genstampV2&gt;
&lt;genstampV1Limit&gt;0&lt;/genstampV1Limit&gt;
&lt;lastAllocatedBlockId&gt;1073741826&lt;/lastAllocatedBlockId&gt;
&lt;txid&gt;37&lt;/txid&gt;
&lt;/NameSection&gt;
&lt;INodeSection&gt;
&lt;lastInodeId&gt;16400&lt;/lastInodeId&gt;
&lt;inode&gt;
&lt;id&gt;16385&lt;/id&gt;
&lt;type&gt;DIRECTORY&lt;/type&gt;
&lt;name&gt;&lt;/name&gt;
&lt;mtime&gt;1392772497282&lt;/mtime&gt;
&lt;permission&gt;theuser:supergroup:rwxr-xr-x&lt;/permission&gt;
&lt;nsquota&gt;9223372036854775807&lt;/nsquota&gt;
&lt;dsquota&gt;-1&lt;/dsquota&gt;
&lt;/inode&gt;
...remaining output omitted...
</pre></div></div>
</div>
<div class="section">
<h3><a name="ReverseXML_Processor"></a>ReverseXML Processor</h3>
<p>ReverseXML processor is the opposite of the XML processor. Users can specify input XML file and output fsimage file via -i and -o command-line.</p>
<div>
<div>
<pre class="source"> bash$ bin/hdfs oiv -p ReverseXML -i fsimage.xml -o fsimage
</pre></div></div>
<p>This will reconstruct an fsimage from an XML file.</p></div>
<div class="section">
<h3><a name="FileDistribution_Processor"></a>FileDistribution Processor</h3>
<p>FileDistribution processor can analyze file sizes in the namespace image. Users can specify maxSize (128GB by default) and step (2MB by default) in bytes via -maxSize and -step command-line.</p>
<div>
<div>
<pre class="source"> bash$ bin/hdfs oiv -p FileDistribution -maxSize maxSize -step size -i fsimage -o output
</pre></div></div>
<p>The processor will calculate how many files in the system fall into each segment. The output file is formatted as a tab separated two column table showed as the following output:</p>
<div>
<div>
<pre class="source"> Size NumFiles
4 1
12 1
16 1
20 1
totalFiles = 4
totalDirectories = 2
totalBlocks = 4
totalSpace = 48
maxFileSize = 21
</pre></div></div>
<p>To make the output result look more readable, users can specify -format option in addition.</p>
<div>
<div>
<pre class="source"> bash$ bin/hdfs oiv -p FileDistribution -maxSize maxSize -step size -format -i fsimage -o output
</pre></div></div>
<p>This would result in the following output:</p>
<div>
<div>
<pre class="source"> Size Range NumFiles
(0 B, 4 B] 1
(8 B, 12 B] 1
(12 B, 16 B] 1
(16 B, 21 B] 1
totalFiles = 4
totalDirectories = 2
totalBlocks = 4
totalSpace = 48
maxFileSize = 21
</pre></div></div>
</div>
<div class="section">
<h3><a name="Delimited_Processor"></a>Delimited Processor</h3>
<p>Delimited processor generates a text representation of the fsimage, with each element separated by a delimiter string (\t by default). Users can specify a new delimiter string by -delimiter option.</p>
<div>
<div>
<pre class="source"> bash$ bin/hdfs oiv -p Delimited -delimiter delimiterString -i fsimage -o output
</pre></div></div>
<p>In addition, users can specify a temporary dir to cache intermediate result by the following command:</p>
<div>
<div>
<pre class="source"> bash$ bin/hdfs oiv -p Delimited -delimiter delimiterString -t temporaryDir -i fsimage -o output
</pre></div></div>
<p>If not set, Delimited processor will construct the namespace in memory before outputting text. The output result of this processor should be like the following output:</p>
<div>
<div>
<pre class="source"> Path Replication ModificationTime AccessTime PreferredBlockSize BlocksCount FileSize NSQUOTA DSQUOTA Permission UserName GroupName
/ 0 2017-02-13 10:39 1970-01-01 08:00 0 0 0 9223372036854775807 -1 drwxr-xr-x root supergroup
/dir0 0 2017-02-13 10:39 1970-01-01 08:00 0 0 0 -1 -1 drwxr-xr-x root supergroup
/dir0/file0 1 2017-02-13 10:39 2017-02-13 10:39 134217728 1 1 0 0 -rw-r--r-- root supergroup
/dir0/file1 1 2017-02-13 10:39 2017-02-13 10:39 134217728 1 1 0 0 -rw-r--r-- root supergroup
/dir0/file2 1 2017-02-13 10:39 2017-02-13 10:39 134217728 1 1 0 0 -rw-r--r-- root supergroup
</pre></div></div>
</div>
<div class="section">
<h3><a name="DetectCorruption_Processor"></a>DetectCorruption Processor</h3>
<p>DetectCorruption processor generates a text representation of the errors of the fsimage, if there&#x2019;s any. It displays the following cases:</p>
<ol style="list-style-type: decimal">
<li>
<p>an inode is mentioned in the fsimage but no associated metadata is found (CorruptNode)</p>
</li>
<li>
<p>an inode has at least one corrupt children (MissingChildren)</p>
</li>
</ol>
<p>The delimiter string can be provided with the -delimiter option, and the processor can cache intermediate result using the -t option.</p>
<div>
<div>
<pre class="source"> bash$ bin/hdfs oiv -p DetectCorruption -delimiter delimiterString -t temporaryDir -i fsimage -o output
</pre></div></div>
<p>The output result of this processor is empty if no corruption is found, otherwise the found entries in the following format:</p>
<div>
<div>
<pre class="source"> CorruptionType Id IsSnapshot ParentPath ParentId Name NodeType CorruptChildren
MissingChild 16385 false / Missing Node 1
MissingChild 16386 false / 16385 dir0 Node 2
CorruptNode 16388 true 16386 Unknown 0
CorruptNode 16389 true 16386 Unknown 0
CorruptNodeWithMissingChild 16391 true 16385 Unknown 1
CorruptNode 16394 true 16391 Unknown 0
</pre></div></div>
<p>The column CorruptionType can be MissingChild, CorruptNode or the combination of these two. IsSnapshot shows whether the node is kept in a snapshot or not. To the NodeType column either Node, Ref or Unknown can be written depending whether the node is an inode, a reference, or is corrupted and thus unknown. CorruptChildren contains the number of the corrupt children the inode may have.</p></div></div>
<div class="section">
<h2><a name="Options"></a>Options</h2>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th align="left"> <b>Flag</b> </th>
<th align="left"> <b>Description</b> </th></tr>
</thead><tbody>
<tr class="b">
<td align="left"> <tt>-i</tt>|<tt>--inputFile</tt> <i>input file</i> </td>
<td align="left"> Specify the input fsimage file (or XML file, if ReverseXML processor is used) to process. Required. </td></tr>
<tr class="a">
<td align="left"> <tt>-o</tt>|<tt>--outputFile</tt> <i>output file</i> </td>
<td align="left"> Specify the output filename, if the specified output processor generates one. If the specified file already exists, it is silently overwritten. (output to stdout by default) If the input file is an XML file, it also creates an &lt;outputFile&gt;.md5. </td></tr>
<tr class="b">
<td align="left"> <tt>-p</tt>|<tt>--processor</tt> <i>processor</i> </td>
<td align="left"> Specify the image processor to apply against the image file. Currently valid options are <tt>Web</tt> (default), <tt>XML</tt>, <tt>Delimited</tt>, <tt>DetectCorruption</tt>, <tt>FileDistribution</tt> and <tt>ReverseXML</tt>. </td></tr>
<tr class="a">
<td align="left"> <tt>-addr</tt> <i>address</i> </td>
<td align="left"> Specify the address(host:port) to listen. (localhost:5978 by default). This option is used with Web processor. </td></tr>
<tr class="b">
<td align="left"> <tt>-maxSize</tt> <i>size</i> </td>
<td align="left"> Specify the range [0, maxSize] of file sizes to be analyzed in bytes (128GB by default). This option is used with FileDistribution processor. </td></tr>
<tr class="a">
<td align="left"> <tt>-step</tt> <i>size</i> </td>
<td align="left"> Specify the granularity of the distribution in bytes (2MB by default). This option is used with FileDistribution processor. </td></tr>
<tr class="b">
<td align="left"> <tt>-format</tt> </td>
<td align="left"> Format the output result in a human-readable fashion rather than a number of bytes. (false by default). This option is used with FileDistribution processor. </td></tr>
<tr class="a">
<td align="left"> <tt>-delimiter</tt> <i>arg</i> </td>
<td align="left"> Delimiting string to use with Delimited or DetectCorruption processor. </td></tr>
<tr class="b">
<td align="left"> <tt>-t</tt>|<tt>--temp</tt> <i>temporary dir</i> </td>
<td align="left"> Use temporary dir to cache intermediate result to generate Delimited outputs. If not set, Delimited processor constructs the namespace in memory before outputting text. </td></tr>
<tr class="a">
<td align="left"> <tt>-h</tt>|<tt>--help</tt> </td>
<td align="left"> Display the tool usage and help information and exit. </td></tr>
</tbody>
</table></div>
<div class="section">
<h2><a name="Analyzing_Results"></a>Analyzing Results</h2>
<p>The Offline Image Viewer makes it easy to gather large amounts of data about the hdfs namespace. This information can then be used to explore file system usage patterns or find specific files that match arbitrary criteria, along with other types of namespace analysis.</p></div>
<div class="section">
<h2><a name="oiv_legacy_Command"></a>oiv_legacy Command</h2>
<p>Due to the internal layout changes introduced by the ProtocolBuffer-based fsimage (<a class="externalLink" href="https://issues.apache.org/jira/browse/HDFS-5698">HDFS-5698</a>), OfflineImageViewer consumes excessive amount of memory and loses some functions such as Indented processor. If you want to process without large amount of memory or use these processors, you can use <tt>oiv_legacy</tt> command (same as <tt>oiv</tt> in Hadoop 2.3).</p>
<div class="section">
<h3><a name="Usage"></a>Usage</h3>
<ol style="list-style-type: decimal">
<li>
<p>Set <tt>dfs.namenode.legacy-oiv-image.dir</tt> to an appropriate directory to make standby NameNode or SecondaryNameNode save its namespace in the old fsimage format during checkpointing.</p>
</li>
<li>
<p>Use <tt>oiv_legacy</tt> command to the old format fsimage.</p>
<div>
<div>
<pre class="source">bash$ bin/hdfs oiv_legacy -i fsimage_old -o output
</pre></div></div>
</li>
</ol></div>
<div class="section">
<h3><a name="Options"></a>Options</h3>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th align="left"> <b>Flag</b> </th>
<th align="left"> <b>Description</b> </th></tr>
</thead><tbody>
<tr class="b">
<td align="left"> <tt>-i</tt>|<tt>--inputFile</tt> <i>input file</i> </td>
<td align="left"> Specify the input fsimage file to process. Required. </td></tr>
<tr class="a">
<td align="left"> <tt>-o</tt>|<tt>--outputFile</tt> <i>output file</i> </td>
<td align="left"> Specify the output filename, if the specified output processor generates one. If the specified file already exists, it is silently overwritten. Required. </td></tr>
<tr class="b">
<td align="left"> <tt>-p</tt>|<tt>--processor</tt> <i>processor</i> </td>
<td align="left"> Specify the image processor to apply against the image file. Valid options are Ls (default), XML, Delimited, Indented, FileDistribution and NameDistribution. </td></tr>
<tr class="a">
<td align="left"> <tt>-maxSize</tt> <i>size</i> </td>
<td align="left"> Specify the range [0, maxSize] of file sizes to be analyzed in bytes (128GB by default). This option is used with FileDistribution processor. </td></tr>
<tr class="b">
<td align="left"> <tt>-step</tt> <i>size</i> </td>
<td align="left"> Specify the granularity of the distribution in bytes (2MB by default). This option is used with FileDistribution processor. </td></tr>
<tr class="a">
<td align="left"> <tt>-format</tt> </td>
<td align="left"> Format the output result in a human-readable fashion rather than a number of bytes. (false by default). This option is used with FileDistribution processor. </td></tr>
<tr class="b">
<td align="left"> <tt>-skipBlocks</tt> </td>
<td align="left"> Do not enumerate individual blocks within files. This may save processing time and outfile file space on namespaces with very large files. The Ls processor reads the blocks to correctly determine file sizes and ignores this option. </td></tr>
<tr class="a">
<td align="left"> <tt>-printToScreen</tt> </td>
<td align="left"> Pipe output of processor to console as well as specified file. On extremely large namespaces, this may increase processing time by an order of magnitude. </td></tr>
<tr class="b">
<td align="left"> <tt>-delimiter</tt> <i>arg</i> </td>
<td align="left"> When used in conjunction with the Delimited processor, replaces the default tab delimiter with the string specified by <i>arg</i>. </td></tr>
<tr class="a">
<td align="left"> <tt>-h</tt>|<tt>--help</tt> </td>
<td align="left"> Display the tool usage and help information and exit. </td></tr>
</tbody>
</table></div></div>
</div>
</div>
<div class="clear">
<hr/>
</div>
<div id="footer">
<div class="xright">
&#169; 2008-2021
Apache Software Foundation
- <a href="http://maven.apache.org/privacy-policy.html">Privacy Policy</a>.
Apache Maven, Maven, Apache, the Apache feather logo, and the Apache Maven project logos are trademarks of The Apache Software Foundation.
</div>
<div class="clear">
<hr/>
</div>
</div>
</body>
</html>