blob: 98a7273c714265754188b0013a8df0b18d59703c [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<!--
| Generated by Apache Maven Doxia at 2021-06-15
| Rendered using Apache Maven Stylus Skin 1.5
-->
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Apache Hadoop 3.3.1 &#x2013; class </title>
<style type="text/css" media="all">
@import url("../css/maven-base.css");
@import url("../css/maven-theme.css");
@import url("../css/site.css");
</style>
<link rel="stylesheet" href="../css/print.css" type="text/css" media="print" />
<meta name="Date-Revision-yyyymmdd" content="20210615" />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
</head>
<body class="composite">
<div id="banner">
<a href="http://hadoop.apache.org/" id="bannerLeft">
<img src="http://hadoop.apache.org/images/hadoop-logo.jpg" alt="" />
</a>
<a href="http://www.apache.org/" id="bannerRight">
<img src="http://www.apache.org/images/asf_logo_wide.png" alt="" />
</a>
<div class="clear">
<hr/>
</div>
</div>
<div id="breadcrumbs">
<div class="xleft">
<a href="http://www.apache.org/" class="externalLink">Apache</a>
&gt;
<a href="http://hadoop.apache.org/" class="externalLink">Hadoop</a>
&gt;
<a href="../../index.html">Apache Hadoop Project Dist POM</a>
&gt;
<a href="../index.html">Apache Hadoop 3.3.1</a>
&gt;
class
</div>
<div class="xright"> <a href="http://wiki.apache.org/hadoop" class="externalLink">Wiki</a>
|
<a href="https://gitbox.apache.org/repos/asf/hadoop.git" class="externalLink">git</a>
|
<a href="http://hadoop.apache.org/" class="externalLink">Apache Hadoop</a>
&nbsp;| Last Published: 2021-06-15
&nbsp;| Version: 3.3.1
</div>
<div class="clear">
<hr/>
</div>
</div>
<div id="leftColumn">
<div id="navcolumn">
<h5>General</h5>
<ul>
<li class="none">
<a href="../../../index.html">Overview</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/SingleCluster.html">Single Node Setup</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/ClusterSetup.html">Cluster Setup</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/CommandsManual.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/FileSystemShell.html">FileSystem Shell</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/Compatibility.html">Compatibility Specification</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/DownstreamDev.html">Downstream Developer's Guide</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/AdminCompatibilityGuide.html">Admin Compatibility Guide</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/InterfaceClassification.html">Interface Classification</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/filesystem/index.html">FileSystem Specification</a>
</li>
</ul>
<h5>Common</h5>
<ul>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/CLIMiniCluster.html">CLI Mini Cluster</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/FairCallQueue.html">Fair Call Queue</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/NativeLibraries.html">Native Libraries</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/Superusers.html">Proxy User</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/RackAwareness.html">Rack Awareness</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/SecureMode.html">Secure Mode</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/ServiceLevelAuth.html">Service Level Authorization</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/HttpAuthentication.html">HTTP Authentication</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/CredentialProviderAPI.html">Credential Provider API</a>
</li>
<li class="none">
<a href="../../../hadoop-kms/index.html">Hadoop KMS</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/Tracing.html">Tracing</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/UnixShellGuide.html">Unix Shell Guide</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/registry/index.html">Registry</a>
</li>
</ul>
<h5>HDFS</h5>
<ul>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/HdfsDesign.html">Architecture</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/HdfsUserGuide.html">User Guide</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/HDFSCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html">NameNode HA With QJM</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithNFS.html">NameNode HA With NFS</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/ObserverNameNode.html">Observer NameNode</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/Federation.html">Federation</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/ViewFs.html">ViewFs</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/ViewFsOverloadScheme.html">ViewFsOverloadScheme</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/HdfsSnapshots.html">Snapshots</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/HdfsEditsViewer.html">Edits Viewer</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/HdfsImageViewer.html">Image Viewer</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/HdfsPermissionsGuide.html">Permissions and HDFS</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/HdfsQuotaAdminGuide.html">Quotas and HDFS</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/LibHdfs.html">libhdfs (C API)</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/WebHDFS.html">WebHDFS (REST API)</a>
</li>
<li class="none">
<a href="../../../hadoop-hdfs-httpfs/index.html">HttpFS</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/ShortCircuitLocalReads.html">Short Circuit Local Reads</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/CentralizedCacheManagement.html">Centralized Cache Management</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/HdfsNfsGateway.html">NFS Gateway</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/HdfsRollingUpgrade.html">Rolling Upgrade</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/ExtendedAttributes.html">Extended Attributes</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/TransparentEncryption.html">Transparent Encryption</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/HdfsMultihoming.html">Multihoming</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/ArchivalStorage.html">Storage Policies</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/MemoryStorage.html">Memory Storage Support</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/SLGUserGuide.html">Synthetic Load Generator</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/HDFSErasureCoding.html">Erasure Coding</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/HDFSDiskbalancer.html">Disk Balancer</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/HdfsUpgradeDomain.html">Upgrade Domain</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/HdfsDataNodeAdminGuide.html">DataNode Admin</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs-rbf/HDFSRouterFederation.html">Router Federation</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/HdfsProvidedStorage.html">Provided Storage</a>
</li>
</ul>
<h5>MapReduce</h5>
<ul>
<li class="none">
<a href="../../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html">Tutorial</a>
</li>
<li class="none">
<a href="../../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html">Compatibility with 1.x</a>
</li>
<li class="none">
<a href="../../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/EncryptedShuffle.html">Encrypted Shuffle</a>
</li>
<li class="none">
<a href="../../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/PluggableShuffleAndPluggableSort.html">Pluggable Shuffle/Sort</a>
</li>
<li class="none">
<a href="../../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/DistributedCacheDeploy.html">Distributed Cache Deploy</a>
</li>
<li class="none">
<a href="../../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/SharedCacheSupport.html">Support for YARN Shared Cache</a>
</li>
</ul>
<h5>MapReduce REST APIs</h5>
<ul>
<li class="none">
<a href="../../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredAppMasterRest.html">MR Application Master</a>
</li>
<li class="none">
<a href="../../../hadoop-mapreduce-client/hadoop-mapreduce-client-hs/HistoryServerRest.html">MR History Server</a>
</li>
</ul>
<h5>YARN</h5>
<ul>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/YARN.html">Architecture</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/YarnCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/CapacityScheduler.html">Capacity Scheduler</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/FairScheduler.html">Fair Scheduler</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/ResourceManagerRestart.html">ResourceManager Restart</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/ResourceManagerHA.html">ResourceManager HA</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/ResourceModel.html">Resource Model</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/NodeLabel.html">Node Labels</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/NodeAttributes.html">Node Attributes</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/WebApplicationProxy.html">Web Application Proxy</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/TimelineServer.html">Timeline Server</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html">Timeline Service V.2</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/WritingYarnApplications.html">Writing YARN Applications</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/YarnApplicationSecurity.html">YARN Application Security</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/NodeManager.html">NodeManager</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/DockerContainers.html">Running Applications in Docker Containers</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/RuncContainers.html">Running Applications in runC Containers</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/NodeManagerCgroups.html">Using CGroups</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/SecureContainer.html">Secure Containers</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/ReservationSystem.html">Reservation System</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/GracefulDecommission.html">Graceful Decommission</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/OpportunisticContainers.html">Opportunistic Containers</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/Federation.html">YARN Federation</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/SharedCache.html">Shared Cache</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/UsingGpus.html">Using GPU</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/UsingFPGA.html">Using FPGA</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/PlacementConstraints.html">Placement Constraints</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/YarnUI2.html">YARN UI2</a>
</li>
</ul>
<h5>YARN REST APIs</h5>
<ul>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/WebServicesIntro.html">Introduction</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html">Resource Manager</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/NodeManagerRest.html">Node Manager</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/TimelineServer.html#Timeline_Server_REST_API_v1">Timeline Server</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html#Timeline_Service_v.2_REST_API">Timeline Service V.2</a>
</li>
</ul>
<h5>YARN Service</h5>
<ul>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/yarn-service/Overview.html">Overview</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/yarn-service/QuickStart.html">QuickStart</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/yarn-service/Concepts.html">Concepts</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/yarn-service/YarnServiceAPI.html">Yarn Service API</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/yarn-service/ServiceDiscovery.html">Service Discovery</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-site/yarn-service/SystemServices.html">System Services</a>
</li>
</ul>
<h5>Hadoop Compatible File Systems</h5>
<ul>
<li class="none">
<a href="../../../hadoop-aliyun/tools/hadoop-aliyun/index.html">Aliyun OSS</a>
</li>
<li class="none">
<a href="../../../hadoop-aws/tools/hadoop-aws/index.html">Amazon S3</a>
</li>
<li class="none">
<a href="../../../hadoop-azure/index.html">Azure Blob Storage</a>
</li>
<li class="none">
<a href="../../../hadoop-azure-datalake/index.html">Azure Data Lake Storage</a>
</li>
<li class="none">
<a href="../../../hadoop-openstack/index.html">OpenStack Swift</a>
</li>
<li class="none">
<a href="../../../hadoop-cos/cloud-storage/index.html">Tencent COS</a>
</li>
</ul>
<h5>Auth</h5>
<ul>
<li class="none">
<a href="../../../hadoop-auth/index.html">Overview</a>
</li>
<li class="none">
<a href="../../../hadoop-auth/Examples.html">Examples</a>
</li>
<li class="none">
<a href="../../../hadoop-auth/Configuration.html">Configuration</a>
</li>
<li class="none">
<a href="../../../hadoop-auth/BuildingIt.html">Building</a>
</li>
</ul>
<h5>Tools</h5>
<ul>
<li class="none">
<a href="../../../hadoop-streaming/HadoopStreaming.html">Hadoop Streaming</a>
</li>
<li class="none">
<a href="../../../hadoop-archives/HadoopArchives.html">Hadoop Archives</a>
</li>
<li class="none">
<a href="../../../hadoop-archive-logs/HadoopArchiveLogs.html">Hadoop Archive Logs</a>
</li>
<li class="none">
<a href="../../../hadoop-distcp/DistCp.html">DistCp</a>
</li>
<li class="none">
<a href="../../../hadoop-gridmix/GridMix.html">GridMix</a>
</li>
<li class="none">
<a href="../../../hadoop-rumen/Rumen.html">Rumen</a>
</li>
<li class="none">
<a href="../../../hadoop-resourceestimator/ResourceEstimator.html">Resource Estimator Service</a>
</li>
<li class="none">
<a href="../../../hadoop-sls/SchedulerLoadSimulator.html">Scheduler Load Simulator</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/Benchmarking.html">Hadoop Benchmarking</a>
</li>
<li class="none">
<a href="../../../hadoop-dynamometer/Dynamometer.html">Dynamometer</a>
</li>
</ul>
<h5>Reference</h5>
<ul>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/release/">Changelog and Release Notes</a>
</li>
<li class="none">
<a href="../../../api/index.html">Java API docs</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/UnixShellAPI.html">Unix Shell API</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/Metrics.html">Metrics</a>
</li>
</ul>
<h5>Configuration</h5>
<ul>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/core-default.xml">core-default.xml</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs/hdfs-default.xml">hdfs-default.xml</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-hdfs-rbf/hdfs-rbf-default.xml">hdfs-rbf-default.xml</a>
</li>
<li class="none">
<a href="../../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml">mapred-default.xml</a>
</li>
<li class="none">
<a href="../../../hadoop-yarn/hadoop-yarn-common/yarn-default.xml">yarn-default.xml</a>
</li>
<li class="none">
<a href="../../../hadoop-kms/kms-default.html">kms-default.xml</a>
</li>
<li class="none">
<a href="../../../hadoop-hdfs-httpfs/httpfs-default.html">httpfs-default.xml</a>
</li>
<li class="none">
<a href="../../../hadoop-project-dist/hadoop-common/DeprecatedProperties.html">Deprecated Properties</a>
</li>
</ul>
<a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
<img alt="Built by Maven" src="../images/logos/maven-feather.png"/>
</a>
</div>
</div>
<div id="bodyColumn">
<div id="contentBox">
<!---
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- ============================================================= -->
<!-- CLASS: FileSystem -->
<!-- ============================================================= -->
<h1>class <tt>org.apache.hadoop.fs.FileSystem</tt></h1>
<ul>
<li><a href="#Invariants">Invariants</a></li>
<li><a href="#Feasible_features">Feasible features</a>
<ul>
<li><a href="#Protected_directories">Protected directories</a></li></ul></li>
<li><a href="#Predicates_and_other_state_access_operations">Predicates and other state access operations</a>
<ul>
<li><a href="#boolean_exists.28Path_p.29">boolean exists(Path p)</a></li>
<li><a href="#boolean_isDirectory.28Path_p.29">boolean isDirectory(Path p)</a></li>
<li><a href="#boolean_isFile.28Path_p.29">boolean isFile(Path p)</a></li>
<li><a href="#FileStatus_getFileStatus.28Path_p.29">FileStatus getFileStatus(Path p)</a></li>
<li><a href="#msync.28.29">msync()</a></li>
<li><a href="#Path_getHomeDirectory.28.29">Path getHomeDirectory()</a></li>
<li><a href="#FileStatus.5B.5D_listStatus.28Path_path.2C_PathFilter_filter.29">FileStatus[] listStatus(Path path, PathFilter filter)</a></li>
<li><a href="#FileStatus.5B.5D_listStatus.28Path_path.29">FileStatus[] listStatus(Path path)</a></li>
<li><a href="#FileStatus.5B.5D_listStatus.28Path.5B.5D_paths.2C_PathFilter_filter.29">FileStatus[] listStatus(Path[] paths, PathFilter filter)</a></li>
<li><a href="#RemoteIterator.3CFileStatus.3E_listStatusIterator.28Path_p.29">RemoteIterator&lt;FileStatus&gt; listStatusIterator(Path p)</a></li>
<li><a href="#FileStatus.5B.5D_listStatus.28Path.5B.5D_paths.29">FileStatus[] listStatus(Path[] paths)</a></li>
<li><a href="#RemoteIterator.5BLocatedFileStatus.5D_listLocatedStatus.28Path_path.2C_PathFilter_filter.29">RemoteIterator[LocatedFileStatus] listLocatedStatus(Path path, PathFilter filter)</a></li>
<li><a href="#RemoteIterator.5BLocatedFileStatus.5D_listLocatedStatus.28Path_path.29">RemoteIterator[LocatedFileStatus] listLocatedStatus(Path path)</a></li>
<li><a href="#RemoteIterator.5BLocatedFileStatus.5D_listFiles.28Path_path.2C_boolean_recursive.29">RemoteIterator[LocatedFileStatus] listFiles(Path path, boolean recursive)</a></li>
<li><a href="#Postconditions">Postconditions</a></li>
<li><a href="#BlockLocation.5B.5D_getFileBlockLocations.28FileStatus_f.2C_int_s.2C_int_l.29">BlockLocation[] getFileBlockLocations(FileStatus f, int s, int l)</a></li>
<li><a href="#BlockLocation.5B.5D_getFileBlockLocations.28Path_P.2C_int_S.2C_int_L.29">BlockLocation[] getFileBlockLocations(Path P, int S, int L)</a></li>
<li><a href="#long_getDefaultBlockSize.28.29">long getDefaultBlockSize()</a></li>
<li><a href="#long_getDefaultBlockSize.28Path_p.29">long getDefaultBlockSize(Path p)</a></li>
<li><a href="#long_getBlockSize.28Path_p.29">long getBlockSize(Path p)</a></li></ul></li>
<li><a href="#State_Changing_Operations"> State Changing Operations</a>
<ul>
<li><a href="#boolean_mkdirs.28Path_p.2C_FsPermission_permission.29">boolean mkdirs(Path p, FsPermission permission)</a></li>
<li><a href="#FSDataOutputStream_create.28Path.2C_....29"> FSDataOutputStream create(Path, ...)</a></li>
<li><a href="#FSDataOutputStreamBuilder_createFile.28Path_p.29">FSDataOutputStreamBuilder createFile(Path p)</a></li>
<li><a href="#FSDataOutputStream_append.28Path_p.2C_int_bufferSize.2C_Progressable_progress.29"> FSDataOutputStream append(Path p, int bufferSize, Progressable progress)</a></li>
<li><a href="#FSDataOutputStreamBuilder_appendFile.28Path_p.29">FSDataOutputStreamBuilder appendFile(Path p)</a></li>
<li><a href="#FSDataInputStream_open.28Path_f.2C_int_bufferSize.29">FSDataInputStream open(Path f, int bufferSize)</a></li>
<li><a href="#FSDataInputStreamBuilder_openFile.28Path_path.29">FSDataInputStreamBuilder openFile(Path path)</a></li>
<li><a href="#Implementors_notes">Implementors notes</a></li>
<li><a href="#FSDataInputStreamBuilder_openFile.28PathHandle.29">FSDataInputStreamBuilder openFile(PathHandle)</a></li>
<li><a href="#PathHandle_getPathHandle.28FileStatus_stat.2C_HandleOpt..._options.29">PathHandle getPathHandle(FileStatus stat, HandleOpt... options)</a></li>
<li><a href="#FSDataInputStream_open.28PathHandle_handle.2C_int_bufferSize.29">FSDataInputStream open(PathHandle handle, int bufferSize)</a></li>
<li><a href="#boolean_delete.28Path_p.2C_boolean_recursive.29">boolean delete(Path p, boolean recursive)</a></li>
<li><a href="#Object_Stores:_root_directory_deletion"> Object Stores: root directory deletion</a></li>
<li><a href="#boolean_rename.28Path_src.2C_Path_d.29">boolean rename(Path src, Path d)</a></li>
<li><a href="#void_concat.28Path_p.2C_Path_sources.5B.5D.29">void concat(Path p, Path sources[])</a></li>
<li><a href="#boolean_truncate.28Path_p.2C_long_newLength.29">boolean truncate(Path p, long newLength)</a></li></ul></li>
<li><a href="#interface_RemoteIterator"> interface RemoteIterator</a>
<ul>
<li><a href="#boolean_hasNext.28.29">boolean hasNext()</a></li>
<li><a href="#E_next.28.29">E next()</a></li>
<li><a href="#Concurrency_and_the_Remote_Iterator">Concurrency and the Remote Iterator</a></li></ul></li>
<li><a href="#interface_StreamCapabilities"> interface StreamCapabilities</a>
<ul>
<li><a href="#boolean_hasCapability.28capability.29">boolean hasCapability(capability)</a></li></ul></li></ul>
<p>The abstract <tt>FileSystem</tt> class is the original class to access Hadoop filesystems; non-abstract subclasses exist for all Hadoop-supported filesystems.</p>
<p>All operations that take a Path to this interface MUST support relative paths. In such a case, they must be resolved relative to the working directory defined by <tt>setWorkingDirectory()</tt>.</p>
<p>For all clients, therefore, we also add the notion of a state component PWD: this represents the present working directory of the client. Changes to this state are not reflected in the filesystem itself: they are unique to the instance of the client.</p>
<p><b>Implementation Note</b>: the static <tt>FileSystem get(URI uri, Configuration conf)</tt> method MAY return a pre-existing instance of a filesystem client class&#x2014;a class that may also be in use in other threads. The implementations of <tt>FileSystem</tt> shipped with Apache Hadoop <i>do not make any attempt to synchronize access to the working directory field</i>.</p>
<div class="section">
<h2><a name="Invariants"></a>Invariants</h2>
<p>All the requirements of a valid FileSystem are considered implicit preconditions and postconditions: all operations on a valid FileSystem MUST result in a new FileSystem that is also valid.</p></div>
<div class="section">
<h2><a name="Feasible_features"></a>Feasible features</h2>
<div class="section">
<h3><a name="Protected_directories"></a><a name="ProtectedDirectories"></a>Protected directories</h3>
<p>HDFS has the notion of <i>Protected Directories</i>, which are declared in the option <tt>fs.protected.directories</tt>. Any attempt to delete or rename such a directory or a parent thereof raises an <tt>AccessControlException</tt>. Accordingly, any attempt to delete the root directory SHALL, if there is a protected directory, result in such an exception being raised.</p></div></div>
<div class="section">
<h2><a name="Predicates_and_other_state_access_operations"></a>Predicates and other state access operations</h2>
<div class="section">
<h3><a name="boolean_exists.28Path_p.29"></a><tt>boolean exists(Path p)</tt></h3>
<div>
<div>
<pre class="source">def exists(FS, p) = p in paths(FS)
</pre></div></div>
</div>
<div class="section">
<h3><a name="boolean_isDirectory.28Path_p.29"></a><tt>boolean isDirectory(Path p)</tt></h3>
<div>
<div>
<pre class="source">def isDirectory(FS, p)= p in directories(FS)
</pre></div></div>
</div>
<div class="section">
<h3><a name="boolean_isFile.28Path_p.29"></a><tt>boolean isFile(Path p)</tt></h3>
<div>
<div>
<pre class="source">def isFile(FS, p) = p in files(FS)
</pre></div></div>
</div>
<div class="section">
<h3><a name="FileStatus_getFileStatus.28Path_p.29"></a><tt>FileStatus getFileStatus(Path p)</tt></h3>
<p>Get the status of a path</p>
<div class="section">
<h4><a name="Preconditions"></a>Preconditions</h4>
<div>
<div>
<pre class="source">if not exists(FS, p) : raise FileNotFoundException
</pre></div></div>
</div>
<div class="section">
<h4><a name="Postconditions"></a>Postconditions</h4>
<div>
<div>
<pre class="source">result = stat: FileStatus where:
if isFile(FS, p) :
stat.length = len(FS.Files[p])
stat.isdir = False
stat.blockSize &gt; 0
elif isDir(FS, p) :
stat.length = 0
stat.isdir = True
elif isSymlink(FS, p) :
stat.length = 0
stat.isdir = False
stat.symlink = FS.Symlinks[p]
stat.hasAcl = hasACL(FS, p)
stat.isEncrypted = inEncryptionZone(FS, p)
stat.isErasureCoded = isErasureCoded(FS, p)
</pre></div></div>
<p>The returned <tt>FileStatus</tt> status of the path additionally carries details on ACL, encryption and erasure coding information. <tt>getFileStatus(Path p).hasAcl()</tt> can be queried to find if the path has an ACL. <tt>getFileStatus(Path p).isEncrypted()</tt> can be queried to find if the path is encrypted. <tt>getFileStatus(Path p).isErasureCoded()</tt> will tell if the path is erasure coded or not.</p>
<p>YARN&#x2019;s distributed cache lets applications add paths to be cached across containers and applications via <tt>Job.addCacheFile()</tt> and <tt>Job.addCacheArchive()</tt>. The cache treats world-readable resources paths added as shareable across applications, and downloads them differently, unless they are declared as encrypted.</p>
<p>To avoid failures during container launching, especially when delegation tokens are used, filesystems and object stores which not implement POSIX access permissions for both files and directories, MUST always return <tt>true</tt> to the <tt>isEncrypted()</tt> predicate. This can be done by setting the <tt>encrypted</tt> flag to true when creating the <tt>FileStatus</tt> instance.</p></div></div>
<div class="section">
<h3><a name="msync.28.29"></a><tt>msync()</tt></h3>
<p>Synchronize metadata state of the client with the latest state of the metadata service of the FileSystem.</p>
<p>In highly available FileSystems standby service can be used as a read-only metadata replica. This call is essential to guarantee consistency of reads from the standby replica and to avoid stale reads.</p>
<p>It is currently only implemented for HDFS and others will just throw <tt>UnsupportedOperationException</tt>.</p>
<div class="section">
<h4><a name="Preconditions"></a>Preconditions</h4></div>
<div class="section">
<h4><a name="Postconditions"></a>Postconditions</h4>
<p>This call internally records the state of the metadata service at the time of the call. This guarantees consistency of subsequent reads from any metadata replica. It assures the client will never access the state of the metadata that preceded the recorded state.</p></div>
<div class="section">
<h4><a name="HDFS_implementation_notes"></a>HDFS implementation notes</h4>
<p>HDFS supports <tt>msync()</tt> in HA mode by calling the Active NameNode and requesting its latest journal transaction ID. For more details see HDFS documentation <a class="externalLink" href="https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/ObserverNameNode.html">Consistent Reads from HDFS Observer NameNode</a></p></div></div>
<div class="section">
<h3><a name="Path_getHomeDirectory.28.29"></a><tt>Path getHomeDirectory()</tt></h3>
<p>The function <tt>getHomeDirectory</tt> returns the home directory for the FileSystem and the current user account.</p>
<p>For some FileSystems, the path is <tt>[&quot;/&quot;, &quot;users&quot;, System.getProperty(&quot;user-name&quot;)]</tt>.</p>
<p>However, for HDFS, the username is derived from the credentials used to authenticate the client with HDFS. This may differ from the local user account name.</p>
<p><b>It is the responsibility of the FileSystem to determine the actual home directory of the caller.</b></p>
<div class="section">
<h4><a name="Preconditions"></a>Preconditions</h4></div>
<div class="section">
<h4><a name="Postconditions"></a>Postconditions</h4>
<div>
<div>
<pre class="source">result = p where valid-path(FS, p)
</pre></div></div>
<p>There is no requirement that the path exists at the time the method was called, or, if it exists, that it points to a directory. However, code tends to assume that <tt>not isFile(FS, getHomeDirectory())</tt> holds to the extent that follow-on code may fail.</p></div>
<div class="section">
<h4><a name="Implementation_Notes"></a>Implementation Notes</h4>
<ul>
<li>The <tt>FTPFileSystem</tt> queries this value from the remote filesystem and may fail with a <tt>RuntimeException</tt> or subclass thereof if there is a connectivity problem. The time to execute the operation is not bounded.</li>
</ul></div></div>
<div class="section">
<h3><a name="FileStatus.5B.5D_listStatus.28Path_path.2C_PathFilter_filter.29"></a><tt>FileStatus[] listStatus(Path path, PathFilter filter)</tt></h3>
<p>Lists entries under a path, <tt>path</tt>.</p>
<p>If <tt>path</tt> refers to a file and the filter accepts it, then that file&#x2019;s <tt>FileStatus</tt> entry is returned in a single-element array.</p>
<p>If the path refers to a directory, the call returns a list of all its immediate child paths which are accepted by the filter &#x2014;and does not include the directory itself.</p>
<p>A <tt>PathFilter</tt> <tt>filter</tt> is a class whose <tt>accept(path)</tt> returns true iff the path <tt>path</tt> meets the filter&#x2019;s conditions.</p>
<div class="section">
<h4><a name="Preconditions"></a>Preconditions</h4>
<p>Path <tt>path</tt> must exist:</p>
<div>
<div>
<pre class="source">if not exists(FS, path) : raise FileNotFoundException
</pre></div></div>
</div>
<div class="section">
<h4><a name="Postconditions"></a>Postconditions</h4>
<div>
<div>
<pre class="source">if isFile(FS, path) and filter.accept(path) :
result = [ getFileStatus(path) ]
elif isFile(FS, path) and not filter.accept(P) :
result = []
elif isDir(FS, path):
result = [
getFileStatus(c) for c in children(FS, path) if filter.accepts(c)
]
</pre></div></div>
<p><b>Implicit invariant</b>: the contents of a <tt>FileStatus</tt> of a child retrieved via <tt>listStatus()</tt> are equal to those from a call of <tt>getFileStatus()</tt> to the same path:</p>
<div>
<div>
<pre class="source">forall fs in listStatus(path) :
fs == getFileStatus(fs.path)
</pre></div></div>
<p><b>Ordering of results</b>: there is no guarantee of ordering of the listed entries. While HDFS currently returns an alphanumerically sorted list, neither the Posix <tt>readdir()</tt> nor Java&#x2019;s <tt>File.listFiles()</tt> API calls define any ordering of returned values. Applications which require a uniform sort order on the results must perform the sorting themselves.</p>
<p><b>Null return</b>: Local filesystems prior to 3.0.0 returned null upon access error. It is considered erroneous. Expect IOException upon access error.</p></div>
<div class="section">
<h4><a name="Atomicity_and_Consistency"></a>Atomicity and Consistency</h4>
<p>By the time the <tt>listStatus()</tt> operation returns to the caller, there is no guarantee that the information contained in the response is current. The details MAY be out of date, including the contents of any directory, the attributes of any files, and the existence of the path supplied.</p>
<p>The state of a directory MAY change during the evaluation process.</p>
<ul>
<li>
<p>After an entry at path <tt>P</tt> is created, and before any other changes are made to the filesystem, <tt>listStatus(P)</tt> MUST find the file and return its status.</p>
</li>
<li>
<p>After an entry at path <tt>P</tt> is deleted, and before any other changes are made to the filesystem, <tt>listStatus(P)</tt> MUST raise a <tt>FileNotFoundException</tt>.</p>
</li>
<li>
<p>After an entry at path <tt>P</tt> is created, and before any other changes are made to the filesystem, the result of <tt>listStatus(parent(P))</tt> SHOULD include the value of <tt>getFileStatus(P)</tt>.</p>
</li>
<li>
<p>After an entry at path <tt>P</tt> is created, and before any other changes are made to the filesystem, the result of <tt>listStatus(parent(P))</tt> SHOULD NOT include the value of <tt>getFileStatus(P)</tt>.</p>
</li>
</ul>
<p>This is not a theoretical possibility, it is observable in HDFS when a directory contains many thousands of files.</p>
<p>Consider a directory <tt>&quot;/d&quot;</tt> with the contents:</p>
<div>
<div>
<pre class="source">a
part-0000001
part-0000002
...
part-9999999
</pre></div></div>
<p>If the number of files is such that HDFS returns a partial listing in each response, then, if a listing <tt>listStatus(&quot;/d&quot;)</tt> takes place concurrently with the operation <tt>rename(&quot;/d/a&quot;,&quot;/d/z&quot;))</tt>, the result may be one of:</p>
<div>
<div>
<pre class="source">[a, part-0000001, ... , part-9999999]
[part-0000001, ... , part-9999999, z]
[a, part-0000001, ... , part-9999999, z]
[part-0000001, ... , part-9999999]
</pre></div></div>
<p>While this situation is likely to be a rare occurrence, it MAY happen. In HDFS these inconsistent views are only likely when listing a directory with many children.</p>
<p>Other filesystems may have stronger consistency guarantees, or return inconsistent data more readily.</p></div></div>
<div class="section">
<h3><a name="FileStatus.5B.5D_listStatus.28Path_path.29"></a><tt>FileStatus[] listStatus(Path path)</tt></h3>
<p>This is exactly equivalent to <tt>listStatus(Path, DEFAULT_FILTER)</tt> where <tt>DEFAULT_FILTER.accept(path) = True</tt> for all paths.</p>
<p>The atomicity and consistency constraints are as for <tt>listStatus(Path, DEFAULT_FILTER)</tt>.</p></div>
<div class="section">
<h3><a name="FileStatus.5B.5D_listStatus.28Path.5B.5D_paths.2C_PathFilter_filter.29"></a><tt>FileStatus[] listStatus(Path[] paths, PathFilter filter)</tt></h3>
<p>Enumerate all files found in the list of directories passed in, calling <tt>listStatus(path, filter)</tt> on each one.</p>
<p>As with <tt>listStatus(path, filter)</tt>, the results may be inconsistent. That is: the state of the filesystem changed during the operation.</p>
<p>There are no guarantees as to whether paths are listed in a specific order, only that they must all be listed, and, at the time of listing, exist.</p>
<div class="section">
<h4><a name="Preconditions"></a>Preconditions</h4>
<p>All paths must exist. There is no requirement for uniqueness.</p>
<div>
<div>
<pre class="source">forall p in paths :
exists(fs, p) else raise FileNotFoundException
</pre></div></div>
</div>
<div class="section">
<h4><a name="Postconditions"></a>Postconditions</h4>
<p>The result is an array whose entries contain every status element found in the path listings, and no others.</p>
<div>
<div>
<pre class="source">result = [listStatus(p, filter) for p in paths]
</pre></div></div>
<p>Implementations MAY merge duplicate entries; and/or optimize the operation by recoginizing duplicate paths and only listing the entries once.</p>
<p>The default implementation iterates through the list; it does not perform any optimizations.</p>
<p>The atomicity and consistency constraints are as for <tt>listStatus(Path, PathFilter)</tt>.</p></div></div>
<div class="section">
<h3><a name="RemoteIterator.3CFileStatus.3E_listStatusIterator.28Path_p.29"></a><tt>RemoteIterator&lt;FileStatus&gt; listStatusIterator(Path p)</tt></h3>
<p>Return an iterator enumerating the <tt>FileStatus</tt> entries under a path. This is similar to <tt>listStatus(Path)</tt> except the fact that rather than returning an entire list, an iterator is returned. The result is exactly the same as <tt>listStatus(Path)</tt>, provided no other caller updates the directory during the listing. Having said that, this does not guarantee atomicity if other callers are adding/deleting the files inside the directory while listing is being performed. Different filesystems may provide a more efficient implementation, for example S3A does the listing in pages and fetches the next pages asynchronously while a page is getting processed.</p>
<p>Note that now since the initial listing is async, bucket/path existence exception may show up later during next() call.</p>
<p>Callers should prefer using listStatusIterator over listStatus as it is incremental in nature.</p></div>
<div class="section">
<h3><a name="FileStatus.5B.5D_listStatus.28Path.5B.5D_paths.29"></a><tt>FileStatus[] listStatus(Path[] paths)</tt></h3>
<p>Enumerate all files found in the list of directories passed in, calling <tt>listStatus(path, DEFAULT_FILTER)</tt> on each one, where the <tt>DEFAULT_FILTER</tt> accepts all path names.</p></div>
<div class="section">
<h3><a name="RemoteIterator.5BLocatedFileStatus.5D_listLocatedStatus.28Path_path.2C_PathFilter_filter.29"></a><tt>RemoteIterator[LocatedFileStatus] listLocatedStatus(Path path, PathFilter filter)</tt></h3>
<p>Return an iterator enumerating the <tt>LocatedFileStatus</tt> entries under a path. This is similar to <tt>listStatus(Path)</tt> except that the return value is an instance of the <tt>LocatedFileStatus</tt> subclass of a <tt>FileStatus</tt>, and that rather than return an entire list, an iterator is returned.</p>
<p>This is actually a <tt>protected</tt> method, directly invoked by <tt>listLocatedStatus(Path path)</tt>. Calls to it may be delegated through layered filesystems, such as <tt>FilterFileSystem</tt>, so its implementation MUST be considered mandatory, even if <tt>listLocatedStatus(Path path)</tt> has been implemented in a different manner. There are open JIRAs proposing making this method public; it may happen in future.</p>
<p>There is no requirement for the iterator to provide a consistent view of the child entries of a path. The default implementation does use <tt>listStatus(Path)</tt> to list its children, with its consistency constraints already documented. Other implementations may perform the enumeration even more dynamically. For example fetching a windowed subset of child entries, so avoiding building up large data structures and the transmission of large messages. In such situations, changes to the filesystem are more likely to become visible.</p>
<p>Callers MUST assume that the iteration operation MAY fail if changes to the filesystem take place between this call returning and the iteration being completely performed.</p>
<div class="section">
<h4><a name="Preconditions"></a>Preconditions</h4>
<p>Path <tt>path</tt> must exist:</p>
<div>
<div>
<pre class="source">exists(FS, path) : raise FileNotFoundException
</pre></div></div>
</div>
<div class="section">
<h4><a name="Postconditions"></a>Postconditions</h4>
<p>The operation generates a set of results, <tt>resultset</tt>, equal to the result of <tt>listStatus(path, filter)</tt>:</p>
<div>
<div>
<pre class="source">if isFile(FS, path) and filter.accept(path) :
resultset = [ getLocatedFileStatus(FS, path) ]
elif isFile(FS, path) and not filter.accept(path) :
resultset = []
elif isDir(FS, path) :
resultset = [
getLocatedFileStatus(FS, c)
for c in children(FS, path) where filter.accept(c)
]
</pre></div></div>
<p>The operation <tt>getLocatedFileStatus(FS, path: Path): LocatedFileStatus</tt> is defined as a generator of a <tt>LocatedFileStatus</tt> instance <tt>ls</tt> where:</p>
<div>
<div>
<pre class="source">fileStatus = getFileStatus(FS, path)
bl = getFileBlockLocations(FS, path, 0, fileStatus.len)
locatedFileStatus = new LocatedFileStatus(fileStatus, bl)
</pre></div></div>
<p>The ordering in which the elements of <tt>resultset</tt> are returned in the iterator is undefined.</p>
<p>The atomicity and consistency constraints are as for <tt>listStatus(Path, PathFilter)</tt>.</p></div></div>
<div class="section">
<h3><a name="RemoteIterator.5BLocatedFileStatus.5D_listLocatedStatus.28Path_path.29"></a><tt>RemoteIterator[LocatedFileStatus] listLocatedStatus(Path path)</tt></h3>
<p>The equivalent to <tt>listLocatedStatus(path, DEFAULT_FILTER)</tt>, where <tt>DEFAULT_FILTER</tt> accepts all path names.</p></div>
<div class="section">
<h3><a name="RemoteIterator.5BLocatedFileStatus.5D_listFiles.28Path_path.2C_boolean_recursive.29"></a><tt>RemoteIterator[LocatedFileStatus] listFiles(Path path, boolean recursive)</tt></h3>
<p>Create an iterator over all files in/under a directory, potentially recursing into child directories.</p>
<p>The goal of this operation is to permit large recursive directory scans to be handled more efficiently by filesystems, by reducing the amount of data which must be collected in a single RPC call.</p>
<div class="section">
<h4><a name="Preconditions"></a>Preconditions</h4>
<div>
<div>
<pre class="source">exists(FS, path) else raise FileNotFoundException
</pre></div></div>
</div></div>
<div class="section">
<h3><a name="Postconditions"></a>Postconditions</h3>
<p>The outcome is an iterator, whose output from the sequence of <tt>iterator.next()</tt> calls can be defined as the set <tt>iteratorset</tt>:</p>
<div>
<div>
<pre class="source">if not recursive:
iteratorset == listStatus(path)
else:
iteratorset = [
getLocatedFileStatus(FS, d)
for d in descendants(FS, path)
]
</pre></div></div>
<p>The function <tt>getLocatedFileStatus(FS, d)</tt> is as defined in <tt>listLocatedStatus(Path, PathFilter)</tt>.</p>
<p>The atomicity and consistency constraints are as for <tt>listStatus(Path, PathFilter)</tt>.</p></div>
<div class="section">
<h3><a name="BlockLocation.5B.5D_getFileBlockLocations.28FileStatus_f.2C_int_s.2C_int_l.29"></a><tt>BlockLocation[] getFileBlockLocations(FileStatus f, int s, int l)</tt></h3>
<div class="section">
<h4><a name="Preconditions"></a>Preconditions</h4>
<div>
<div>
<pre class="source">if s &lt; 0 or l &lt; 0 : raise {HadoopIllegalArgumentException, InvalidArgumentException}
</pre></div></div>
<ul>
<li>HDFS throws <tt>HadoopIllegalArgumentException</tt> for an invalid offset or length; this extends <tt>IllegalArgumentException</tt>.</li>
</ul></div>
<div class="section">
<h4><a name="Postconditions"></a>Postconditions</h4>
<p>If the filesystem is location aware, it must return the list of block locations where the data in the range <tt>[s:s+l]</tt> can be found.</p>
<div>
<div>
<pre class="source">if f == null :
result = null
elif f.getLen() &lt;= s:
result = []
else result = [ locations(FS, b) for b in blocks(FS, p, s, s+l)]
</pre></div></div>
<p>Where</p>
<div>
<div>
<pre class="source"> def locations(FS, b) = a list of all locations of a block in the filesystem
def blocks(FS, p, s, s + l) = a list of the blocks containing data(FS, path)[s:s+l]
</pre></div></div>
<p>Note that that as <tt>length(FS, f)</tt> is defined as <tt>0</tt> if <tt>isDir(FS, f)</tt>, the result of <tt>getFileBlockLocations()</tt> on a directory is <tt>[]</tt></p>
<p>If the filesystem is not location aware, it SHOULD return</p>
<div>
<div>
<pre class="source"> [
BlockLocation([&quot;localhost:9866&quot;] ,
[&quot;localhost&quot;],
[&quot;/default/localhost&quot;]
0, f.getLen())
] ;
</pre></div></div>
<p>*A bug in Hadoop 1.0.3 means that a topology path of the same number of elements as the cluster topology MUST be provided, hence Filesystems SHOULD return that <tt>&quot;/default/localhost&quot;</tt> path. While this is no longer an issue, the convention is generally retained.</p></div></div>
<div class="section">
<h3><a name="BlockLocation.5B.5D_getFileBlockLocations.28Path_P.2C_int_S.2C_int_L.29"></a><tt>BlockLocation[] getFileBlockLocations(Path P, int S, int L)</tt></h3>
<div class="section">
<h4><a name="Preconditions"></a>Preconditions</h4>
<div>
<div>
<pre class="source">if p == null : raise NullPointerException
if not exists(FS, p) : raise FileNotFoundException
</pre></div></div>
</div>
<div class="section">
<h4><a name="Postconditions"></a>Postconditions</h4>
<div>
<div>
<pre class="source">result = getFileBlockLocations(getFileStatus(FS, P), S, L)
</pre></div></div>
</div></div>
<div class="section">
<h3><a name="long_getDefaultBlockSize.28.29"></a><tt>long getDefaultBlockSize()</tt></h3>
<p>Get the &#x201c;default&#x201d; block size for a filesystem. This is often used during split calculations to divide work optimally across a set of worker processes.</p>
<div class="section">
<h4><a name="Preconditions"></a>Preconditions</h4></div>
<div class="section">
<h4><a name="Postconditions"></a>Postconditions</h4>
<div>
<div>
<pre class="source">result = integer &gt; 0
</pre></div></div>
<p>Although there is no defined minimum value for this result, as it is used to partition work during job submission, a block size that is too small will result in badly partitioned workload, or even the <tt>JobSubmissionClient</tt> and equivalent running out of memory as it calculates the partitions.</p>
<p>Any FileSystem that does not actually break files into blocks SHOULD return a number for this that results in efficient processing. A FileSystem MAY make this user-configurable (the object store connectors usually do this).</p></div></div>
<div class="section">
<h3><a name="long_getDefaultBlockSize.28Path_p.29"></a><tt>long getDefaultBlockSize(Path p)</tt></h3>
<p>Get the &#x201c;default&#x201d; block size for a path &#x2013;that is, the block size to be used when writing objects to a path in the filesystem.</p>
<div class="section">
<h4><a name="Preconditions"></a>Preconditions</h4></div>
<div class="section">
<h4><a name="Postconditions"></a>Postconditions</h4>
<div>
<div>
<pre class="source">result = integer &gt;= 0
</pre></div></div>
<p>The outcome of this operation is usually identical to <tt>getDefaultBlockSize()</tt>, with no checks for the existence of the given path.</p>
<p>Filesystems that support mount points may have different default values for different paths, in which case the specific default value for the destination path SHOULD be returned.</p>
<p>It is not an error if the path does not exist: the default/recommended value for that part of the filesystem MUST be returned.</p></div></div>
<div class="section">
<h3><a name="long_getBlockSize.28Path_p.29"></a><tt>long getBlockSize(Path p)</tt></h3>
<p>This method is exactly equivalent to querying the block size of the <tt>FileStatus</tt> structure returned in <tt>getFileStatus(p)</tt>. It is deprecated in order to encourage users to make a single call to <tt>getFileStatus(p)</tt> and then use the result to examine multiple attributes of the file (e.g. length, type, block size). If more than one attribute is queried, This can become a significant performance optimization &#x2014;and reduce load on the filesystem.</p>
<div class="section">
<h4><a name="Preconditions"></a>Preconditions</h4>
<div>
<div>
<pre class="source">if not exists(FS, p) : raise FileNotFoundException
</pre></div></div>
</div>
<div class="section">
<h4><a name="Postconditions"></a>Postconditions</h4>
<div>
<div>
<pre class="source">if len(FS, P) &gt; 0: getFileStatus(P).getBlockSize() &gt; 0
result == getFileStatus(P).getBlockSize()
</pre></div></div>
<ol style="list-style-type: decimal">
<li>The outcome of this operation MUST be identical to the value of <tt>getFileStatus(P).getBlockSize()</tt>.</li>
<li>By inference, it MUST be &gt; 0 for any file of length &gt; 0.</li>
</ol></div></div></div>
<div class="section">
<h2><a name="State_Changing_Operations"></a><a name="state_changing_operations"></a> State Changing Operations</h2>
<div class="section">
<h3><a name="boolean_mkdirs.28Path_p.2C_FsPermission_permission.29"></a><tt>boolean mkdirs(Path p, FsPermission permission)</tt></h3>
<p>Create a directory and all its parents.</p>
<div class="section">
<h4><a name="Preconditions"></a>Preconditions</h4>
<p>The path must either be a directory or not exist</p>
<div>
<div>
<pre class="source"> if exists(FS, p) and not isDir(FS, p) :
raise [ParentNotDirectoryException, FileAlreadyExistsException, IOException]
</pre></div></div>
<p>No ancestor may be a file</p>
<div>
<div>
<pre class="source">forall d = ancestors(FS, p) :
if exists(FS, d) and not isDir(FS, d) :
raise [ParentNotDirectoryException, FileAlreadyExistsException, IOException]
</pre></div></div>
</div>
<div class="section">
<h4><a name="Postconditions"></a>Postconditions</h4>
<div>
<div>
<pre class="source">FS' where FS'.Directories' = FS.Directories + [p] + ancestors(FS, p)
result = True
</pre></div></div>
<p>The condition exclusivity requirement of a FileSystem&#x2019;s directories, files and symbolic links must hold.</p>
<p>The probe for the existence and type of a path and directory creation MUST be atomic. The combined operation, including <tt>mkdirs(parent(F))</tt> MAY be atomic.</p>
<p>The return value is always true&#x2014;even if a new directory is not created (this is defined in HDFS).</p></div></div>
<div class="section">
<h3><a name="FSDataOutputStream_create.28Path.2C_....29"></a><a name="FileSystem.create"></a> <tt>FSDataOutputStream create(Path, ...)</tt></h3>
<div>
<div>
<pre class="source">FSDataOutputStream create(Path p,
FsPermission permission,
boolean overwrite,
int bufferSize,
short replication,
long blockSize,
Progressable progress) throws IOException;
</pre></div></div>
<div class="section">
<h4><a name="Preconditions"></a>Preconditions</h4>
<p>The file must not exist for a no-overwrite create:</p>
<div>
<div>
<pre class="source">if not overwrite and isFile(FS, p) : raise FileAlreadyExistsException
</pre></div></div>
<p>Writing to or overwriting a directory must fail.</p>
<div>
<div>
<pre class="source">if isDir(FS, p) : raise {FileAlreadyExistsException, FileNotFoundException, IOException}
</pre></div></div>
<p>No ancestor may be a file</p>
<div>
<div>
<pre class="source">forall d = ancestors(FS, p) :
if exists(FS, d) and not isDir(FS, d) :
raise [ParentNotDirectoryException, FileAlreadyExistsException, IOException]
</pre></div></div>
<p>FileSystems may reject the request for other reasons, such as the FS being read-only (HDFS), the block size being below the minimum permitted (HDFS), the replication count being out of range (HDFS), quotas on namespace or filesystem being exceeded, reserved names, etc. All rejections SHOULD be <tt>IOException</tt> or a subclass thereof and MAY be a <tt>RuntimeException</tt> or subclass. For instance, HDFS may raise an <tt>InvalidPathException</tt>.</p></div>
<div class="section">
<h4><a name="Postconditions"></a>Postconditions</h4>
<div>
<div>
<pre class="source">FS' where :
FS'.Files'[p] == []
ancestors(p) is-subset-of FS'.Directories'
result = FSDataOutputStream
</pre></div></div>
<p>A zero byte file MUST exist at the end of the specified path, visible to all.</p>
<p>The updated (valid) FileSystem MUST contain all the parent directories of the path, as created by <tt>mkdirs(parent(p))</tt>.</p>
<p>The result is <tt>FSDataOutputStream</tt>, which through its operations may generate new filesystem states with updated values of <tt>FS.Files[p]</tt></p>
<p>The behavior of the returned stream is covered in <a href="outputstream.html">Output</a>.</p></div>
<div class="section">
<h4><a name="Implementation_Notes"></a>Implementation Notes</h4>
<ul>
<li>
<p>Some implementations split the create into a check for the file existing from the actual creation. This means the operation is NOT atomic: it is possible for clients creating files with <tt>overwrite==true</tt> to fail if the file is created by another client between the two tests.</p>
</li>
<li>
<p>S3A, Swift and potentially other Object Stores do not currently change the <tt>FS</tt> state until the output stream <tt>close()</tt> operation is completed. This is a significant difference between the behavior of object stores and that of filesystems, as it allows &gt;1 client to create a file with <tt>overwrite=false</tt>, and potentially confuse file/directory logic. In particular, using <tt>create()</tt> to acquire an exclusive lock on a file (whoever creates the file without an error is considered the holder of the lock) may not not a safe algorithm to use when working with object stores.</p>
</li>
<li>
<p>Object stores may create an empty file as a marker when a file is created. However, object stores with <tt>overwrite=true</tt> semantics may not implement this atomically, so creating files with <tt>overwrite=false</tt> cannot be used as an implicit exclusion mechanism between processes.</p>
</li>
<li>
<p>The Local FileSystem raises a <tt>FileNotFoundException</tt> when trying to create a file over a directory, hence it is listed as an exception that MAY be raised when this precondition fails.</p>
</li>
<li>
<p>Not covered: symlinks. The resolved path of the symlink is used as the final path argument to the <tt>create()</tt> operation</p>
</li>
</ul></div></div>
<div class="section">
<h3><a name="FSDataOutputStreamBuilder_createFile.28Path_p.29"></a><tt>FSDataOutputStreamBuilder createFile(Path p)</tt></h3>
<p>Make a <tt>FSDataOutputStreamBuilder</tt> to specify the parameters to create a file.</p>
<p>The behavior of the returned stream is covered in <a href="outputstream.html">Output</a>.</p>
<div class="section">
<h4><a name="Implementation_Notes"></a>Implementation Notes</h4>
<p><tt>createFile(p)</tt> returns a <tt>FSDataOutputStreamBuilder</tt> only and does not make change on filesystem immediately. When <tt>build()</tt> is invoked on the <tt>FSDataOutputStreamBuilder</tt>, the builder parameters are verified and <a href="#FileSystem.create"><tt>create(Path p)</tt></a> is invoked on the underlying filesystem. <tt>build()</tt> has the same preconditions and postconditions as <a href="#FileSystem.create"><tt>create(Path p)</tt></a>.</p>
<ul>
<li>Similar to <a href="#FileSystem.create"><tt>create(Path p)</tt></a>, files are overwritten by default, unless specify <tt>builder.overwrite(false)</tt>.</li>
<li>Unlike <a href="#FileSystem.create"><tt>create(Path p)</tt></a>, missing parent directories are not created by default, unless specify <tt>builder.recursive()</tt>.</li>
</ul></div></div>
<div class="section">
<h3><a name="FSDataOutputStream_append.28Path_p.2C_int_bufferSize.2C_Progressable_progress.29"></a><a name="FileSystem.append"></a> <tt>FSDataOutputStream append(Path p, int bufferSize, Progressable progress)</tt></h3>
<p>Implementations without a compliant call SHOULD throw <tt>UnsupportedOperationException</tt>.</p>
<div class="section">
<h4><a name="Preconditions"></a>Preconditions</h4>
<div>
<div>
<pre class="source">if not exists(FS, p) : raise FileNotFoundException
if not isFile(FS, p) : raise [FileAlreadyExistsException, FileNotFoundException, IOException]
</pre></div></div>
</div>
<div class="section">
<h4><a name="Postconditions"></a>Postconditions</h4>
<div>
<div>
<pre class="source">FS' = FS
result = FSDataOutputStream
</pre></div></div>
<p>Return: <tt>FSDataOutputStream</tt>, which can update the entry <tt>FS.Files[p]</tt> by appending data to the existing list.</p>
<p>The behavior of the returned stream is covered in <a href="outputstream.html">Output</a>.</p></div></div>
<div class="section">
<h3><a name="FSDataOutputStreamBuilder_appendFile.28Path_p.29"></a><tt>FSDataOutputStreamBuilder appendFile(Path p)</tt></h3>
<p>Make a <tt>FSDataOutputStreamBuilder</tt> to specify the parameters to append to an existing file.</p>
<p>The behavior of the returned stream is covered in <a href="outputstream.html">Output</a>.</p>
<div class="section">
<h4><a name="Implementation_Notes"></a>Implementation Notes</h4>
<p><tt>appendFile(p)</tt> returns a <tt>FSDataOutputStreamBuilder</tt> only and does not make change on filesystem immediately. When <tt>build()</tt> is invoked on the <tt>FSDataOutputStreamBuilder</tt>, the builder parameters are verified and <a href="#FileSystem.append"><tt>append()</tt></a> is invoked on the underlying filesystem. <tt>build()</tt> has the same preconditions and postconditions as <a href="#FileSystem.append"><tt>append()</tt></a>.</p></div></div>
<div class="section">
<h3><a name="FSDataInputStream_open.28Path_f.2C_int_bufferSize.29"></a><tt>FSDataInputStream open(Path f, int bufferSize)</tt></h3>
<p>Implementations without a compliant call SHOULD throw <tt>UnsupportedOperationException</tt>.</p>
<div class="section">
<h4><a name="Preconditions"></a>Preconditions</h4>
<div>
<div>
<pre class="source">if not isFile(FS, p)) : raise [FileNotFoundException, IOException]
</pre></div></div>
<p>This is a critical precondition. Implementations of some FileSystems (e.g. Object stores) could shortcut one round trip by postponing their HTTP GET operation until the first <tt>read()</tt> on the returned <tt>FSDataInputStream</tt>. However, much client code does depend on the existence check being performed at the time of the <tt>open()</tt> operation. Implementations MUST check for the presence of the file at the time of creation. This does not imply that the file and its data is still at the time of the following <tt>read()</tt> or any successors.</p></div>
<div class="section">
<h4><a name="Postconditions"></a>Postconditions</h4>
<div>
<div>
<pre class="source">result = FSDataInputStream(0, FS.Files[p])
</pre></div></div>
<p>The result provides access to the byte array defined by <tt>FS.Files[p]</tt>; whether that access is to the contents at the time the <tt>open()</tt> operation was invoked, or whether and how it may pick up changes to that data in later states of FS is an implementation detail.</p>
<p>The result MUST be the same for local and remote callers of the operation.</p></div>
<div class="section">
<h4><a name="HDFS_implementation_notes"></a>HDFS implementation notes</h4>
<ol style="list-style-type: decimal">
<li>
<p>HDFS MAY throw <tt>UnresolvedPathException</tt> when attempting to traverse symbolic links</p>
</li>
<li>
<p>HDFS throws <tt>IOException(&quot;Cannot open filename &quot; + src)</tt> if the path exists in the metadata, but no copies of any its blocks can be located; -<tt>FileNotFoundException</tt> would seem more accurate and useful.</p>
</li>
</ol></div></div>
<div class="section">
<h3><a name="FSDataInputStreamBuilder_openFile.28Path_path.29"></a><tt>FSDataInputStreamBuilder openFile(Path path)</tt></h3>
<p>Creates a <a href="fsdatainputstreambuilder.html"><tt>FSDataInputStreamBuilder</tt></a> to construct a operation to open the file at <tt>path</tt> for reading.</p>
<p>When <tt>build()</tt> is invoked on the returned <tt>FSDataInputStreamBuilder</tt> instance, the builder parameters are verified and <tt>openFileWithOptions(Path, OpenFileParameters)</tt> invoked.</p>
<p>This (protected) operation returns a <tt>CompletableFuture&lt;FSDataInputStream&gt;</tt> which, when its <tt>get()</tt> method is called, either returns an input stream of the contents of opened file, or raises an exception.</p>
<p>The base implementation of the <tt>openFileWithOptions(PathHandle, OpenFileParameters)</tt> ultimately invokes <tt>open(Path, int)</tt>.</p>
<p>Thus the chain <tt>openFile(path).build().get()</tt> has the same preconditions and postconditions as <tt>open(Path p, int bufferSize)</tt></p>
<p>However, there is one difference which implementations are free to take advantage of:</p>
<p>The returned stream MAY implement a lazy open where file non-existence or access permission failures may not surface until the first <tt>read()</tt> of the actual data.</p>
<p>The <tt>openFile()</tt> operation may check the state of the filesystem during its invocation, but as the state of the filesystem may change betwen this call and the actual <tt>build()</tt> and <tt>get()</tt> operations, this file-specific preconditions (file exists, file is readable, etc) MUST NOT be checked here.</p>
<p>FileSystem implementations which do not implement <tt>open(Path, int)</tt> MAY postpone raising an <tt>UnsupportedOperationException</tt> until either the <tt>FSDataInputStreamBuilder.build()</tt> or the subsequent <tt>get()</tt> call, else they MAY fail fast in the <tt>openFile()</tt> call.</p></div>
<div class="section">
<h3><a name="Implementors_notes"></a>Implementors notes</h3>
<p>The base implementation of <tt>openFileWithOptions()</tt> actually executes the <tt>open(path)</tt> operation synchronously, yet still returns the result or any failures in the <tt>CompletableFuture&lt;&gt;</tt>, so as to ensure that users code expecting this.</p>
<p>Any filesystem where the time to open a file may be significant SHOULD execute it asynchronously by submitting the operation in some executor/thread pool. This is particularly recommended for object stores and other filesystems likely to be accessed over long-haul connections.</p>
<p>Arbitrary filesystem-specific options MAY be supported; these MUST be prefixed with either the filesystem schema, e.g. <tt>hdfs.</tt> or in the &#x201c;fs.SCHEMA&#x201d; format as normal configuration settings <tt>fs.hdfs</tt>). The latter style allows the same configuration option to be used for both filesystem configuration and file-specific configuration.</p>
<p>It SHOULD be possible to always open a file without specifying any options, so as to present a consistent model to users. However, an implementation MAY opt to require one or more mandatory options to be set.</p>
<p>The returned stream may perform &#x201c;lazy&#x201d; evaluation of file access. This is relevant for object stores where the probes for existence are expensive, and, even with an asynchronous open, may be considered needless.</p></div>
<div class="section">
<h3><a name="FSDataInputStreamBuilder_openFile.28PathHandle.29"></a><tt>FSDataInputStreamBuilder openFile(PathHandle)</tt></h3>
<p>Creates a <tt>FSDataInputStreamBuilder</tt> to build an operation to open a file. Creates a <a href="fsdatainputstreambuilder.html"><tt>FSDataInputStreamBuilder</tt></a> to construct a operation to open the file identified by the given <tt>PathHandle</tt> for reading.</p>
<p>When <tt>build()</tt> is invoked on the returned <tt>FSDataInputStreamBuilder</tt> instance, the builder parameters are verified and <tt>openFileWithOptions(PathHandle, OpenFileParameters)</tt> invoked.</p>
<p>This (protected) operation returns a <tt>CompletableFuture&lt;FSDataInputStream&gt;</tt> which, when its <tt>get()</tt> method is called, either returns an input stream of the contents of opened file, or raises an exception.</p>
<p>The base implementation of the <tt>openFileWithOptions(PathHandle, OpenFileParameters)</tt> method returns a future which invokes <tt>open(Path, int)</tt>.</p>
<p>Thus the chain <tt>openFile(pathhandle).build().get()</tt> has the same preconditions and postconditions as <tt>open(Pathhandle, int)</tt></p>
<p>As with <tt>FSDataInputStreamBuilder openFile(PathHandle)</tt>, the <tt>openFile()</tt> call must not be where path-specific preconditions are checked -that is postponed to the <tt>build()</tt> and <tt>get()</tt> calls.</p>
<p>FileSystem implementations which do not implement <tt>open(PathHandle handle, int bufferSize)</tt> MAY postpone raising an <tt>UnsupportedOperationException</tt> until either the <tt>FSDataInputStreamBuilder.build()</tt> or the subsequent <tt>get()</tt> call, else they MAY fail fast in the <tt>openFile()</tt> call.</p>
<p>The base implementation raises this exception in the <tt>build()</tt> operation; other implementations SHOULD copy this.</p></div>
<div class="section">
<h3><a name="PathHandle_getPathHandle.28FileStatus_stat.2C_HandleOpt..._options.29"></a><tt>PathHandle getPathHandle(FileStatus stat, HandleOpt... options)</tt></h3>
<p>Implementations without a compliant call MUST throw <tt>UnsupportedOperationException</tt></p>
<div class="section">
<h4><a name="Preconditions"></a>Preconditions</h4>
<div>
<div>
<pre class="source">let stat = getFileStatus(Path p)
let FS' where:
(FS.Directories', FS.Files', FS.Symlinks')
p' in paths(FS') where:
exists(FS, stat.path) implies exists(FS', p')
</pre></div></div>
<p>The referent of a <tt>FileStatus</tt> instance, at the time it was resolved, is the same referent as the result of <tt>getPathHandle(FileStatus)</tt>. The <tt>PathHandle</tt> may be used in subsequent operations to ensure invariants hold between calls.</p>
<p>The <tt>options</tt> parameter specifies whether a subsequent call e.g., <tt>open(PathHandle)</tt> will succeed if the referent data or location changed. By default, any modification results in an error. The caller MAY specify relaxations that allow operations to succeed even if the referent exists at a different path and/or its data are changed.</p>
<p>An implementation MUST throw <tt>UnsupportedOperationException</tt> if it cannot support the semantics specified by the caller. The default set of options are as follows.</p>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th align="right"> </th>
<th align="center"> Unmoved </th>
<th align="center"> Moved </th></tr>
</thead><tbody>
<tr class="b">
<td align="right"> Unchanged </td>
<td align="center"> EXACT </td>
<td align="center"> CONTENT </td></tr>
<tr class="a">
<td align="right"> Changed </td>
<td align="center"> PATH </td>
<td align="center"> REFERENCE </td></tr>
</tbody>
</table>
<p>Changes to ownership, extended attributes, and other metadata are not required to match the <tt>PathHandle</tt>. Implementations can extend the set of <tt>HandleOpt</tt> parameters with custom constraints.</p>
<div class="section">
<h5><a name="Examples"></a>Examples</h5>
<p>A client specifies that the <tt>PathHandle</tt> should track the entity across renames using <tt>REFERENCE</tt>. The implementation MUST throw an <tt>UnsupportedOperationException</tt> when creating the <tt>PathHandle</tt> unless failure to resolve the reference implies the entity no longer exists.</p>
<p>A client specifies that the <tt>PathHandle</tt> should resolve only if the entity is unchanged using <tt>PATH</tt>. The implementation MUST throw an <tt>UnsupportedOperationException</tt> when creating the <tt>PathHandle</tt> unless it can distinguish between an identical entity located subsequently at the same path.</p></div></div>
<div class="section">
<h4><a name="Postconditions"></a>Postconditions</h4>
<div>
<div>
<pre class="source">result = PathHandle(p')
</pre></div></div>
</div>
<div class="section">
<h4><a name="Implementation_notes"></a>Implementation notes</h4>
<p>The referent of a <tt>PathHandle</tt> is the namespace when the <tt>FileStatus</tt> instance was created, <i>not</i> its state when the <tt>PathHandle</tt> is created. An implementation MAY reject attempts to create or resolve <tt>PathHandle</tt> instances that are valid, but expensive to service.</p>
<p>Object stores that implement rename by copying objects MUST NOT claim to support <tt>CONTENT</tt> and <tt>REFERENCE</tt> unless the lineage of the object is resolved.</p>
<p>It MUST be possible to serialize a <tt>PathHandle</tt> instance and reinstantiate it in one or more processes, on another machine, and arbitrarily far into the future without changing its semantics. The implementation MUST refuse to resolve instances if it can no longer guarantee its invariants.</p></div>
<div class="section">
<h4><a name="HDFS_implementation_notes"></a>HDFS implementation notes</h4>
<p>HDFS does not support <tt>PathHandle</tt> references to directories or symlinks. Support for <tt>CONTENT</tt> and <tt>REFERENCE</tt> looks up files by INode. INodes are not unique across NameNodes, so federated clusters SHOULD include enough metadata in the <tt>PathHandle</tt> to detect references from other namespaces.</p></div></div>
<div class="section">
<h3><a name="FSDataInputStream_open.28PathHandle_handle.2C_int_bufferSize.29"></a><tt>FSDataInputStream open(PathHandle handle, int bufferSize)</tt></h3>
<p>Implementaions without a compliant call MUST throw <tt>UnsupportedOperationException</tt></p>
<div class="section">
<h4><a name="Preconditions"></a>Preconditions</h4>
<div>
<div>
<pre class="source">let fd = getPathHandle(FileStatus stat)
if stat.isdir : raise IOException
let FS' where:
(FS.Directories', FS.Files', FS.Symlinks')
p' in FS.Files' where:
FS.Files'[p'] = fd
if not exists(FS', p') : raise InvalidPathHandleException
</pre></div></div>
<p>The implementation MUST resolve the referent of the <tt>PathHandle</tt> following the constraints specified at its creation by <tt>getPathHandle(FileStatus)</tt>.</p>
<p>Metadata necessary for the <tt>FileSystem</tt> to satisfy this contract MAY be encoded in the <tt>PathHandle</tt>.</p></div>
<div class="section">
<h4><a name="Postconditions"></a>Postconditions</h4>
<div>
<div>
<pre class="source">result = FSDataInputStream(0, FS.Files'[p'])
</pre></div></div>
<p>The stream returned is subject to the constraints of a stream returned by <tt>open(Path)</tt>. Constraints checked on open MAY hold to hold for the stream, but this is not guaranteed.</p>
<p>For example, a <tt>PathHandle</tt> created with <tt>CONTENT</tt> constraints MAY return a stream that ignores updates to the file after it is opened, if it was unmodified when <tt>open(PathHandle)</tt> was resolved.</p></div>
<div class="section">
<h4><a name="Implementation_notes"></a>Implementation notes</h4>
<p>An implementation MAY check invariants either at the server or before returning the stream to the client. For example, an implementation may open the file, then verify the invariants in the <tt>PathHandle</tt> using <tt>getFileStatus(Path)</tt> to implement <tt>CONTENT</tt>. This could yield false positives and it requires additional RPC traffic.</p></div></div>
<div class="section">
<h3><a name="boolean_delete.28Path_p.2C_boolean_recursive.29"></a><tt>boolean delete(Path p, boolean recursive)</tt></h3>
<p>Delete a path, be it a file, symbolic link or directory. The <tt>recursive</tt> flag indicates whether a recursive delete should take place &#x2014;if unset then a non-empty directory cannot be deleted.</p>
<p>Except in the special case of the root directory, if this API call completed successfully then there is nothing at the end of the path. That is: the outcome is desired. The return flag simply tells the caller whether or not any change was made to the state of the filesystem.</p>
<p><i>Note</i>: many uses of this method surround it with checks for the return value being false, raising exception if so. For example</p>
<div>
<div>
<pre class="source">if (!fs.delete(path, true)) throw new IOException(&quot;Could not delete &quot; + path);
</pre></div></div>
<p>This pattern is not needed. Code SHOULD just call <tt>delete(path, recursive)</tt> and assume the destination is no longer present &#x2014;except in the special case of root directories, which will always remain (see below for special coverage of root directories).</p>
<div class="section">
<h4><a name="Preconditions"></a>Preconditions</h4>
<p>A directory with children and <tt>recursive == False</tt> cannot be deleted</p>
<div>
<div>
<pre class="source">if isDir(FS, p) and not recursive and (children(FS, p) != {}) : raise IOException
</pre></div></div>
<p>(HDFS raises <tt>PathIsNotEmptyDirectoryException</tt> here.)</p></div>
<div class="section">
<h4><a name="Postconditions"></a>Postconditions</h4>
<div class="section">
<h5><a name="Nonexistent_path"></a>Nonexistent path</h5>
<p>If the file does not exist the filesystem state does not change</p>
<div>
<div>
<pre class="source">if not exists(FS, p):
FS' = FS
result = False
</pre></div></div>
<p>The result SHOULD be <tt>False</tt>, indicating that no file was deleted.</p></div>
<div class="section">
<h5><a name="Simple_File"></a>Simple File</h5>
<p>A path referring to a file is removed, return value: <tt>True</tt></p>
<div>
<div>
<pre class="source">if isFile(FS, p) :
FS' = (FS.Directories, FS.Files - [p], FS.Symlinks)
result = True
</pre></div></div>
</div>
<div class="section">
<h5><a name="Empty_root_directory.2C_recursive_.3D.3D_False"></a>Empty root directory, <tt>recursive == False</tt></h5>
<p>Deleting an empty root does not change the filesystem state and may return true or false.</p>
<div>
<div>
<pre class="source">if isRoot(p) and children(FS, p) == {} :
FS ' = FS
result = (undetermined)
</pre></div></div>
<p>There is no consistent return code from an attempt to delete the root directory.</p>
<p>Implementations SHOULD return true; this avoids code which checks for a false return value from overreacting.</p>
<p><i>Object Stores</i>: see <a href="#object-stores-rm-root">Object Stores: root directory deletion</a>.</p></div>
<div class="section">
<h5><a name="Empty_.28non-root.29_directory_recursive_.3D.3D_False"></a>Empty (non-root) directory <tt>recursive == False</tt></h5>
<p>Deleting an empty directory that is not root will remove the path from the FS and return true.</p>
<div>
<div>
<pre class="source">if isDir(FS, p) and not isRoot(p) and children(FS, p) == {} :
FS' = (FS.Directories - [p], FS.Files, FS.Symlinks)
result = True
</pre></div></div>
</div>
<div class="section">
<h5><a name="Recursive_delete_of_non-empty_root_directory"></a>Recursive delete of non-empty root directory</h5>
<p>Deleting a root path with children and <tt>recursive==True</tt> can generally have three outcomes:</p>
<ol style="list-style-type: decimal">
<li>
<p>The POSIX model assumes that if the user has the correct permissions to delete everything, they are free to do so (resulting in an empty filesystem).</p>
<div>
<div>
<pre class="source">if isDir(FS, p) and isRoot(p) and recursive :
FS' = ({[&quot;/&quot;]}, {}, {}, {})
result = True
</pre></div></div>
</li>
<li>
<p>HDFS never permits the deletion of the root of a filesystem; the filesystem must be taken offline and reformatted if an empty filesystem is desired.</p>
<div>
<div>
<pre class="source">if isDir(FS, p) and isRoot(p) and recursive :
FS' = FS
result = False
</pre></div></div>
</li>
<li>
<p>Object Stores: see <a href="#object-stores-rm-root">Object Stores: root directory deletion</a>.</p>
</li>
</ol>
<p>This specification does not recommend any specific action. Do note, however, that the POSIX model assumes that there is a permissions model such that normal users do not have the permission to delete that root directory; it is an action which only system administrators should be able to perform.</p>
<p>Any filesystem client which interacts with a remote filesystem which lacks such a security model, MAY reject calls to <tt>delete(&quot;/&quot;, true)</tt> on the basis that it makes it too easy to lose data.</p></div></div></div>
<div class="section">
<h3><a name="Object_Stores:_root_directory_deletion"></a><a name="object-stores-rm-root"></a> Object Stores: root directory deletion</h3>
<p>Some of the object store based filesystem implementations always return false when deleting the root, leaving the state of the store unchanged.</p>
<div>
<div>
<pre class="source">if isRoot(p) :
FS ' = FS
result = False
</pre></div></div>
<p>This is irrespective of the recursive flag status or the state of the directory.</p>
<p>This is a simplification which avoids the inevitably non-atomic scan and delete of the contents of the store. It also avoids any confusion about whether the operation actually deletes that specific store/container itself, and adverse consequences of the simpler permissions models of stores.</p>
<div class="section">
<div class="section">
<h5><a name="Recursive_delete_of_non-root_directory"></a>Recursive delete of non-root directory</h5>
<p>Deleting a non-root path with children <tt>recursive==true</tt> removes the path and all descendants</p>
<div>
<div>
<pre class="source">if isDir(FS, p) and not isRoot(p) and recursive :
FS' where:
not isDir(FS', p)
and forall d in descendants(FS, p):
not isDir(FS', d)
not isFile(FS', d)
not isSymlink(FS', d)
result = True
</pre></div></div>
</div></div>
<div class="section">
<h4><a name="Atomicity"></a>Atomicity</h4>
<ul>
<li>
<p>Deleting a file MUST be an atomic action.</p>
</li>
<li>
<p>Deleting an empty directory MUST be an atomic action.</p>
</li>
<li>
<p>A recursive delete of a directory tree MUST be atomic.</p>
</li>
</ul></div>
<div class="section">
<h4><a name="Implementation_Notes"></a>Implementation Notes</h4>
<ul>
<li>Object Stores and other non-traditional filesystems onto which a directory tree is emulated, tend to implement <tt>delete()</tt> as recursive listing and entry-by-entry delete operation. This can break the expectations of client applications for O(1) atomic directory deletion, preventing the stores&#x2019; use as drop-in replacements for HDFS.</li>
</ul></div></div>
<div class="section">
<h3><a name="boolean_rename.28Path_src.2C_Path_d.29"></a><tt>boolean rename(Path src, Path d)</tt></h3>
<p>In terms of its specification, <tt>rename()</tt> is one of the most complex operations within a filesystem.</p>
<p>In terms of its implementation, it is the one with the most ambiguity regarding when to return false versus raising an exception.</p>
<p>Rename includes the calculation of the destination path. If the destination exists and is a directory, the final destination of the rename becomes the destination + the filename of the source path.</p>
<div>
<div>
<pre class="source">let dest = if (isDir(FS, d) and d != src) :
d + [filename(src)]
else :
d
</pre></div></div>
<div class="section">
<h4><a name="Preconditions"></a>Preconditions</h4>
<p>All checks on the destination path MUST take place after the final <tt>dest</tt> path has been calculated.</p>
<p>Source <tt>src</tt> must exist:</p>
<div>
<div>
<pre class="source">exists(FS, src) else raise FileNotFoundException
</pre></div></div>
<p><tt>dest</tt> cannot be a descendant of <tt>src</tt>:</p>
<div>
<div>
<pre class="source">if isDescendant(FS, src, dest) : raise IOException
</pre></div></div>
<p>This implicitly covers the special case of <tt>isRoot(FS, src)</tt>.</p>
<p><tt>dest</tt> must be root, or have a parent that exists:</p>
<div>
<div>
<pre class="source">isRoot(FS, dest) or exists(FS, parent(dest)) else raise IOException
</pre></div></div>
<p>The parent path of a destination must not be a file:</p>
<div>
<div>
<pre class="source">if isFile(FS, parent(dest)) : raise IOException
</pre></div></div>
<p>This implicitly covers all the ancestors of the parent.</p>
<p>There must not be an existing file at the end of the destination path:</p>
<div>
<div>
<pre class="source">if isFile(FS, dest) : raise FileAlreadyExistsException, IOException
</pre></div></div>
</div>
<div class="section">
<h4><a name="Postconditions"></a>Postconditions</h4>
<div class="section">
<h5><a name="Renaming_a_directory_onto_itself"></a>Renaming a directory onto itself</h5>
<p>Renaming a directory onto itself is no-op; return value is not specified.</p>
<p>In POSIX the result is <tt>False</tt>; in HDFS the result is <tt>True</tt>.</p>
<div>
<div>
<pre class="source">if isDir(FS, src) and src == dest :
FS' = FS
result = (undefined)
</pre></div></div>
</div>
<div class="section">
<h5><a name="Renaming_a_file_to_self"></a>Renaming a file to self</h5>
<p>Renaming a file to itself is a no-op; the result is <tt>True</tt>.</p>
<div>
<div>
<pre class="source"> if isFile(FS, src) and src == dest :
FS' = FS
result = True
</pre></div></div>
</div>
<div class="section">
<h5><a name="Renaming_a_file_onto_a_nonexistent_path"></a>Renaming a file onto a nonexistent path</h5>
<p>Renaming a file where the destination is a directory moves the file as a child of the destination directory, retaining the filename element of the source path.</p>
<div>
<div>
<pre class="source">if isFile(FS, src) and src != dest:
FS' where:
not exists(FS', src)
and exists(FS', dest)
and data(FS', dest) == data (FS, dest)
result = True
</pre></div></div>
</div>
<div class="section">
<h5><a name="Renaming_a_directory_onto_a_directory"></a>Renaming a directory onto a directory</h5>
<p>If <tt>src</tt> is a directory then all its children will then exist under <tt>dest</tt>, while the path <tt>src</tt> and its descendants will no longer exist. The names of the paths under <tt>dest</tt> will match those under <tt>src</tt>, as will the contents:</p>
<div>
<div>
<pre class="source">if isDir(FS, src) and isDir(FS, dest) and src != dest :
FS' where:
not exists(FS', src)
and dest in FS'.Directories
and forall c in descendants(FS, src) :
not exists(FS', c))
and forall c in descendants(FS, src) where isDir(FS, c):
isDir(FS', dest + childElements(src, c)
and forall c in descendants(FS, src) where not isDir(FS, c):
data(FS', dest + childElements(s, c)) == data(FS, c)
result = True
</pre></div></div>
</div>
<div class="section">
<h5><a name="Renaming_into_a_path_where_the_parent_path_does_not_exist"></a>Renaming into a path where the parent path does not exist</h5>
<div>
<div>
<pre class="source"> not exists(FS, parent(dest))
</pre></div></div>
<p>There is no consistent behavior here.</p>
<p><i>HDFS</i></p>
<p>The outcome is no change to FileSystem state, with a return value of false.</p>
<div>
<div>
<pre class="source">FS' = FS; result = False
</pre></div></div>
<p><i>Local Filesystem</i></p>
<p>The outcome is as a normal rename, with the additional (implicit) feature that the parent directories of the destination also exist.</p>
<div>
<div>
<pre class="source">exists(FS', parent(dest))
</pre></div></div>
<p><i>S3A FileSystem</i></p>
<p>The outcome is as a normal rename, with the additional (implicit) feature that the parent directories of the destination then exist: <tt>exists(FS', parent(dest))</tt></p>
<p>There is a check for and rejection if the <tt>parent(dest)</tt> is a file, but no checks for any other ancestors.</p>
<p>*Other Filesystems (including Swift) *</p>
<p>Other filesystems strictly reject the operation, raising a <tt>FileNotFoundException</tt></p></div>
<div class="section">
<h5><a name="Concurrency_requirements"></a>Concurrency requirements</h5>
<ul>
<li>
<p>The core operation of <tt>rename()</tt>&#x2014;moving one entry in the filesystem to another&#x2014;MUST be atomic. Some applications rely on this as a way to coordinate access to data.</p>
</li>
<li>
<p>Some FileSystem implementations perform checks on the destination FileSystem before and after the rename. One example of this is <tt>ChecksumFileSystem</tt>, which provides checksummed access to local data. The entire sequence MAY NOT be atomic.</p>
</li>
</ul></div>
<div class="section">
<h5><a name="Implementation_Notes"></a>Implementation Notes</h5>
<p><b>Files open for reading, writing or appending</b></p>
<p>The behavior of <tt>rename()</tt> on an open file is unspecified: whether it is allowed, what happens to later attempts to read from or write to the open stream</p>
<p><b>Renaming a directory onto itself</b></p>
<p>The return code of renaming a directory onto itself is unspecified.</p>
<p><b>Destination exists and is a file</b></p>
<p>Renaming a file atop an existing file is specified as failing, raising an exception.</p>
<ul>
<li>
<p>Local FileSystem : the rename succeeds; the destination file is replaced by the source file.</p>
</li>
<li>
<p>HDFS : The rename fails, no exception is raised. Instead the method call simply returns false.</p>
</li>
</ul>
<p><b>Missing source file</b></p>
<p>If the source file <tt>src</tt> does not exist, <tt>FileNotFoundException</tt> should be raised.</p>
<p>HDFS fails without raising an exception; <tt>rename()</tt> merely returns false.</p>
<div>
<div>
<pre class="source">FS' = FS
result = false
</pre></div></div>
<p>The behavior of HDFS here should not be considered a feature to replicate. <tt>FileContext</tt> explicitly changed the behavior to raise an exception, and the retrofitting of that action to the <tt>DFSFileSystem</tt> implementation is an ongoing matter for debate.</p></div></div></div>
<div class="section">
<h3><a name="void_concat.28Path_p.2C_Path_sources.5B.5D.29"></a><tt>void concat(Path p, Path sources[])</tt></h3>
<p>Joins multiple blocks together to create a single file. This is a little-used operation currently implemented only by HDFS.</p>
<p>Implementations without a compliant call SHOULD throw <tt>UnsupportedOperationException</tt>.</p>
<div class="section">
<h4><a name="Preconditions"></a>Preconditions</h4>
<div>
<div>
<pre class="source">if not exists(FS, p) : raise FileNotFoundException
if sources==[] : raise IllegalArgumentException
</pre></div></div>
<p>All sources MUST be in the same directory:</p>
<div>
<div>
<pre class="source">for s in sources: if parent(S) != parent(p) raise IllegalArgumentException
</pre></div></div>
<p>All block sizes must match that of the target:</p>
<div>
<div>
<pre class="source">for s in sources: getBlockSize(FS, S) == getBlockSize(FS, p)
</pre></div></div>
<p>No duplicate paths:</p>
<div>
<div>
<pre class="source">not (exists p1, p2 in (sources + [p]) where p1 == p2)
</pre></div></div>
<p>HDFS: All source files except the final one MUST be a complete block:</p>
<div>
<div>
<pre class="source">for s in (sources[0:length(sources)-1] + [p]):
(length(FS, s) mod getBlockSize(FS, p)) == 0
</pre></div></div>
</div>
<div class="section">
<h4><a name="Postconditions"></a>Postconditions</h4>
<div>
<div>
<pre class="source">FS' where:
(data(FS', T) = data(FS, T) + data(FS, sources[0]) + ... + data(FS, srcs[length(srcs)-1]))
and for s in srcs: not exists(FS', S)
</pre></div></div>
<p>HDFS&#x2019;s restrictions may be an implementation detail of how it implements <tt>concat</tt> by changing the inode references to join them together in a sequence. As no other filesystem in the Hadoop core codebase implements this method, there is no way to distinguish implementation detail from specification.</p></div></div>
<div class="section">
<h3><a name="boolean_truncate.28Path_p.2C_long_newLength.29"></a><tt>boolean truncate(Path p, long newLength)</tt></h3>
<p>Truncate file <tt>p</tt> to the specified <tt>newLength</tt>.</p>
<p>Implementations without a compliant call SHOULD throw <tt>UnsupportedOperationException</tt>.</p>
<div class="section">
<h4><a name="Preconditions"></a>Preconditions</h4>
<div>
<div>
<pre class="source">if not exists(FS, p) : raise FileNotFoundException
if isDir(FS, p) : raise [FileNotFoundException, IOException]
if newLength &lt; 0 || newLength &gt; len(FS.Files[p]) : raise HadoopIllegalArgumentException
</pre></div></div>
<p>HDFS: The source file MUST be closed. Truncate cannot be performed on a file, which is open for writing or appending.</p></div>
<div class="section">
<h4><a name="Postconditions"></a>Postconditions</h4>
<div>
<div>
<pre class="source">FS' where:
len(FS.Files[p]) = newLength
</pre></div></div>
<p>Return: <tt>true</tt>, if truncation is finished and the file can be immediately opened for appending, or <tt>false</tt> otherwise.</p>
<p>HDFS: HDFS returns <tt>false</tt> to indicate that a background process of adjusting the length of the last block has been started, and clients should wait for it to complete before they can proceed with further file updates.</p></div>
<div class="section">
<h4><a name="Concurrency"></a>Concurrency</h4>
<p>If an input stream is open when truncate() occurs, the outcome of read operations related to the part of the file being truncated is undefined.</p></div></div></div>
<div class="section">
<h2><a name="interface_RemoteIterator"></a><a name="RemoteIterator"></a> interface <tt>RemoteIterator</tt></h2>
<p>The <tt>RemoteIterator</tt> interface is used as a remote-access equivalent to <tt>java.util.Iterator</tt>, allowing the caller to iterate through a finite sequence of remote data elements.</p>
<p>The core differences are</p>
<ol style="list-style-type: decimal">
<li><tt>Iterator</tt>&#x2019;s optional <tt>void remove()</tt> method is not supported.</li>
<li>For those methods which are supported, <tt>IOException</tt> exceptions may be raised.</li>
</ol>
<div>
<div>
<pre class="source">public interface RemoteIterator&lt;E&gt; {
boolean hasNext() throws IOException;
E next() throws IOException;
}
</pre></div></div>
<p>The basic view of the interface is that <tt>hasNext()</tt> being true implies that <tt>next()</tt> will successfully return the next entry in the list:</p>
<div>
<div>
<pre class="source">while hasNext(): next()
</pre></div></div>
<p>Equally, a successful call to <tt>next()</tt> implies that had <tt>hasNext()</tt> been invoked prior to the call to <tt>next()</tt>, it would have been true.</p>
<div>
<div>
<pre class="source">boolean elementAvailable = hasNext();
try {
next();
assert elementAvailable;
} catch (NoSuchElementException e) {
assert !elementAvailable
}
</pre></div></div>
<p>The <tt>next()</tt> operator MUST iterate through the list of available results, <i>even if no calls to <tt>hasNext()</tt> are made</i>.</p>
<p>That is, it is possible to enumerate the results through a loop which only terminates when a <tt>NoSuchElementException</tt> exception is raised.</p>
<div>
<div>
<pre class="source">try {
while (true) {
process(iterator.next());
}
} catch (NoSuchElementException ignored) {
// the end of the list has been reached
}
</pre></div></div>
<p>The output of the iteration is equivalent to the loop</p>
<div>
<div>
<pre class="source">while (iterator.hasNext()) {
process(iterator.next());
}
</pre></div></div>
<p>As raising exceptions is an expensive operation in JVMs, the <tt>while(hasNext())</tt> loop option is more efficient. (see also <a href="#RemoteIteratorConcurrency">Concurrency and the Remote Iterator</a> for a dicussion on this topic).</p>
<p>Implementors of the interface MUST support both forms of iterations; authors of tests SHOULD verify that both iteration mechanisms work.</p>
<p>The iteration is required to return a finite sequence; both forms of loop MUST ultimately terminate. All implementations of the interface in the Hadoop codebase meet this requirement; all consumers assume that it holds.</p>
<div class="section">
<h3><a name="boolean_hasNext.28.29"></a><tt>boolean hasNext()</tt></h3>
<p>Returns true if-and-only-if a subsequent single call to <tt>next()</tt> would return an element rather than raise an exception.</p>
<div class="section">
<h4><a name="Preconditions"></a>Preconditions</h4></div>
<div class="section">
<h4><a name="Postconditions"></a>Postconditions</h4>
<div>
<div>
<pre class="source">result = True ==&gt; next() will succeed.
result = False ==&gt; next() will raise an exception
</pre></div></div>
<p>Multiple calls to <tt>hasNext()</tt>, without any intervening <tt>next()</tt> calls, MUST return the same value.</p>
<div>
<div>
<pre class="source">boolean has1 = iterator.hasNext();
boolean has2 = iterator.hasNext();
assert has1 == has2;
</pre></div></div>
</div></div>
<div class="section">
<h3><a name="E_next.28.29"></a><tt>E next()</tt></h3>
<p>Return the next element in the iteration.</p>
<div class="section">
<h4><a name="Preconditions"></a>Preconditions</h4>
<div>
<div>
<pre class="source">hasNext() else raise java.util.NoSuchElementException
</pre></div></div>
</div>
<div class="section">
<h4><a name="Postconditions"></a>Postconditions</h4>
<div>
<div>
<pre class="source">result = the next element in the iteration
</pre></div></div>
<p>Repeated calls to <tt>next()</tt> return subsequent elements in the sequence, until the entire sequence has been returned.</p></div></div>
<div class="section">
<h3><a name="Concurrency_and_the_Remote_Iterator"></a><a name="RemoteIteratorConcurrency"></a>Concurrency and the Remote Iterator</h3>
<p>The primary use of <tt>RemoteIterator</tt> in the filesystem APIs is to list files on (possibly remote) filesystems. These filesystems are invariably accessed concurrently; the state of the filesystem MAY change between a <tt>hasNext()</tt> probe and the invocation of the <tt>next()</tt> call.</p>
<p>During iteration through a <tt>RemoteIterator</tt>, if the directory is deleted on remote filesystem, then <tt>hasNext()</tt> or <tt>next()</tt> call may throw <tt>FileNotFoundException</tt>.</p>
<p>Accordingly, a robust iteration through a <tt>RemoteIterator</tt> would catch and discard <tt>NoSuchElementException</tt> exceptions raised during the process, which could be done through the <tt>while(true)</tt> iteration example above, or through a <tt>hasNext()/next()</tt> sequence with an outer <tt>try/catch</tt> clause to catch a <tt>NoSuchElementException</tt> alongside other exceptions which may be raised during a failure (for example, a <tt>FileNotFoundException</tt>)</p>
<div>
<div>
<pre class="source">try {
while (iterator.hasNext()) {
process(iterator.next());
}
} catch (NoSuchElementException ignored) {
// the end of the list has been reached
}
</pre></div></div>
<p>It is notable that this is <i>not</i> done in the Hadoop codebase. This does not imply that robust loops are not recommended &#x2014;more that the concurrency problems were not considered during the implementation of these loops.</p></div></div>
<div class="section">
<h2><a name="interface_StreamCapabilities"></a><a name="StreamCapability"></a> interface <tt>StreamCapabilities</tt></h2>
<p>The <tt>StreamCapabilities</tt> provides a way to programmatically query the capabilities that <tt>OutputStream</tt>, <tt>InputStream</tt>, or other FileSystem class supports.</p>
<div>
<div>
<pre class="source">public interface StreamCapabilities {
boolean hasCapability(String capability);
}
</pre></div></div>
<div class="section">
<h3><a name="boolean_hasCapability.28capability.29"></a><tt>boolean hasCapability(capability)</tt></h3>
<p>Return true iff the <tt>OutputStream</tt>, <tt>InputStream</tt>, or other FileSystem class has the desired capability.</p>
<p>The caller can query the capabilities of a stream using a string value. Here is a table of possible string values:</p>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th>String </th>
<th> Constant </th>
<th> Implements </th>
<th> Description</th></tr>
</thead><tbody>
<tr class="b">
<td>hflush </td>
<td> HFLUSH </td>
<td> Syncable </td>
<td> Flush out the data in client&#x2019;s user buffer. After the return of this call, new readers will see the data.</td></tr>
<tr class="a">
<td>hsync </td>
<td> HSYNC </td>
<td> Syncable </td>
<td> Flush out the data in client&#x2019;s user buffer all the way to the disk device (but the disk may have it in its cache). Similar to POSIX fsync.</td></tr>
<tr class="b">
<td>in:readahead </td>
<td> READAHEAD </td>
<td> CanSetReadahead </td>
<td> Set the readahead on the input stream.</td></tr>
<tr class="a">
<td>dropbehind </td>
<td> DROPBEHIND </td>
<td> CanSetDropBehind </td>
<td> Drop the cache.</td></tr>
<tr class="b">
<td>in:unbuffer </td>
<td> UNBUFFER </td>
<td> CanUnbuffer </td>
<td> Reduce the buffering on the input stream.</td></tr>
</tbody>
</table></div></div>
</div>
</div>
<div class="clear">
<hr/>
</div>
<div id="footer">
<div class="xright">
&#169; 2008-2021
Apache Software Foundation
- <a href="http://maven.apache.org/privacy-policy.html">Privacy Policy</a>.
Apache Maven, Maven, Apache, the Apache feather logo, and the Apache Maven project logos are trademarks of The Apache Software Foundation.
</div>
<div class="clear">
<hr/>
</div>
</div>
</body>
</html>