blob: 98e4560bf6abbec80181d20e4dad2e3fa8bea14e [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<!--
| Generated by Apache Maven Doxia at 2021-06-15
| Rendered using Apache Maven Stylus Skin 1.5
-->
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Apache Hadoop Azure support &#x2013; Testing the Azure WASB client</title>
<style type="text/css" media="all">
@import url("./css/maven-base.css");
@import url("./css/maven-theme.css");
@import url("./css/site.css");
</style>
<link rel="stylesheet" href="./css/print.css" type="text/css" media="print" />
<meta name="Date-Revision-yyyymmdd" content="20210615" />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
</head>
<body class="composite">
<div id="banner">
<a href="http://hadoop.apache.org/" id="bannerLeft">
<img src="http://hadoop.apache.org/images/hadoop-logo.jpg" alt="" />
</a>
<a href="http://www.apache.org/" id="bannerRight">
<img src="http://www.apache.org/images/asf_logo_wide.png" alt="" />
</a>
<div class="clear">
<hr/>
</div>
</div>
<div id="breadcrumbs">
<div class="xleft">
<a href="http://www.apache.org/" class="externalLink">Apache</a>
&gt;
<a href="http://hadoop.apache.org/" class="externalLink">Hadoop</a>
&gt;
<a href="index.html">Apache Hadoop Azure support</a>
&gt;
Testing the Azure WASB client
</div>
<div class="xright"> <a href="http://wiki.apache.org/hadoop" class="externalLink">Wiki</a>
|
<a href="https://gitbox.apache.org/repos/asf/hadoop.git" class="externalLink">git</a>
&nbsp;| Last Published: 2021-06-15
&nbsp;| Version: 3.3.1
</div>
<div class="clear">
<hr/>
</div>
</div>
<div id="leftColumn">
<div id="navcolumn">
<h5>General</h5>
<ul>
<li class="none">
<a href="../index.html">Overview</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/SingleCluster.html">Single Node Setup</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/ClusterSetup.html">Cluster Setup</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/CommandsManual.html">Commands Reference</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/FileSystemShell.html">FileSystem Shell</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/Compatibility.html">Compatibility Specification</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/DownstreamDev.html">Downstream Developer's Guide</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/AdminCompatibilityGuide.html">Admin Compatibility Guide</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/InterfaceClassification.html">Interface Classification</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/filesystem/index.html">FileSystem Specification</a>
</li>
</ul>
<h5>Common</h5>
<ul>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/CLIMiniCluster.html">CLI Mini Cluster</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/FairCallQueue.html">Fair Call Queue</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/NativeLibraries.html">Native Libraries</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/Superusers.html">Proxy User</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/RackAwareness.html">Rack Awareness</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/SecureMode.html">Secure Mode</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/ServiceLevelAuth.html">Service Level Authorization</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/HttpAuthentication.html">HTTP Authentication</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/CredentialProviderAPI.html">Credential Provider API</a>
</li>
<li class="none">
<a href="../hadoop-kms/index.html">Hadoop KMS</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/Tracing.html">Tracing</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/UnixShellGuide.html">Unix Shell Guide</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/registry/index.html">Registry</a>
</li>
</ul>
<h5>HDFS</h5>
<ul>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsDesign.html">Architecture</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsUserGuide.html">User Guide</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HDFSCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html">NameNode HA With QJM</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithNFS.html">NameNode HA With NFS</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/ObserverNameNode.html">Observer NameNode</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/Federation.html">Federation</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/ViewFs.html">ViewFs</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/ViewFsOverloadScheme.html">ViewFsOverloadScheme</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsSnapshots.html">Snapshots</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsEditsViewer.html">Edits Viewer</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsImageViewer.html">Image Viewer</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsPermissionsGuide.html">Permissions and HDFS</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsQuotaAdminGuide.html">Quotas and HDFS</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/LibHdfs.html">libhdfs (C API)</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/WebHDFS.html">WebHDFS (REST API)</a>
</li>
<li class="none">
<a href="../hadoop-hdfs-httpfs/index.html">HttpFS</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/ShortCircuitLocalReads.html">Short Circuit Local Reads</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/CentralizedCacheManagement.html">Centralized Cache Management</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsNfsGateway.html">NFS Gateway</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsRollingUpgrade.html">Rolling Upgrade</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/ExtendedAttributes.html">Extended Attributes</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/TransparentEncryption.html">Transparent Encryption</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsMultihoming.html">Multihoming</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/ArchivalStorage.html">Storage Policies</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/MemoryStorage.html">Memory Storage Support</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/SLGUserGuide.html">Synthetic Load Generator</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HDFSErasureCoding.html">Erasure Coding</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HDFSDiskbalancer.html">Disk Balancer</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsUpgradeDomain.html">Upgrade Domain</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsDataNodeAdminGuide.html">DataNode Admin</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs-rbf/HDFSRouterFederation.html">Router Federation</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsProvidedStorage.html">Provided Storage</a>
</li>
</ul>
<h5>MapReduce</h5>
<ul>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html">Tutorial</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html">Compatibility with 1.x</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/EncryptedShuffle.html">Encrypted Shuffle</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/PluggableShuffleAndPluggableSort.html">Pluggable Shuffle/Sort</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/DistributedCacheDeploy.html">Distributed Cache Deploy</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/SharedCacheSupport.html">Support for YARN Shared Cache</a>
</li>
</ul>
<h5>MapReduce REST APIs</h5>
<ul>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredAppMasterRest.html">MR Application Master</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-hs/HistoryServerRest.html">MR History Server</a>
</li>
</ul>
<h5>YARN</h5>
<ul>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/YARN.html">Architecture</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/YarnCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/CapacityScheduler.html">Capacity Scheduler</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/FairScheduler.html">Fair Scheduler</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/ResourceManagerRestart.html">ResourceManager Restart</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/ResourceManagerHA.html">ResourceManager HA</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/ResourceModel.html">Resource Model</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/NodeLabel.html">Node Labels</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/NodeAttributes.html">Node Attributes</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/WebApplicationProxy.html">Web Application Proxy</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/TimelineServer.html">Timeline Server</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html">Timeline Service V.2</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/WritingYarnApplications.html">Writing YARN Applications</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/YarnApplicationSecurity.html">YARN Application Security</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/NodeManager.html">NodeManager</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/DockerContainers.html">Running Applications in Docker Containers</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/RuncContainers.html">Running Applications in runC Containers</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/NodeManagerCgroups.html">Using CGroups</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/SecureContainer.html">Secure Containers</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/ReservationSystem.html">Reservation System</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/GracefulDecommission.html">Graceful Decommission</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/OpportunisticContainers.html">Opportunistic Containers</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/Federation.html">YARN Federation</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/SharedCache.html">Shared Cache</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/UsingGpus.html">Using GPU</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/UsingFPGA.html">Using FPGA</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/PlacementConstraints.html">Placement Constraints</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/YarnUI2.html">YARN UI2</a>
</li>
</ul>
<h5>YARN REST APIs</h5>
<ul>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/WebServicesIntro.html">Introduction</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html">Resource Manager</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/NodeManagerRest.html">Node Manager</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/TimelineServer.html#Timeline_Server_REST_API_v1">Timeline Server</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html#Timeline_Service_v.2_REST_API">Timeline Service V.2</a>
</li>
</ul>
<h5>YARN Service</h5>
<ul>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/yarn-service/Overview.html">Overview</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/yarn-service/QuickStart.html">QuickStart</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/yarn-service/Concepts.html">Concepts</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/yarn-service/YarnServiceAPI.html">Yarn Service API</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/yarn-service/ServiceDiscovery.html">Service Discovery</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/yarn-service/SystemServices.html">System Services</a>
</li>
</ul>
<h5>Hadoop Compatible File Systems</h5>
<ul>
<li class="none">
<a href="../hadoop-aliyun/tools/hadoop-aliyun/index.html">Aliyun OSS</a>
</li>
<li class="none">
<a href="../hadoop-aws/tools/hadoop-aws/index.html">Amazon S3</a>
</li>
<li class="none">
<a href="../hadoop-azure/index.html">Azure Blob Storage</a>
</li>
<li class="none">
<a href="../hadoop-azure-datalake/index.html">Azure Data Lake Storage</a>
</li>
<li class="none">
<a href="../hadoop-openstack/index.html">OpenStack Swift</a>
</li>
<li class="none">
<a href="../hadoop-cos/cloud-storage/index.html">Tencent COS</a>
</li>
</ul>
<h5>Auth</h5>
<ul>
<li class="none">
<a href="../hadoop-auth/index.html">Overview</a>
</li>
<li class="none">
<a href="../hadoop-auth/Examples.html">Examples</a>
</li>
<li class="none">
<a href="../hadoop-auth/Configuration.html">Configuration</a>
</li>
<li class="none">
<a href="../hadoop-auth/BuildingIt.html">Building</a>
</li>
</ul>
<h5>Tools</h5>
<ul>
<li class="none">
<a href="../hadoop-streaming/HadoopStreaming.html">Hadoop Streaming</a>
</li>
<li class="none">
<a href="../hadoop-archives/HadoopArchives.html">Hadoop Archives</a>
</li>
<li class="none">
<a href="../hadoop-archive-logs/HadoopArchiveLogs.html">Hadoop Archive Logs</a>
</li>
<li class="none">
<a href="../hadoop-distcp/DistCp.html">DistCp</a>
</li>
<li class="none">
<a href="../hadoop-gridmix/GridMix.html">GridMix</a>
</li>
<li class="none">
<a href="../hadoop-rumen/Rumen.html">Rumen</a>
</li>
<li class="none">
<a href="../hadoop-resourceestimator/ResourceEstimator.html">Resource Estimator Service</a>
</li>
<li class="none">
<a href="../hadoop-sls/SchedulerLoadSimulator.html">Scheduler Load Simulator</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/Benchmarking.html">Hadoop Benchmarking</a>
</li>
<li class="none">
<a href="../hadoop-dynamometer/Dynamometer.html">Dynamometer</a>
</li>
</ul>
<h5>Reference</h5>
<ul>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/release/">Changelog and Release Notes</a>
</li>
<li class="none">
<a href="../api/index.html">Java API docs</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/UnixShellAPI.html">Unix Shell API</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/Metrics.html">Metrics</a>
</li>
</ul>
<h5>Configuration</h5>
<ul>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/core-default.xml">core-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/hdfs-default.xml">hdfs-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs-rbf/hdfs-rbf-default.xml">hdfs-rbf-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml">mapred-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-common/yarn-default.xml">yarn-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-kms/kms-default.html">kms-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-hdfs-httpfs/httpfs-default.html">httpfs-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/DeprecatedProperties.html">Deprecated Properties</a>
</li>
</ul>
<a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
<img alt="Built by Maven" src="./images/logos/maven-feather.png"/>
</a>
</div>
</div>
<div id="bodyColumn">
<div id="contentBox">
<!---
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<h1>Testing the Azure WASB client</h1>
<ul>
<li><a href="#Policy_for_submitting_patches_which_affect_the_hadoop-azure_module.">Policy for submitting patches which affect the hadoop-azure module.</a>
<ul>
<li><a href="#The_submitter_of_any_patch_is_required_to_run_all_the_integration_tests_and_declare_which_Azure_region_they_used.">The submitter of any patch is required to run all the integration tests and declare which Azure region they used.</a></li>
<li><a href="#What_if_there.E2.80.99s_an_intermittent_failure_of_a_test.3F">What if there&#x2019;s an intermittent failure of a test?</a></li>
<li><a href="#What_if_the_tests_are_timing_out_or_failing_over_my_network_connection.3F">What if the tests are timing out or failing over my network connection?</a></li></ul></li>
<li><a href="#Setting_up_the_tests">Setting up the tests</a></li>
<li><a href="#Testing_the_hadoop-azure_Module">Testing the hadoop-azure Module</a></li>
<li><a href="#Running_the_Tests">Running the Tests</a></li>
<li><a href="#Viewing_the_results">Viewing the results</a></li>
<li><a href="#Scale_Tests">Scale Tests</a>
<ul>
<li><a href="#Enabling_the_Scale_Tests">Enabling the Scale Tests</a></li>
<li><a href="#Scale_test_tuning_options">Scale test tuning options</a>
<ul>
<li><a href="#Scale_test_configuration_options">Scale test configuration options</a>
<ul>
<li><a href="#fs.azure.scale.test.huge.filesize:_size_in_MB_for_.E2.80.9CHuge_file_tests.E2.80.9D.">fs.azure.scale.test.huge.filesize: size in MB for &#x201c;Huge file tests&#x201d;.</a></li></ul></li></ul></li></ul></li>
<li><a href="#Using_the_emulator">Using the emulator</a></li>
<li><a href="#Debugging_Test_failures">Debugging Test failures</a></li>
<li><a href="#Adding_new_tests">Adding new tests</a>
<ul>
<li><a href="#Requirements_of_new_Tests">Requirements of new Tests</a>
<ul>
<li><a href="#Subclasses_Existing_Shared_Base_Blasses">Subclasses Existing Shared Base Blasses</a>
<ul>
<li><a href="#org.apache.hadoop.fs.azure.AbstractWasbTestWithTimeout">org.apache.hadoop.fs.azure.AbstractWasbTestWithTimeout</a></li>
<li><a href="#org.apache.hadoop.fs.azure.AbstractWasbTestBase">org.apache.hadoop.fs.azure.AbstractWasbTestBase</a></li>
<li><a href="#org.apache.hadoop.fs.azure.integration.AbstractAzureScaleTest">org.apache.hadoop.fs.azure.integration.AbstractAzureScaleTest</a></li></ul></li>
<li><a href="#Secure">Secure</a></li>
<li><a href="#Efficient_of_Time_and_Money">Efficient of Time and Money</a></li></ul></li>
<li><a href="#Works_Over_Long-haul_Links">Works Over Long-haul Links</a></li>
<li><a href="#Provides_Diagnostics_and_timing_information">Provides Diagnostics and timing information</a>
<ul>
<li><a href="#Fails_Meaningfully">Fails Meaningfully</a></li></ul></li>
<li><a href="#Cleans_Up_Afterwards">Cleans Up Afterwards</a></li>
<li><a href="#Works_Reliably">Works Reliably</a></li></ul></li>
<li><a href="#Tips">Tips</a>
<ul>
<li><a href="#How_to_keep_your_credentials_really_safe">How to keep your credentials really safe</a></li>
<li><a href="#Cleaning_up_Containers">Cleaning up Containers</a></li></ul></li>
<li><a href="#Testing_the_Azure_ABFS_Client">Testing the Azure ABFS Client</a></li>
<li><a href="#Generating_test_run_configurations_and_test_triggers_over_various_config_combinations">Generating test run configurations and test triggers over various config combinations</a></li></ul>
<p>This module includes both unit tests, which can run in isolation without connecting to the Azure Storage service, and integration tests, which require a working connection to interact with a container. Unit test suites follow the naming convention <tt>Test*.java</tt>. Integration tests follow the naming convention <tt>ITest*.java</tt>.</p>
<div class="section">
<h2><a name="Policy_for_submitting_patches_which_affect_the_hadoop-azure_module."></a>Policy for submitting patches which affect the <tt>hadoop-azure</tt> module.</h2>
<p>The Apache Jenkins infrastucture does not run any cloud integration tests, due to the need to keep credentials secure.</p>
<div class="section">
<h3><a name="The_submitter_of_any_patch_is_required_to_run_all_the_integration_tests_and_declare_which_Azure_region_they_used."></a>The submitter of any patch is required to run all the integration tests and declare which Azure region they used.</h3>
<p>This is important: <b>patches which do not include this declaration will be ignored</b></p>
<p>This policy has proven to be the only mechanism to guarantee full regression testing of code changes. Why the declaration of region? Two reasons</p>
<ol style="list-style-type: decimal">
<li>It helps us identify regressions which only surface against specific endpoints.</li>
<li>It forces the submitters to be more honest about their testing. It&#x2019;s easy to lie, &#x201c;yes, I tested this&#x201d;. To say &#x201c;yes, I tested this against Azure US-west&#x201d; is a more specific lie and harder to make. And, if you get caught out: you lose all credibility with the project.</li>
</ol>
<p>You don&#x2019;t need to test from a VM within the Azure infrastructure, all you need are credentials.</p>
<p>It&#x2019;s neither hard nor expensive to run the tests; if you can&#x2019;t, there&#x2019;s no guarantee your patch works. The reviewers have enough to do, and don&#x2019;t have the time to do these tests, especially as every failure will simply make for a slow iterative development.</p>
<p>Please: run the tests. And if you don&#x2019;t, we are sorry for declining your patch, but we have to.</p></div>
<div class="section">
<h3><a name="What_if_there.E2.80.99s_an_intermittent_failure_of_a_test.3F"></a>What if there&#x2019;s an intermittent failure of a test?</h3>
<p>Some of the tests do fail intermittently, especially in parallel runs. If this happens, try to run the test on its own to see if the test succeeds.</p>
<p>If it still fails, include this fact in your declaration. We know some tests are intermittently unreliable.</p></div>
<div class="section">
<h3><a name="What_if_the_tests_are_timing_out_or_failing_over_my_network_connection.3F"></a>What if the tests are timing out or failing over my network connection?</h3>
<p>The tests are designed to be configurable for different timeouts. If you are seeing problems and this configuration isn&#x2019;t working, that&#x2019;s a sign of the configuration mechanism isn&#x2019;t complete. If it&#x2019;s happening in the production code, that could be a sign of a problem which may surface over long-haul connections. Please help us identify and fix these problems &#x2014; especially as you are the one best placed to verify the fixes work.</p></div></div>
<div class="section">
<h2><a name="Setting_up_the_tests"></a>Setting up the tests</h2></div>
<div class="section">
<h2><a name="Testing_the_hadoop-azure_Module"></a>Testing the <tt>hadoop-azure</tt> Module</h2>
<p>The <tt>hadoop-azure</tt> module includes a full suite of unit tests. Many of the tests will run without additional configuration by running <tt>mvn test</tt>. This includes tests against mocked storage, which is an in-memory emulation of Azure Storage.</p>
<p>The integration tests are designed to test directly against an Azure storage service, and require an account and credentials in order to run.</p>
<p>This is done by creating the file to <tt>src/test/resources/azure-auth-keys.xml</tt> and setting the name of the storage account and its access key.</p>
<p>For example:</p>
<div>
<div>
<pre class="source">&lt;?xml version=&quot;1.0&quot;?&gt;
&lt;?xml-stylesheet type=&quot;text/xsl&quot; href=&quot;configuration.xsl&quot;?&gt;
&lt;configuration&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.wasb.account.name&lt;/name&gt;
&lt;value&gt;{ACCOUNTNAME}.blob.core.windows.net&lt;/value&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.account.key.{ACCOUNTNAME}.blob.core.windows.net&lt;/name&gt;
&lt;value&gt;{ACCOUNT ACCESS KEY}&lt;/value&gt;
&lt;/property&gt;
&lt;/configuration&gt;
</pre></div></div>
<p>To run contract tests, set the WASB file system URI in <tt>src/test/resources/azure-auth-keys.xml</tt> and the account access key. For example:</p>
<div>
<div>
<pre class="source">&lt;?xml version=&quot;1.0&quot;?&gt;
&lt;?xml-stylesheet type=&quot;text/xsl&quot; href=&quot;configuration.xsl&quot;?&gt;
&lt;configuration&gt;
&lt;property&gt;
&lt;name&gt;fs.contract.test.fs.wasb&lt;/name&gt;
&lt;value&gt;wasb://{CONTAINERNAME}@{ACCOUNTNAME}.blob.core.windows.net&lt;/value&gt;
&lt;description&gt;The name of the azure file system for testing.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.account.key.{ACCOUNTNAME}.blob.core.windows.net&lt;/name&gt;
&lt;value&gt;{ACCOUNT ACCESS KEY}&lt;/value&gt;
&lt;/property&gt;
&lt;/configuration&gt;
</pre></div></div>
<p>Overall, to run all the tests using <tt>mvn test</tt>, a sample <tt>azure-auth-keys.xml</tt> is like following:</p>
<div>
<div>
<pre class="source">&lt;?xml version=&quot;1.0&quot;?&gt;
&lt;?xml-stylesheet type=&quot;text/xsl&quot; href=&quot;configuration.xsl&quot;?&gt;
&lt;configuration&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.wasb.account.name&lt;/name&gt;
&lt;value&gt;{ACCOUNTNAME}.blob.core.windows.net&lt;/value&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.account.key.{ACCOUNTNAME}.blob.core.windows.net&lt;/name&gt;
&lt;value&gt;{ACCOUNT ACCESS KEY}&lt;/value&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.contract.test.fs.wasb&lt;/name&gt;
&lt;value&gt;wasb://{CONTAINERNAME}@{ACCOUNTNAME}.blob.core.windows.net&lt;/value&gt;
&lt;/property&gt;
&lt;/configuration&gt;
</pre></div></div>
<p>DO NOT ADD <tt>azure-auth-keys.xml</tt> TO REVISION CONTROL. The keys to your Azure Storage account are a secret and must not be shared.</p></div>
<div class="section">
<h2><a name="Running_the_Tests"></a>Running the Tests</h2>
<p>After completing the configuration, execute the test run through Maven.</p>
<div>
<div>
<pre class="source">mvn -T 1C clean verify
</pre></div></div>
<p>It&#x2019;s also possible to execute multiple test suites in parallel by passing the <tt>parallel-tests=wasb|abfs|both</tt> property on the command line. The tests spend most of their time blocked on network I/O, so running in parallel tends to complete full test runs faster.</p>
<div>
<div>
<pre class="source">mvn -T 1C -Dparallel-tests=both clean verify
mvn -T 1C -Dparallel-tests=wasb clean verify
mvn -T 1C -Dparallel-tests=abfs clean verify
</pre></div></div>
<p><tt>-Dparallel-tests=wasb</tt> runs the WASB related integration tests from azure directory<br /> <tt>-Dparallel-tests=abfs</tt> runs the ABFS related integration tests from azurebfs directory<br /> <tt>-Dparallel-tests=both</tt> runs all the integration tests from both azure and azurebfs directory<br /></p>
<p>Some tests must run with exclusive access to the storage container, so even with the <tt>parallel-tests</tt> property, several test suites will run in serial in a separate Maven execution step after the parallel tests.</p>
<p>By default, <tt>parallel-tests</tt> runs 4 test suites concurrently. This can be tuned by passing the <tt>testsThreadCount</tt> property.</p>
<div>
<div>
<pre class="source">mvn -T 1C -Dparallel-tests -DtestsThreadCount=8 clean verify
</pre></div></div>
<!---
To run just unit tests, which do not require Azure connectivity or credentials,
use any of the above invocations, but switch the goal to `test` instead of
`verify`.
-->
<div>
<div>
<pre class="source">mvn -T 1C clean test
mvn -T 1C -Dparallel-tests clean test
mvn -T 1C -Dparallel-tests -DtestsThreadCount=8 clean test
</pre></div></div>
<p>To run only a specific named subset of tests, pass the <tt>test</tt> property for unit tests or the <tt>it.test</tt> property for integration tests.</p>
<div>
<div>
<pre class="source">mvn -T 1C clean test -Dtest=TestRollingWindowAverage
mvn -T 1C clean verify -Dscale -Dit.test=ITestFileSystemOperationExceptionMessage -Dtest=none
mvn -T 1C clean verify -Dtest=none -Dit.test=ITest*
</pre></div></div>
<p>Note</p>
<ol style="list-style-type: decimal">
<li>
<p>When running a specific subset of tests, the patterns passed in <tt>test</tt> and <tt>it.test</tt> override the configuration of which tests need to run in isolation in a separate serial phase (mentioned above). This can cause unpredictable results, so the recommendation is to avoid passing <tt>parallel-tests</tt> in combination with <tt>test</tt> or <tt>it.test</tt>. If you know that you are specifying only tests that can run safely in parallel, then it will work. For wide patterns, like <tt>ITest*</tt> shown above, it may cause unpredictable test failures.</p>
</li>
<li>
<p>The command line shell may try to expand the &#x201c;*&#x201d; and sometimes the &#x201c;#&#x201d; symbols in test patterns. In such situations, escape the character it with a &#x201c;\&#x201d; prefix. Example:</p>
<div>
<div>
<pre class="source"> mvn -T 1C clean verify -Dtest=none -Dit.test=ITest\*
</pre></div></div>
</li>
</ol></div>
<div class="section">
<h2><a name="Viewing_the_results"></a>Viewing the results</h2>
<p>Integration test results and logs are stored in <tt>target/failsafe-reports/</tt>. An HTML report can be generated during site generation, or with the <tt>surefire-report</tt> plugin:</p>
<div>
<div>
<pre class="source"># for the unit tests
mvn -T 1C surefire-report:report-only
# for the integration tests
mvn -T 1C surefire-report:failsafe-report-only
# all reports for this module
mvn -T 1C site:site
</pre></div></div>
</div>
<div class="section">
<h2><a name="Scale_Tests"></a>Scale Tests</h2>
<p>There are a set of tests designed to measure the scalability and performance at scale of the filesystem client, <i>Scale Tests</i>. Tests include: creating and traversing directory trees, uploading large files, renaming them, deleting them, seeking through the files, performing random IO, and others. This makes them a foundational part of the benchmarking.</p>
<p>By their very nature they are slow. And, as their execution time is often limited by bandwidth between the computer running the tests and the Azure endpoint, parallel execution does not speed these tests up.</p>
<div class="section">
<h3><a name="Enabling_the_Scale_Tests"></a>Enabling the Scale Tests</h3>
<p>The tests are enabled if the <tt>scale</tt> property is set in the maven build this can be done regardless of whether or not the parallel test profile is used</p>
<div>
<div>
<pre class="source">mvn -T 1C verify -Dscale
mvn -T 1C verify -Dparallel-tests -Dscale -DtestsThreadCount=8
</pre></div></div>
<p>The most bandwidth intensive tests (those which upload data) always run sequentially; those which are slow due to HTTPS setup costs or server-side actions are included in the set of parallelized tests.</p></div>
<div class="section">
<h3><a name="Scale_test_tuning_options"></a>Scale test tuning options</h3>
<p>Some of the tests can be tuned from the maven build or from the configuration file used to run the tests.</p>
<div>
<div>
<pre class="source">mvn -T 1C verify -Dparallel-tests -Dscale -DtestsThreadCount=8 -Dfs.azure.scale.test.huge.filesize=128M
</pre></div></div>
<p>The algorithm is</p>
<ol style="list-style-type: decimal">
<li>The value is queried from the configuration file, using a default value if it is not set.</li>
<li>The value is queried from the JVM System Properties, where it is passed down by maven.</li>
<li>If the system property is null, an empty string, or it has the value <tt>unset</tt>, then the configuration value is used. The <tt>unset</tt> option is used to <a class="externalLink" href="http://stackoverflow.com/questions/7773134/null-versus-empty-arguments-in-maven">work round a quirk in maven property propagation</a>.</li>
</ol>
<p>Only a few properties can be set this way; more will be added.</p>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th> Property </th>
<th> Meaninging </th></tr>
</thead><tbody>
<tr class="b">
<td> <tt>fs.azure.scale.test.huge.filesize</tt></td>
<td> Size for huge file uploads </td></tr>
<tr class="a">
<td> <tt>fs.azure.scale.test.huge.huge.partitionsize</tt></td>
<td> Size for partitions in huge file uploads </td></tr>
</tbody>
</table>
<p>The file and partition sizes are numeric values with a k/m/g/t/p suffix depending on the desired size. For example: 128M, 128m, 2G, 2G, 4T or even 1P.</p>
<div class="section">
<h4><a name="Scale_test_configuration_options"></a>Scale test configuration options</h4>
<p>Some scale tests perform multiple operations (such as creating many directories).</p>
<p>The exact number of operations to perform is configurable in the option <tt>scale.test.operation.count</tt></p>
<div>
<div>
<pre class="source">&lt;property&gt;
&lt;name&gt;scale.test.operation.count&lt;/name&gt;
&lt;value&gt;10&lt;/value&gt;
&lt;/property&gt;
</pre></div></div>
<p>Larger values generate more load, and are recommended when testing locally, or in batch runs.</p>
<p>Smaller values results in faster test runs, especially when the object store is a long way away.</p>
<p>Operations which work on directories have a separate option: this controls the width and depth of tests creating recursive directories. Larger values create exponentially more directories, with consequent performance impact.</p>
<div>
<div>
<pre class="source">&lt;property&gt;
&lt;name&gt;scale.test.directory.count&lt;/name&gt;
&lt;value&gt;2&lt;/value&gt;
&lt;/property&gt;
</pre></div></div>
<p>DistCp tests targeting Azure support a configurable file size. The default is 10 MB, but the configuration value is expressed in KB so that it can be tuned smaller to achieve faster test runs.</p>
<div>
<div>
<pre class="source">&lt;property&gt;
&lt;name&gt;scale.test.distcp.file.size.kb&lt;/name&gt;
&lt;value&gt;10240&lt;/value&gt;
&lt;/property&gt;
</pre></div></div>
<p>Azure-specific scale test properties are</p>
<div class="section">
<h5><a name="fs.azure.scale.test.huge.filesize:_size_in_MB_for_.E2.80.9CHuge_file_tests.E2.80.9D."></a><tt>fs.azure.scale.test.huge.filesize</tt>: size in MB for &#x201c;Huge file tests&#x201d;.</h5>
<p>The Huge File tests validate Azure storages&#x2019;s ability to handle large files &#x2014;the property <tt>fs.azure.scale.test.huge.filesize</tt> declares the file size to use.</p>
<div>
<div>
<pre class="source">&lt;property&gt;
&lt;name&gt;fs.azure.scale.test.huge.filesize&lt;/name&gt;
&lt;value&gt;200M&lt;/value&gt;
&lt;/property&gt;
</pre></div></div>
<p>Tests at this scale are slow: they are best executed from hosts running in the cloud infrastructure where the storage endpoint is based.</p></div></div></div></div>
<div class="section">
<h2><a name="Using_the_emulator"></a>Using the emulator</h2>
<p>A selection of tests can run against the <a class="externalLink" href="http://msdn.microsoft.com/en-us/library/azure/hh403989.aspx">Azure Storage Emulator</a> which is a high-fidelity emulation of live Azure Storage. The emulator is sufficient for high-confidence testing. The emulator is a Windows executable that runs on a local machine.</p>
<p>To use the emulator, install Azure SDK 2.3 and start the storage emulator. Then, edit <tt>src/test/resources/azure-test.xml</tt> and add the following property:</p>
<div>
<div>
<pre class="source">&lt;property&gt;
&lt;name&gt;fs.azure.test.emulator&lt;/name&gt;
&lt;value&gt;true&lt;/value&gt;
&lt;/property&gt;
</pre></div></div>
<p>There is a known issue when running tests with the emulator. You may see the following failure message:</p>
<div>
<div>
<pre class="source">com.microsoft.windowsazure.storage.StorageException: The value for one of the HTTP headers is not in the correct format.
</pre></div></div>
<p>To resolve this, restart the Azure Emulator. Ensure it is v3.2 or later.</p></div>
<div class="section">
<h2><a name="Debugging_Test_failures"></a>Debugging Test failures</h2>
<p>Logging at debug level is the standard way to provide more diagnostics output; after setting this rerun the tests</p>
<div>
<div>
<pre class="source">log4j.logger.org.apache.hadoop.fs.azure=DEBUG
</pre></div></div>
</div>
<div class="section">
<h2><a name="Adding_new_tests"></a>Adding new tests</h2>
<p>New tests are always welcome. Bear in mind that we need to keep costs and test time down, which is done by</p>
<ul>
<li>Not duplicating tests.</li>
<li>Being efficient in your use of Hadoop API calls.</li>
<li>Isolating large/slow tests into the &#x201c;scale&#x201d; test group.</li>
<li>Designing all tests to execute in parallel (where possible).</li>
<li>Adding new probes and predicates into existing tests, albeit carefully.</li>
</ul>
<p><i>No duplication</i>: if an operation is tested elsewhere, don&#x2019;t repeat it. This applies as much for metadata operations as it does for bulk IO. If a new test case is added which completely obsoletes an existing test, it is OK to cut the previous one &#x2014;after showing that coverage is not worsened.</p>
<p><i>Efficient</i>: prefer the <tt>getFileStatus()</tt> and examining the results, rather than call to <tt>exists()</tt>, <tt>isFile()</tt>, etc.</p>
<p><i>Fail with useful information:</i> provide as much diagnostics as possible on a failure. Using <tt>org.apache.hadoop.fs.contract.ContractTestUtils</tt> to make assertions about the state of a filesystem helps here.</p>
<p><i>Isolating Scale tests</i>. Any test doing large amounts of IO MUST extend the class <tt>AbstractAzureScaleTest</tt>, so only running if <tt>scale</tt> is defined on a build, supporting test timeouts configurable by the user. Scale tests should also support configurability as to the actual size of objects/number of operations, so that behavior at different scale can be verified.</p>
<p><i>Designed for parallel execution</i>. A key need here is for each test suite to work on isolated parts of the filesystem. Subclasses of <tt>AbstractWasbTestBase</tt> SHOULD use the <tt>path()</tt>, <tt>methodpath()</tt> and <tt>blobpath()</tt> methods, to build isolated paths. Tests MUST NOT assume that they have exclusive access to a bucket.</p>
<p><i>Extending existing tests where appropriate</i>. This recommendation goes against normal testing best practise of &#x201c;test one thing per method&#x201d;. Because it is so slow to create directory trees or upload large files, we do not have that luxury. All the tests against real endpoints are integration tests where sharing test setup and teardown saves time and money.</p>
<p>A standard way to do this is to extend existing tests with some extra predicates, rather than write new tests. When doing this, make sure that the new predicates fail with meaningful diagnostics, so any new problems can be easily debugged from test logs.</p>
<div class="section">
<h3><a name="Requirements_of_new_Tests"></a>Requirements of new Tests</h3>
<p>This is what we expect from new tests; they&#x2019;re an extension of the normal Hadoop requirements, based on the need to work with remote servers whose use requires the presence of secret credentials, where tests may be slow, and where finding out why something failed from nothing but the test output is critical.</p>
<div class="section">
<h4><a name="Subclasses_Existing_Shared_Base_Blasses"></a>Subclasses Existing Shared Base Blasses</h4>
<p>There are a set of base classes which should be extended for Azure tests and integration tests.</p>
<div class="section">
<h5><a name="org.apache.hadoop.fs.azure.AbstractWasbTestWithTimeout"></a><tt>org.apache.hadoop.fs.azure.AbstractWasbTestWithTimeout</tt></h5>
<p>This extends the junit <tt>Assert</tt> class with thread names and timeouts, the default timeout being set in <tt>AzureTestConstants.AZURE_TEST_TIMEOUT</tt> to ten minutes. The thread names are set to aid analyzing the stack trace of a test: a <tt>jstack</tt> call can be used to</p></div>
<div class="section">
<h5><a name="org.apache.hadoop.fs.azure.AbstractWasbTestBase"></a><tt>org.apache.hadoop.fs.azure.AbstractWasbTestBase</tt></h5>
<p>The base class for tests which use <tt>AzureBlobStorageTestAccount</tt> to create mock or live Azure clients; in test teardown it tries to clean up store state.</p>
<ol style="list-style-type: decimal">
<li>
<p>This class requires subclasses to implement <tt>createTestAccount()</tt> to create a mock or real test account.</p>
</li>
<li>
<p>The configuration used to create a test account <i>should</i> be that from <tt>createConfiguration()</tt>; this can be extended in subclasses to tune the settings.</p>
</li>
</ol></div>
<div class="section">
<h5><a name="org.apache.hadoop.fs.azure.integration.AbstractAzureScaleTest"></a><tt>org.apache.hadoop.fs.azure.integration.AbstractAzureScaleTest</tt></h5>
<p>This extends <tt>AbstractWasbTestBase</tt> for scale tests; those test which only run when <tt>-Dscale</tt> is used to select the &#x201c;scale&#x201d; profile. These tests have a timeout of 30 minutes, so as to support slow test runs.</p>
<p>Having shared base classes help reduces future maintenance. Please use them.</p></div></div>
<div class="section">
<h4><a name="Secure"></a>Secure</h4>
<p>Don&#x2019;t ever log credentials. The credential tests go out of their way to not provide meaningful logs or assertion messages precisely to avoid this.</p></div>
<div class="section">
<h4><a name="Efficient_of_Time_and_Money"></a>Efficient of Time and Money</h4>
<p>This means efficient in test setup/teardown, and, ideally, making use of existing public datasets to save setup time and tester cost.</p>
<p>The reference example is <tt>ITestAzureHugeFiles</tt>:. This marks the test suite as <tt>@FixMethodOrder(MethodSorters.NAME_ASCENDING)</tt> then orders the test cases such that each test case expects the previous test to have completed (here: uploaded a file, renamed a file, &#x2026;). This provides for independent tests in the reports, yet still permits an ordered sequence of operations. Do note the use of <tt>Assume.assume()</tt> to detect when the preconditions for a single test case are not met, hence, the tests become skipped, rather than fail with a trace which is really a false alarm.</p></div></div>
<div class="section">
<h3><a name="Works_Over_Long-haul_Links"></a>Works Over Long-haul Links</h3>
<p>As well as making file size and operation counts scaleable, this includes making test timeouts adequate. The Scale tests make this configurable; it&#x2019;s hard coded to ten minutes in <tt>AbstractAzureIntegrationTest()</tt>; subclasses can change this by overriding <tt>getTestTimeoutMillis()</tt>.</p>
<p>Equally importantly: support proxies, as some testers need them.</p></div>
<div class="section">
<h3><a name="Provides_Diagnostics_and_timing_information"></a>Provides Diagnostics and timing information</h3>
<ol style="list-style-type: decimal">
<li>Create logs, log things.</li>
<li>you can use <tt>AbstractWasbTestBase.describe(format-string, args)</tt> here; it adds some newlines so as to be easier to spot.</li>
<li>Use <tt>ContractTestUtils.NanoTimer</tt> to measure the duration of operations, and log the output.</li>
</ol>
<div class="section">
<h4><a name="Fails_Meaningfully"></a>Fails Meaningfully</h4>
<p>The <tt>ContractTestUtils</tt> class contains a whole set of assertions for making statements about the expected state of a filesystem, e.g. <tt>assertPathExists(FS, path)</tt>, <tt>assertPathDoesNotExists(FS, path)</tt>, and others. These do their best to provide meaningful diagnostics on failures (e.g. directory listings, file status, &#x2026;), so help make failures easier to understand.</p>
<p>At the very least, <i>do not use <tt>assertTrue()</tt> or <tt>assertFalse()</tt> without including error messages</i>.</p></div></div>
<div class="section">
<h3><a name="Cleans_Up_Afterwards"></a>Cleans Up Afterwards</h3>
<p>Keeps costs down.</p>
<ol style="list-style-type: decimal">
<li>Do not only cleanup if a test case completes successfully; test suite teardown must do it.</li>
<li>That teardown code must check for the filesystem and other fields being null before the cleanup. Why? If test setup fails, the teardown methods still get called.</li>
</ol></div>
<div class="section">
<h3><a name="Works_Reliably"></a>Works Reliably</h3>
<p>We really appreciate this &#x2014; you will too.</p></div></div>
<div class="section">
<h2><a name="Tips"></a>Tips</h2>
<div class="section">
<h3><a name="How_to_keep_your_credentials_really_safe"></a>How to keep your credentials really safe</h3>
<p>Although the <tt>auth-keys.xml</tt> file is marged as ignored in git and subversion, it is still in your source tree, and there&#x2019;s always that risk that it may creep out.</p>
<p>You can avoid this by keeping your keys outside the source tree and using an absolute XInclude reference to it.</p>
<div>
<div>
<pre class="source">&lt;configuration&gt;
&lt;include xmlns=&quot;http://www.w3.org/2001/XInclude&quot;
href=&quot;file:///users/qe/.auth-keys.xml&quot; /&gt;
&lt;/configuration&gt;
</pre></div></div>
</div>
<div class="section">
<h3><a name="Cleaning_up_Containers"></a>Cleaning up Containers</h3>
<p>The Azure tests create containers with the prefix <tt>&quot;wasbtests-&quot;</tt> and delete them after the test runs. If a test run is interrupted, these containers may not get deleted. There is a special test case which can be manually invoked to list and delete these, <tt>CleanupTestContainers</tt></p>
<div>
<div>
<pre class="source">mvn test -Dtest=CleanupTestContainers
</pre></div></div>
<p>This will delete the containers; the output log of the test run will provide the details and summary of the operation.</p></div></div>
<div class="section">
<h2><a name="Testing_the_Azure_ABFS_Client"></a>Testing the Azure ABFS Client</h2>
<p>Azure Data Lake Storage Gen 2 (ADLS Gen 2) is a set of capabilities dedicated to big data analytics, built on top of Azure Blob Storage. The ABFS and ABFSS schemes target the ADLS Gen 2 REST API, and the WASB and WASBS schemes target the Azure Blob Storage REST API. ADLS Gen 2 offers better performance and scalability. ADLS Gen 2 also offers authentication and authorization compatible with the Hadoop Distributed File System permissions model when hierarchical namespace is enabled for the storage account. Furthermore, the metadata and data produced by ADLS Gen 2 REST API can be consumed by Blob REST API, and vice versa.</p></div>
<div class="section">
<h2><a name="Generating_test_run_configurations_and_test_triggers_over_various_config_combinations"></a>Generating test run configurations and test triggers over various config combinations</h2>
<p>To simplify the testing across various authentication and features combinations that are mandatory for a PR, script <tt>dev-support/testrun-scripts/runtests.sh</tt> should be used. Once the script is updated with relevant config settings for various test combinations, it will: 1. Auto-generate configs specific to each test combinations 2. Run tests for all combinations 3. Summarize results across all the test combination runs.</p>
<p>As a pre-requiste step, fill config values for test accounts and credentials needed for authentication in <tt>src/test/resources/azure-auth-keys.xml.template</tt> and rename as <tt>src/test/resources/azure-auth-keys.xml</tt>.</p>
<p><b>To add a new test combination:</b> Templates for mandatory test combinations for PR validation are present in <tt>dev-support/testrun-scripts/runtests.sh</tt>. If a new one needs to be added, add a combination set within <tt>dev-support/testrun-scripts/runtests.sh</tt> similar to the ones already defined and 1. Provide a new combination name 2. Update properties and values array which need to be effective for the test combination 3. Call generateconfigs</p>
<p><b>To run PR validation:</b> Running command * <tt>dev-support/testrun-scripts/runtests.sh</tt> will generate configurations for each of the combinations defined and run tests for all the combinations. * <tt>dev-support/testrun-scripts/runtests.sh -c {combinationname}</tt> Specific combinations can be provided with -c option. If combinations are provided with -c option, tests for only those combinations will be run.</p>
<p><b>Test logs:</b> Test runs will create a folder within dev-support/testlogs to save the test logs. Folder name will be the test start timestamp. The mvn verify command line logs for each combination will be saved into a file as Test-Logs-$combination.txt into this folder. In case of any failures, this file will have the failure exception stack. At the end of the test run, the consolidated results of all the combination runs will be saved into a file as Test-Results.log in the same folder. When run for PR validation, the consolidated test results needs to be pasted into the PR comment section.</p>
<p><b>To generate config for use in IDE:</b> Running command with -a (activate) option <tt>dev-support/testrun-scripts/runtests.sh -a {combination name}</tt> will update the effective config relevant for the specific test combination. Hence the same config files used by the mvn test runs can be used for IDE without any manual updates needed within config file.</p>
<p><b>Other command line options:</b> * -a &lt;COMBINATION_NAME&gt; Specify the combination name which needs to be activated. This is to be used to generate config for use in IDE. * -c &lt;COMBINATION_NAME&gt; Specify the combination name for test runs. If this config is specified, tests for only the specified combinations will run. All combinations of tests will be running if this config is not specified. * -t &lt;THREAD_COUNT&gt; ABFS mvn tests are run in parallel mode. Tests by default are run with 8 thread count. It can be changed by providing -t &lt;THREAD_COUNT&gt;</p>
<p>In order to test ABFS, please add the following configuration to your <tt>src/test/resources/azure-auth-keys.xml</tt> file. Note that the ABFS tests include compatibility tests which require WASB credentials, in addition to the ABFS credentials.</p>
<div>
<div>
<pre class="source">&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;
&lt;?xml-stylesheet type=&quot;text/xsl&quot; href=&quot;configuration.xsl&quot;?&gt;
&lt;configuration xmlns:xi=&quot;http://www.w3.org/2001/XInclude&quot;&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.abfs.account.name&lt;/name&gt;
&lt;value&gt;{ACCOUNT_NAME}.dfs.core.windows.net&lt;/value&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.account.key.{ACCOUNT_NAME}.dfs.core.windows.net&lt;/name&gt;
&lt;value&gt;{ACCOUNT_ACCESS_KEY}&lt;/value&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.wasb.account.name&lt;/name&gt;
&lt;value&gt;{ACCOUNT_NAME}.blob.core.windows.net&lt;/value&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.account.key.{ACCOUNT_NAME}.blob.core.windows.net&lt;/name&gt;
&lt;value&gt;{ACCOUNT_ACCESS_KEY}&lt;/value&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.contract.test.fs.abfs&lt;/name&gt;
&lt;value&gt;abfs://{CONTAINER_NAME}@{ACCOUNT_NAME}.dfs.core.windows.net&lt;/value&gt;
&lt;description&gt;A file system URI to be used by the contract tests.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.contract.test.fs.wasb&lt;/name&gt;
&lt;value&gt;wasb://{CONTAINER_NAME}@{ACCOUNT_NAME}.blob.core.windows.net&lt;/value&gt;
&lt;description&gt;A file system URI to be used by the contract tests.&lt;/description&gt;
&lt;/property&gt;
&lt;/configuration&gt;
</pre></div></div>
<p>To run OAuth and ACL test cases you must use a storage account with the hierarchical namespace enabled, and set the following configuration settings:</p>
<div>
<div>
<pre class="source">&lt;!--=========================== AUTHENTICATION OPTIONS ===================--&gt;
&lt;!--ATTENTION:
TO RUN ABFS &amp; WASB COMPATIBILITY TESTS, YOU MUST SET AUTH TYPE AS SharedKey.
OAUTH IS INTRODUCED TO ABFS ONLY.--&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.account.auth.type.{YOUR_ABFS_ACCOUNT_NAME}&lt;/name&gt;
&lt;value&gt;{AUTH TYPE}&lt;/value&gt;
&lt;description&gt;The authorization type can be SharedKey, OAuth, Custom or SAS. The
default is SharedKey.&lt;/description&gt;
&lt;/property&gt;
&lt;!--============================= FOR OAUTH ===========================--&gt;
&lt;!--IF AUTH TYPE IS SET AS OAUTH, FOLLOW THE STEPS BELOW--&gt;
&lt;!--NOTICE: AAD client and tenant related properties can be obtained through Azure Portal--&gt;
&lt;!--1. UNCOMMENT BELOW AND CHOOSE YOUR OAUTH PROVIDER TYPE --&gt;
&lt;!--
&lt;property&gt;
&lt;name&gt;fs.azure.account.oauth.provider.type.{ABFS_ACCOUNT_NAME}&lt;/name&gt;
&lt;value&gt;org.apache.hadoop.fs.azurebfs.oauth2.{Token Provider Class name}&lt;/value&gt;
&lt;description&gt;The full name of token provider class name.&lt;/description&gt;
&lt;/property&gt;
--&gt;
&lt;!--2. UNCOMMENT BELOW AND SET CREDENTIALS ACCORDING TO THE PROVIDER TYPE--&gt;
&lt;!--2.1. If &quot;ClientCredsTokenProvider&quot; is set as key provider, uncomment below and
set auth endpoint, client id and secret below--&gt;
&lt;!--
&lt;property&gt;
&lt;name&gt;fs.azure.account.oauth2.client.endpoint.{ABFS_ACCOUNT_NAME}&lt;/name&gt;
&lt;value&gt;https://login.microsoftonline.com/{TENANTID}/oauth2/token&lt;/value&gt;
&lt;description&gt;Token end point, this can be found through Azure portal&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.account.oauth2.client.id.{ABFS_ACCOUNT_NAME}&lt;/name&gt;
&lt;value&gt;{client id}&lt;/value&gt;
&lt;description&gt;AAD client id.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.account.oauth2.client.secret.{ABFS_ACCOUNT_NAME}&lt;/name&gt;
&lt;value&gt;{client secret}&lt;/value&gt;
&lt;/property&gt;
--&gt;
&lt;!--2.2. If &quot;UserPasswordTokenProvider&quot; is set as key provider, uncomment below and
set auth endpoint, use name and password--&gt;
&lt;!--
&lt;property&gt;
&lt;name&gt;fs.azure.account.oauth2.client.endpoint.{ABFS_ACCOUNT_NAME}&lt;/name&gt;
&lt;value&gt;https://login.microsoftonline.com/{TENANTID}/oauth2/token&lt;/value&gt;
&lt;description&gt;Token end point, this can be found through Azure portal&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.account.oauth2.user.name.{ABFS_ACCOUNT_NAME}&lt;/name&gt;
&lt;value&gt;{user name}&lt;/value&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.account.oauth2.user.password.{ABFS_ACCOUNT_NAME}&lt;/name&gt;
&lt;value&gt;{user password}&lt;/value&gt;
&lt;/property&gt;
--&gt;
&lt;!--2.3. If &quot;MsiTokenProvider&quot; is set as key provider, uncomment below and
set tenantGuid and client id.--&gt;
&lt;!--
&lt;property&gt;
&lt;name&gt;fs.azure.account.oauth2.msi.tenant.{ABFS_ACCOUNT_NAME}&lt;/name&gt;
&lt;value&gt;{tenantGuid}&lt;/value&gt;
&lt;description&gt;msi tenantGuid.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.account.oauth2.client.id.{ABFS_ACCOUNT_NAME}&lt;/name&gt;
&lt;value&gt;{client id}&lt;/value&gt;
&lt;description&gt;AAD client id.&lt;/description&gt;
&lt;/property&gt;
--&gt;
&lt;!--2.4. If &quot;RefreshTokenBasedTokenProvider&quot; is set as key provider, uncomment below and
set refresh token and client id.--&gt;
&lt;!--
&lt;property&gt;
&lt;name&gt;fs.azure.account.oauth2.refresh.token.{ABFS_ACCOUNT_NAME}&lt;/name&gt;
&lt;value&gt;{refresh token}&lt;/value&gt;
&lt;description&gt;refresh token.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.account.oauth2.client.id.{ABFS_ACCOUNT_NAME}&lt;/name&gt;
&lt;value&gt;{client id}&lt;/value&gt;
&lt;description&gt;AAD client id.&lt;/description&gt;
&lt;/property&gt;
--&gt;
&lt;!--
&lt;property&gt;
&lt;name&gt;fs.azure.identity.transformer.enable.short.name&lt;/name&gt;
&lt;value&gt;true/false&lt;/value&gt;
&lt;description&gt;
User principal names (UPNs) have the format &#x201c;{alias}@{domain}&#x201d;.
If true, only {alias} is included when a UPN would otherwise appear in the output
of APIs like getFileStatus, getOwner, getAclStatus, etc, default is false.
&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.identity.transformer.domain.name&lt;/name&gt;
&lt;value&gt;domain name of the user's upn&lt;/value&gt;
&lt;description&gt;
If the domain name is specified and &#x201c;fs.azure.identity.transformer.enable.short.name&#x201d;
is true, then the {alias} part of a UPN can be specified as input to APIs like setOwner,
setAcl, modifyAclEntries, or removeAclEntries, and it will be transformed to a UPN by appending @ and the domain specified by
this configuration property.
&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.identity.transformer.service.principal.id&lt;/name&gt;
&lt;value&gt;service principal object id&lt;/value&gt;
&lt;description&gt;
An Azure Active Directory object ID (oid) used as the replacement for names contained
in the list specified by &#x201c;fs.azure.identity.transformer.service.principal.substitution.list&#x201d;.
Notice that instead of setting oid, you can also set $superuser here.
&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.identity.transformer.skip.superuser.replacement&lt;/name&gt;
&lt;value&gt;true/false&lt;/value&gt;
&lt;description&gt;
If false, &#x201c;$superuser&#x201d; is replaced with the current user when it appears as the owner
or owning group of a file or directory. The default is false.
&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.identity.transformer.service.principal.substitution.list&lt;/name&gt;
&lt;value&gt;mapred,hdfs,yarn,hive,tez&lt;/value&gt;
&lt;description&gt;
A comma separated list of names to be replaced with the service principal ID specified by
&#x201c;fs.azure.identity.transformer.service.principal.id&#x201d;. This substitution occurs
when setOwner, setAcl, modifyAclEntries, or removeAclEntries are invoked with identities
contained in the substitution list. Notice that when in non-secure cluster, asterisk symbol *
can be used to match all user/group.
&lt;/description&gt;
&lt;/property&gt;
--&gt;
</pre></div></div>
<p>To run Delegation SAS test cases you must use a storage account with the hierarchical namespace enabled and set the following configuration settings:</p>
<div>
<div>
<pre class="source">&lt;!--=========================== AUTHENTICATION OPTIONS ===================--&gt;
&lt;!--============================= FOR SAS ===========================--&gt;
&lt;!-- To run ABFS Delegation SAS tests, you must register an app, create the
necessary role assignments, and set the configuration discussed below:
1) Register an app:
a) Login to https://portal.azure.com, select your AAD directory and search for app registrations.
b) Click &quot;New registration&quot;.
c) Provide a display name, such as &quot;abfs-app&quot;.
d) Set the account type to &quot;Accounts in this organizational directory only ({YOUR_Tenant} only - Single tenant)&quot;.
e) For Redirect URI select Web and enter &quot;http://localhost&quot;.
f) Click Register.
2) Create necessary role assignments:
a) Login to https://portal.azure.com and find the Storage account with hierarchical namespace enabled
that you plan to run the tests against.
b) Select &quot;Access Control (IAM)&quot;.
c) Select Role Assignments
d) Click Add and select &quot;Add role assignments&quot;
e) For Role and enter &quot;Storage Blob Data Owner&quot;.
f) Under Select enter the name of the app you registered in step 1 and select it.
g) Click Save.
h) Repeat above steps to create a second role assignment for the app but this time for
the &quot;Storage Blob Delegator&quot; role.
3) Generate a new client secret for the application:
a) Login to https://portal.azure.com and find the app registered in step 1.
b) Select &quot;Certificates and secrets&quot;.
c) Click &quot;New client secret&quot;.
d) Enter a description (eg. Secret1)
e) Set expiration period. Expires in 1 year is good.
f) Click Add
g) Copy the secret and expiration to a safe location.
4) Set the following configuration values:
--&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.sas.token.provider.type&lt;/name&gt;
&lt;value&gt;org.apache.hadoop.fs.azurebfs.extensions.MockDelegationSASTokenProvider&lt;/value&gt;
&lt;description&gt;The fully qualified class name of the SAS token provider implementation.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.test.app.service.principal.tenant.id&lt;/name&gt;
&lt;value&gt;{TID}&lt;/value&gt;
&lt;description&gt;Tenant ID for the application's service principal.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.test.app.service.principal.object.id&lt;/name&gt;
&lt;value&gt;{OID}&lt;/value&gt;
&lt;description&gt;Object ID for the application's service principal.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.test.app.id&lt;/name&gt;
&lt;value&gt;{app id}&lt;/value&gt;
&lt;description&gt;The application's ID, also known as the client id.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.test.app.secret&lt;/name&gt;
&lt;value&gt;{client secret}&lt;/value&gt;
&lt;description&gt;The application's secret, also known as the client secret.&lt;/description&gt;
&lt;/property&gt;
</pre></div></div>
<p>To run CheckAccess test cases you must register an app with no RBAC and set the following configurations.</p>
<div>
<div>
<pre class="source">&lt;!--=========================== FOR CheckAccess =========================--&gt;
&lt;!-- To run ABFS CheckAccess SAS tests, you must register an app, with no role
assignments, and set the configuration discussed below:
1) Register a new app with no RBAC
2) As part of the test configs you need to provide the guid for the above
created app. Please follow the below steps to fetch the guid.
a) Get an access token with the above created app. Please refer the
following documentation for the same. https://docs.microsoft
.com/en-us/azure/active-directory/develop/v2-oauth2-client-creds-grant-flow#get-a-token
b) Decode the token fetched with the above step. You may use https
://jwt.ms/ to decode the token
d) The oid field in the decoded string is the guid.
3) Set the following configurations:
--&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.enable.check.access&lt;/name&gt;
&lt;value&gt;true&lt;/value&gt;
&lt;description&gt;By default the check access will be on. Checkaccess can
be turned off by changing this flag to false.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.account.test.oauth2.client.id&lt;/name&gt;
&lt;value&gt;{client id}&lt;/value&gt;
&lt;description&gt;The client id(app id) for the app created on step 1
&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.account.test.oauth2.client.secret&lt;/name&gt;
&lt;value&gt;{client secret}&lt;/value&gt;
&lt;description&gt;
The client secret(application's secret) for the app created on step 1
&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.check.access.testuser.guid&lt;/name&gt;
&lt;value&gt;{guid}&lt;/value&gt;
&lt;description&gt;The guid fetched on step 2&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.azure.account.oauth2.client.endpoint.{account name}.dfs.core
.windows.net&lt;/name&gt;
&lt;value&gt;https://login.microsoftonline.com/{TENANTID}/oauth2/token&lt;/value&gt;
&lt;description&gt;
Token end point. This can be found through Azure portal. As part of CheckAccess
test cases. The access will be tested for an FS instance created with the
above mentioned client credentials. So this configuration is necessary to
create the test FS instance.
&lt;/description&gt;
&lt;/property&gt;
</pre></div></div>
<p>If running tests against an endpoint that uses the URL format http[s]://[ip]:[port]/[account]/[filesystem] instead of http[s]://[account][domain-suffix]/[filesystem], please use the following:</p>
<div>
<div>
<pre class="source">&lt;property&gt;
&lt;name&gt;fs.azure.abfs.endpoint&lt;/name&gt;
&lt;value&gt;{IP}:{PORT}&lt;/value&gt;
&lt;/property&gt;
</pre></div></div></div>
</div>
</div>
<div class="clear">
<hr/>
</div>
<div id="footer">
<div class="xright">
&#169; 2008-2021
Apache Software Foundation
- <a href="http://maven.apache.org/privacy-policy.html">Privacy Policy</a>.
Apache Maven, Maven, Apache, the Apache feather logo, and the Apache Maven project logos are trademarks of The Apache Software Foundation.
</div>
<div class="clear">
<hr/>
</div>
</div>
</body>
</html>