blob: 941e701333b0923e061da19293e1bcc4790cccb5 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a;
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.impl.StatusProbeEnum;
import org.apache.hadoop.fs.s3a.performance.AbstractS3ACostTest;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileNotFoundException;
import java.net.URI;
import java.util.Arrays;
import java.util.Collection;
import java.util.EnumSet;
import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
import static org.apache.hadoop.fs.s3a.Statistic.*;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
import static org.apache.hadoop.fs.s3a.performance.OperationCost.*;
import static org.apache.hadoop.test.GenericTestUtils.getTestDir;
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
/**
* Use metrics to assert about the cost of file API calls.
* Parameterized on guarded vs raw. and directory marker keep vs delete
*/
@RunWith(Parameterized.class)
public class ITestS3AFileOperationCost extends AbstractS3ACostTest {
private static final Logger LOG =
LoggerFactory.getLogger(ITestS3AFileOperationCost.class);
/**
* Parameterization.
*/
@Parameterized.Parameters(name = "{0}")
public static Collection<Object[]> params() {
return Arrays.asList(new Object[][]{
{"raw-keep-markers", false, true, false},
{"raw-delete-markers", false, false, false},
{"nonauth-keep-markers", true, true, false},
{"auth-delete-markers", true, false, true}
});
}
public ITestS3AFileOperationCost(final String name,
final boolean s3guard,
final boolean keepMarkers,
final boolean authoritative) {
super(s3guard, keepMarkers, authoritative);
}
/**
* Test the cost of {@code listLocatedStatus(file)}.
* There's a minor inefficiency in that calling this on
* a file in S3Guard still executes a LIST call, even
* though the file record is in the store.
*/
@Test
public void testCostOfLocatedFileStatusOnFile() throws Throwable {
describe("performing listLocatedStatus on a file");
Path file = file(methodPath());
S3AFileSystem fs = getFileSystem();
verifyMetrics(() -> fs.listLocatedStatus(file),
whenRaw(FILE_STATUS_FILE_PROBE
.plus(LIST_LOCATED_STATUS_LIST_OP)),
whenAuthoritative(LIST_LOCATED_STATUS_LIST_OP),
whenNonauth(LIST_LOCATED_STATUS_LIST_OP));
}
@Test
public void testCostOfListLocatedStatusOnEmptyDir() throws Throwable {
describe("performing listLocatedStatus on an empty dir");
Path dir = dir(methodPath());
S3AFileSystem fs = getFileSystem();
verifyMetrics(() ->
fs.listLocatedStatus(dir),
whenRaw(LIST_LOCATED_STATUS_LIST_OP
.plus(GET_FILE_STATUS_ON_EMPTY_DIR)),
whenAuthoritative(NO_IO),
whenNonauth(LIST_LOCATED_STATUS_LIST_OP));
}
@Test
public void testCostOfListLocatedStatusOnNonEmptyDir() throws Throwable {
describe("performing listLocatedStatus on a non empty dir");
Path dir = dir(methodPath());
S3AFileSystem fs = getFileSystem();
Path file = file(new Path(dir, "file.txt"));
verifyMetrics(() ->
fs.listLocatedStatus(dir),
whenRaw(LIST_LOCATED_STATUS_LIST_OP),
whenAuthoritative(NO_IO),
whenNonauth(LIST_LOCATED_STATUS_LIST_OP));
}
@Test
public void testCostOfListFilesOnFile() throws Throwable {
describe("Performing listFiles() on a file");
Path file = path(getMethodName() + ".txt");
S3AFileSystem fs = getFileSystem();
touch(fs, file);
verifyMetrics(() ->
fs.listFiles(file, true),
whenRaw(LIST_LOCATED_STATUS_LIST_OP
.plus(GET_FILE_STATUS_ON_FILE)),
whenAuthoritative(NO_IO),
whenNonauth(LIST_LOCATED_STATUS_LIST_OP));
}
@Test
public void testCostOfListFilesOnEmptyDir() throws Throwable {
describe("Perpforming listFiles() on an empty dir with marker");
// this attem
Path dir = path(getMethodName());
S3AFileSystem fs = getFileSystem();
fs.mkdirs(dir);
verifyMetrics(() ->
fs.listFiles(dir, true),
whenRaw(LIST_FILES_LIST_OP
.plus(GET_FILE_STATUS_ON_EMPTY_DIR)),
whenAuthoritative(NO_IO),
whenNonauth(LIST_FILES_LIST_OP));
}
@Test
public void testCostOfListFilesOnNonEmptyDir() throws Throwable {
describe("Performing listFiles() on a non empty dir");
Path dir = path(getMethodName());
S3AFileSystem fs = getFileSystem();
fs.mkdirs(dir);
Path file = new Path(dir, "file.txt");
touch(fs, file);
verifyMetrics(() ->
fs.listFiles(dir, true),
whenRaw(LIST_FILES_LIST_OP),
whenAuthoritative(NO_IO),
whenNonauth(LIST_FILES_LIST_OP));
}
@Test
public void testCostOfListFilesOnNonExistingDir() throws Throwable {
describe("Performing listFiles() on a non existing dir");
Path dir = path(getMethodName());
S3AFileSystem fs = getFileSystem();
verifyMetricsIntercepting(FileNotFoundException.class, "",
() -> fs.listFiles(dir, true),
whenRaw(LIST_FILES_LIST_OP
.plus(GET_FILE_STATUS_FNFE)));
}
@Test
public void testCostOfListStatusOnFile() throws Throwable {
describe("Performing listStatus() on a file");
Path file = path(getMethodName() + ".txt");
S3AFileSystem fs = getFileSystem();
touch(fs, file);
verifyMetrics(() ->
fs.listStatus(file),
whenRaw(LIST_STATUS_LIST_OP
.plus(GET_FILE_STATUS_ON_FILE)),
whenAuthoritative(LIST_STATUS_LIST_OP),
whenNonauth(LIST_STATUS_LIST_OP));
}
@Test
public void testCostOfListStatusOnEmptyDir() throws Throwable {
describe("Performing listStatus() on an empty dir");
Path dir = path(getMethodName());
S3AFileSystem fs = getFileSystem();
fs.mkdirs(dir);
verifyMetrics(() ->
fs.listStatus(dir),
whenRaw(LIST_STATUS_LIST_OP
.plus(GET_FILE_STATUS_ON_EMPTY_DIR)),
whenAuthoritative(NO_IO),
whenNonauth(LIST_STATUS_LIST_OP));
}
@Test
public void testCostOfListStatusOnNonEmptyDir() throws Throwable {
describe("Performing listStatus() on a non empty dir");
Path dir = path(getMethodName());
S3AFileSystem fs = getFileSystem();
fs.mkdirs(dir);
Path file = new Path(dir, "file.txt");
touch(fs, file);
verifyMetrics(() ->
fs.listStatus(dir),
whenRaw(LIST_STATUS_LIST_OP),
whenAuthoritative(NO_IO),
whenNonauth(LIST_STATUS_LIST_OP));
}
@Test
public void testCostOfGetFileStatusOnFile() throws Throwable {
describe("performing getFileStatus on a file");
Path simpleFile = file(methodPath());
S3AFileStatus status = verifyRawInnerGetFileStatus(simpleFile, true,
StatusProbeEnum.ALL,
GET_FILE_STATUS_ON_FILE);
assertTrue("not a file: " + status, status.isFile());
}
@Test
public void testCostOfGetFileStatusOnEmptyDir() throws Throwable {
describe("performing getFileStatus on an empty directory");
Path dir = dir(methodPath());
S3AFileStatus status = verifyRawInnerGetFileStatus(dir, true,
StatusProbeEnum.ALL,
GET_FILE_STATUS_ON_DIR_MARKER);
assertSame("not empty: " + status, Tristate.TRUE,
status.isEmptyDirectory());
// but now only ask for the directories and the file check is skipped.
verifyRawInnerGetFileStatus(dir, false,
StatusProbeEnum.DIRECTORIES,
FILE_STATUS_DIR_PROBE);
// now look at isFile/isDir against the same entry
isDir(dir, true, FILE_STATUS_DIR_PROBE);
isFile(dir, false, FILE_STATUS_FILE_PROBE);
}
@Test
public void testCostOfGetFileStatusOnMissingFile() throws Throwable {
describe("performing getFileStatus on a missing file");
interceptRawGetFileStatusFNFE(methodPath(), false,
StatusProbeEnum.ALL,
GET_FILE_STATUS_FNFE);
}
@Test
public void testIsDirIsFileMissingPath() throws Throwable {
describe("performing isDir and isFile on a missing file");
Path path = methodPath();
// now look at isFile/isDir against the same entry
isDir(path, false,
FILE_STATUS_DIR_PROBE);
isFile(path, false,
FILE_STATUS_FILE_PROBE);
}
@Test
public void testCostOfGetFileStatusOnNonEmptyDir() throws Throwable {
describe("performing getFileStatus on a non-empty directory");
Path dir = dir(methodPath());
file(new Path(dir, "simple.txt"));
S3AFileStatus status = verifyRawInnerGetFileStatus(dir, true,
StatusProbeEnum.ALL,
GET_FILE_STATUS_ON_DIR);
assertEmptyDirStatus(status, Tristate.FALSE);
}
@Test
public void testCostOfCopyFromLocalFile() throws Throwable {
describe("testCostOfCopyFromLocalFile");
File localTestDir = getTestDir("tmp");
localTestDir.mkdirs();
File tmpFile = File.createTempFile("tests3acost", ".txt",
localTestDir);
tmpFile.delete();
try {
URI localFileURI = tmpFile.toURI();
FileSystem localFS = FileSystem.get(localFileURI,
getFileSystem().getConf());
Path localPath = new Path(localFileURI);
int len = 10 * 1024;
byte[] data = dataset(len, 'A', 'Z');
writeDataset(localFS, localPath, data, len, 1024, true);
S3AFileSystem s3a = getFileSystem();
Path remotePath = methodPath();
verifyMetrics(() -> {
s3a.copyFromLocalFile(false, true, localPath, remotePath);
return "copy";
},
with(INVOCATION_COPY_FROM_LOCAL_FILE, 1),
with(OBJECT_PUT_REQUESTS, 1),
with(OBJECT_PUT_BYTES, len));
verifyFileContents(s3a, remotePath, data);
// print final stats
LOG.info("Filesystem {}", s3a);
} finally {
tmpFile.delete();
}
}
@Test
public void testDirProbes() throws Throwable {
describe("Test directory probe cost");
assumeUnguarded();
S3AFileSystem fs = getFileSystem();
// Create the empty directory.
Path emptydir = dir(methodPath());
// head probe fails
interceptRawGetFileStatusFNFE(emptydir, false,
StatusProbeEnum.HEAD_ONLY,
FILE_STATUS_FILE_PROBE);
// a LIST will find it and declare as empty
S3AFileStatus status = verifyRawInnerGetFileStatus(emptydir, true,
StatusProbeEnum.LIST_ONLY,
FILE_STATUS_DIR_PROBE);
assertEmptyDirStatus(status, Tristate.TRUE);
// skip all probes and expect no operations to take place
interceptRawGetFileStatusFNFE(emptydir, false,
EnumSet.noneOf(StatusProbeEnum.class),
NO_IO);
// now add a trailing slash to the key and use the
// deep internal s3GetFileStatus method call.
String emptyDirTrailingSlash = fs.pathToKey(emptydir.getParent())
+ "/" + emptydir.getName() + "/";
// A HEAD request does not probe for keys with a trailing /
interceptRaw(FileNotFoundException.class, "",
NO_IO, () ->
fs.s3GetFileStatus(emptydir, emptyDirTrailingSlash,
StatusProbeEnum.HEAD_ONLY, null, false));
// but ask for a directory marker and you get the entry
status = verifyRaw(FILE_STATUS_DIR_PROBE, () ->
fs.s3GetFileStatus(emptydir,
emptyDirTrailingSlash,
StatusProbeEnum.LIST_ONLY,
null,
true));
assertEquals(emptydir, status.getPath());
assertEmptyDirStatus(status, Tristate.TRUE);
}
@Test
public void testNeedEmptyDirectoryProbeRequiresList() throws Throwable {
S3AFileSystem fs = getFileSystem();
intercept(IllegalArgumentException.class, "", () ->
fs.s3GetFileStatus(new Path("/something"), "/something",
StatusProbeEnum.HEAD_ONLY, null, true));
}
@Test
public void testCreateCost() throws Throwable {
describe("Test file creation cost -raw only");
assumeUnguarded();
Path testFile = methodPath();
// when overwrite is false, the path is checked for existence.
create(testFile, false,
CREATE_FILE_NO_OVERWRITE);
// but when true: only the directory checks take place.
create(testFile, true, CREATE_FILE_OVERWRITE);
}
@Test
public void testCreateCostFileExists() throws Throwable {
describe("Test cost of create file failing with existing file");
assumeUnguarded();
Path testFile = file(methodPath());
// now there is a file there, an attempt with overwrite == false will
// fail on the first HEAD.
interceptRaw(FileAlreadyExistsException.class, "",
FILE_STATUS_FILE_PROBE,
() -> file(testFile, false));
}
@Test
public void testCreateCostDirExists() throws Throwable {
describe("Test cost of create file failing with existing dir");
assumeUnguarded();
Path testFile = dir(methodPath());
// now there is a file there, an attempt with overwrite == false will
// fail on the first HEAD.
interceptRaw(FileAlreadyExistsException.class, "",
GET_FILE_STATUS_ON_DIR_MARKER,
() -> file(testFile, false));
}
/**
* Use the builder API.
* This always looks for a parent unless the caller says otherwise.
*/
@Test
public void testCreateBuilder() throws Throwable {
describe("Test builder file creation cost -raw only");
assumeUnguarded();
Path testFile = methodPath();
dir(testFile.getParent());
// builder defaults to looking for parent existence (non-recursive)
buildFile(testFile, false, false,
GET_FILE_STATUS_FNFE // destination file
.plus(FILE_STATUS_DIR_PROBE)); // parent dir
// recursive = false and overwrite=true:
// only make sure the dest path isn't a directory.
buildFile(testFile, true, true,
FILE_STATUS_DIR_PROBE);
// now there is a file there, an attempt with overwrite == false will
// fail on the first HEAD.
interceptRaw(FileAlreadyExistsException.class, "",
GET_FILE_STATUS_ON_FILE,
() -> buildFile(testFile, false, true,
GET_FILE_STATUS_ON_FILE));
}
@Test
public void testCostOfGlobStatus() throws Throwable {
describe("Test globStatus has expected cost");
S3AFileSystem fs = getFileSystem();
assume("Unguarded FS only", !fs.hasMetadataStore());
Path basePath = path("testCostOfGlobStatus/nextFolder/");
// create a bunch of files
int filesToCreate = 10;
for (int i = 0; i < filesToCreate; i++) {
create(basePath.suffix("/" + i));
}
fs.globStatus(basePath.suffix("/*"));
// 2 head + 1 list from getFileStatus on path,
// plus 1 list to match the glob pattern
verifyRaw(LIST_STATUS_LIST_OP,
() -> fs.globStatus(basePath.suffix("/*")));
}
@Test
public void testCostOfGlobStatusNoSymlinkResolution() throws Throwable {
describe("Test globStatus does not attempt to resolve symlinks");
S3AFileSystem fs = getFileSystem();
assume("Unguarded FS only", !fs.hasMetadataStore());
Path basePath = path("testCostOfGlobStatusNoSymlinkResolution/f/");
// create a single file, globStatus returning a single file on a pattern
// triggers attempts at symlinks resolution if configured
String fileName = "/notASymlinkDOntResolveMeLikeOne";
create(basePath.suffix(fileName));
// unguarded: 2 head + 1 list from getFileStatus on path,
// plus 1 list to match the glob pattern
// no additional operations from symlink resolution
verifyRaw(LIST_STATUS_LIST_OP,
() -> fs.globStatus(basePath.suffix("/*")));
}
}