blob: db3c2b6c27462ad21e7fd2a874dd893d59a0b4d5 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.stream.Stream;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.model.ListObjectsV2Request;
import com.amazonaws.services.s3.model.ListObjectsV2Result;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.model.PutObjectRequest;
import com.amazonaws.services.s3.model.S3ObjectSummary;
import org.assertj.core.api.Assertions;
import org.junit.Test;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.impl.StatusProbeEnum;
import org.apache.hadoop.fs.s3a.impl.StoreContext;
import org.apache.hadoop.fs.s3a.s3guard.DDBPathMetadata;
import org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore;
import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
import org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore;
import static org.apache.hadoop.fs.contract.ContractTestUtils.assertRenameOutcome;
import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.assume;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.assumeFilesystemHasMetadatastore;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.getStatusWithEmptyDirFlag;
/**
* Test logic around whether or not a directory is empty, with S3Guard enabled.
* The fact that S3AFileStatus has an isEmptyDirectory flag in it makes caching
* S3AFileStatus's really tricky, as the flag can change as a side effect of
* changes to other paths.
* After S3Guard is merged to trunk, we should try to remove the
* isEmptyDirectory flag from S3AFileStatus, or maintain it outside
* of the MetadataStore.
*/
public class ITestS3GuardEmptyDirs extends AbstractS3ATestBase {
/**
* Rename an empty directory, verify that the empty dir
* marker moves in both S3Guard and in the S3A FS.
*/
@Test
public void testRenameEmptyDir() throws Throwable {
S3AFileSystem fs = getFileSystem();
Path basePath = path(getMethodName());
Path sourceDir = new Path(basePath, "AAA-source");
String sourceDirMarker = fs.pathToKey(sourceDir) + "/";
Path destDir = new Path(basePath, "BBB-dest");
String destDirMarker = fs.pathToKey(destDir) + "/";
// set things up.
mkdirs(sourceDir);
// there's source directory marker
fs.getObjectMetadata(sourceDirMarker);
S3AFileStatus srcStatus = getEmptyDirStatus(sourceDir);
assertEquals("Must be an empty dir: " + srcStatus, Tristate.TRUE,
srcStatus.isEmptyDirectory());
// do the rename
assertRenameOutcome(fs, sourceDir, destDir, true);
S3AFileStatus destStatus = getEmptyDirStatus(destDir);
assertEquals("Must be an empty dir: " + destStatus, Tristate.TRUE,
destStatus.isEmptyDirectory());
// source does not exist.
intercept(FileNotFoundException.class,
() -> getEmptyDirStatus(sourceDir));
// and verify that there's no dir marker hidden under a tombstone
intercept(FileNotFoundException.class,
() -> Invoker.once("HEAD", sourceDirMarker, () -> {
ObjectMetadata md = fs.getObjectMetadata(sourceDirMarker);
return String.format("Object %s of length %d",
sourceDirMarker, md.getInstanceLength());
}));
// the parent dir mustn't be confused
S3AFileStatus baseStatus = getEmptyDirStatus(basePath);
assertEquals("Must not be an empty dir: " + baseStatus, Tristate.FALSE,
baseStatus.isEmptyDirectory());
// and verify the dest dir has a marker
fs.getObjectMetadata(destDirMarker);
}
private S3AFileStatus getEmptyDirStatus(Path dir) throws IOException {
return getFileSystem().innerGetFileStatus(dir, true, StatusProbeEnum.ALL);
}
@Test
public void testEmptyDirs() throws Exception {
S3AFileSystem fs = getFileSystem();
assumeFilesystemHasMetadatastore(getFileSystem());
MetadataStore configuredMs = fs.getMetadataStore();
Path existingDir = path("existing-dir");
Path existingFile = path("existing-dir/existing-file");
try {
// 1. Simulate files already existing in the bucket before we started our
// cluster. Temporarily disable the MetadataStore so it doesn't witness
// us creating these files.
fs.setMetadataStore(new NullMetadataStore());
assertTrue(fs.mkdirs(existingDir));
touch(fs, existingFile);
// 2. Simulate (from MetadataStore's perspective) starting our cluster and
// creating a file in an existing directory.
fs.setMetadataStore(configuredMs); // "start cluster"
Path newFile = path("existing-dir/new-file");
touch(fs, newFile);
S3AFileStatus status = fs.innerGetFileStatus(existingDir, true,
StatusProbeEnum.ALL);
assertEquals("Should not be empty dir", Tristate.FALSE,
status.isEmptyDirectory());
// 3. Assert that removing the only file the MetadataStore witnessed
// being created doesn't cause it to think the directory is now empty.
fs.delete(newFile, false);
status = fs.innerGetFileStatus(existingDir, true, StatusProbeEnum.ALL);
assertEquals("Should not be empty dir", Tristate.FALSE,
status.isEmptyDirectory());
// 4. Assert that removing the final file, that existed "before"
// MetadataStore started, *does* cause the directory to be marked empty.
fs.delete(existingFile, false);
status = fs.innerGetFileStatus(existingDir, true, StatusProbeEnum.ALL);
assertEquals("Should be empty dir now", Tristate.TRUE,
status.isEmptyDirectory());
} finally {
configuredMs.forgetMetadata(existingFile);
configuredMs.forgetMetadata(existingDir);
}
}
/**
* Test tombstones don't get in the way of a listing of the
* root dir.
* This test needs to create a path which appears first in the listing,
* and an entry which can come later. To allow the test to proceed
* while other tests are running, the filename "0000" is used for that
* deleted entry.
*/
@Test
public void testTombstonesAndEmptyDirectories() throws Throwable {
S3AFileSystem fs = getFileSystem();
assumeFilesystemHasMetadatastore(getFileSystem());
// Create the first and last files.
Path base = path(getMethodName());
// use something ahead of all the ASCII alphabet characters so
// even during parallel test runs, this test is expected to work.
String first = "0000";
Path firstPath = new Path(base, first);
// this path is near the bottom of the ASCII string space.
// This isn't so critical.
String last = "zzzz";
Path lastPath = new Path(base, last);
touch(fs, firstPath);
touch(fs, lastPath);
// Delete first entry (+assert tombstone)
assertDeleted(firstPath, false);
DynamoDBMetadataStore ddbMs = getRequiredDDBMetastore(fs);
DDBPathMetadata firstMD = ddbMs.get(firstPath);
assertNotNull("No MD for " + firstPath, firstMD);
assertTrue("Not a tombstone " + firstMD,
firstMD.isDeleted());
// PUT child to store going past the FS entirely.
// This is not going to show up on S3Guard.
Path child = new Path(firstPath, "child");
StoreContext ctx = fs.createStoreContext();
String childKey = ctx.pathToKey(child);
String baseKey = ctx.pathToKey(base) + "/";
AmazonS3 s3 = fs.getAmazonS3ClientForTesting("LIST");
String bucket = ctx.getBucket();
try {
createEmptyObject(fs, childKey);
// Do a list
ListObjectsV2Request listReq = new ListObjectsV2Request()
.withBucketName(bucket)
.withPrefix(baseKey)
.withMaxKeys(10)
.withDelimiter("/");
ListObjectsV2Result listing = s3.listObjectsV2(listReq);
// the listing has the first path as a prefix, because of the child
Assertions.assertThat(listing.getCommonPrefixes())
.describedAs("The prefixes of a LIST of %s", base)
.contains(baseKey + first + "/");
// and the last file is one of the files
Stream<String> files = listing.getObjectSummaries()
.stream()
.map(S3ObjectSummary::getKey);
Assertions.assertThat(files)
.describedAs("The files of a LIST of %s", base)
.contains(baseKey + last);
// verify absolutely that the last file exists
assertPathExists("last file", lastPath);
boolean isDDB = fs.getMetadataStore() instanceof DynamoDBMetadataStore;
// if DDB is the metastore, then we expect no FS requests to be made
// at all.
S3ATestUtils.MetricDiff listMetric = new S3ATestUtils.MetricDiff(fs,
Statistic.OBJECT_LIST_REQUESTS);
S3ATestUtils.MetricDiff getMetric = new S3ATestUtils.MetricDiff(fs,
Statistic.OBJECT_METADATA_REQUESTS);
// do a getFile status with empty dir flag
S3AFileStatus status = getStatusWithEmptyDirFlag(fs, base);
assertNonEmptyDir(status);
if (isDDB) {
listMetric.assertDiffEquals(
"FileSystem called S3 LIST rather than use DynamoDB",
0);
getMetric.assertDiffEquals(
"FileSystem called S3 GET rather than use DynamoDB",
0);
LOG.info("Verified that DDB directory status was accepted");
}
} finally {
// try to recover from the defective state.
s3.deleteObject(bucket, childKey);
fs.delete(lastPath, true);
ddbMs.forgetMetadata(firstPath);
}
}
protected void assertNonEmptyDir(final S3AFileStatus status) {
assertEquals("Should not be empty dir: " + status, Tristate.FALSE,
status.isEmptyDirectory());
}
/**
* Get the DynamoDB metastore; assume false if it is of a different
* type.
* @return extracted and cast metadata store.
*/
@SuppressWarnings("ConstantConditions")
private DynamoDBMetadataStore getRequiredDDBMetastore(S3AFileSystem fs) {
MetadataStore ms = fs.getMetadataStore();
assume("Not a DynamoDBMetadataStore: " + ms,
ms instanceof DynamoDBMetadataStore);
return (DynamoDBMetadataStore) ms;
}
/**
* From {@code S3AFileSystem.createEmptyObject()}.
* @param fs filesystem
* @param key key
*/
private void createEmptyObject(S3AFileSystem fs, String key) {
final InputStream im = new InputStream() {
@Override
public int read() {
return -1;
}
};
PutObjectRequest putObjectRequest = fs.newPutObjectRequest(key,
fs.newObjectMetadata(0L),
im);
AmazonS3 s3 = fs.getAmazonS3ClientForTesting("PUT");
s3.putObject(putObjectRequest);
}
}