blob: a16e1b5e492fb6b582fa8146a3821c6b977fe8a3 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.auth;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.AccessDeniedException;
import java.util.concurrent.Callable;
import org.assertj.core.api.Assertions;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.S3ATestUtils;
import org.apache.hadoop.fs.s3a.Statistic;
import org.apache.hadoop.mapred.LocatedFileStatusFetcher;
import org.apache.hadoop.mapreduce.lib.input.InvalidInputException;
import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile;
import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
import static org.apache.hadoop.fs.s3a.Constants.ASSUMED_ROLE_ARN;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.assume;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.lsR;
import static org.apache.hadoop.fs.s3a.auth.RoleModel.Effects;
import static org.apache.hadoop.fs.s3a.auth.RoleModel.Statement;
import static org.apache.hadoop.fs.s3a.auth.RoleModel.directory;
import static org.apache.hadoop.fs.s3a.auth.RoleModel.statement;
import static org.apache.hadoop.fs.s3a.auth.RolePolicies.*;
import static org.apache.hadoop.fs.s3a.auth.RoleTestUtils.bindRolePolicyStatements;
import static org.apache.hadoop.fs.s3a.auth.RoleTestUtils.newAssumedRoleConfig;
import static org.apache.hadoop.io.IOUtils.cleanupWithLogger;
import static org.apache.hadoop.mapreduce.lib.input.FileInputFormat.LIST_STATUS_NUM_THREADS;
import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains;
import static org.apache.hadoop.test.GenericTestUtils.failif;
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
/**
* This test creates a client with no read access to the underlying
* filesystem and then tries to perform various read operations on it.
*
* The tests are all bundled into one big test case.
* From a purist unit test perspective, this is utterly wrong as it goes
* against the
* <i>"Each test case tests exactly one thing"</i>
* philosophy of JUnit.
* <p>
* However is significantly reduces setup costs .
* as it means that the filesystems and directories only need to be
* created and destroyed once per suite, rather than
* once per individual test.
* <p>
* All the test probes have informative messages so when a test failure
* does occur, its cause should be discoverable. It main weaknesses are
* therefore:
* <ol>
* <li>A failure of an assertion blocks all subsequent assertions from
* being checked.</li>
* <li>Maintenance is potentially harder.</li>
* </ol>
* To simplify maintenance, the operations tested are broken up into
* their own methods, with fields used to share the restricted role and
* created paths.
*
*/
public class ITestRestrictedReadAccess extends AbstractS3ATestBase {
private static final Logger LOG =
LoggerFactory.getLogger(ITestRestrictedReadAccess.class);
/** Filter to select everything. */
private static final PathFilter EVERYTHING = t -> true;
/** Filter to select .txt files. */
private static final PathFilter TEXT_FILE =
path -> path.toUri().toString().endsWith(".txt");
/** The same path filter used in FileInputFormat. */
private static final PathFilter HIDDEN_FILE_FILTER =
(p) -> {
String n = p.getName();
return !n.startsWith("_") && !n.startsWith(".");
};
/**
* Text found in LocatedFileStatusFetcher exception when the glob
* returned "null".
*/
private static final String DOES_NOT_EXIST = "does not exist";
/**
* Text found in LocatedFileStatusFetcher exception when
* the glob returned an empty list.
*/
private static final String MATCHES_0_FILES = "matches 0 files";
/**
* Text used in files.
*/
public static final byte[] HELLO = "hello".getBytes(StandardCharsets.UTF_8);
/**
* Wildcard scan to find *.txt in the no-read directory.
*/
private Path noReadWildcard;
private Path basePath;
private Path noReadDir;
private Path emptyDir;
private Path emptyFile;
private Path subDir;
private Path subdirFile;
private Path subDir2;
private Path subdir2File1;
private Path subdir2File2;
private Configuration roleConfig;
/**
* A read-only FS; if non-null it is closed in teardown.
*/
private S3AFileSystem readonlyFS;
@Override
public void setup() throws Exception {
super.setup();
assumeRoleTests();
}
@Override
public void teardown() throws Exception {
try {
super.teardown();
} finally {
cleanupWithLogger(LOG, readonlyFS);
}
}
private void assumeRoleTests() {
assume("No ARN for role tests", !getAssumedRoleARN().isEmpty());
}
private String getAssumedRoleARN() {
return getContract().getConf().getTrimmed(ASSUMED_ROLE_ARN, "");
}
/**
* Create the assumed role configuration.
* @return a config bonded to the ARN of the assumed role
*/
public Configuration createAssumedRoleConfig() {
return createAssumedRoleConfig(getAssumedRoleARN());
}
/**
* Create a config for an assumed role; it also disables FS caching.
* @param roleARN ARN of role
* @return the new configuration
*/
private Configuration createAssumedRoleConfig(String roleARN) {
return newAssumedRoleConfig(getContract().getConf(), roleARN);
}
/**
* This is a single test case which invokes the individual test cases
* in sequence.
*/
@Test
public void testNoReadAccess() throws Throwable {
describe("Test failure handling if the client doesn't"
+ " have read access under a path");
initNoReadAccess();
// now move up the API Chain, from the calls made by globStatus,
// to globStatus itself, and then to LocatedFileStatusFetcher,
// which invokes globStatus
checkBasicFileOperations();
checkGlobOperations();
checkSingleThreadedLocatedFileStatus();
checkLocatedFileStatusFourThreads();
checkLocatedFileStatusScanFile();
checkLocatedFileStatusNonexistentPath();
checkDeleteOperations();
}
/**
* Initialize the directory tree and the role filesystem.
*/
public void initNoReadAccess() throws Throwable {
describe("Setting up filesystem");
S3AFileSystem realFS = getFileSystem();
basePath = methodPath();
// define the paths and create them.
describe("Creating test directories and files");
// this is the directory to which the restricted role has no read
// access.
noReadDir = new Path(basePath, "noReadDir");
// wildcard scan to find *.txt
noReadWildcard = new Path(noReadDir, "*/*.txt");
// an empty directory directory under the noReadDir
emptyDir = new Path(noReadDir, "emptyDir");
realFS.mkdirs(emptyDir);
// an empty file directory under the noReadDir
emptyFile = new Path(noReadDir, "emptyFile.txt");
touch(realFS, emptyFile);
// a subdirectory
subDir = new Path(noReadDir, "subDir");
// and a file in that subdirectory
subdirFile = new Path(subDir, "subdirFile.txt");
createFile(realFS, subdirFile, true, HELLO);
subDir2 = new Path(noReadDir, "subDir2");
subdir2File1 = new Path(subDir2, "subdir2File1.txt");
subdir2File2 = new Path(subDir2, "subdir2File2.docx");
createFile(realFS, subdir2File1, true, HELLO);
createFile(realFS, subdir2File2, true, HELLO);
// create a role filesystem which does not have read access under a path
// it still has write access, which can be explored in the final
// step to delete files and directories.
roleConfig = createAssumedRoleConfig();
bindRolePolicyStatements(roleConfig,
STATEMENT_ALLOW_SSE_KMS_RW,
statement(true, S3_ALL_BUCKETS, S3_ALL_OPERATIONS),
new Statement(Effects.Deny)
.addActions(S3_ALL_GET)
.addResources(directory(noReadDir)));
readonlyFS = (S3AFileSystem) basePath.getFileSystem(roleConfig);
}
/**
* Validate basic IO operations.
*/
public void checkBasicFileOperations() throws Throwable {
// this is a LIST call; there's no marker.
// so the sequence is
// - HEAD path -> FNFE
// - HEAD path + / -> FNFE
// - LIST path -> list results
// Because the client has list access, this succeeds
readonlyFS.listStatus(basePath);
lsR(readonlyFS, basePath, true);
// this is HEAD + "/" on S3
readonlyFS.listStatus(emptyDir);
// a recursive list of the no-read-directory works because
// there is no directory marker, it becomes a LIST call.
lsR(readonlyFS, noReadDir, true);
// similarly, a getFileStatus ends up being a list of the path
// and so working.
readonlyFS.getFileStatus(noReadDir);
readonlyFS.getFileStatus(emptyDir);
// now look at a file
accessDenied(() ->
readonlyFS.getFileStatus(subdirFile));
// the attempt to read the data will also fail.
accessDenied(() ->
ContractTestUtils.readUTF8(readonlyFS, subdirFile, HELLO.length));
accessDenied(() -> readonlyFS.open(emptyFile));
}
/**
* Explore Glob's recursive scan.
*/
public void checkGlobOperations() throws Throwable {
describe("Glob Status operations");
// baseline: the real filesystem on a subdir
globFS(getFileSystem(), subdirFile, null, false, 1);
// a file fails
globFS(readonlyFS, subdirFile, null, true, 1);
// empty directories don't fail.
FileStatus[] st = globFS(readonlyFS, emptyDir, null, false, 1);
st = globFS(readonlyFS,
noReadWildcard,
null, false, 2);
// there is precisely one .docx file (subdir2File2.docx)
globFS(readonlyFS,
new Path(noReadDir, "*/*.docx"),
null, false, 1);
// there are no .doc files.
globFS(readonlyFS,
new Path(noReadDir, "*/*.doc"),
null, false, 0);
globFS(readonlyFS, noReadDir,
EVERYTHING, false, 1);
// and a filter without any wildcarded pattern only finds
// the role dir itself.
FileStatus[] st2 = globFS(readonlyFS, noReadDir,
EVERYTHING, false, 1);
Assertions.assertThat(st2)
.extracting(FileStatus::getPath)
.containsExactly(noReadDir);
}
/**
* Run a located file status fetcher against the directory tree.
*/
public void checkSingleThreadedLocatedFileStatus() throws Throwable {
describe("LocatedFileStatusFetcher operations");
// use the same filter as FileInputFormat; single thread.
roleConfig.setInt(LIST_STATUS_NUM_THREADS, 1);
LocatedFileStatusFetcher fetcher =
new LocatedFileStatusFetcher(
roleConfig,
new Path[]{basePath},
true,
HIDDEN_FILE_FILTER,
true);
Assertions.assertThat(fetcher.getFileStatuses())
.describedAs("result of located scan")
.flatExtracting(FileStatus::getPath)
.containsExactlyInAnyOrder(
emptyFile,
subdirFile,
subdir2File1,
subdir2File2);
}
/**
* Run a located file status fetcher against the directory tree.
*/
public void checkLocatedFileStatusFourThreads() throws Throwable {
// four threads and the text filter.
int threads = 4;
describe("LocatedFileStatusFetcher with %d", threads);
roleConfig.setInt(LIST_STATUS_NUM_THREADS, threads);
LocatedFileStatusFetcher fetcher =
new LocatedFileStatusFetcher(
roleConfig,
new Path[]{noReadWildcard},
true,
EVERYTHING,
true);
Assertions.assertThat(fetcher.getFileStatuses())
.describedAs("result of located scan")
.isNotNull()
.flatExtracting(FileStatus::getPath)
.containsExactlyInAnyOrder(subdirFile, subdir2File1);
}
/**
* Run a located file status fetcher against the directory tree.
*/
public void checkLocatedFileStatusScanFile() throws Throwable {
// pass in a file as the base of the scan.
describe("LocatedFileStatusFetcher with file %s", subdirFile);
roleConfig.setInt(LIST_STATUS_NUM_THREADS, 16);
LocatedFileStatusFetcher fetcher
= new LocatedFileStatusFetcher(
roleConfig,
new Path[]{subdirFile},
true,
TEXT_FILE,
true);
accessDenied(() -> fetcher.getFileStatuses());
}
/**
* Explore what happens with a path that does not exist.
*/
public void checkLocatedFileStatusNonexistentPath() throws Throwable {
// scan a path that doesn't exist
Path nonexistent = new Path(noReadDir, "nonexistent");
InvalidInputException ex = intercept(InvalidInputException.class,
DOES_NOT_EXIST,
() -> new LocatedFileStatusFetcher(
roleConfig,
new Path[]{nonexistent},
true,
EVERYTHING,
true)
.getFileStatuses());
// validate nested exception
assertExceptionContains(DOES_NOT_EXIST, ex.getCause());
// a file which exists but which doesn't match the pattern
// is downgraded to not existing.
intercept(InvalidInputException.class,
DOES_NOT_EXIST,
() -> new LocatedFileStatusFetcher(
roleConfig,
new Path[]{noReadDir},
true,
TEXT_FILE,
true)
.getFileStatuses());
// a pattern under a nonexistent path is considered to not be a match.
ex = intercept(
InvalidInputException.class,
MATCHES_0_FILES,
() -> new LocatedFileStatusFetcher(
roleConfig,
new Path[]{new Path(nonexistent, "*.txt)")},
true,
TEXT_FILE,
true)
.getFileStatuses());
// validate nested exception
assertExceptionContains(MATCHES_0_FILES, ex.getCause());
}
/**
* Do some cleanup to see what happens with delete calls.
* Cleanup happens in test teardown anyway; doing it here
* just makes use of the delete calls to see how delete failures
* change with permissions.
*/
public void checkDeleteOperations() throws Throwable {
describe("Testing delete operations");
readonlyFS.delete(emptyDir, true);
// to fail on HEAD
accessDenied(() -> readonlyFS.delete(emptyFile, true));
// this will succeed for both
readonlyFS.delete(subDir, true);
// after which it is not there
fileNotFound(() -> readonlyFS.getFileStatus(subDir));
// and nor is its child.
fileNotFound(() -> readonlyFS.getFileStatus(subdirFile));
// now delete the base path
readonlyFS.delete(basePath, true);
// and expect an FNFE
fileNotFound(() -> readonlyFS.getFileStatus(subDir));
}
/**
* Require an operation to fail with a FileNotFoundException.
* @param eval closure to evaluate.
* @param <T> type of callable
* @return the exception.
* @throws Exception any other exception
*/
protected <T> FileNotFoundException fileNotFound(final Callable<T> eval)
throws Exception {
return intercept(FileNotFoundException.class, eval);
}
/**
* Require an operation to fail with an AccessDeniedException.
* @param eval closure to evaluate.
* @param <T> type of callable
* @return the exception.
* @throws Exception any other exception
*/
protected <T> AccessDeniedException accessDenied(final Callable<T> eval)
throws Exception {
return intercept(AccessDeniedException.class, eval);
}
/**
* Assert that a status array has exactly one element and its
* value is as expected.
* @param expected expected path
* @param statuses list of statuses
*/
protected void assertStatusPathEquals(final Path expected,
final FileStatus[] statuses) {
Assertions.assertThat(statuses)
.describedAs("List of status entries")
.isNotNull()
.hasSize(1);
Assertions.assertThat(statuses[0].getPath())
.describedAs("Status entry %s", statuses[0])
.isEqualTo(expected);
}
/**
* Glob under a path with expected outcomes.
* @param fs filesystem to use
* @param path path (which can include patterns)
* @param filter optional filter
* @param expectAuthFailure is auth failure expected?
* @param expectedCount expected count of results; -1 means null response
* @return the result of a successful glob or null if an expected auth
* failure was caught.
* @throws IOException failure.
*/
protected FileStatus[] globFS(
final S3AFileSystem fs,
final Path path,
final PathFilter filter,
boolean expectAuthFailure,
final int expectedCount)
throws IOException {
LOG.info("Glob {}", path);
S3ATestUtils.MetricDiff getMetric = new S3ATestUtils.MetricDiff(fs,
Statistic.OBJECT_METADATA_REQUESTS);
S3ATestUtils.MetricDiff listMetric = new S3ATestUtils.MetricDiff(fs,
Statistic.OBJECT_LIST_REQUEST);
FileStatus[] st;
try {
st = filter == null
? fs.globStatus(path)
: fs.globStatus(path, filter);
LOG.info("Metrics:\n {},\n {}", getMetric, listMetric);
if (expectAuthFailure) {
// should have failed here
String resultStr;
if (st == null) {
resultStr = "A null array";
} else {
resultStr = StringUtils.join(st, ",");
}
fail(String.format("globStatus(%s) should have raised"
+ " an exception, but returned %s", path, resultStr));
}
} catch (AccessDeniedException e) {
LOG.info("Metrics:\n {},\n {}", getMetric, listMetric);
failif(!expectAuthFailure, "Access denied in glob of " + path,
e);
return null;
} catch (IOException | RuntimeException e) {
throw new AssertionError("Other exception raised in glob:" + e, e);
}
if (expectedCount < 0) {
Assertions.assertThat(st)
.describedAs("Glob of %s", path)
.isNull();
} else {
Assertions.assertThat(st)
.describedAs("Glob of %s", path)
.isNotNull()
.hasSize(expectedCount);
}
return st;
}
}