blob: 68df9106ba4a96c18cf23790558f831d90818dbb [file] [log] [blame]
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# This file is invoked if TARGET_FILESYSTEM is set to "s3" to check
# preconditions for accessing the specified S3 bucket. It inherits the
# environment variables from its caller and uses them as
# implicit parameters.
#
# The following environment variables are used:
# TARGET_FILESYSTEM
# S3_BUCKET
# AWS_ACCESS_KEY_ID
# AWS_SECRET_ACCESS_KEY
#
# Returns:
# 0 (success): if preconditions for S3 access are satisfied.
# 1 (failure): if S3 access is unsuccessful.
# 2 (error): if the 'aws' executable is not on the path, or other
# environmental problems cause the script to fail.
#
# If tests are to be run against S3 as the backing file system, verify that
# the assigned S3 bucket can be accessed.
# Access can be authorized by AWS_ credentials passed in environment variables
# or an EC2 IAM role assigned to the VM running the tests.
#
# If S3 access is granted via an IAM role assigned to the VM instance,
# then the credentials bound to the IAM role are retrieved automatically
# both by the Hadoop s3a: provider and by AWSCLI.
# In this case AWS keys must not be present in environment variables or in
# core-site.xml because their presence would preempt the IAM-based
# credentials.
#
# For further details on IAM roles refer to the Amazon docs at
# http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html#instance-metadata-security-credentials
#
# The assigned IAM role and the security credentials provided
# by the role can be queried through the AWS instance metadata mechanism.
# Instance metadata is served through an HTTP connection to the special
# address 169.254.169.254. Details are described at:
# http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html#instancedata-data-retrieval
AWS_METADATA_IP_ADDRESS="169.254.169.254"
# safeguard against bad calls: if no S3 access is requested, just succeed
# silently.
if [ "${TARGET_FILESYSTEM-}" != "s3" ]; then
exit 0
fi
echo "Checking S3 access"
# Check if the S3 bucket name is NULL.
if [[ -z ${S3_BUCKET-} ]]; then
echo "Error: S3_BUCKET cannot be an empty string"
exit 1
fi
# S3 access can be granted using access keys passed in via the environment
# or specifying an IAM role that has S3 access privileges.
# First check the environment variables, they have precedence over the IAM
# role: invalid credentials in the environment variables will prevent S3 access
# even if a valid IAM role is present.
# Use a subshell to prevent leaking AWS secrets.
if (set +x; [[ -z ${AWS_ACCESS_KEY_ID-} && -z ${AWS_SECRET_ACCESS_KEY-} ]]); then
# If the environment variables are missing check the assumed IAM role.
# The IAM role can be queried via http://169.254.169.254/ using instance
# properties.
# Wget will fail if the address is not present (i.e. the script is not running on
# an EC2 VM) or the IAM role cannot be retrieved.
# Set short timeouts so the script is not blocked if run outside of EC2.
WGET_ARGS=(-T 1)
WGET_ARGS+=(-t 1)
WGET_ARGS+=(-q)
WGET_ARGS+=(-o /dev/null)
WGET_ARGS+=(http://${AWS_METADATA_IP_ADDRESS}/latest/meta-data/iam/security-credentials/)
if ! wget "${WGET_ARGS[@]}" ; then
echo \
"Error: missing valid S3 credentials.
You wanted to access an S3 bucket but you did not supply valid credentials.
The AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables
have to be assigned valid credentials that belong to the owner of the
specified S3 bucket, or an IAM role authorized to access the S3 bucket
has to be assigned to the VM instance if this is run inside an EC2 VM."
exit 1
fi
fi
if [ ! -x "$(command -v aws)" ] ; then
echo "Error: AWS CLI not found, unable to check S3 access."
exit 2
fi
aws s3 ls "s3://${S3_BUCKET}/" 1>/dev/null
if [ $? != 0 ]; then
echo "Error: accessing S3_BUCKET '${S3_BUCKET}' failed."
exit 1
else
exit 0
fi