| #!/bin/bash |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| # |
| # An integration test for the client, using the vagrant environment as a testbed. |
| |
| # Determine if we are already in the vagrant environment. If not, start it up and invoke the script |
| # from within the environment. |
| if [[ "$USER" != "vagrant" ]]; then |
| vagrant up |
| time vagrant ssh -c /vagrant/src/test/sh/org/apache/aurora/e2e/test_end_to_end.sh "$@" |
| exit $? |
| fi |
| |
| set -u -e -x |
| set -o pipefail |
| |
| readonly TEST_SCHEDULER_IP=192.168.33.7 |
| |
| _curl() { curl --silent --fail --retry 4 --retry-delay 10 "$@" ; } |
| |
| tear_down() { |
| set +x # Disable command echo, as this makes it more difficult see which command failed. |
| |
| for job in http_example http_example_revocable http_example_docker; do |
| aurora update abort devcluster/vagrant/test/$job || true >/dev/null 2>&1 |
| aurora job killall --no-batching devcluster/vagrant/test/$job >/dev/null 2>&1 |
| done |
| } |
| |
| collect_result() { |
| if [[ $RETCODE = 0 ]] |
| then |
| echo "OK (all tests passed)" |
| else |
| echo "!!! FAIL (something returned non-zero) for $BASH_COMMAND" |
| # Attempt to clean up any state we left behind. |
| tear_down |
| fi |
| exit $RETCODE |
| } |
| |
| check_url_live() { |
| [[ $(curl -sL -w '%{http_code}' $1 -o /dev/null) == 200 ]] |
| } |
| |
| test_file_removed() { |
| local _file=$1 |
| local _success=0 |
| for i in $(seq 1 10); do |
| if [[ ! -e $_file ]]; then |
| _success=1 |
| break |
| fi |
| sleep 1 |
| done |
| |
| if [[ "$_success" -ne "1" ]]; then |
| echo "File was not removed." |
| exit 1 |
| fi |
| } |
| |
| test_version() { |
| # The version number is written to stderr, making it necessary to redirect the output. |
| [[ $(aurora --version 2>&1) = $(cat /vagrant/.auroraversion) ]] |
| } |
| |
| test_health_check() { |
| [[ $(_curl "localhost:8081/health") == 'OK' ]] |
| } |
| |
| test_config() { |
| local _config=$1 _jobkey=$2 |
| |
| joblist=$(aurora config list $_config | tr -dc '[[:print:]]') |
| [[ "$joblist" = *"$_jobkey"* ]] |
| } |
| |
| test_inspect() { |
| local _jobkey=$1 _config=$2 |
| shift 2 |
| local _extra_args="${@}" |
| |
| aurora job inspect $_jobkey $_config $_extra_args |
| } |
| |
| test_create() { |
| local _jobkey=$1 _config=$2 |
| shift 2 |
| local _extra_args="${@}" |
| |
| aurora job create $_jobkey $_config $_extra_args |
| } |
| |
| test_job_status() { |
| local _cluster=$1 _role=$2 _env=$3 _job=$4 |
| local _jobkey="$_cluster/$_role/$_env/$_job" |
| |
| echo "== Checking job status" |
| aurora job list $_cluster/$_role/$_env | grep "$_jobkey" |
| aurora job status $_jobkey |
| } |
| |
| test_scheduler_ui() { |
| local _role=$1 _env=$2 _job=$3 |
| |
| # Check that scheduler UI pages shown |
| base_url="localhost:8081" |
| check_url_live "$base_url/leaderhealth" |
| check_url_live "$base_url/scheduler" |
| check_url_live "$base_url/scheduler/$_role" |
| check_url_live "$base_url/scheduler/$_role/$_env/$_job" |
| } |
| |
| test_observer_ui() { |
| local _cluster=$1 _role=$2 _job=$3 |
| |
| # Check the observer page |
| observer_url="localhost:1338" |
| check_url_live "$observer_url" |
| |
| # Poll the observer, waiting for it to receive and show information about the task. |
| local _success=0 |
| for i in $(seq 1 120); do |
| task_id=$(aurora_admin query -l '%taskId%' --shards=0 --states=RUNNING $_cluster $_role $_job) |
| if check_url_live "$observer_url/task/$task_id"; then |
| _success=1 |
| break |
| else |
| sleep 1 |
| fi |
| done |
| |
| if [[ "$_success" -ne "1" ]]; then |
| echo "Observer task detail page is not available." |
| exit 1 |
| fi |
| } |
| |
| test_restart() { |
| local _jobkey=$1 |
| |
| aurora job restart --batch-size=2 --watch-secs=10 $_jobkey |
| } |
| |
| assert_update_state() { |
| local _jobkey=$1 _expected_state=$2 |
| |
| local _state=$(aurora update list $_jobkey --status active | tail -n +2 | awk '{print $3}') |
| if [[ $_state != $_expected_state ]]; then |
| echo "Expected update to be in state $_expected_state, but found $_state" |
| exit 1 |
| fi |
| } |
| |
| test_update() { |
| local _jobkey=$1 _config=$2 _cluster=$3 |
| shift 3 |
| local _extra_args="${@}" |
| |
| aurora update start $_jobkey $_config $_extra_args |
| assert_update_state $_jobkey 'ROLLING_FORWARD' |
| local _update_id=$(aurora update list $_jobkey --status ROLLING_FORWARD \ |
| | tail -n +2 | awk '{print $2}') |
| aurora_admin scheduler_snapshot devcluster |
| sudo restart aurora-scheduler |
| assert_update_state $_jobkey 'ROLLING_FORWARD' |
| aurora update pause $_jobkey --message='hello' |
| assert_update_state $_jobkey 'ROLL_FORWARD_PAUSED' |
| aurora update resume $_jobkey |
| assert_update_state $_jobkey 'ROLLING_FORWARD' |
| aurora update wait $_jobkey $_update_id |
| |
| # Check that the update ended in ROLLED_FORWARD state. Assumes the status is the last column. |
| local status=$(aurora update info $_jobkey $_update_id | grep 'Current status' | awk '{print $NF}') |
| if [[ $status != "ROLLED_FORWARD" ]]; then |
| echo "Update should have completed in ROLLED_FORWARD state" |
| exit 1 |
| fi |
| } |
| |
| test_update_fail() { |
| local _jobkey=$1 _config=$2 _cluster=$3 _bad_healthcheck_config=$4 |
| shift 4 |
| local _extra_args="${@}" |
| |
| # Make sure our updates works. |
| aurora update start $_jobkey $_config $_extra_args |
| assert_update_state $_jobkey 'ROLLING_FORWARD' |
| local _update_id=$(aurora update list $_jobkey --status ROLLING_FORWARD \ |
| | tail -n +2 | awk '{print $2}') |
| # Need to wait until udpate finishes before we can start one that we want to fail. |
| aurora update wait $_jobkey $_update_id |
| |
| # Starting update with a health check that is meant to fail. Expected behavior is roll back. |
| aurora update start $_jobkey $_bad_healthcheck_config $_extra_args |
| local _update_id=$(aurora update list $_jobkey --status active \ |
| | tail -n +2 | awk '{print $2}') |
| # || is so that we don't return an EXIT so that `trap collect_result` doesn't get triggered. |
| aurora update wait $_jobkey $_update_id || echo $? |
| # Making sure we rolled back. |
| local status=$(aurora update info $_jobkey $_update_id | grep 'Current status' | awk '{print $NF}') |
| if [[ $status != "ROLLED_BACK" ]]; then |
| echo "Update should have completed in ROLLED_BACK state due to failed healthcheck." |
| exit 1 |
| fi |
| } |
| |
| test_announce() { |
| local _role=$1 _env=$2 _job=$3 |
| |
| # default python return code |
| local retcode=0 |
| |
| # launch aurora client in interpreter mode to get access to the kazoo client |
| env SERVERSET="/aurora/$_role/$_env/$_job" PEX_INTERPRETER=1 \ |
| aurora /vagrant/src/test/sh/org/apache/aurora/e2e/validate_serverset.py || retcode=$? |
| |
| if [[ $retcode = 1 ]]; then |
| echo "Validated announced job." |
| return 0 |
| elif [[ $retcode = 2 ]]; then |
| echo "Job failed to announce in serverset." |
| elif [[ $retcode = 3 ]]; then |
| echo "Job failed to re-announce when expired." |
| else |
| echo "Unknown failure in test script." |
| fi |
| |
| exit 1 |
| |
| validate_serverset "/aurora/$_jobkey" |
| } |
| |
| test_run() { |
| local _jobkey=$1 |
| |
| # Create an SSH public key so that local SSH works without a password. |
| local _ssh_key=~/.ssh/id_rsa |
| rm -f ${_ssh_key}* |
| ssh-keygen -t rsa -N "" -f $_ssh_key |
| cat ${_ssh_key}.pub >> ~/.ssh/authorized_keys |
| |
| # Using the sandbox contents as a proxy for functioning SSH. List sandbox contents, we expect |
| # 3 instances of the same thing - our python script. |
| sandbox_contents=$(aurora task run $_jobkey 'ls' | awk '{print $2}' | sort | uniq -c) |
| echo "$sandbox_contents" |
| [[ "$sandbox_contents" = " 3 http_example.py" ]] |
| } |
| |
| test_kill() { |
| local _jobkey=$1 |
| |
| aurora job kill $_jobkey/1 |
| aurora job killall $_jobkey |
| } |
| |
| test_quota() { |
| local _cluster=$1 _role=$2 |
| |
| aurora quota get $_cluster/$_role |
| } |
| |
| test_discovery_info() { |
| local _task_id_prefix=$1 |
| local _discovery_name=$2 |
| |
| if ! [[ -x "$(command -v jq)" ]]; then |
| echo "jq is not installed, skipping discovery info test" |
| return 0 |
| fi |
| |
| framework_info=$(curl --silent '192.168.33.7:5050/state' | jq '.frameworks | map(select(.name == "TwitterScheduler"))') |
| if [[ -z $framework_info ]]; then |
| echo "Cannot get framework info for $framework" |
| exit 1 |
| fi |
| |
| task_info=$(echo $framework_info | jq --arg task_id_prefix "${_task_id_prefix}" '.[0]["tasks"] | map(select(.id | contains($task_id_prefix)))') |
| if [[ -z $task_info ]]; then |
| echo "Cannot get task blob json for task id prefix ${_task_id_prefix}" |
| exit 1 |
| fi |
| |
| discovery_info=$(echo $task_info | jq '.[0]["discovery"]') |
| if [[ -z $discovery_info ]]; then |
| echo "Cannot get discovery info json from task blob ${task_blob}" |
| exit 1 |
| fi |
| |
| name=$(echo $discovery_info | jq '.["name"]') |
| if [[ "$name" -ne "\"$_discovery_name\"" ]]; then |
| echo "discovery info name $name does not equal to expected \"$_discovery_name\"" |
| exit 1 |
| fi |
| |
| num_ports=$(echo $discovery_info | jq '.["ports"]["ports"] | length') |
| |
| if ! [[ "$num_ports" -gt 0 ]]; then |
| echo "num of ports in discovery info is $num_ports which is not greater than zero" |
| exit 1 |
| fi |
| } |
| |
| test_http_example() { |
| local _cluster=$1 _role=$2 _env=$3 |
| local _base_config=$4 _updated_config=$5 |
| local _bad_healthcheck_config=$6 |
| local _job=$7 |
| local _bind_parameters=${8:-""} |
| |
| local _jobkey="$_cluster/$_role/$_env/$_job" |
| local _task_id_prefix="${_role}-${_env}-${_job}-0" |
| local _discovery_name="${_job}.${_env}.${_role}" |
| |
| test_config $_base_config $_jobkey |
| test_inspect $_jobkey $_base_config $_bind_parameters |
| test_create $_jobkey $_base_config $_bind_parameters |
| test_job_status $_cluster $_role $_env $_job |
| test_scheduler_ui $_role $_env $_job |
| test_observer_ui $_cluster $_role $_job |
| test_discovery_info $_task_id_prefix $_discovery_name |
| test_restart $_jobkey |
| test_update $_jobkey $_updated_config $_cluster $_bind_parameters |
| test_update_fail $_jobkey $_base_config $_cluster $_bad_healthcheck_config $_bind_parameters |
| # Running test_update second time to change state to success. |
| test_update $_jobkey $_updated_config $_cluster $_bind_parameters |
| test_announce $_role $_env $_job |
| test_run $_jobkey |
| test_kill $_jobkey |
| test_quota $_cluster $_role |
| } |
| |
| test_http_example_basic() { |
| local _cluster=$1 _role=$2 _env=$3 |
| local _base_config=$4 |
| local _job=$7 |
| local _jobkey="$_cluster/$_role/$_env/$_job" |
| |
| test_create $_jobkey $_base_config |
| test_observer_ui $_cluster $_role $_job |
| test_kill $_jobkey |
| } |
| |
| test_admin() { |
| local _cluster=$1 |
| echo '== Testing admin commands' |
| echo '== Getting leading scheduler' |
| aurora_admin get_scheduler $_cluster | grep ":8081" |
| } |
| |
| test_ephemeral_daemon_with_final() { |
| local _cluster=$1 _role=$2 _env=$3 _job=$4 _config=$5 |
| local _jobkey="$_cluster/$_role/$_env/$_job" |
| local _stop_file=$(mktemp) |
| local _extra_args="--bind stop_file=$_stop_file" |
| rm $_stop_file |
| |
| test_create $_jobkey $_config $_extra_args |
| test_observer_ui $_cluster $_role $_job |
| test_job_status $_cluster $_role $_env $_job |
| touch $_stop_file # Stops 'main_process'. |
| test_file_removed $_stop_file # Removed by 'final_process'. |
| } |
| |
| restore_netrc() { |
| mv ~/.netrc.bak ~/.netrc >/dev/null 2>&1 || true |
| } |
| |
| test_basic_auth_unauthenticated() { |
| local _cluster=$1 _role=$2 _env=$3 |
| local _config=$4 |
| local _job=$7 |
| local _jobkey="$_cluster/$_role/$_env/$_job" |
| |
| mv ~/.netrc ~/.netrc.bak |
| trap restore_netrc EXIT |
| |
| aurora job create $_jobkey $_config || retcode=$? |
| if [[ $retcode != 30 ]]; then |
| echo "Expected auth error exit code, got $retcode" |
| exit 1 |
| fi |
| restore_netrc |
| } |
| |
| test_appc() { |
| TEMP_PATH=$(mktemp -d) |
| pushd "$TEMP_PATH" |
| |
| # build the appc image from the docker image |
| docker save -o http_example-latest.tar http_example |
| docker2aci http_example-latest.tar |
| |
| APPC_IMAGE_ID="sha512-$(sha512sum http_example-latest.aci | awk '{print $1}')" |
| APPC_IMAGE_DIRECTORY="/tmp/mesos/images/appc/images/$APPC_IMAGE_ID" |
| |
| sudo mkdir -p "$APPC_IMAGE_DIRECTORY" |
| sudo tar -xf http_example-latest.aci -C "$APPC_IMAGE_DIRECTORY" |
| # This restart is necessary for mesos to pick up the image from the local store. |
| sudo restart mesos-slave |
| |
| popd |
| rm -rf "$TEMP_PATH" |
| |
| TEST_JOB_APPC_ARGS=("${BASE_ARGS[@]}" "$TEST_JOB_APPC" "--bind appc_image_id=$APPC_IMAGE_ID") |
| test_http_example "${TEST_JOB_APPC_ARGS[@]}" |
| } |
| |
| RETCODE=1 |
| # Set up shorthands for test |
| export TEST_ROOT=/vagrant/src/test/sh/org/apache/aurora/e2e |
| export EXAMPLE_DIR=${TEST_ROOT}/http |
| export DOCKER_DIR=${TEST_ROOT}/docker |
| TEST_CLUSTER=devcluster |
| TEST_ROLE=vagrant |
| TEST_ENV=test |
| TEST_JOB=http_example |
| TEST_JOB_REVOCABLE=http_example_revocable |
| TEST_JOB_GPU=http_example_gpu |
| TEST_JOB_DOCKER=http_example_docker |
| TEST_JOB_APPC=http_example_appc |
| TEST_CONFIG_FILE=$EXAMPLE_DIR/http_example.aurora |
| TEST_CONFIG_UPDATED_FILE=$EXAMPLE_DIR/http_example_updated.aurora |
| TEST_BAD_HEALTHCHECK_CONFIG_UPDATED_FILE=$EXAMPLE_DIR/http_example_bad_healthcheck.aurora |
| TEST_EPHEMERAL_DAEMON_WITH_FINAL_JOB=ephemeral_daemon_with_final |
| TEST_EPHEMERAL_DAEMON_WITH_FINAL_CONFIG_FILE=$TEST_ROOT/ephemeral_daemon_with_final.aurora |
| |
| BASE_ARGS=( |
| $TEST_CLUSTER |
| $TEST_ROLE |
| $TEST_ENV |
| $TEST_CONFIG_FILE |
| $TEST_CONFIG_UPDATED_FILE |
| $TEST_BAD_HEALTHCHECK_CONFIG_UPDATED_FILE |
| ) |
| |
| TEST_JOB_ARGS=("${BASE_ARGS[@]}" "$TEST_JOB") |
| |
| TEST_JOB_REVOCABLE_ARGS=("${BASE_ARGS[@]}" "$TEST_JOB_REVOCABLE") |
| |
| TEST_JOB_GPU_ARGS=("${BASE_ARGS[@]}" "$TEST_JOB_GPU") |
| |
| TEST_JOB_DOCKER_ARGS=("${BASE_ARGS[@]}" "$TEST_JOB_DOCKER") |
| |
| TEST_ADMIN_ARGS=($TEST_CLUSTER) |
| |
| TEST_JOB_EPHEMERAL_DAEMON_WITH_FINAL_ARGS=( |
| $TEST_CLUSTER |
| $TEST_ROLE |
| $TEST_ENV |
| $TEST_EPHEMERAL_DAEMON_WITH_FINAL_JOB |
| $TEST_EPHEMERAL_DAEMON_WITH_FINAL_CONFIG_FILE |
| ) |
| |
| trap collect_result EXIT |
| |
| aurorabuild all |
| test_version |
| test_http_example "${TEST_JOB_ARGS[@]}" |
| test_health_check |
| |
| test_http_example_basic "${TEST_JOB_REVOCABLE_ARGS[@]}" |
| |
| test_http_example_basic "${TEST_JOB_GPU_ARGS[@]}" |
| |
| # build the test docker image |
| sudo docker build -t http_example ${TEST_ROOT} |
| test_http_example "${TEST_JOB_DOCKER_ARGS[@]}" |
| |
| # This test relies on the docker image having been built above. |
| test_appc |
| |
| test_admin "${TEST_ADMIN_ARGS[@]}" |
| test_basic_auth_unauthenticated "${TEST_JOB_ARGS[@]}" |
| |
| test_ephemeral_daemon_with_final "${TEST_JOB_EPHEMERAL_DAEMON_WITH_FINAL_ARGS[@]}" |
| |
| /vagrant/src/test/sh/org/apache/aurora/e2e/test_kerberos_end_to_end.sh |
| /vagrant/src/test/sh/org/apache/aurora/e2e/test_bypass_leader_redirect_end_to_end.sh |
| RETCODE=0 |