| #!/usr/bin/env bash |
| ################################################################################ |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| ################################################################################ |
| |
| ################################### |
| # Starts a docker container of the aws. |
| # |
| # To improve performance of s3_get_number_of_lines_by_prefix, one docker container will be reused for several aws |
| # commands. An interactive python shell keeps the container busy such that it can be reused to issue several commands. |
| # |
| # Globals: |
| # TEST_INFRA_DIR |
| # Exports: |
| # AWSCLI_CONTAINER_ID |
| ################################### |
| function aws_cli_start() { |
| export AWSCLI_CONTAINER_ID=$(docker run -d \ |
| --network host \ |
| --mount type=bind,source="$TEST_INFRA_DIR",target=/hostdir \ |
| -e AWS_REGION -e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY \ |
| --entrypoint python \ |
| -it banst/awscli) |
| |
| while [[ "$(docker inspect -f {{.State.Running}} "$AWSCLI_CONTAINER_ID")" -ne "true" ]]; do |
| sleep 0.1 |
| done |
| on_exit aws_cli_stop |
| } |
| |
| ################################### |
| # Stops the docker container of the aws cli. |
| # |
| # Globals: |
| # AWSCLI_CONTAINER_ID |
| ################################### |
| function aws_cli_stop() { |
| docker kill "$AWSCLI_CONTAINER_ID" |
| docker rm "$AWSCLI_CONTAINER_ID" |
| export AWSCLI_CONTAINER_ID= |
| } |
| |
| # always start it while sourcing, so that AWSCLI_CONTAINER_ID is available from parent script |
| if [[ $AWSCLI_CONTAINER_ID ]]; then |
| aws_cli_stop |
| fi |
| aws_cli_start |
| |
| ################################### |
| # Runs an aws command on the previously started container. |
| # |
| # Globals: |
| # AWSCLI_CONTAINER_ID |
| ################################### |
| function aws_cli() { |
| local endpoint="" |
| if [[ $S3_ENDPOINT ]]; then |
| endpoint="--endpoint-url $S3_ENDPOINT" |
| fi |
| if ! docker exec "$AWSCLI_CONTAINER_ID" aws $endpoint "$@"; then |
| echo "Error executing aws command: $@"; |
| return 1 |
| fi |
| } |
| |
| ################################### |
| # Download s3 objects to folder by full path prefix. |
| # |
| # Globals: |
| # IT_CASE_S3_BUCKET |
| # TEST_INFRA_DIR |
| # Arguments: |
| # $1 - local path to save folder with files |
| # $2 - s3 key full path prefix |
| # $3 - s3 file name prefix w/o directory to filter files by name (optional) |
| # $4 - recursive? |
| # Returns: |
| # None |
| ################################### |
| function s3_get_by_full_path_and_filename_prefix() { |
| local args= |
| if [[ $3 ]]; then |
| args=" --exclude '*' --include '*/${3}[!/]*'" |
| fi |
| if [[ "$4" == true ]]; then |
| args="$args --recursive" |
| fi |
| local relative_dir=${1#$TEST_INFRA_DIR} |
| aws_cli s3 cp --quiet "s3://$IT_CASE_S3_BUCKET/$2" "/hostdir/${relative_dir}" $args |
| } |
| |
| ################################### |
| # Delete s3 objects by full path prefix. |
| # |
| # Globals: |
| # IT_CASE_S3_BUCKET |
| # Arguments: |
| # $1 - s3 key full path prefix |
| # Returns: |
| # None |
| ################################### |
| function s3_delete_by_full_path_prefix() { |
| aws_cli s3 rm --quiet "s3://$IT_CASE_S3_BUCKET/$1" --recursive |
| } |
| |
| ################################### |
| # Count number of lines in files of s3 objects filtered by prefix. |
| # The lines has to be simple to comply with CSV format |
| # because SQL is used to query the s3 objects. |
| # |
| # Globals: |
| # IT_CASE_S3_BUCKET |
| # Arguments: |
| # $1 - s3 key prefix |
| # $2 - s3 file name prefix w/o directory to filter files by name (optional) |
| # Returns: |
| # line number in part files |
| ################################### |
| function s3_get_number_of_lines_by_prefix() { |
| local file_prefix="${2-}" |
| |
| # find all files that have the given prefix |
| parts=$(aws_cli s3api list-objects --bucket "$IT_CASE_S3_BUCKET" --prefix "$1" | |
| docker run -i stedolan/jq -r '[.Contents[].Key] | join(" ")') |
| |
| # in parallel (N tasks), query the number of lines, store result in a file named lines |
| N=10 |
| echo "0" >lines |
| # turn off job control, so that there is noise when starting/finishing bg tasks |
| old_state=$(set +o) |
| set +m |
| for part in $parts; do |
| if [[ $(basename "${part}") == $file_prefix* ]]; then |
| ((i = i % N)) |
| ((i++ == 0)) && wait |
| aws_cli s3api select-object-content --bucket "$IT_CASE_S3_BUCKET" --key "$part" \ |
| --expression "select count(*) from s3object" --expression-type "SQL" \ |
| --input-serialization='{"CSV": {}}' --output-serialization='{"CSV": {}}' /dev/stdout >>lines & |
| fi |
| done |
| wait |
| # restore old settings |
| eval "$old_state" |
| # add number of lines of each part |
| paste -sd+ lines | bc |
| } |