hudi-integ-test/prepare_integration_suite.sh - hudi - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #!/bin/bash

 # Determine the current working directory
 _DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 # Preserve the calling directory
 _CALLING_DIR="$(pwd)"

 #########################
 # The command line help #
 #########################
 usage() {
     echo "Usage: $0"
     echo "   --spark-command, prints the spark command"
     echo "   -h | --hadoop, hadoop-version"
     echo "   -s | --spark, spark version"
     echo "   -p | --parquet, parquet version"
     echo "   -a | --avro, avro version"
     echo "   -i | --hive, hive version"
     echo "   -l | --scala, scala version"
     exit 1
 }

 get_spark_command() {
 if [ -z "$scala" ]
 then
   scala="2.11"
 else
   scala=$scala
 fi
 echo "spark-submit --packages org.apache.spark:spark-avro_${scala}:2.4.4 \
 --master $0 \
 --deploy-mode $1 \
 --properties-file $2 \
 --class org.apache.hudi.integ.testsuite.HoodieTestSuiteJob \
 `ls target/hudi-integ-test-*-SNAPSHOT.jar` \
 --source-class $3 \
 --source-ordering-field $4 \
 --input-base-path $5 \
 --target-base-path $6 \
 --target-table $7 \
 --props $8 \
 --storage-type $9 \
 --payload-class "${10}" \
 --workload-yaml-path "${11}" \
 --input-file-size "${12}" \
 --<use-deltastreamer>"
 }

 case "$1" in
    --help)
        usage
        exit 0
        ;;
 esac

 case "$1" in
    --spark-command)
        get_spark_command
        exit 0
        ;;
 esac

 while getopts ":h:s:p:a:i:l:-:" opt; do
   case $opt in
     h) hadoop="$OPTARG"
     printf "Argument hadoop is %s\n" "$hadoop"
     ;;
     s) spark="$OPTARG"
     printf "Argument spark is %s\n" "$spark"
     ;;
     p) parquet="$OPTARG"
     printf "Argument parquet is %s\n" "$parquet"
     ;;
     a) avro="$OPTARG"
     printf "Argument avro is %s\n" "$avro"
     ;;
     i) hive="$OPTARG"
     printf "Argument hive is %s\n" "$hive"
     ;;
     l) scala="$OPTARG"
     printf "Argument scala is %s\n" "$scala"
     ;;
     -)
       case "$OPTARG" in
         hadoop)
           hadoop="${!OPTIND}"; OPTIND=$(( $OPTIND + 1 ))
           printf "Argument hadoop is %s\n" "$hadoop"
           ;;
         spark)
           spark="${!OPTIND}"; OPTIND=$(( $OPTIND + 1 ))
           printf "Argument spark is %s\n" "$spark"
           ;;
         parquet)
           parquet="${!OPTIND}"; OPTIND=$(( $OPTIND + 1 ))
           printf "Argument parquet is %s\n" "$parquet"
           ;;
         avro)
           avro="${!OPTIND}"; OPTIND=$(( $OPTIND + 1 ))
           printf "Argument avro is %s\n" "$avro"
           ;;
         hive)
           hive="${!OPTIND}"; OPTIND=$(( $OPTIND + 1 ))
           printf "Argument hive is %s\n" "$hive"
           ;;
         scala)
           scala="${!OPTIND}"; OPTIND=$(( $OPTIND + 1 ))
           printf "Argument scala is %s\n" "$scala"
           ;;
         *) echo "Invalid option --$OPTARG" >&2
           ;;
     esac ;;
     \?) echo "Invalid option -$OPTARG" >&2
     ;;
   esac
 done


 get_versions () {
   base_command=''
   if [ -z "$hadoop" ]
    then
     base_command=$base_command
   else
     base_command+=' -Dhadoop.version='$hadoop
   fi

   if [ -z "$hive" ]
   then
     base_command=$base_command
   else
     base_command+=' -Dhive.version='$hive
   fi

   if [ -z "$scala" ]
   then
     base_command=$base_command
   else
     base_command+=' -Dscala-'$scala
   fi
   echo $base_command
 }

 versions=$(get_versions)

 final_command='mvn clean install -DskipTests '$versions
 printf "Final command $final_command \n"

 # change to the project root directory to run maven command
 move_to_root='cd ..'
 $move_to_root && $final_command

 # change back to original working directory
 cd $_CALLING_DIR

 printf "A sample spark command to start the integration suite \n"
 get_spark_command
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#!/bin/bash

	# Determine the current working directory
	_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
	# Preserve the calling directory
	_CALLING_DIR="$(pwd)"

	#########################
	# The command line help #
	#########################
	usage() {
	echo "Usage: $0"
	echo " --spark-command, prints the spark command"
	echo " -h \| --hadoop, hadoop-version"
	echo " -s \| --spark, spark version"
	echo " -p \| --parquet, parquet version"
	echo " -a \| --avro, avro version"
	echo " -i \| --hive, hive version"
	echo " -l \| --scala, scala version"
	exit 1
	}

	get_spark_command() {
	if [ -z "$scala" ]
	then
	scala="2.11"
	else
	scala=$scala
	fi
	echo "spark-submit --packages org.apache.spark:spark-avro_${scala}:2.4.4 \
	--master $0 \
	--deploy-mode $1 \
	--properties-file $2 \
	--class org.apache.hudi.integ.testsuite.HoodieTestSuiteJob \
	`ls target/hudi-integ-test-*-SNAPSHOT.jar` \
	--source-class $3 \
	--source-ordering-field $4 \
	--input-base-path $5 \
	--target-base-path $6 \
	--target-table $7 \
	--props $8 \
	--storage-type $9 \
	--payload-class "${10}" \
	--workload-yaml-path "${11}" \
	--input-file-size "${12}" \
	--<use-deltastreamer>"
	}

	case "$1" in
	--help)
	usage
	exit 0
	;;
	esac

	case "$1" in
	--spark-command)
	get_spark_command
	exit 0
	;;
	esac

	while getopts ":h:s:p:a:i:l:-:" opt; do
	case $opt in
	h) hadoop="$OPTARG"
	printf "Argument hadoop is %s\n" "$hadoop"
	;;
	s) spark="$OPTARG"
	printf "Argument spark is %s\n" "$spark"
	;;
	p) parquet="$OPTARG"
	printf "Argument parquet is %s\n" "$parquet"
	;;
	a) avro="$OPTARG"
	printf "Argument avro is %s\n" "$avro"
	;;
	i) hive="$OPTARG"
	printf "Argument hive is %s\n" "$hive"
	;;
	l) scala="$OPTARG"
	printf "Argument scala is %s\n" "$scala"
	;;
	-)
	case "$OPTARG" in
	hadoop)
	hadoop="${!OPTIND}"; OPTIND=$(( $OPTIND + 1 ))
	printf "Argument hadoop is %s\n" "$hadoop"
	;;
	spark)
	spark="${!OPTIND}"; OPTIND=$(( $OPTIND + 1 ))
	printf "Argument spark is %s\n" "$spark"
	;;
	parquet)
	parquet="${!OPTIND}"; OPTIND=$(( $OPTIND + 1 ))
	printf "Argument parquet is %s\n" "$parquet"
	;;
	avro)
	avro="${!OPTIND}"; OPTIND=$(( $OPTIND + 1 ))
	printf "Argument avro is %s\n" "$avro"
	;;
	hive)
	hive="${!OPTIND}"; OPTIND=$(( $OPTIND + 1 ))
	printf "Argument hive is %s\n" "$hive"
	;;
	scala)
	scala="${!OPTIND}"; OPTIND=$(( $OPTIND + 1 ))
	printf "Argument scala is %s\n" "$scala"
	;;
	*) echo "Invalid option --$OPTARG" >&2
	;;
	esac ;;
	\?) echo "Invalid option -$OPTARG" >&2
	;;
	esac
	done


	get_versions () {
	base_command=''
	if [ -z "$hadoop" ]
	then
	base_command=$base_command
	else
	base_command+=' -Dhadoop.version='$hadoop
	fi

	if [ -z "$hive" ]
	then
	base_command=$base_command
	else
	base_command+=' -Dhive.version='$hive
	fi

	if [ -z "$scala" ]
	then
	base_command=$base_command
	else
	base_command+=' -Dscala-'$scala
	fi
	echo $base_command
	}

	versions=$(get_versions)

	final_command='mvn clean install -DskipTests '$versions
	printf "Final command $final_command \n"

	# change to the project root directory to run maven command
	move_to_root='cd ..'
	$move_to_root && $final_command

	# change back to original working directory
	cd $_CALLING_DIR

	printf "A sample spark command to start the integration suite \n"
	get_spark_command