| #!/usr/bin/env bash |
| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| |
| # If not set by env, set to 1 to run spark-submit instead of local java |
| # This should be used to run with spark-submit instead of java |
| if [[ -z "$SYSDS_DISTRIBUTED" ]]; then |
| SYSDS_DISTRIBUTED=0 |
| fi |
| |
| # if not set by env, set to 1 to disable setup output of this script |
| if [ -z "$SYSDS_QUIET" ]; then |
| SYSDS_QUIET=0 |
| fi |
| |
| # if not set by env, set to default exec modes |
| if [[ -z "$SYSDS_EXEC_MODE" ]]; then |
| case "$SYSDS_DISTRIBUTED" in |
| 0) SYSDS_EXEC_MODE=singlenode ;; |
| *) SYSDS_EXEC_MODE=hybrid ;; |
| esac |
| fi |
| |
| # an echo toggle |
| print_out() |
| { |
| if [ $SYSDS_QUIET == 0 ]; then |
| echo "$1" |
| fi |
| } |
| |
| if [[ -z $SYSTEMDS_ROOT ]] ; then |
| SYSTEMDS_ROOT=$(pwd) |
| print_out "SYSTEMDS_ROOT not set defaulting to current dir $(pwd)" |
| fi; |
| |
| # when using find, look in the directories in this order |
| DIR_SEARCH_ORDER="$SYSTEMDS_ROOT/target . $SYSTEMDS_ROOT $SYSTEMDS_ROOT/conf $SYSTEMDS_ROOT/lib $SYSTEMDS_ROOT/src" |
| ordered_find() { |
| result="" |
| for dir in $(echo "$DIR_SEARCH_ORDER" | tr ' ' '\n') ; do |
| if [[ $dir == "$SYSTEMDS_ROOT" ]] || [[ $dir == "." ]]; then |
| result=$(find "$dir" -maxdepth 1 -iname "$1" -print -quit) |
| if [[ $result != "" ]]; then break; fi |
| else |
| result=$(find "$dir" -iname "$1" -print -quit 2> /dev/null) |
| if [[ $result != "" ]]; then break; fi |
| fi |
| done |
| echo "$result" |
| } |
| |
| if [ -n "$SYSTEMDS_STANDALONE_OPTS" ]; then |
| print_out "Overriding SYSTEMDS_STANDALONE_OPTS with env var: $SYSTEMDS_STANDALONE_OPTS" |
| else |
| # specify parameters to java when running locally here |
| SYSTEMDS_STANDALONE_OPTS="-Xmx4g -Xms4g -Xmn400m " |
| fi |
| |
| if [ -n "$SYSTEMDS_REMOTE_DEBUGGING" ]; then |
| print_out "Overriding SYSTEMDS_REMOTE_DEBUGGING with env var: $SYSTEMDS_REMOTE_DEBUGGING" |
| else |
| SYSTEMDS_REMOTE_DEBUGGING=" -agentlib:jdwp=transport=dt_socket,suspend=y,address=8787,server=y " |
| fi |
| |
| # check if log4j config file exists, otherwise unset |
| # to run with a non fatal complaint by SystemDS |
| if [ -z "$LOG4JPROP" ] ; then |
| # before wild card search look obvious places. |
| if [ -f "$SYSTEMDS_ROOT/conf/log4j.properties" ]; then |
| LOG4JPROP="$SYSTEMDS_ROOT/conf/log4j.properties" |
| elif [ -f "$SYSTEMDS_ROOT/log4j.properties" ]; then |
| LOG4JPROP="$SYSTEMDS_ROOT/log4j.properties" |
| else # wildcard search |
| LOG4JPROP=$(ordered_find "log4j*properties") |
| fi |
| fi |
| |
| # If the LOG4J variable is declared or found. |
| if [ -f "${LOG4JPROP}" ]; then |
| LOG4JPROPFULL="-Dlog4j.configuration=file:$LOG4JPROP" |
| fi |
| |
| if [ -n "${SYSTEMDS_DISTRIBUTED_OPTS}" ]; then |
| print_out "Overriding SYSTEMDS_DISTRIBUTED_OPTS with env var $SYSTEMDS_DISTRIBUTED_OPTS" |
| else |
| # specify parameters to pass to spark-submit when running on spark here |
| SYSTEMDS_DISTRIBUTED_OPTS="\ |
| --master yarn \ |
| --deploy-mode client \ |
| --driver-memory 100g \ |
| --conf spark.driver.extraJavaOptions=\"-Xms100g -Xmn10g -Dlog4j.configuration=file:$LOG4JPROP\" \ |
| --conf spark.executor.extraJavaOptions=\"-Dlog4j.configuration=file:$LOG4JPROP\" \ |
| --conf spark.executor.heartbeatInterval=100s \ |
| --files $LOG4JPROP \ |
| --conf spark.network.timeout=512s \ |
| --num-executors 4 \ |
| --executor-memory 64g \ |
| --executor-cores 16 " |
| fi |
| |
| |
| # error help print |
| function printUsage { |
| cat << EOF |
| |
| Usage: $0 [-r] [SystemDS.jar] [-f] <dml-filename> [arguments] [-help] |
| |
| SystemDS.jar : Specify a custom SystemDS.jar file (this will be prepended |
| to the classpath |
| or fed to spark-submit |
| -r : Spawn a debug server for remote debugging (standalone and |
| spark driver only atm). Default port is 8787 - change within |
| this script if necessary. See SystemDS documentation on how |
| to attach a remote debugger. |
| -f : Optional prefix to the dml-filename for consistency with |
| previous behavior dml-filename : The script file to run. |
| This is mandatory unless running as a federated worker |
| (see below). |
| arguments : The arguments specified after the DML script are passed to |
| SystemDS. Specify parameters that need to go to |
| java/spark-submit by editing this run script. |
| -help : Print this usage message and SystemDS parameter info |
| |
| Worker Usage: $0 [-r] WORKER [SystemDS.jar] <portnumber> [arguments] [-help] |
| |
| port : The port to open for the federated worker. |
| |
| Federated Monitoring Usage: $0 [-r] FEDMONITORING [SystemDS.jar] <portnumber> [arguments] [-help] |
| |
| port : The port to open for the federated monitoring tool. |
| |
| Set custom launch configuration by setting/editing SYSTEMDS_STANDALONE_OPTS |
| and/or SYSTEMDS_DISTRIBUTED_OPTS. |
| |
| Set the environment variable SYSDS_DISTRIBUTED=1 to run spark-submit instead of |
| local java Set SYSDS_QUIET=1 to omit extra information printed by this run |
| script. |
| |
| EOF |
| } |
| |
| # print an error if no argument is supplied. |
| if [ -z "$1" ] ; then |
| echo "Wrong Usage. Add -help for additional parameters."; |
| echo "" |
| printUsage; |
| exit -1 |
| fi |
| |
| #This loop handles the parameters to the run-script, not the ones passed to SystemDS. |
| #To not confuse getopts with SystemDS parameters, only the first two params are considered |
| #here. If more run-script params are needed, adjust the next line accordingly |
| PRINT_SYSDS_HELP=0 |
| while getopts ":hr:f:" options "$1$2"; do |
| case $options in |
| h ) echo "Help requested. Will exit after extended usage message!" |
| printUsage |
| PRINT_SYSDS_HELP=1 |
| break |
| ;; |
| \? ) echo "Unknown parameter -$OPTARG" |
| printUsage |
| exit |
| ;; |
| f ) |
| # silently remove -f (this variant is triggered if there's no |
| # jar file or WORKER as first parameter) |
| if echo "$OPTARG" | grep -qi "dml"; then |
| break |
| else |
| print_out "No DML Script found after -f option." |
| fi |
| ;; |
| r ) |
| print_out "Spawning server for remote debugging" |
| if [ $SYSDS_DISTRIBUTED == 0 ]; then |
| SYSTEMDS_STANDALONE_OPTS=${SYSTEMDS_STANDALONE_OPTS}${SYSTEMDS_REMOTE_DEBUGGING} |
| else |
| SYSTEMDS_DISTRIBUTED_OPTS=${SYSTEMDS_DISTRIBUTED_OPTS}${SYSTEMDS_REMOTE_DEBUGGING} |
| fi |
| shift # remove -r from positional arguments |
| ;; |
| * ) |
| print_out "Error: Unexpected error while processing options;" |
| printUsage |
| exit |
| esac |
| done |
| |
| # Peel off first and/or second argument so that $@ contains arguments to DML script |
| if echo "$1" | grep -q "jar"; then |
| SYSTEMDS_JAR_FILE=$1 |
| shift |
| # handle optional '-f' before DML file (for consistency) |
| if echo "$1" | grep -q "\-f"; then |
| shift |
| SCRIPT_FILE=$1 |
| shift |
| else |
| SCRIPT_FILE=$1 |
| shift |
| fi |
| elif echo "$1" | grep -q "WORKER"; then |
| WORKER=1 |
| shift |
| if echo "$1" | grep -q "jar"; then |
| SYSTEMDS_JAR_FILE=$1 |
| shift |
| fi |
| PORT=$1 |
| re='^[0-9]+$' |
| if ! [[ $PORT =~ $re ]] ; then |
| echo "error: Port is not a number" |
| printUsage |
| fi |
| shift |
| elif echo "$1" | grep -q "FEDMONITORING"; then |
| FEDMONITORING=1 |
| shift |
| if echo "$1" | grep -q "jar"; then |
| SYSTEMDS_JAR_FILE=$1 |
| shift |
| fi |
| PORT=$1 |
| re='^[0-9]+$' |
| if ! [[ $PORT =~ $re ]] ; then |
| echo "error: Port is not a number" |
| printUsage |
| fi |
| shift |
| else |
| # handle optional '-f' before DML file (for consistency) |
| if echo "$1" | grep -q "\-f"; then |
| shift |
| SCRIPT_FILE=$1 |
| shift |
| else |
| SCRIPT_FILE=$1 |
| shift |
| fi |
| fi |
| |
| if [ -z "$WORKER" ] ; then |
| WORKER=0 |
| fi |
| |
| if [ -z "$FEDMONITORING" ] ; then |
| FEDMONITORING=0 |
| fi |
| |
| # find a SystemDS jar file to run |
| if [ -z ${SYSTEMDS_JAR_FILE+x} ]; then # If it is not found yet. |
| if [ ! -z ${SYSTEMDS_ROOT+x} ]; then # Check currently set SYSETMDS_ROOT |
| # Current SYSTEMDS_ROOT is set and is a directory. |
| if [ -d "$SYSTEMDS_ROOT/target" ] && [ -d "$SYSTEMDS_ROOT/.git" ]; then |
| # Current path is most likely a build directory of systemds |
| SYSTEMDS_JAR_FILE=$(find "$SYSTEMDS_ROOT/target" -maxdepth 1 -iname ""systemds-?.?.?-SNAPSHOT.jar"" -print -quit) |
| elif [ -d "$SYSTEMDS_ROOT" ] && [ -d "$SYSTEMDS_ROOT/lib" ]; then |
| # Most likely a release directory. |
| SYSTEMDS_JAR_FILE=$(find "$SYSTEMDS_ROOT" -maxdepth 1 -iname ""systemds-?.?.?-SNAPSHOT.jar"" -print -quit) |
| fi |
| fi |
| fi |
| |
| # If no jar file is found, start searching --- expected + 70 ms execution time |
| if [ -z ${SYSTEMDS_JAR_FILE+x} ]; then |
| SYSTEMDS_JAR_FILE=$(ordered_find "systemds.jar") |
| if [ -z ${SYSTEMDS_JAR_FILE+x} ]; then |
| SYSTEMDS_JAR_FILE=$(ordered_find "systemds-?.?.?.jar") |
| if [ -z ${SYSTEMDS_JAR_FILE+x} ]; then |
| SYSTEMDS_JAR_FILE=$(ordered_find "systemds-?.?.?-SNAPSHOT.jar") |
| if [ -z ${SYSTEMDS_JAR_FILE+x} ]; then |
| echo "wARNING: Unable to find SystemDS jar file to launch" |
| exit -1 |
| fi |
| fi |
| fi |
| fi |
| |
| if [[ "$*" == *-config* ]]; then |
| # override config file from env var if given as parameter to SystemDS |
| read -r -d '' -a myArray < <( echo "$@" ) |
| INDEX=0 |
| for i in "${myArray[@]}"; do |
| if [[ ${myArray[INDEX]} == *-config* ]]; then |
| if [ -f "${myArray[((INDEX+1))]}" ]; then |
| CONFIG_FILE="${myArray[((INDEX+1))]}" |
| else |
| echo Warning! Passed config file "${myArray[((INDEX+1))]}" does not exist. |
| fi |
| # remove -config |
| unset 'myArray[INDEX]' |
| |
| # remove -config param if not starting with - |
| if [[ "${myArray[((INDEX+1))]:0:1}" != "-" ]]; then |
| unset 'myArray[((INDEX+1))]' |
| fi |
| # setting the script arguments without the passed -config for further processing |
| set -- "${myArray[@]}" |
| break; |
| fi |
| # debug print array item |
| #echo "${myArray[INDEX]}" |
| (( INDEX=INDEX+1 )) |
| done |
| |
| if [ -f "$CONFIG_FILE" ] ; then |
| CONFIG_FILE="-config $CONFIG_FILE" |
| else |
| CONFIG_FILE="" |
| fi |
| elif [ -z "$CONFIG_FILE" ] ; then |
| |
| # default search for config file |
| if [ -f "$SYSTEMDS_ROOT/conf/SystemDS-config-defaults.xml" ]; then |
| CONFIG_FILE="$SYSTEMDS_ROOT/conf/SystemDS-config-defaults.xml" |
| elif [ -f "$SYSTEMDS_ROOT/SystemDS-config-defaults.xml" ]; then |
| CONFIG_FILE="$SYSTEMDS_ROOT/conf/SystemDS-config-defaults.xml" |
| else # wildcard search |
| # same as above: set config file param if the file exists |
| CONFIG_FILE=$(ordered_find "SystemDS-config-defaults.xml") |
| fi |
| |
| if [ -z "$CONFIG_FILE" ]; then # Second search if still not found. |
| CONFIG_FILE=$(ordered_find "SystemDS-config.xml") |
| fi |
| |
| if [ -z "$CONFIG_FILE" ]; then |
| CONFIG_FILE="" |
| else |
| CONFIG_FILE="-config $CONFIG_FILE" |
| fi |
| else |
| # CONFIG_FILE was set by env var. Unset if that setting is wrong |
| if [ -f "${CONFIG_FILE}" ]; then |
| CONFIG_FILE="-config $CONFIG_FILE" |
| else |
| CONFIG_FILE="" |
| fi |
| fi |
| |
| # override exec mode if given as parameter to SystemDS (e.g. -exec singlenode) |
| read -r -d '' -a myArray < <( echo "$@" ) |
| INDEX=0 |
| for i in "${myArray[@]}"; do |
| if [[ "$i" == *-exec* ]]; then |
| SYSDS_EXEC_MODE="${myArray[((INDEX+1))]}" |
| break; |
| fi |
| (( INDEX=INDEX+1 )) |
| done |
| |
| # find absolute path to hadoop home in SYSTEMDS_ROOT |
| if [ -z "$HADOOP_HOME" ]; then |
| HADOOP_HOME="$(find "$SYSTEMDS_ROOT" -iname hadoop | tail -n 1 )" |
| fi |
| |
| # detect operating system to set correct path separator |
| if [ "$OSTYPE" == "win32" ] || [ "$OSTYPE" == "msys" ] || [ "$OSTYPE" == "cygwin" ]; then |
| PATH_SEP=\; |
| DIR_SEP=\\ |
| else |
| # default directory separator unix style |
| DIR_SEP=/ |
| PATH_SEP=: |
| fi |
| |
| |
| NATIVE_LIBS="$SYSTEMDS_ROOT${DIR_SEP}target${DIR_SEP}classes${DIR_SEP}lib" |
| PATH=${HADOOP_HOME}${DIR_SEP}bin${PATH_SEP}${PATH}${PATH_SEP}$NATIVE_LIBS |
| LD_LIBRARY_PATH=${HADOOP_HOME}${DIR_SEP}bin${PATH_SEP}${LD_LIBRARY_PATH} |
| |
| |
| if [ $PRINT_SYSDS_HELP == 1 ]; then |
| echo "----------------------------------------------------------------------" |
| echo "Further help on SystemDS arguments:" |
| java -jar $SYSTEMDS_JAR_FILE org.apache.sysds.api.DMLScript -help |
| exit 1 |
| fi |
| |
| if [ $SYSDS_QUIET == 0 ]; then |
| print_out "###############################################################################" |
| print_out "# SYSTEMDS_ROOT= $SYSTEMDS_ROOT" |
| print_out "# SYSTEMDS_JAR_FILE= $SYSTEMDS_JAR_FILE" |
| print_out "# SYSDS_EXEC_MODE= $SYSDS_EXEC_MODE" |
| print_out "# CONFIG_FILE= $CONFIG_FILE" |
| print_out "# LOG4JPROP= $LOG4JPROPFULL" |
| print_out "# HADOOP_HOME= $HADOOP_HOME" |
| print_out "#" |
| fi |
| |
| # Build the command to run |
| if [ $WORKER == 1 ]; then |
| print_out "# starting Federated worker on port $PORT" |
| CMD=" \ |
| java $SYSTEMDS_STANDALONE_OPTS \ |
| $LOG4JPROPFULL \ |
| -jar $SYSTEMDS_JAR_FILE \ |
| -w $PORT \ |
| $CONFIG_FILE \ |
| $*" |
| elif [ "$FEDMONITORING" == 1 ]; then |
| print_out "# starting Federated backend monitoring on port $PORT" |
| CMD=" \ |
| java $SYSTEMDS_STANDALONE_OPTS \ |
| $LOG4JPROPFULL \ |
| -jar $SYSTEMDS_JAR_FILE \ |
| -fedMonitoring $PORT \ |
| $CONFIG_FILE \ |
| $*" |
| elif [ $SYSDS_DISTRIBUTED == 0 ]; then |
| print_out "# Running script $SCRIPT_FILE locally with opts: $*" |
| |
| CMD=" \ |
| java $SYSTEMDS_STANDALONE_OPTS \ |
| $LOG4JPROPFULL \ |
| -jar $SYSTEMDS_JAR_FILE \ |
| -f $SCRIPT_FILE \ |
| -exec $SYSDS_EXEC_MODE \ |
| $CONFIG_FILE \ |
| $*" |
| else |
| print_out "# Running script $SCRIPT_FILE distributed with opts: $*" |
| CMD=" \ |
| spark-submit $SYSTEMDS_DISTRIBUTED_OPTS \ |
| $SYSTEMDS_JAR_FILE \ |
| -f $SCRIPT_FILE \ |
| -exec $SYSDS_EXEC_MODE \ |
| $CONFIG_FILE \ |
| $*" |
| fi |
| |
| if [ $SYSDS_QUIET == 0 ]; then |
| print_out "# Executing command: $CMD" |
| print_out "###############################################################################" |
| fi |
| |
| # run |
| eval "$CMD" |