| #!/usr/bin/env bash |
| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| |
| |
| ############################################################## |
| # This script is part of the SystemDS binary release. It is |
| # meant to work out of the box when unzipping the |
| # systemds-<version>.zip (or tbz) file. |
| # |
| # Make configuration changes here: |
| ############################################################## |
| |
| # If not set by env, set to 1 to run spark-submit instead of local java |
| # This should be used if "-exec SPARK" or "-exec HYBRID" is required |
| if [ -z "$SYSDS_DISTRIBUTED" ]; then |
| SYSDS_DISTRIBUTED=0 |
| fi |
| |
| # if not set by env, set to 1 to disable setup output of this script |
| if [ -z "$SYSDS_QUIET" ]; then |
| SYSDS_QUIET=0 |
| fi |
| |
| # an echo toggle |
| print_out() |
| { |
| if [ $SYSDS_QUIET == 0 ]; then |
| echo "$1" |
| fi |
| } |
| |
| if [ -n "$SYSTEMDS_STANDALONE_OPTS" ]; then |
| print_out "Overriding SYSTEMDS_STANDALONE_OPTS with env var: $SYSTEMDS_STANDALONE_OPTS" |
| else |
| # specify paramteters to java when running locally here |
| SYSTEMDS_STANDALONE_OPTS="\ |
| -Xmx4g\ |
| -Xms4g\ |
| -Xmn400m" |
| fi |
| |
| if [ -n "${SYSTEMDS_DISTRIBUTED_OPTS}" ]; then |
| print_out "Overriding SYSTEMDS_DISTRIBUTED_OPTS with env var $SYSTEMDS_DISTRIBUTED_OPTS" |
| else |
| # specify parameters to pass to spark-submit when running on spark here |
| SYSTEMDS_DISTRIBUTED_OPTS="\ |
| --master yarn \ |
| --deploy-mode client\ |
| --driver-memory 96g \ |
| --num-executors 4 \ |
| --executor-memory 64g \ |
| --executor-cores 16" |
| fi |
| |
| |
| ############################################################## |
| # No need to touch the content below. These commands launch |
| # SystemDS based on the settings above. |
| ############################################################## |
| |
| |
| #------------------------------------------------------------- |
| # some helper functions |
| |
| # error help print |
| printUsageExit() |
| { |
| cat << EOF |
| Usage: $0 [SystemDS.jar] [-f] <dml-filename> [arguments] [-help] |
| |
| SystemDS.jar - Specify a custom SystemDS.jar file (this will be prepended to the classpath |
| or fed to spark-submit |
| dml-filename - The script file to run |
| arguments - The arguments specified after the DML script are passed to SystemDS. |
| Specify parameters that need to go to java/spark-submit by editing this |
| run script. |
| -help - Print this usage message and exit |
| |
| Worker Usage: $0 WORKER [SystemDS.jar] <portnumber> [arguments] [-help] |
| |
| port - The port to open for the federated worker. |
| |
| Set custom launch configuration by setting/editing SYSTEMDS_STANDALONE_OPTS and/or SYSTEMDS_DISTRIBUTED_OPTS |
| |
| Set the environment variable SYSDS_DISTRIBUTED=1 to run spark-submit instead of local java |
| Set SYSDS_QUIET=1 to omit extra information printed by this run script. |
| EOF |
| exit 1 |
| } |
| |
| # print an error if no argument is supplied. |
| if [ -z "$1" ] ; then |
| echo "Wrong Usage."; |
| printUsageExit; |
| fi |
| |
| while getopts "h:f:" options; do |
| case $options in |
| h ) echo Warning: Help requested. Will exit after usage message; |
| printUsageExit |
| ;; |
| \? ) echo Warning: Help requested. Will exit after usage message; |
| printUsageExit |
| ;; |
| f ) |
| # silently remove -f (this variant is triggered if there's no |
| # jar file or WORKER as first parameter) |
| shift |
| ;; |
| * ) echo Error: Unexpected error while processing options; |
| esac |
| done |
| |
| # Peel off first and/or second argument so that $@ contains arguments to DML script |
| if echo "$1" | grep -q "jar"; then |
| SYSTEMDS_JAR_FILE=$1 |
| shift |
| # handle optional '-f' before DML file (for consistency) |
| if echo "$1" | grep -q "\-f"; then |
| shift |
| SCRIPT_FILE=$1 |
| shift |
| else |
| SCRIPT_FILE=$1 |
| shift |
| fi |
| elif echo "$1" | grep -q "WORKER"; then |
| WORKER=1 |
| shift |
| if echo "$1" | grep -q "jar"; then |
| SYSTEMDS_JAR_FILE=$1 |
| shift |
| fi |
| PORT=$1 |
| re='^[0-9]+$' |
| if ! [[ $PORT =~ $re ]] ; then |
| echo "error: Port is not a number" |
| printUsageExit |
| fi |
| shift |
| else |
| # handle optional '-f' before DML file (for consistency) |
| if echo "$1" | grep -q "\-f"; then |
| shift |
| SCRIPT_FILE=$1 |
| shift |
| else |
| SCRIPT_FILE=$1 |
| shift |
| fi |
| fi |
| |
| if [ -z "$WORKER" ] ; then |
| WORKER=0 |
| fi |
| |
| if [ -z "$SYSTEMDS_ROOT" ] ; then |
| SYSTEMDS_ROOT=. |
| print_out "SYSTEMDS_ROOT not set defaulting to current dir $(pwd)" |
| else |
| # construct a relative path |
| SYSTEMDS_ROOT=$(realpath --relative-to=. ${SYSTEMDS_ROOT}) |
| fi; |
| |
| # when using find, look in the directories in this order |
| DIR_SEARCH_ORDER="conf lib $SYSTEMDS_ROOT/conf $SYSTEMDS_ROOT/target" |
| |
| # find me a SystemDS jar file to run |
| if [ -z "$SYSTEMDS_JAR_FILE" ];then |
| SYSTEMDS_JAR_FILE=$(find $DIR_SEARCH_ORDER -iname "systemds.jar" 2> /dev/null | tail -n 1) |
| if [ -z "$SYSTEMDS_JAR_FILE" ];then |
| SYSTEMDS_JAR_FILE=$(find $DIR_SEARCH_ORDER -iname "systemds-?.?.?.jar" 2> /dev/null | head -n 1) |
| if [ -z "$SYSTEMDS_JAR_FILE" ];then |
| SYSTEMDS_JAR_FILE=$(find $DIR_SEARCH_ORDER -iname "systemds-?.?.?-SNAPSHOT.jar" 2> /dev/null | head -n 1) |
| fi |
| fi |
| else |
| print_out "Using user supplied systemds jar file $SYSTEMDS_JAR_FILE" |
| fi |
| |
| # check if log4j config file exists, otherwise unset |
| # to run with a non fatal complaint by SystemDS |
| if [ -z "$LOG4JPROP" ] ; then |
| LOG4JPROP=$(find $DIR_SEARCH_ORDER -iname "log4j*properties" 2> /dev/null | head -n 1) |
| if [ -z "${LOG4JPROP}" ]; then |
| LOG4JPROP="" |
| else |
| LOG4JPROP="-Dlog4j.configuration=file:$LOG4JPROP" |
| fi |
| else |
| # L4J was set by env var. Unset if that setting is wrong |
| LOG4JPROP2=$(find "$LOG4JPROP") |
| if [ -z "${LOG4JPROP2}" ]; then |
| LOG4JPROP="" |
| else |
| LOG4JPROP="-Dlog4j.configuration=file:$LOG4JPROP2" |
| fi |
| fi |
| |
| if [ -z "$CONFIG_FILE" ] ; then |
| # same as above: set config file param if the file exists |
| CONFIG_FILE=$(find $DIR_SEARCH_ORDER -iname "SystemDS*config*.xml" 2> /dev/null | head -n 1) |
| if [ -z "$CONFIG_FILE" ]; then |
| CONFIG_FILE="" |
| else |
| CONFIG_FILE="--config $CONFIG_FILE" |
| fi |
| else |
| # CONFIG_FILE was set by env var. Unset if that setting is wrong |
| CONFIG_FILE2=$(find "$CONFIG_FILE") |
| if [ -z "${CONFIG_FILE2}" ]; then |
| CONFIG_FILE="" |
| else |
| CONFIG_FILE="--config $CONFIG_FILE" |
| fi |
| fi |
| |
| # find absolute path to hadoop home in SYSTEMDS_ROOT |
| if [ -z "$HADOOP_HOME" ]; then |
| HADOOP_HOME=$(realpath "$(find "$SYSTEMDS_ROOT" -iname hadoop | tail -n 1 )") |
| export HADOOP_HOME |
| fi |
| # add hadoop home to path and lib path for loading hadoop jni |
| HADOOP_REL=$(realpath --relative-to=. "$HADOOP_HOME") |
| |
| # default directory separator unix style |
| DIR_SEP=/ |
| # detect operating system to set correct path separator |
| if [ "$OSTYPE" == "win32" ] || [ "$OSTYPE" == "msys" ] || [ "$OSTYPE" == "cygwin" ]; then |
| PATH_SEP=\; |
| DIR_SEP=\\ |
| HADOOP_REL="${HADOOP_REL////\\}" |
| else |
| PATH_SEP=: |
| fi |
| |
| # make the jar path relative to skip issues with Windows paths |
| JARNAME=$(basename "$SYSTEMDS_JAR_FILE") |
| |
| # relative path to jar file |
| SYSTEMDS_JAR_FILE=$(realpath --relative-to=. "$(dirname "$SYSTEMDS_JAR_FILE")")${DIR_SEP}${JARNAME} |
| |
| NATIVE_LIBS="$SYSTEMDS_ROOT${DIR_SEP}target${DIR_SEP}classes${DIR_SEP}lib" |
| export PATH=${HADOOP_REL}${DIR_SEP}bin${PATH_SEP}${PATH}${PATH_SEP}$NATIVE_LIBS |
| export LD_LIBRARY_PATH=${HADOOP_REL}${DIR_SEP}bin${PATH_SEP}${LD_LIBRARY_PATH} |
| |
| # set java class path |
| CLASSPATH="${SYSTEMDS_JAR_FILE}${PATH_SEP} \ |
| ${SYSTEMDS_ROOT}${DIR_SEP}lib${DIR_SEP}*${PATH_SEP} \ |
| ${SYSTEMDS_ROOT}${DIR_SEP}target${DIR_SEP}lib${DIR_SEP}*" |
| # trim whitespace (introduced by the line breaks above) |
| CLASSPATH=$(echo "${CLASSPATH}" | tr -d '[:space:]') |
| |
| print_out "###############################################################################" |
| print_out "# SYSTEMDS_ROOT= $SYSTEMDS_ROOT" |
| print_out "# SYSTEMDS_JAR_FILE= $SYSTEMDS_JAR_FILE" |
| print_out "# CONFIG_FILE= $CONFIG_FILE" |
| print_out "# LOG4JPROP= $LOG4JPROP" |
| print_out "# CLASSPATH= $CLASSPATH" |
| print_out "# HADOOP_HOME= $HADOOP_HOME" |
| |
| #build the command to run |
| if [ $WORKER == 1 ]; then |
| print_out "#" |
| print_out "# starting Fedederated worker on port $PORT" |
| print_out "###############################################################################" |
| CMD=" \ |
| java $SYSTEMDS_STANDALONE_OPTS \ |
| -cp $CLASSPATH \ |
| $LOG4JPROP \ |
| org.apache.sysds.api.DMLScript \ |
| -w $PORT \ |
| $*" |
| print_out "Executing command: $CMD" |
| print_out "" |
| |
| elif [ $SYSDS_DISTRIBUTED == 0 ]; then |
| print_out "#" |
| print_out "# Running script $SCRIPT_FILE locally with opts: $*" |
| print_out "###############################################################################" |
| CMD=" \ |
| java $SYSTEMDS_STANDALONE_OPTS \ |
| -cp $CLASSPATH \ |
| $LOG4JPROP \ |
| org.apache.sysds.api.DMLScript \ |
| -f $SCRIPT_FILE \ |
| -exec singlenode \ |
| $CONFIG_FILE \ |
| $*" |
| print_out "Executing command: $CMD" |
| print_out "" |
| else |
| print_out "#" |
| print_out "# Running script $SCRIPT_FILE distributed with opts: $*" |
| print_out "###############################################################################" |
| export SPARK_MAJOR_VERSION=2 |
| CMD=" \ |
| spark-submit $SYSTEMDS_DISTRIBUTED_OPTS \ |
| $SYSTEMDS_JAR_FILE \ |
| -f $SCRIPT_FILE \ |
| $*" |
| print_out "Executing command: $CMD" |
| print_out "" |
| fi |
| |
| # run |
| $CMD |