| #!/usr/bin/env bash |
| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| |
| ############################################################## |
| # This script is part of the SystemDS binary release. It is |
| # meant to work out of the box when unzipping the |
| # systemds-<version>.zip (or tbz) file. |
| # |
| # Make configuration changes here: |
| ############################################################## |
| |
| # If not set by env, set to 1 to run spark-submit instead of local java |
| # This should be used if "-exec SPARK" or "-exec HYBRID" is required |
| if [ -z "$SYSDS_DISTRIBUTED" ]; then |
| SYSDS_DISTRIBUTED=0 |
| fi |
| |
| # if not set by env, set to 1 to disable setup output of this script |
| if [ -z "$SYSDS_QUIET" ]; then |
| SYSDS_QUIET=0 |
| fi |
| |
| # an echo toggle |
| print_out() |
| { |
| if [ $SYSDS_QUIET == 0 ]; then |
| echo "$1" |
| fi |
| } |
| |
| if [ -n "$SYSTEMDS_STANDALONE_OPTS" ]; then |
| print_out "Overriding SYSTEMDS_STANDALONE_OPTS with env var: $SYSTEMDS_STANDALONE_OPTS" |
| else |
| # specify paramteters to java when running locally here |
| SYSTEMDS_STANDALONE_OPTS="\ |
| -Xmx4g\ |
| -Xms4g\ |
| -Xmn400m " |
| fi |
| |
| if [ -n "$SYSTEMDS_REMOTE_DEBUGGING" ]; then |
| print_out "Overriding SYSTEMDS_REMOTE_DEBUGGING with env var: $SYSTEMDS_REMOTE_DEBUGGING" |
| else |
| SYSTEMDS_REMOTE_DEBUGGING=" -agentlib:jdwp=transport=dt_socket,suspend=y,address=8787,server=y " |
| fi |
| |
| if [ -n "${SYSTEMDS_DISTRIBUTED_OPTS}" ]; then |
| print_out "Overriding SYSTEMDS_DISTRIBUTED_OPTS with env var $SYSTEMDS_DISTRIBUTED_OPTS" |
| else |
| # specify parameters to pass to spark-submit when running on spark here |
| SYSTEMDS_DISTRIBUTED_OPTS="\ |
| --master yarn \ |
| --deploy-mode client\ |
| --driver-memory 96g \ |
| --num-executors 4 \ |
| --executor-memory 64g \ |
| --executor-cores 16 " |
| fi |
| |
| |
| ############################################################## |
| # No need to touch the content below. These commands launch |
| # SystemDS based on the settings above. |
| ############################################################## |
| |
| |
| #------------------------------------------------------------- |
| # some helper functions |
| |
| # error help print |
| PRINT_SYSDS_HELP=0 |
| function printUsage { |
| cat << EOF |
| |
| Usage: $0 [-r] [SystemDS.jar] [-f] <dml-filename> [arguments] [-help] |
| |
| SystemDS.jar : Specify a custom SystemDS.jar file (this will be prepended |
| to the classpath |
| or fed to spark-submit |
| -r : Spawn a debug server for remote debugging (standalone and |
| spark driver only atm). Default port is 8787 - change within |
| this script if necessary. See SystemDS documentation on how |
| to attach a remote debugger. |
| -f : Optional prefix to the dml-filename for consistency with |
| previous behavior dml-filename : The script file to run. |
| This is mandatory unless running as a federated worker |
| (see below). |
| arguments : The arguments specified after the DML script are passed to |
| SystemDS. Specify parameters that need to go to |
| java/spark-submit by editing this run script. |
| -help : Print this usage message and SystemDS parameter info |
| |
| Worker Usage: $0 [-r] WORKER [SystemDS.jar] <portnumber> [arguments] [-help] |
| |
| port : The port to open for the federated worker. |
| |
| Set custom launch configuration by setting/editing SYSTEMDS_STANDALONE_OPTS |
| and/or SYSTEMDS_DISTRIBUTED_OPTS. |
| |
| Set the environment variable SYSDS_DISTRIBUTED=1 to run spark-submit instead of |
| local java Set SYSDS_QUIET=1 to omit extra information printed by this run |
| script. |
| |
| EOF |
| if [ ${PRINT_SYSDS_HELP} -eq 0 ]; then |
| exit 0 |
| fi |
| } |
| |
| # print an error if no argument is supplied. |
| if [ -z "$1" ] ; then |
| echo "Wrong Usage. Add -help for additional parameters."; |
| echo "" |
| printUsage; |
| fi |
| |
| #This loop handles the parameters to the run-script, not the ones passed to SystemDS. |
| #To not confuse getopts with SystemDS parameters, only the first two params are considered |
| #here. If more run-script params are needed, adjust the next line accordingly |
| while getopts ":hr:f:" options "$1$2"; do |
| case $options in |
| h ) echo "Help requested. Will exit after extended usage message!" |
| PRINT_SYSDS_HELP=1 |
| printUsage |
| break |
| ;; |
| \? ) echo "Unknown parameter -$OPTARG" |
| printUsage |
| exit |
| ;; |
| f ) |
| # silently remove -f (this variant is triggered if there's no |
| # jar file or WORKER as first parameter) |
| if echo "$OPTARG" | grep -qi "dml"; then |
| break |
| else |
| print_out "No DML Script found after -f option." |
| fi |
| ;; |
| r ) |
| print_out "Spawning server for remote debugging" |
| if [ $SYSDS_DISTRIBUTED == 0 ]; then |
| SYSTEMDS_STANDALONE_OPTS=${SYSTEMDS_STANDALONE_OPTS}${SYSTEMDS_REMOTE_DEBUGGING} |
| else |
| SYSTEMDS_DISTRIBUTED_OPTS=${SYSTEMDS_DISTRIBUTED_OPTS}${SYSTEMDS_REMOTE_DEBUGGING} |
| fi |
| shift # remove -r from positional arguments |
| ;; |
| * ) |
| print_out "Error: Unexpected error while processing options;" |
| printUsage |
| exit |
| esac |
| done |
| |
| # Peel off first and/or second argument so that $@ contains arguments to DML script |
| if echo "$1" | grep -q "jar"; then |
| SYSTEMDS_JAR_FILE=$1 |
| shift |
| # handle optional '-f' before DML file (for consistency) |
| if echo "$1" | grep -q "\-f"; then |
| shift |
| SCRIPT_FILE=$1 |
| shift |
| else |
| SCRIPT_FILE=$1 |
| shift |
| fi |
| elif echo "$1" | grep -q "WORKER"; then |
| WORKER=1 |
| shift |
| if echo "$1" | grep -q "jar"; then |
| SYSTEMDS_JAR_FILE=$1 |
| shift |
| fi |
| PORT=$1 |
| re='^[0-9]+$' |
| if ! [[ $PORT =~ $re ]] ; then |
| echo "error: Port is not a number" |
| printUsage |
| fi |
| shift |
| else |
| # handle optional '-f' before DML file (for consistency) |
| if echo "$1" | grep -q "\-f"; then |
| shift |
| SCRIPT_FILE=$1 |
| shift |
| else |
| SCRIPT_FILE=$1 |
| shift |
| fi |
| fi |
| |
| if [ -z "$WORKER" ] ; then |
| WORKER=0 |
| fi |
| |
| if [ -z "$SYSTEMDS_ROOT" ] ; then |
| SYSTEMDS_ROOT=. |
| print_out "SYSTEMDS_ROOT not set defaulting to current dir $(pwd)" |
| else |
| # construct a relative path |
| SYSTEMDS_ROOT=$(realpath --relative-to=. ${SYSTEMDS_ROOT}) |
| fi; |
| |
| # when using find, look in the directories in this order |
| DIR_SEARCH_ORDER="conf lib $SYSTEMDS_ROOT/conf $SYSTEMDS_ROOT/target" |
| |
| # find me a SystemDS jar file to run |
| if [ -z "$SYSTEMDS_JAR_FILE" ];then |
| SYSTEMDS_JAR_FILE=$(find $DIR_SEARCH_ORDER -iname "systemds.jar" 2> /dev/null | tail -n 1) |
| if [ -z "$SYSTEMDS_JAR_FILE" ];then |
| SYSTEMDS_JAR_FILE=$(find $DIR_SEARCH_ORDER -iname "systemds-?.?.?.jar" 2> /dev/null | head -n 1) |
| if [ -z "$SYSTEMDS_JAR_FILE" ];then |
| SYSTEMDS_JAR_FILE=$(find $DIR_SEARCH_ORDER -iname "systemds-?.?.?-SNAPSHOT.jar" 2> /dev/null | head -n 1) |
| fi |
| fi |
| else |
| print_out "Using user supplied systemds jar file $SYSTEMDS_JAR_FILE" |
| fi |
| |
| # check if log4j config file exists, otherwise unset |
| # to run with a non fatal complaint by SystemDS |
| if [ -z "$LOG4JPROP" ] ; then |
| LOG4JPROP=$(find $DIR_SEARCH_ORDER -iname "log4j*properties" 2> /dev/null | head -n 1) |
| if [ -z "${LOG4JPROP}" ]; then |
| LOG4JPROP="" |
| else |
| LOG4JPROP="-Dlog4j.configuration=file:$LOG4JPROP" |
| fi |
| else |
| # L4J was set by env var. Unset if that setting is wrong |
| LOG4JPROP2=$(find "$LOG4JPROP") |
| if [ -z "${LOG4JPROP2}" ]; then |
| LOG4JPROP="" |
| else |
| LOG4JPROP="-Dlog4j.configuration=file:$LOG4JPROP2" |
| fi |
| fi |
| |
| if [ -z "$CONFIG_FILE" ] ; then |
| # same as above: set config file param if the file exists |
| CONFIG_FILE=$(find $DIR_SEARCH_ORDER -iname "SystemDS*config*.xml" 2> /dev/null | head -n 1) |
| if [ -z "$CONFIG_FILE" ]; then |
| CONFIG_FILE="" |
| else |
| CONFIG_FILE="--config $CONFIG_FILE" |
| fi |
| else |
| # CONFIG_FILE was set by env var. Unset if that setting is wrong |
| CONFIG_FILE2=$(find "$CONFIG_FILE") |
| if [ -z "${CONFIG_FILE2}" ]; then |
| CONFIG_FILE="" |
| else |
| CONFIG_FILE="--config $CONFIG_FILE" |
| fi |
| fi |
| |
| # find absolute path to hadoop home in SYSTEMDS_ROOT |
| if [ -z "$HADOOP_HOME" ]; then |
| HADOOP_HOME=$(realpath "$(find "$SYSTEMDS_ROOT" -iname hadoop | tail -n 1 )") |
| export HADOOP_HOME |
| fi |
| # add hadoop home to path and lib path for loading hadoop jni |
| HADOOP_REL=$(realpath --relative-to=. "$HADOOP_HOME") |
| |
| # default directory separator unix style |
| DIR_SEP=/ |
| # detect operating system to set correct path separator |
| if [ "$OSTYPE" == "win32" ] || [ "$OSTYPE" == "msys" ] || [ "$OSTYPE" == "cygwin" ]; then |
| PATH_SEP=\; |
| DIR_SEP=\\ |
| HADOOP_REL="${HADOOP_REL////\\}" |
| else |
| PATH_SEP=: |
| fi |
| |
| # make the jar path relative to skip issues with Windows paths |
| JARNAME=$(basename "$SYSTEMDS_JAR_FILE") |
| |
| # relative path to jar file |
| SYSTEMDS_JAR_FILE=$(realpath --relative-to=. "$(dirname "$SYSTEMDS_JAR_FILE")")${DIR_SEP}${JARNAME} |
| |
| NATIVE_LIBS="$SYSTEMDS_ROOT${DIR_SEP}target${DIR_SEP}classes${DIR_SEP}lib" |
| export PATH=${HADOOP_REL}${DIR_SEP}bin${PATH_SEP}${PATH}${PATH_SEP}$NATIVE_LIBS |
| export LD_LIBRARY_PATH=${HADOOP_REL}${DIR_SEP}bin${PATH_SEP}${LD_LIBRARY_PATH} |
| |
| # set java class path |
| CLASSPATH="${SYSTEMDS_JAR_FILE}${PATH_SEP} \ |
| ${SYSTEMDS_ROOT}${DIR_SEP}lib${DIR_SEP}*${PATH_SEP} \ |
| ${SYSTEMDS_ROOT}${DIR_SEP}target${DIR_SEP}lib${DIR_SEP}*" |
| # trim whitespace (introduced by the line breaks above) |
| CLASSPATH=$(echo "${CLASSPATH}" | tr -d '[:space:]') |
| |
| if [ $PRINT_SYSDS_HELP == 1 ]; then |
| echo "----------------------------------------------------------------------" |
| echo "Further help on SystemDS arguments:" |
| java -cp $CLASSPATH org.apache.sysds.api.DMLScript -help |
| exit 1 |
| fi |
| |
| print_out "###############################################################################" |
| print_out "# SYSTEMDS_ROOT= $SYSTEMDS_ROOT" |
| print_out "# SYSTEMDS_JAR_FILE= $SYSTEMDS_JAR_FILE" |
| print_out "# CONFIG_FILE= $CONFIG_FILE" |
| print_out "# LOG4JPROP= $LOG4JPROP" |
| print_out "# CLASSPATH= $CLASSPATH" |
| print_out "# HADOOP_HOME= $HADOOP_HOME" |
| |
| #build the command to run |
| if [ $WORKER == 1 ]; then |
| print_out "#" |
| print_out "# starting Fedederated worker on port $PORT" |
| print_out "###############################################################################" |
| CMD=" \ |
| java $SYSTEMDS_STANDALONE_OPTS \ |
| -cp $CLASSPATH \ |
| $LOG4JPROP \ |
| org.apache.sysds.api.DMLScript \ |
| -w $PORT \ |
| $*" |
| print_out "Executing command: $CMD" |
| print_out "" |
| |
| elif [ $SYSDS_DISTRIBUTED == 0 ]; then |
| print_out "#" |
| print_out "# Running script $SCRIPT_FILE locally with opts: $*" |
| print_out "###############################################################################" |
| CMD=" \ |
| java $SYSTEMDS_STANDALONE_OPTS \ |
| -cp $CLASSPATH \ |
| $LOG4JPROP \ |
| org.apache.sysds.api.DMLScript \ |
| -f $SCRIPT_FILE \ |
| -exec singlenode \ |
| $CONFIG_FILE \ |
| $*" |
| print_out "Executing command: $CMD" |
| print_out "" |
| else |
| print_out "#" |
| print_out "# Running script $SCRIPT_FILE distributed with opts: $*" |
| print_out "###############################################################################" |
| export SPARK_MAJOR_VERSION=2 |
| CMD=" \ |
| spark-submit $SYSTEMDS_DISTRIBUTED_OPTS \ |
| $SYSTEMDS_JAR_FILE \ |
| -f $SCRIPT_FILE \ |
| $*" |
| print_out "Executing command: $CMD" |
| print_out "" |
| fi |
| |
| # run |
| $CMD |