blob: b74f22a12d5a92fbabad79c27b2a0c8732b1d7bd [file] [log] [blame]
#!/usr/bin/env bash
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
##############################################################
# This script is part of the SystemDS binary release. It is
# meant to work out of the box when unzipping the
# systemds-<version>.zip (or tbz) file.
#
# Make configuration changes here:
##############################################################
# If not set by env, set to 1 to run spark-submit instead of local java
# This should be used to run with spark-submit instead of java
if [[ -z "$SYSDS_DISTRIBUTED" ]]; then
SYSDS_DISTRIBUTED=0
fi
# if not set by env, set to 1 to disable setup output of this script
if [ -z "$SYSDS_QUIET" ]; then
SYSDS_QUIET=0
fi
# if not set by env, set to default exec modes
if [[ -z "$SYSDS_EXEC_MODE" ]]; then
case "$SYSDS_DISTRIBUTED" in
0) SYSDS_EXEC_MODE=singlenode ;;
*) SYSDS_EXEC_MODE=hybrid ;;
esac
fi
# an echo toggle
print_out()
{
if [ $SYSDS_QUIET == 0 ]; then
echo "$1"
fi
}
# when using find, look in the directories in this order
DIR_SEARCH_ORDER=". $SYSTEMDS_ROOT $SYSTEMDS_ROOT/conf $SYSTEMDS_ROOT/lib $SYSTEMDS_ROOT/src $SYSTEMDS_ROOT/target"
ordered_find() {
result=""
for dir in $(echo "$DIR_SEARCH_ORDER" | tr ' ' '\n') ; do
if [[ $dir == "$SYSTEMDS_ROOT" ]] || [[ $dir == "." ]]; then
result=$(find "$dir" -maxdepth 1 -iname "$1" -print -quit)
if [[ $result != "" ]]; then break; fi
else
result=$(find "$dir" -iname "$1" -print -quit 2> /dev/null)
if [[ $result != "" ]]; then break; fi
fi
done
echo "$result"
}
if [ -n "$SYSTEMDS_STANDALONE_OPTS" ]; then
print_out "Overriding SYSTEMDS_STANDALONE_OPTS with env var: $SYSTEMDS_STANDALONE_OPTS"
else
# specify paramteters to java when running locally here
SYSTEMDS_STANDALONE_OPTS="\
-Xmx4g\
-Xms4g\
-Xmn400m "
fi
if [ -n "$SYSTEMDS_REMOTE_DEBUGGING" ]; then
print_out "Overriding SYSTEMDS_REMOTE_DEBUGGING with env var: $SYSTEMDS_REMOTE_DEBUGGING"
else
SYSTEMDS_REMOTE_DEBUGGING=" -agentlib:jdwp=transport=dt_socket,suspend=y,address=8787,server=y "
fi
# check if log4j config file exists, otherwise unset
# to run with a non fatal complaint by SystemDS
if [ -z "$LOG4JPROP" ] ; then
LOG4JPROP=$(ordered_find "log4j*properties")
if [ -z "${LOG4JPROP}" ]; then
LOG4JPROP=""
else
LOG4JPROP="-Dlog4j.configuration=file:$LOG4JPROP"
fi
else
# L4J was set by env var. Unset if that setting is wrong
LOG4JPROP2=$(find "$LOG4JPROP")
if [ -z "${LOG4JPROP2}" ]; then
LOG4JPROP=""
else
LOG4JPROP="-Dlog4j.configuration=file:$LOG4JPROP2"
fi
fi
if [ -n "${SYSTEMDS_DISTRIBUTED_OPTS}" ]; then
print_out "Overriding SYSTEMDS_DISTRIBUTED_OPTS with env var $SYSTEMDS_DISTRIBUTED_OPTS"
else
# specify parameters to pass to spark-submit when running on spark here
SYSTEMDS_DISTRIBUTED_OPTS="\
--master yarn \
--deploy-mode client \
--driver-memory 100g \
--conf spark.driver.extraJavaOptions=\"-Xms100g -Xmn10g -Dlog4j.configuration=file:$LOG4JPROP\" \
--conf spark.executor.extraJavaOptions=\"-Dlog4j.configuration=file:$LOG4JPROP\" \
--conf spark.executor.heartbeatInterval=100s \
--files $LOG4JPROP \
--conf spark.network.timeout=512s \
--num-executors 4 \
--executor-memory 64 \
--executor-cores 16 "
fi
##############################################################
# No need to touch the content below. These commands launch
# SystemDS based on the settings above.
##############################################################
#-------------------------------------------------------------
# some helper functions
# error help print
PRINT_SYSDS_HELP=0
function printUsage {
cat << EOF
Usage: $0 [-r] [SystemDS.jar] [-f] <dml-filename> [arguments] [-help]
SystemDS.jar : Specify a custom SystemDS.jar file (this will be prepended
to the classpath
or fed to spark-submit
-r : Spawn a debug server for remote debugging (standalone and
spark driver only atm). Default port is 8787 - change within
this script if necessary. See SystemDS documentation on how
to attach a remote debugger.
-f : Optional prefix to the dml-filename for consistency with
previous behavior dml-filename : The script file to run.
This is mandatory unless running as a federated worker
(see below).
arguments : The arguments specified after the DML script are passed to
SystemDS. Specify parameters that need to go to
java/spark-submit by editing this run script.
-help : Print this usage message and SystemDS parameter info
Worker Usage: $0 [-r] WORKER [SystemDS.jar] <portnumber> [arguments] [-help]
port : The port to open for the federated worker.
Set custom launch configuration by setting/editing SYSTEMDS_STANDALONE_OPTS
and/or SYSTEMDS_DISTRIBUTED_OPTS.
Set the environment variable SYSDS_DISTRIBUTED=1 to run spark-submit instead of
local java Set SYSDS_QUIET=1 to omit extra information printed by this run
script.
EOF
if [ ${PRINT_SYSDS_HELP} -eq 0 ]; then
exit 0
fi
}
# print an error if no argument is supplied.
if [ -z "$1" ] ; then
echo "Wrong Usage. Add -help for additional parameters.";
echo ""
printUsage;
fi
#This loop handles the parameters to the run-script, not the ones passed to SystemDS.
#To not confuse getopts with SystemDS parameters, only the first two params are considered
#here. If more run-script params are needed, adjust the next line accordingly
while getopts ":hr:f:" options "$1$2"; do
case $options in
h ) echo "Help requested. Will exit after extended usage message!"
PRINT_SYSDS_HELP=1
printUsage
break
;;
\? ) echo "Unknown parameter -$OPTARG"
printUsage
exit
;;
f )
# silently remove -f (this variant is triggered if there's no
# jar file or WORKER as first parameter)
if echo "$OPTARG" | grep -qi "dml"; then
break
else
print_out "No DML Script found after -f option."
fi
;;
r )
print_out "Spawning server for remote debugging"
if [ $SYSDS_DISTRIBUTED == 0 ]; then
SYSTEMDS_STANDALONE_OPTS=${SYSTEMDS_STANDALONE_OPTS}${SYSTEMDS_REMOTE_DEBUGGING}
else
SYSTEMDS_DISTRIBUTED_OPTS=${SYSTEMDS_DISTRIBUTED_OPTS}${SYSTEMDS_REMOTE_DEBUGGING}
fi
shift # remove -r from positional arguments
;;
* )
print_out "Error: Unexpected error while processing options;"
printUsage
exit
esac
done
# Peel off first and/or second argument so that $@ contains arguments to DML script
if echo "$1" | grep -q "jar"; then
SYSTEMDS_JAR_FILE=$1
shift
# handle optional '-f' before DML file (for consistency)
if echo "$1" | grep -q "\-f"; then
shift
SCRIPT_FILE=$1
shift
else
SCRIPT_FILE=$1
shift
fi
elif echo "$1" | grep -q "WORKER"; then
WORKER=1
shift
if echo "$1" | grep -q "jar"; then
SYSTEMDS_JAR_FILE=$1
shift
fi
PORT=$1
re='^[0-9]+$'
if ! [[ $PORT =~ $re ]] ; then
echo "error: Port is not a number"
printUsage
fi
shift
else
# handle optional '-f' before DML file (for consistency)
if echo "$1" | grep -q "\-f"; then
shift
SCRIPT_FILE=$1
shift
else
SCRIPT_FILE=$1
shift
fi
fi
if [ -z "$WORKER" ] ; then
WORKER=0
fi
if [[ -z $SYSTEMDS_ROOT ]] ; then
SYSTEMDS_ROOT=.
print_out "SYSTEMDS_ROOT not set defaulting to current dir $(pwd)"
else
# construct a relative path
SYSTEMDS_ROOT=$(realpath --relative-to=. ${SYSTEMDS_ROOT})
fi;
# find me a SystemDS jar file to run
if [ -z "$SYSTEMDS_JAR_FILE" ];then
SYSTEMDS_JAR_FILE=$(ordered_find "systemds.jar")
if [ -z "$SYSTEMDS_JAR_FILE" ];then
SYSTEMDS_JAR_FILE=$(ordered_find "systemds-?.?.?.jar")
if [ -z "$SYSTEMDS_JAR_FILE" ];then
SYSTEMDS_JAR_FILE=$(ordered_find "systemds-?.?.?-SNAPSHOT.jar")
fi
fi
else
print_out "Using user supplied systemds jar file $SYSTEMDS_JAR_FILE"
fi
if [[ "$*" == *-config* ]]; then
# override config file from env var if given as parameter to SystemDS
read -r -d '' -a myArray < <( echo "$@" )
INDEX=0
for i in "${myArray[@]}"; do
if [[ ${myArray[INDEX]} == *-config* ]]; then
if [ -f "${myArray[((INDEX+1))]}" ]; then
CONFIG_FILE="${myArray[((INDEX+1))]}"
else
echo Warning! Passed config file "${myArray[((INDEX+1))]}" does not exist.
fi
# remove -config
unset 'myArray[INDEX]'
# remove -config param if not starting with -
if [[ "${myArray[((INDEX+1))]:0:1}" != "-" ]]; then
unset 'myArray[((INDEX+1))]'
fi
# setting the script arguments without the passed -config for further processing
set -- "${myArray[@]}"
break;
fi
# debug print array item
#echo "${myArray[INDEX]}"
(( INDEX=INDEX+1 ))
done
if [ -f "$CONFIG_FILE" ] ; then
CONFIG_FILE="-config $CONFIG_FILE"
else
CONFIG_FILE=""
fi
elif [ -z "$CONFIG_FILE" ] ; then
# same as above: set config file param if the file exists
CONFIG_FILE=$(ordered_find "SystemDS-config-defaults.xml")
if [ -z "$CONFIG_FILE" ]; then
CONFIG_FILE=""
else
CONFIG_FILE="-config $CONFIG_FILE"
fi
else
# CONFIG_FILE was set by env var. Unset if that setting is wrong
if [ -f "${CONFIG_FILE}" ]; then
CONFIG_FILE="-config $CONFIG_FILE"
else
CONFIG_FILE=""
fi
fi
# override exec mode if given as parameter to SystemDS (e.g. -exec singlenode)
read -r -d '' -a myArray < <( echo "$@" )
INDEX=0
for i in "${myArray[@]}"; do
if [[ ${myArray[INDEX]} == *-exec* ]]; then
SYSDS_EXEC_MODE="${myArray[((INDEX+1))]}"
break;
fi
(( INDEX=INDEX+1 ))
done
if [ $SYSDS_DISTRIBUTED -ne 0 ] && [[ $SYSDS_EXEC_MODE == "singlenode" ]]; then
echo "Error: Can not run on Spark with execution mode singlenode"
exit 1
fi
# find absolute path to hadoop home in SYSTEMDS_ROOT
if [ -z "$HADOOP_HOME" ]; then
HADOOP_HOME=$(realpath "$(find "$SYSTEMDS_ROOT" -iname hadoop | tail -n 1 )")
export HADOOP_HOME
fi
# add hadoop home to path and lib path for loading hadoop jni
HADOOP_REL=$(realpath --relative-to=. "$HADOOP_HOME")
# default directory separator unix style
DIR_SEP=/
# detect operating system to set correct path separator
if [ "$OSTYPE" == "win32" ] || [ "$OSTYPE" == "msys" ] || [ "$OSTYPE" == "cygwin" ]; then
PATH_SEP=\;
DIR_SEP=\\
HADOOP_REL="${HADOOP_REL////\\}"
else
PATH_SEP=:
fi
# make the jar path relative to skip issues with Windows paths
JARNAME=$(basename "$SYSTEMDS_JAR_FILE")
# relative path to jar file
SYSTEMDS_JAR_FILE=$(realpath --relative-to=. "$(dirname "$SYSTEMDS_JAR_FILE")")${DIR_SEP}${JARNAME}
NATIVE_LIBS="$SYSTEMDS_ROOT${DIR_SEP}target${DIR_SEP}classes${DIR_SEP}lib"
export PATH=${HADOOP_REL}${DIR_SEP}bin${PATH_SEP}${PATH}${PATH_SEP}$NATIVE_LIBS
export LD_LIBRARY_PATH=${HADOOP_REL}${DIR_SEP}bin${PATH_SEP}${LD_LIBRARY_PATH}
# set java class path
CLASSPATH="${SYSTEMDS_JAR_FILE}${PATH_SEP} \
${SYSTEMDS_ROOT}${DIR_SEP}lib${DIR_SEP}*${PATH_SEP} \
${SYSTEMDS_ROOT}${DIR_SEP}target${DIR_SEP}lib${DIR_SEP}*"
# trim whitespace (introduced by the line breaks above)
CLASSPATH=$(echo "${CLASSPATH}" | tr -d '[:space:]')
if [ $PRINT_SYSDS_HELP == 1 ]; then
echo "----------------------------------------------------------------------"
echo "Further help on SystemDS arguments:"
java -cp $CLASSPATH org.apache.sysds.api.DMLScript -help
exit 1
fi
print_out "###############################################################################"
print_out "# SYSTEMDS_ROOT= $SYSTEMDS_ROOT"
print_out "# SYSTEMDS_JAR_FILE= $SYSTEMDS_JAR_FILE"
print_out "# SYSDS_EXEC_MODE= $SYSDS_EXEC_MODE"
print_out "# CONFIG_FILE= $CONFIG_FILE"
print_out "# LOG4JPROP= $LOG4JPROP"
print_out "# CLASSPATH= $CLASSPATH"
print_out "# HADOOP_HOME= $HADOOP_HOME"
#build the command to run
if [ $WORKER == 1 ]; then
print_out "#"
print_out "# starting Fedederated worker on port $PORT"
print_out "###############################################################################"
CMD=" \
java $SYSTEMDS_STANDALONE_OPTS \
-cp $CLASSPATH \
$LOG4JPROP \
org.apache.sysds.api.DMLScript \
-w $PORT \
$CONFIG_FILE \
$*"
print_out "Executing command: $CMD"
print_out ""
elif [ $SYSDS_DISTRIBUTED == 0 ]; then
print_out "#"
print_out "# Running script $SCRIPT_FILE locally with opts: $*"
print_out "###############################################################################"
CMD=" \
java $SYSTEMDS_STANDALONE_OPTS \
-cp $CLASSPATH \
$LOG4JPROP \
org.apache.sysds.api.DMLScript \
-f $SCRIPT_FILE \
-exec $SYSDS_EXEC_MODE \
$CONFIG_FILE \
$*"
print_out "Executing command: $CMD"
print_out ""
else
print_out "#"
print_out "# Running script $SCRIPT_FILE distributed with opts: $*"
print_out "###############################################################################"
export SPARK_MAJOR_VERSION=2
CMD=" \
spark-submit $SYSTEMDS_DISTRIBUTED_OPTS \
$SYSTEMDS_JAR_FILE \
-f $SCRIPT_FILE \
-exec $SYSDS_EXEC_MODE \
$CONFIG_FILE \
$*"
print_out "Executing command: $CMD"
print_out ""
fi
# run
eval "$CMD"