| #!/bin/bash |
| # |
| # The Mahout command script |
| # |
| # Environment Variables |
| # |
| # MAHOUT_JAVA_HOME The java implementation to use. Overrides JAVA_HOME. |
| # |
| # MAHOUT_HEAPSIZE The maximum amount of heap to use, in MB. |
| # Default is 1000. |
| # |
| # HADOOP_CONF_DIR The location of a hadoop config directory |
| # |
| # MAHOUT_OPTS Extra Java runtime options. |
| # |
| # MAHOUT_CONF_DIR The location of the program short-name to class name |
| # mappings and the default properties files |
| # defaults to "$MAHOUT_HOME/src/conf" |
| # |
| # MAHOUT_LOCAL set to anything other than an empty string to force |
| # mahout to run locally even if |
| # HADOOP_CONF_DIR and HADOOP_HOME are set |
| # |
| # MAHOUT_CORE set to anything other than an empty string to force |
| # mahout to run in developer 'core' mode, just as if the |
| # -core option was presented on the command-line |
| # Commane-line Options |
| # |
| # -core -core is used to switch into 'developer mode' when |
| # running mahout locally. If specified, the classes |
| # from the 'target/classes' directories in each project |
| # are used. Otherwise classes will be retrived from |
| # jars in the binary releas collection or *-job.jar files |
| # found in build directories. When running on hadoop |
| # the job files will always be used. |
| |
| # |
| #/** |
| # * Licensed to the Apache Software Foundation (ASF) under one or more |
| # * contributor license agreements. See the NOTICE file distributed with |
| # * this work for additional information regarding copyright ownership. |
| # * The ASF licenses this file to You under the Apache License, Version 2.0 |
| # * (the "License"); you may not use this file except in compliance with |
| # * the License. You may obtain a copy of the License at |
| # * |
| # * http://www.apache.org/licenses/LICENSE-2.0 |
| # * |
| # * Unless required by applicable law or agreed to in writing, software |
| # * distributed under the License is distributed on an "AS IS" BASIS, |
| # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # * See the License for the specific language governing permissions and |
| # * limitations under the License. |
| # */ |
| |
| cygwin=false |
| case "`uname`" in |
| CYGWIN*) cygwin=true;; |
| esac |
| |
| # resolve links - $0 may be a softlink |
| THIS="$0" |
| while [ -h "$THIS" ]; do |
| ls=`ls -ld "$THIS"` |
| link=`expr "$ls" : '.*-> \(.*\)$'` |
| if expr "$link" : '.*/.*' > /dev/null; then |
| THIS="$link" |
| else |
| THIS=`dirname "$THIS"`/"$link" |
| fi |
| done |
| |
| IS_CORE=0 |
| if [ "$1" == "-core" ] ; then |
| IS_CORE=1 |
| shift |
| fi |
| |
| if [ "$MAHOUT_CORE" != "" ]; then |
| IS_CORE=1 |
| fi |
| |
| # some directories |
| THIS_DIR=`dirname "$THIS"` |
| MAHOUT_HOME=`cd "$THIS_DIR/.." ; pwd` |
| |
| # some Java parameters |
| if [ "$MAHOUT_JAVA_HOME" != "" ]; then |
| #echo "run java in $MAHOUT_JAVA_HOME" |
| JAVA_HOME=$MAHOUT_JAVA_HOME |
| fi |
| |
| if [ "$JAVA_HOME" = "" ]; then |
| echo "Error: JAVA_HOME is not set." |
| exit 1 |
| fi |
| |
| JAVA=$JAVA_HOME/bin/java |
| JAVA_HEAP_MAX=-Xmx3g |
| |
| # check envvars which might override default args |
| if [ "$MAHOUT_HEAPSIZE" != "" ]; then |
| #echo "run with heapsize $MAHOUT_HEAPSIZE" |
| JAVA_HEAP_MAX="-Xmx""$MAHOUT_HEAPSIZE""m" |
| #echo $JAVA_HEAP_MAX |
| fi |
| |
| if [ "x$MAHOUT_CONF_DIR" = "x" ]; then |
| if [ -d $MAHOUT_HOME/src/conf ]; then |
| MAHOUT_CONF_DIR=$MAHOUT_HOME/src/conf |
| else |
| if [ -d $MAHOUT_HOME/conf ]; then |
| MAHOUT_CONF_DIR=$MAHOUT_HOME/conf |
| else |
| echo No MAHOUT_CONF_DIR found |
| fi |
| fi |
| fi |
| |
| # CLASSPATH initially contains $MAHOUT_CONF_DIR, or defaults to $MAHOUT_HOME/src/conf |
| CLASSPATH=${CLASSPATH}:$MAHOUT_CONF_DIR |
| |
| if [ "$MAHOUT_LOCAL" != "" ]; then |
| echo "MAHOUT_LOCAL is set, so we don't add HADOOP_CONF_DIR to classpath." |
| elif [ -n "$HADOOP_CONF_DIR" ] ; then |
| echo "MAHOUT_LOCAL is not set; adding HADOOP_CONF_DIR to classpath." |
| CLASSPATH=${CLASSPATH}:$HADOOP_CONF_DIR |
| fi |
| |
| CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar |
| |
| # so that filenames w/ spaces are handled correctly in loops below |
| IFS= |
| |
| if [ $IS_CORE == 0 ] |
| then |
| # add release dependencies to CLASSPATH |
| for f in $MAHOUT_HOME/mahout-*.jar; do |
| CLASSPATH=${CLASSPATH}:$f; |
| done |
| |
| # add dev targets if they exist |
| for f in $MAHOUT_HOME/examples/target/mahout-examples-*-job.jar $MAHOUT_HOME/mahout-examples-*-job.jar ; do |
| CLASSPATH=${CLASSPATH}:$f; |
| done |
| |
| # add release dependencies to CLASSPATH |
| for f in $MAHOUT_HOME/lib/*.jar; do |
| CLASSPATH=${CLASSPATH}:$f; |
| done |
| else |
| CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/math/target/classes |
| CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/core/target/classes |
| CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/integration/target/classes |
| CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/examples/target/classes |
| #CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/core/src/main/resources |
| fi |
| |
| # add development dependencies to CLASSPATH |
| for f in $MAHOUT_HOME/examples/target/dependency/*.jar; do |
| CLASSPATH=${CLASSPATH}:$f; |
| done |
| |
| |
| # cygwin path translation |
| if $cygwin; then |
| CLASSPATH=`cygpath -p -w "$CLASSPATH"` |
| fi |
| |
| # restore ordinary behaviour |
| unset IFS |
| |
| # default log directory & file |
| if [ "$MAHOUT_LOG_DIR" = "" ]; then |
| MAHOUT_LOG_DIR="$MAHOUT_HOME/logs" |
| fi |
| if [ "$MAHOUT_LOGFILE" = "" ]; then |
| MAHOUT_LOGFILE='mahout.log' |
| fi |
| |
| #Fix log path under cygwin |
| if $cygwin; then |
| MAHOUT_LOG_DIR=`cygpath -p -w "$MAHOUT_LOG_DIR"` |
| fi |
| |
| MAHOUT_OPTS="$MAHOUT_OPTS -Dhadoop.log.dir=$MAHOUT_LOG_DIR" |
| MAHOUT_OPTS="$MAHOUT_OPTS -Dhadoop.log.file=$MAHOUT_LOGFILE" |
| MAHOUT_OPTS="$MAHOUT_OPTS -Dmapred.min.split.size=512MB" |
| MAHOUT_OPTS="$MAHOUT_OPTS -Dmapred.map.child.java.opts=-Xmx4096m" |
| MAHOUT_OPTS="$MAHOUT_OPTS -Dmapred.reduce.child.java.opts=-Xmx4096m" |
| MAHOUT_OPTS="$MAHOUT_OPTS -Dmapred.output.compress=true" |
| MAHOUT_OPTS="$MAHOUT_OPTS -Dmapred.compress.map.output=true" |
| MAHOUT_OPTS="$MAHOUT_OPTS -Dmapred.map.tasks=1" |
| MAHOUT_OPTS="$MAHOUT_OPTS -Dmapred.reduce.tasks=1" |
| MAHOUT_OPTS="$MAHOUT_OPTS -Dio.sort.factor=30" |
| MAHOUT_OPTS="$MAHOUT_OPTS -Dio.sort.mb=1024" |
| MAHOUT_OPTS="$MAHOUT_OPTS -Dio.file.buffer.size=32786" |
| |
| if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then |
| MAHOUT_OPTS="$MAHOUT_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH" |
| fi |
| |
| CLASS=org.apache.mahout.driver.MahoutDriver |
| |
| for f in $MAHOUT_HOME/examples/target/mahout-examples-*-job.jar $MAHOUT_HOME/mahout-examples-*-job.jar ; do |
| if [ -e "$f" ]; then |
| MAHOUT_JOB=$f |
| fi |
| done |
| |
| # run it |
| |
| HADOOP_BINARY=$(PATH="${HADOOP_HOME:-${HADOOP_PREFIX}}/bin:$PATH" which hadoop 2>/dev/null) |
| if [ -x "$HADOOP_BINARY" ] ; then |
| HADOOP_BINARY_CLASSPATH=$("$HADOOP_BINARY" classpath) |
| fi |
| |
| if [ ! -x "$HADOOP_BINARY" ] || [ "$MAHOUT_LOCAL" != "" ] ; then |
| if [ ! -x "$HADOOP_BINARY" ] ; then |
| echo "hadoop binary is not in PATH,HADOOP_HOME/bin,HADOOP_PREFIX/bin, running locally" |
| elif [ "$MAHOUT_LOCAL" != "" ] ; then |
| echo "MAHOUT_LOCAL is set, running locally" |
| fi |
| # echo "CLASSPATH: $CLASSPATH" |
| CLASSPATH="${CLASSPATH}:${MAHOUT_HOME/lib/hadoop/*}" |
| case $1 in |
| (classpath) |
| echo $CLASSPATH |
| ;; |
| (*) |
| exec "$JAVA" $JAVA_HEAP_MAX $MAHOUT_OPTS -classpath "$CLASSPATH" $CLASS "$@" |
| esac |
| else |
| echo "Running on hadoop, using $HADOOP_BINARY and HADOOP_CONF_DIR=$HADOOP_CONF_DIR" |
| |
| if [ "$MAHOUT_JOB" = "" ] ; then |
| echo "ERROR: Could not find mahout-examples-*.job in $MAHOUT_HOME or $MAHOUT_HOME/examples/target, please run 'mvn install' to create the .job file" |
| exit 1 |
| else |
| case "$1" in |
| (hadoop) |
| shift |
| export HADOOP_CLASSPATH=$MAHOUT_CONF_DIR:${HADOOP_CLASSPATH}:$CLASSPATH |
| exec "$HADOOP_BINARY" "$@" |
| ;; |
| (classpath) |
| echo $CLASSPATH |
| ;; |
| (*) |
| echo "MAHOUT-JOB: $MAHOUT_JOB" |
| export HADOOP_CLASSPATH=$MAHOUT_CONF_DIR:${HADOOP_CLASSPATH} |
| exec "$HADOOP_BINARY" jar $MAHOUT_JOB $CLASS "$@" |
| esac |
| fi |
| fi |