| #!/usr/bin/env bash |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| TIKA_SCRIPT="$0" |
| verbose=false |
| THIS_OS=`uname -s` |
| |
| # What version of Java is required to run this version of Tika. |
| JAVA_VER_REQ="8" |
| |
| stop_all=false |
| |
| # for now, we don't support running this script from cygwin due to problems |
| # like not having lsof, ps auxww, curl, and awkward directory handling |
| if [ "${THIS_OS:0:6}" == "CYGWIN" ]; then |
| echo -e "This script does not support cygwin due to severe limitations and lack of adherence\nto BASH standards, such as lack of lsof, curl, and ps options." |
| exit 1 |
| fi |
| |
| # Resolve symlinks to this script |
| while [ -h "$TIKA_SCRIPT" ] ; do |
| ls=`ls -ld "$TIKA_SCRIPT"` |
| # Drop everything prior to -> |
| link=`expr "$ls" : '.*-> \(.*\)$'` |
| if expr "$link" : '/.*' > /dev/null; then |
| TIKA_SCRIPT="$link" |
| else |
| TIKA_SCRIPT=`dirname "$TIKA_SCRIPT"`/"$link" |
| fi |
| done |
| |
| TIKA_TIP=`dirname "$TIKA_SCRIPT"`/.. |
| TIKA_TIP=`cd "$TIKA_TIP"; pwd` |
| DEFAULT_SERVER_DIR="$TIKA_TIP/" |
| |
| # If an include wasn't specified in the environment, then search for one... |
| if [ -z "$TIKA_INCLUDE" ]; then |
| # Locations (in order) to use when searching for an include file. |
| for include in "`dirname "$0"`/tika.in.sh" \ |
| "$HOME/.tika.in.sh" \ |
| /usr/share/tika/tika.in.sh \ |
| /usr/local/share/tika/tika.in.sh \ |
| /etc/default/tika.in.sh \ |
| /opt/tika/tika.in.sh; do |
| if [ -r "$include" ]; then |
| TIKA_INCLUDE="$include" |
| . "$include" |
| break |
| fi |
| done |
| elif [ -r "$TIKA_INCLUDE" ]; then |
| . "$TIKA_INCLUDE" |
| fi |
| |
| if [ -z "$TIKA_PID_DIR" ]; then |
| TIKA_PID_DIR="$TIKA_TIP/bin" |
| fi |
| |
| |
| |
| |
| |
| echo "Default server $DEFAULT_SERVER_DIR" |
| |
| if [ -n "$TIKA_JAVA_HOME" ]; then |
| JAVA="$TIKA_JAVA_HOME/bin/java" |
| elif [ -n "$JAVA_HOME" ]; then |
| for java in "$JAVA_HOME"/bin/amd64/java "$JAVA_HOME"/bin/java; do |
| if [ -x "$java" ]; then |
| JAVA="$java" |
| break |
| fi |
| done |
| if [ -z "$JAVA" ]; then |
| echo >&2 "The currently defined JAVA_HOME ($JAVA_HOME) refers" |
| echo >&2 "to a location where Java could not be found. Aborting." |
| echo >&2 "Either fix the JAVA_HOME variable or remove it from the" |
| echo >&2 "environment so that the system PATH will be searched." |
| exit 1 |
| fi |
| else |
| JAVA=java |
| fi |
| |
| if [ -z "$TIKA_STOP_WAIT" ]; then |
| TIKA_STOP_WAIT=180 |
| fi |
| # test that Java exists, is executable and correct version |
| JAVA_VER=$("$JAVA" -version 2>&1) |
| if [[ $? -ne 0 ]] ; then |
| echo >&2 "Java not found, or an error was encountered when running java." |
| echo >&2 "A working Java $JAVA_VER_REQ JRE is required to run Tika!" |
| echo >&2 "Please install latest version of Java $JAVA_VER_REQ or set JAVA_HOME properly." |
| echo >&2 "Command that we tried: '${JAVA} -version', with response:" |
| echo >&2 "${JAVA_VER}" |
| echo >&2 |
| echo >&2 "Debug information:" |
| echo >&2 "JAVA_HOME: ${JAVA_HOME:-N/A}" |
| echo >&2 "Active Path:" |
| echo >&2 "${PATH}" |
| exit 1 |
| else |
| JAVA_VER_NUM=$(echo $JAVA_VER | head -1 | awk -F '"' '/version/ {print $2}' | sed -e's/^1\.//' | sed -e's/[._-].*$//') |
| if [[ "$JAVA_VER_NUM" -lt "$JAVA_VER_REQ" ]] ; then |
| echo >&2 "Your current version of Java is too old to run this version of Tika." |
| echo >&2 "We found major version $JAVA_VER_NUM, using command '${JAVA} -version', with response:" |
| echo >&2 "${JAVA_VER}" |
| echo >&2 |
| echo >&2 "Please install latest version of Java $JAVA_VER_REQ or set JAVA_HOME properly." |
| echo >&2 |
| echo >&2 "Debug information:" |
| echo >&2 "JAVA_HOME: ${JAVA_HOME:-N/A}" |
| echo >&2 "Active Path:" |
| echo >&2 "${PATH}" |
| exit 1 |
| fi |
| JAVA_VENDOR="Oracle" |
| if [ "`echo $JAVA_VER | grep -i "IBM J9"`" != "" ]; then |
| JAVA_VENDOR="IBM J9" |
| fi |
| fi |
| |
| |
| function print_usage() { |
| CMD="$1" |
| ERROR_MSG="$2" |
| |
| if [ "$ERROR_MSG" != "" ]; then |
| echo -e "\nERROR: $ERROR_MSG\n" |
| fi |
| |
| if [ -z "$CMD" ]; then |
| echo "" |
| echo "Usage: tika COMMAND OPTIONS" |
| echo " where COMMAND is one of: start, stop, status" |
| echo "" |
| echo " Standalone server example (start Tika running in the background on port 9998):" |
| echo "" |
| echo " ./tika start -p 9998" |
| echo "" |
| echo "Pass -help after any COMMAND to see command-specific usage information," |
| echo " such as: ./tika start -help or ./tika stop -help" |
| echo "" |
| elif [ "$CMD" == "start" ]; then |
| echo "" |
| echo "Usage: tika $CMD [-f] [-h hostname] [-p port] [-d directory] [-V]" |
| echo "" |
| echo " -f Start Tika in foreground; default starts Tika in the background" |
| echo " and sends stdout / stderr to tika-PORT-console.log" |
| echo "" |
| echo " -p <port> Specify the port to start the Tika HTTP listener on; default is 9998" |
| echo "" |
| echo " -d Specify the Tika server directory; defaults to ../" |
| echo "" |
| echo " -j/--jar Specify the tika-server.jar; defaults to tika-server.jar" |
| echo "" |
| echo " -V/--verbose Verbose messages from this script" |
| echo "" |
| |
| elif [ "$CMD" == "stop" ]; then |
| echo "" |
| echo "Usage: tika stop [-p port] [-V]" |
| echo "" |
| echo " -p <port> Specify the port the Tika HTTP listener is bound to" |
| echo "" |
| echo " --all Find and stop all running Tika servers on this host" |
| echo "" |
| echo " -V/--verbose Verbose messages from this script" |
| echo "" |
| echo " NOTE: To see if any Tika servers are running, do: tika status" |
| echo "" |
| elif [ "$CMD" == "status" ]; then |
| echo "" |
| echo "Usage: tika status" |
| echo "" |
| echo " This command will show the status of all running Tika servers." |
| echo " It can only detect those Tika servers running on the current host." |
| echo "" |
| fi |
| } # end print_usage |
| |
| # used to show the script is still alive when waiting on work to complete |
| function spinner() { |
| local pid=$1 |
| local delay=0.5 |
| local spinstr='|/-\' |
| while [ "$(ps aux | awk '{print $2}' | grep -w $pid)" ]; do |
| local temp=${spinstr#?} |
| printf " [%c] " "$spinstr" |
| local spinstr=$temp${spinstr%"$temp"} |
| sleep $delay |
| printf "\b\b\b\b\b\b" |
| done |
| printf " \b\b\b\b" |
| } |
| |
| # uses terminate -9 to stop Tika process |
| function stop_tika() { |
| |
| DIR="$1" |
| TIKA_PID="$2" |
| |
| if [ -z "$TIKA_PID" ]; then |
| echo "ERROR: No PID found for Tika running on port $TIKA_PORT ... script fails." |
| exit 1 |
| fi |
| echo "Sending terminate command to Tika running on port $TIKA_PORT with process $TIKA_PID" |
| kill -9 $TIKA_PID |
| rm -f "$TIKA_PID_DIR/tika-$TIKA_PORT.pid" |
| sleep 1 |
| |
| CHECK_PID=`ps auxww | awk '{print $2}' | grep -w $TIKA_PID | sort -r | tr -d ' '` |
| if [ "$CHECK_PID" != "" ]; then |
| echo "ERROR: Failed to terminate previous Tika Java process $TIKA_PID ... script fails." |
| exit 1 |
| fi |
| } # end stop_tika |
| |
| |
| if [ $# -eq 1 ]; then |
| case $1 in |
| -help|-usage|-h|--help) |
| print_usage "" |
| exit |
| ;; |
| -info|-i|status) |
| #get_info |
| echo "To be done" |
| exit $? |
| ;; |
| esac |
| fi |
| |
| if [ $# -gt 0 ]; then |
| # if first arg starts with a dash (and it's not -help or -info), |
| # then assume they are starting Tika, such as: tika -f |
| if [[ $1 == -* ]]; then |
| SCRIPT_CMD="start" |
| else |
| SCRIPT_CMD="$1" |
| shift |
| fi |
| else |
| # no args - just show usage and exit |
| print_usage "" |
| exit |
| fi |
| |
| # verify the command given is supported |
| if [ "$SCRIPT_CMD" != "stop" ] && [ "$SCRIPT_CMD" != "start" ]; then |
| print_usage "" "$SCRIPT_CMD is not a valid command!" |
| exit 1 |
| fi |
| |
| |
| # Run in foreground (default is to run in the background) |
| FG="false" |
| |
| if [ $# -gt 0 ]; then |
| while true; do |
| case "$1" in |
| -d|-dir) |
| if [[ -z "$2" || "${2:0:1}" == "-" ]]; then |
| print_usage "$SCRIPT_CMD" "Server directory is required when using the $1 option!" |
| exit 1 |
| fi |
| |
| if [[ "$2" == "." || "$2" == "./" || "$2" == ".." || "$2" == "../" ]]; then |
| TIKA_SERVER_DIR="$(pwd)/$2" |
| else |
| # see if the arg value is relative to the tip vs full path |
| if [[ "$2" != /* ]] && [[ -d "$TIKA_TIP/$2" ]]; then |
| TIKA_SERVER_DIR="$TIKA_TIP/$2" |
| else |
| TIKA_SERVER_DIR="$2" |
| fi |
| fi |
| # resolve it to an absolute path |
| TIKA_SERVER_DIR="$(cd "$TIKA_SERVER_DIR"; pwd)" |
| shift 2 |
| ;; |
| |
| -f|-foreground) |
| FG="true" |
| shift |
| ;; |
| -p|-port) |
| if [[ -z "$2" || "${2:0:1}" == "-" ]]; then |
| print_usage "$SCRIPT_CMD" "Port number is required when using the $1 option!" |
| exit 1 |
| fi |
| TIKA_PORT="$2" |
| shift 2 |
| ;; |
| -h|-host) |
| if [[ -z "$2" || "${2:0:1}" == "-" ]]; then |
| print_usage "$SCRIPT_CMD" "Hostname is required when using the $1 option!" |
| exit 1 |
| fi |
| TIKA_HOST="$2" |
| shift 2 |
| ;; |
| -j|--jar) |
| TIKA_SERVER_JAR="$2" |
| shift 2 |
| ;; |
| -help|-usage) |
| print_usage "$SCRIPT_CMD" |
| exit 0 |
| ;; |
| -V|--verbose) |
| verbose=true |
| shift |
| ;; |
| --all) |
| stop_all=true |
| shift |
| ;; |
| --) |
| shift |
| break |
| ;; |
| *) |
| if [ "${1:0:2}" == "-D" ]; then |
| # pass thru any opts that begin with -D (java system props) |
| TIKA_OPTS+=("$1") |
| PASS_TO_RUN_EXAMPLE+=" $1" |
| shift |
| else |
| if [ "$1" != "" ]; then |
| print_usage "$SCRIPT_CMD" "$1 is not supported by this script" |
| exit 1 |
| else |
| break # out-of-args, stop looping |
| fi |
| fi |
| ;; |
| esac |
| done |
| fi |
| |
| if [ -z "$TIKA_SERVER_DIR" ]; then |
| TIKA_SERVER_DIR="$DEFAULT_SERVER_DIR" |
| fi |
| |
| if [ ! -e "$TIKA_SERVER_DIR" ]; then |
| echo -e "\nTika server directory $TIKA_SERVER_DIR not found!\n" |
| exit 1 |
| fi |
| |
| ############# start/stop logic below here ################ |
| |
| if $verbose ; then |
| echo "Using Tika root directory: $TIKA_TIP" |
| echo "Using Java: $JAVA" |
| "$JAVA" -version |
| fi |
| |
| # stop all if no port specified |
| if [[ "$SCRIPT_CMD" == "stop" ]]; then |
| if $stop_all; then |
| none_stopped=true |
| find "$TIKA_PID_DIR" -name "tika-*.pid" -type f | while read PIDF |
| do |
| NEXT_PID=`cat "$PIDF"` |
| echo "About to tika port with $NEXT_PID" |
| stop_tika "$TIKA_SERVER_DIR" "$NEXT_PID" |
| none_stopped=false |
| rm -f "$PIDF" |
| done |
| # TODO: none_stopped doesn't get reflected across the subshell |
| # This can be uncommented once we find a clean way out of it |
| # if $none_stopped; then |
| # echo -e "\nNo Tika servers found to stop.\n" |
| # fi |
| else |
| # not stopping all and don't have a port, but if we can find the pid file for the default port 9998, then use that |
| none_stopped=true |
| numTikas=`find "$TIKA_PID_DIR" -name "tika-*.pid" -type f | wc -l | tr -d ' '` |
| if [ -z "$TIKA_PORT" ]; then |
| if [ $numTikas -eq 1 ]; then |
| # only do this if there is only 1 server running, otherwise they must provide the -p or --all |
| PID="$(cat "$(find "$TIKA_PID_DIR" -name "tika-*.pid" -type f)")" |
| CHECK_PID=`ps auxww | awk '{print $2}' | grep -w $PID | sort -r | tr -d ' '` |
| if [ "$CHECK_PID" != "" ]; then |
| stop_tika "$TIKA_SERVER_DIR" "$CHECK_PID" |
| none_stopped=false |
| fi |
| fi |
| else |
| PID="$(cat "$(find "$TIKA_PID_DIR" -name "tika-$TIKA_PORT.pid" -type f)")" |
| stop_tika "$TIKA_SERVER_DIR" "$PID" |
| none_stopped=false |
| fi |
| |
| if $none_stopped; then |
| if [ $numTikas -gt 0 ]; then |
| echo -e "\nFound $numTikas Tika servers running! Must either specify a port using -p or --all to stop all Tika servers on this host.\n" |
| else |
| echo -e "\nNo Tika servers found to stop.\n" |
| fi |
| exit 1 |
| fi |
| fi |
| exit |
| fi |
| |
| |
| if [ -z "$TIKA_PORT" ]; then |
| TIKA_PORT=9998 |
| fi |
| |
| if [ -z "$TIKA_HOST" ]; then |
| TIKA_HOST='0.0.0.0' |
| fi |
| |
| if [ -z "$TIKA_SERVER_JAR" ]; then |
| TIKA_SERVER_JAR=tika-server.jar |
| fi |
| |
| if [ -z "$TIKA_LOGS_DIR" ]; then |
| TIKA_LOGS_DIR="$TIKA_SERVER_DIR/logs" |
| fi |
| |
| # Launches Tika in foreground/background depending on parameters |
| function start_tika() { |
| |
| run_in_foreground="$1" |
| TIKA_ADDL_ARGS="$2" |
| |
| # define default GC_TUNE |
| if [ -z ${GC_TUNE+x} ]; then |
| GC_TUNE=('-XX:+UseG1GC' \ |
| '-XX:+PerfDisableSharedMem' \ |
| '-XX:+ParallelRefProcEnabled' \ |
| '-XX:MaxGCPauseMillis=250' \ |
| '-XX:+UseLargePages' \ |
| '-XX:+AlwaysPreTouch') |
| else |
| GC_TUNE=($GC_TUNE) |
| fi |
| |
| if $verbose ; then |
| echo -e "\nStarting Tika using the following settings:" |
| echo -e " JAVA = $JAVA" |
| echo -e " TIKA_SERVER_DIR = $TIKA_SERVER_DIR" |
| echo -e " TIKA_SERVER_JAR = $TIKA_SERVER_JAR" |
| echo -e " TIKA_HOST = $TIKA_HOST" |
| echo -e " TIKA_PORT = $TIKA_PORT" |
| echo -e " JAVA_MEM_OPTS = ${JAVA_MEM_OPTS[@]}" |
| echo -e " GC_TUNE = ${GC_TUNE[@]}" |
| echo -e " GC_LOG_OPTS = ${GC_LOG_OPTS[@]}" |
| echo -e " TIKA_FORKED_OPTS = $TIKA_FORKED_OPTS" |
| |
| if [ "$TIKA_OPTS" != "" ]; then |
| echo -e " TIKA_OPTS = ${TIKA_OPTS[@]}" |
| fi |
| |
| if [ "$TIKA_ADDL_ARGS" != "" ]; then |
| echo -e " TIKA_ADDL_ARGS = $TIKA_ADDL_ARGS" |
| fi |
| |
| if [ "$TIKA_DATA_HOME" != "" ]; then |
| echo -e " TIKA_DATA_HOME = $TIKA_DATA_HOME" |
| fi |
| |
| echo -e "\n" |
| fi |
| |
| # need to launch tika from the server dir |
| cd "$TIKA_SERVER_DIR" |
| if [ ! -e "$TIKA_SERVER_DIR/$TIKA_SERVER_JAR" ]; then |
| echo -e "\nERROR: $TIKA_SERVER_JAR file not found in $TIKA_SERVER_DIR\nPlease check your -d and -j parameters to set the correct Tika server directory and jar.\n" |
| exit 1 |
| fi |
| |
| TIKA_START_OPTS=('-server' "${JAVA_MEM_OPTS[@]}" "${GC_TUNE[@]}" "${GC_LOG_OPTS[@]}" \ |
| "${TIKA_HOST_ARG[@]}" \ |
| "${LOG4J_CONFIG[@]}" "${TIKA_OPTS[@]}") |
| |
| mkdir -p "$TIKA_LOGS_DIR" 2>/dev/null |
| if [ $? -ne 0 ]; then |
| echo -e "\nERROR: Logs directory $TIKA_LOGS_DIR could not be created. Exiting" |
| exit 1 |
| fi |
| if [ ! -w "$TIKA_LOGS_DIR" ]; then |
| echo -e "\nERROR: Logs directory $TIKA_LOGS_DIR is not writable. Exiting" |
| exit 1 |
| fi |
| case "$TIKA_LOGS_DIR" in |
| contexts|etc|lib|modules|resources|scripts|tika|tika-webapp) |
| echo -e "\nERROR: Logs directory $TIKA_LOGS_DIR is invalid. Reserved for the system. Exiting" |
| exit 1 |
| ;; |
| esac |
| |
| |
| if [ "$run_in_foreground" == "true" ]; then |
| if $verbose ; then |
| echo "Startup command" |
| echo "$JAVA ${TIKA_START_OPTS[@]} $TIKA_ADDL_ARGS -jar $TIKA_SERVER_JAR -p $TIKA_PORT -h $TIKA_HOST $TIKA_FORKED_OPTS" |
| fi |
| exec "$JAVA" "${TIKA_START_OPTS[@]}" $TIKA_ADDL_ARGS -jar $TIKA_SERVER_JAR -p $TIKA_PORT -h $TIKA_HOST $TIKA_TIKA_FORKED_OPTS |
| else |
| # run Tika in the background |
| if $verbose ; then |
| echo "Startup command" |
| echo "$JAVA ${TIKA_START_OPTS[@]} $TIKA_ADDL_ARGS -jar $TIKA_SERVER_JAR -p $TIKA_PORT -h $TIKA_HOST $TIKA_FORKED_OPTS $TIKA_LOGS_DIR/tika-$TIKA_PORT-console.log $TIKA_PID_DIR/tika-$TIKA_PORT.pid" |
| fi |
| nohup "$JAVA" "${TIKA_START_OPTS[@]}" $TIKA_ADDL_ARGS \ |
| -jar $TIKA_SERVER_JAR -p $TIKA_PORT -h $TIKA_HOST $TIKA_TIKA_FORKED_OPTS \ |
| 1>"$TIKA_LOGS_DIR/tika-$TIKA_PORT-console.log" 2>&1 & echo $! > "$TIKA_PID_DIR/tika-$TIKA_PORT.pid" |
| |
| # no lsof on cygwin though |
| if hash lsof 2>/dev/null ; then # hash returns true if lsof is on the path |
| echo -n "Waiting up to $TIKA_STOP_WAIT seconds to see Tika running on port $TIKA_PORT" |
| # Launch in a subshell to show the spinner |
| (loops=0 |
| while true |
| do |
| running=`lsof -PniTCP:$TIKA_PORT -sTCP:LISTEN` |
| if [ -z "$running" ]; then |
| slept=$((loops * 2)) |
| if [ $slept -lt $TIKA_STOP_WAIT ]; then |
| sleep 2 |
| loops=$[$loops+1] |
| else |
| echo -e "Still not seeing Tika listening on $TIKA_PORT after $TIKA_STOP_WAIT seconds!" |
| tail -30 "$TIKA_LOGS_DIR/tika-$TIKA_PORT-console.log" |
| exit # subshell! |
| fi |
| else |
| TIKA_PID=`ps auxww | grep tika\-server | grep -w "\-p $TIKA_PORT" | grep -v grep | awk '{print $2}' | sort -r` |
| echo -e "\nStarted Tika server on port $TIKA_PORT (pid=$TIKA_PID). Happy extracting!\n" |
| exit # subshell! |
| fi |
| done) & |
| spinner $! |
| else |
| echo -e "NOTE: Please install lsof as this script needs it to determine if Tika is listening on port $TIKA_PORT." |
| sleep 10 |
| TIKA_PID=`ps auxww | grep tika\-server | grep -w "\-p $TIKA_PORT" | grep -v grep | awk '{print $2}' | sort -r` |
| echo -e "\nStarted Tika server on port $TIKA_PORT (pid=$TIKA_PID). Happy extracting!\n" |
| return; |
| fi |
| fi |
| } # end start_tika |
| |
| |
| |
| if [[ "$SCRIPT_CMD" == "start" ]]; then |
| start_tika "$FG" "$ADDITIONAL_CMD_OPTS" |
| exit 1 |
| fi |