| #!/usr/bin/env bash |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| # ====== Common code copied/adapted from bin/solr (TODO: centralize/share this kind of thing across bin/solr, etc) |
| |
| THIS_SCRIPT="$0" |
| |
| # Resolve symlinks to this script |
| while [ -h "$THIS_SCRIPT" ] ; do |
| ls=`ls -ld "$THIS_SCRIPT"` |
| # Drop everything prior to -> |
| link=`expr "$ls" : '.*-> \(.*\)$'` |
| if expr "$link" : '/.*' > /dev/null; then |
| THIS_SCRIPT="$link" |
| else |
| THIS_SCRIPT=`dirname "$THIS_SCRIPT"`/"$link" |
| fi |
| done |
| |
| SOLR_TIP=`dirname "$THIS_SCRIPT"`/.. |
| SOLR_TIP=`cd "$SOLR_TIP"; pwd` |
| |
| if [ -n "$SOLR_JAVA_HOME" ]; then |
| JAVA="$SOLR_JAVA_HOME/bin/java" |
| elif [ -n "$JAVA_HOME" ]; then |
| for java in "$JAVA_HOME"/bin/amd64/java "$JAVA_HOME"/bin/java; do |
| if [ -x "$java" ]; then |
| JAVA="$java" |
| break |
| fi |
| done |
| else |
| JAVA=java |
| fi |
| |
| # test that Java exists and is executable on this server |
| "$JAVA" -version >/dev/null 2>&1 || { echo >&2 "Java is required to run this tool! Please install Java 8 or greater before running this script."; exit 1; } |
| |
| |
| # ===== post specific code |
| |
| TOOL_JAR=("$SOLR_TIP/dist"/solr-core-*.jar) |
| |
| function print_usage() { |
| echo "" |
| echo 'Usage: post -c <collection> [OPTIONS] <files|directories|urls|-d ["...",...]>' |
| echo " or post -help" |
| echo "" |
| echo " collection name defaults to DEFAULT_SOLR_COLLECTION if not specified" |
| echo "" |
| echo "OPTIONS" |
| echo "=======" |
| echo " Solr options:" |
| echo " -url <base Solr update URL> (overrides collection, host, and port)" |
| echo " -host <host> (default: localhost)" |
| echo " -p or -port <port> (default: 8983)" |
| echo " -commit yes|no (default: yes)" |
| echo " -u or -user <user:pass> (sets BasicAuth credentials)" |
| # optimize intentionally omitted, but can be used as '-optimize yes' (default: no) |
| echo "" |
| echo " Web crawl options:" |
| echo " -recursive <depth> (default: 1)" |
| echo " -delay <seconds> (default: 10)" |
| echo "" |
| echo " Directory crawl options:" |
| echo " -delay <seconds> (default: 0)" |
| echo "" |
| echo " stdin/args options:" |
| echo " -type <content/type> (default: application/xml)" |
| echo "" |
| echo " Other options:" |
| echo " -filetypes <type>[,<type>,...] (default: xml,json,jsonl,csv,pdf,doc,docx,ppt,pptx,xls,xlsx,odt,odp,ods,ott,otp,ots,rtf,htm,html,txt,log)" |
| echo " -params \"<key>=<value>[&<key>=<value>...]\" (values must be URL-encoded; these pass through to Solr update request)" |
| echo " -out yes|no (default: no; yes outputs Solr response to console)" |
| echo " -format solr (sends application/json content as Solr commands to /update instead of /update/json/docs)" |
| echo "" |
| echo "" |
| echo "Examples:" |
| echo "" |
| echo "* JSON file: $THIS_SCRIPT -c wizbang events.json" |
| echo "* XML files: $THIS_SCRIPT -c records article*.xml" |
| echo "* CSV file: $THIS_SCRIPT -c signals LATEST-signals.csv" |
| echo "* Directory of files: $THIS_SCRIPT -c myfiles ~/Documents" |
| echo "* Web crawl: $THIS_SCRIPT -c gettingstarted http://lucene.apache.org/solr -recursive 1 -delay 1" |
| echo "* Standard input (stdin): echo '{"commit": {}}' | $THIS_SCRIPT -c my_collection -type application/json -out yes -d" |
| echo "* Data as string: $THIS_SCRIPT -c signals -type text/csv -out yes -d $'id,value\n1,0.47'" |
| echo "" |
| } # end print_usage |
| |
| if [[ $# -eq 1 && ("$1" == "-help" || "$1" == "-h" || "$1" == "-usage") ]]; then |
| print_usage |
| exit |
| fi |
| |
| |
| COLLECTION="$DEFAULT_SOLR_COLLECTION" |
| PROPS=('-Dauto=yes') |
| RECURSIVE="" |
| FILES=() |
| URLS=() |
| ARGS=() |
| |
| while [ $# -gt 0 ]; do |
| # TODO: natively handle the optional parameters to SPT |
| # but for now they can be specified as bin/post -c collection-name delay=5 http://lucidworks.com |
| |
| if [[ -d "$1" ]]; then |
| # Directory |
| # echo "$1: DIRECTORY" |
| RECURSIVE=yes |
| FILES+=("$1") |
| elif [[ -f "$1" ]]; then |
| # File |
| # echo "$1: FILE" |
| FILES+=("$1") |
| elif [[ "$1" == http* ]]; then |
| # URL |
| # echo "$1: URL" |
| URLS+=("$1") |
| else |
| if [[ "$1" == -* ]]; then |
| if [[ "$1" == "-c" ]]; then |
| # Special case, pull out collection name |
| shift |
| COLLECTION="$1" |
| elif [[ "$1" == "-p" ]]; then |
| # -p alias for -port for convenience and compatibility with `bin/solr start` |
| shift |
| PROPS+=("-Dport=$1") |
| elif [[ ("$1" == "-d" || "$1" == "--data" || "$1" == "-") ]]; then |
| if [[ ! -t 0 ]]; then |
| MODE="stdin" |
| else |
| # when no stdin exists and -d specified, the rest of the arguments |
| # are assumed to be strings to post as-is |
| MODE="args" |
| shift |
| if [[ $# -gt 0 ]]; then |
| ARGS=("$@") |
| shift $# |
| else |
| # SPT needs a valid args string, useful for 'bin/post -c foo -d' to force a commit |
| ARGS+=("<add/>") |
| fi |
| fi |
| elif [[ ("$1" == "-u" || "$1" == "-user") ]]; then |
| shift |
| PROPS+=("-Dbasicauth=$1") |
| else |
| if [[ "$1" == -D* ]] ; then |
| PROPS+=("$1") |
| if [[ "${1:2:4}" == "url=" ]]; then |
| SOLR_URL=${1:6} |
| fi |
| else |
| key="${1:1}" |
| shift |
| # echo "$1: PROP" |
| PROPS+=("-D$key=$1") |
| if [[ "$key" == "url" ]]; then |
| SOLR_URL=$1 |
| fi |
| fi |
| fi |
| else |
| echo -e "\nUnrecognized argument: $1\n" |
| echo -e "If this was intended to be a data file, it does not exist relative to $PWD\n" |
| exit 1 |
| fi |
| fi |
| shift |
| done |
| |
| # Check for errors |
| if [[ $COLLECTION == "" && $SOLR_URL == "" ]]; then |
| echo -e "\nCollection or URL must be specified. Use -c <collection name> or set DEFAULT_SOLR_COLLECTION in your environment, or use -url instead.\n" |
| echo -e "See '$THIS_SCRIPT -h' for usage instructions.\n" |
| exit 1 |
| fi |
| |
| # Unsupported: bin/post -c foo |
| if [[ ${#FILES[@]} == 0 && ${#URLS[@]} == 0 && $MODE != "stdin" && $MODE != "args" ]]; then |
| echo -e "\nNo files, directories, URLs, -d strings, or stdin were specified.\n" |
| echo -e "See '$THIS_SCRIPT -h' for usage instructions.\n" |
| exit 1 |
| fi |
| |
| # SPT does not support mixing different data mode types, just files, just URLs, just stdin, or just argument strings. |
| # The following are unsupported constructs: |
| # bin/post -c foo existing_file.csv http://example.com |
| # echo '<xml.../>' | bin/post -c foo existing_file.csv |
| # bin/post -c foo existing_file.csv -d 'anything' |
| if [[ (${#FILES[@]} != 0 && ${#URLS[@]} != 0 && $MODE != "stdin" && $MODE != "args") |
| || ((${#FILES[@]} != 0 || ${#URLS[@]} != 0) && ($MODE == "stdin" || $MODE == "args")) ]]; then |
| echo -e "\nCombining files/directories, URLs, stdin, or args is not supported. Post them separately.\n" |
| exit 1 |
| fi |
| |
| PARAMS="" |
| |
| # TODO: let's simplify this |
| if [[ $MODE != "stdin" && $MODE != "args" ]]; then |
| if [[ $FILES != "" ]]; then |
| MODE="files" |
| PARAMS=("${FILES[@]}") |
| fi |
| |
| if [[ $URLS != "" ]]; then |
| MODE="web" |
| PARAMS=("${URLS[@]}") |
| fi |
| else |
| PARAMS=("${ARGS[@]}") |
| fi |
| |
| PROPS+=("-Dc=$COLLECTION" "-Ddata=$MODE") |
| if [[ -n "$RECURSIVE" ]]; then |
| PROPS+=('-Drecursive=yes') |
| fi |
| |
| echo "$JAVA" -classpath "${TOOL_JAR[0]}" "${PROPS[@]}" org.apache.solr.util.SimplePostTool "${PARAMS[@]}" |
| "$JAVA" -classpath "${TOOL_JAR[0]}" "${PROPS[@]}" org.apache.solr.util.SimplePostTool "${PARAMS[@]}" |
| |
| # post smoker: |
| # bin/post -c signals -out yes -type application/json -d '[{"id": 2, "val": 0.47}]' |
| # bin/post -c signals -out yes -params "wt=json" -d '<add><doc><field name="id">1</field></doc></add>' |