| #!/usr/bin/env bash |
| |
| ## Licensed to the Apache Software Foundation (ASF) under one |
| ## or more contributor license agreements. See the NOTICE file |
| ## distributed with this work for additional information |
| ## regarding copyright ownership. The ASF licenses this file |
| ## to you under the Apache License, Version 2.0 (the |
| ## "License"); you may not use this file except in compliance |
| ## with the License. You may obtain a copy of the License at |
| ## |
| ## http://www.apache.org/licenses/LICENSE-2.0 |
| ## |
| ## Unless required by applicable law or agreed to in writing, software |
| ## distributed under the License is distributed on an "AS IS" BASIS, |
| ## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| ## See the License for the specific language governing permissions and |
| ## limitations under the License. |
| |
| function printUsage() { |
| cat << EOF |
| tdbloader2 - TDB Bulk Loader |
| |
| Usage: tdbloader2 --loc <Directory> [Options] <Data> ... |
| |
| Bulk loader for TDB which manipulates the data files directly and so |
| can only be used to create new databases. This command relies on |
| POSIX utilities so will only work on POSIX operating systems. |
| |
| If you wish to bulk load to an existing database please use tdbloader |
| instead. |
| |
| Required options are as follows: |
| |
| -l <DatabaseDirectory> |
| --loc <DatabaseDirectory> |
| Sets the location in which the database should be created. |
| |
| This location must be a directory and must be empty, if a |
| non-existent path is specified it will be created as a new |
| directory. |
| |
| <Data> |
| Specifies the path to one/more data files to load |
| |
| Common additional options are as follows: |
| |
| -h |
| --help |
| Prints this help summary and exits |
| |
| Advanced additional options are as follows: |
| |
| -d |
| --debug |
| Enable debug mode, adds extra debug output |
| |
| -j <JvmArgs> |
| --jvm-args <JvmArgs> |
| Sets the arguments that should be passed to the JVM for the |
| JVM based portions of the build. |
| |
| Generally it is best to not change these unless you have been |
| specifically advised to. The scripts will use appropriate |
| defaults if this is not specified. |
| |
| In particular be careful increasing the heap size since many |
| parts of TDB actually use memory mapped files that live |
| outside the heap so if the heap is too large the heap may |
| conflict with the memory mapped files for memory space. |
| |
| -k |
| --keep-work |
| Keeps the temporary work files around after they are no longer |
| needed. May be useful for debugging. |
| |
| -p <Phase> |
| --phase <Phase> |
| Sets the phase of the build to run, supported values are: |
| |
| all Full bulk load |
| data Data phase only |
| index Index phase only, requires the data phase to |
| previously have been run |
| |
| When no phase is specified it defaults to all |
| |
| -s <SortArgs> |
| --sort-args <SortArgs> |
| Sets the arguments that should be passed to sort for the sort |
| based portions of the build. |
| |
| Generally it is best not to change these as the scripts will |
| use appropriate defaults for your system. |
| |
| -t |
| --trace |
| Enable trace mode, essentially sets -x within the scripts |
| |
| EOF |
| } |
| |
| function resolveLink() { |
| local NAME=$1 |
| |
| if [ -L "$NAME" ]; then |
| case "$OSTYPE" in |
| darwin*|bsd*) |
| # BSD style readlink behaves differently to GNU readlink |
| # Have to manually follow links |
| while [ -L "$NAME" ]; do |
| NAME=$(readlink -- "$NAME") |
| done |
| ;; |
| *) |
| # Assuming standard GNU readlink with -f for |
| # canonicalize |
| NAME=$(readlink -f -- "$NAME") |
| ;; |
| esac |
| fi |
| |
| echo "$NAME" |
| } |
| |
| # If JENA_HOME is empty |
| if [ -z "$JENA_HOME" ]; then |
| SCRIPT="$0" |
| # Catch common issue: script has been symlinked |
| if [ -L "$SCRIPT" ]; then |
| SCRIPT=$(resolveLink "$0") |
| # If link is relative |
| case "$SCRIPT" in |
| /*) |
| # Already absolute |
| ;; |
| *) |
| # Relative, make absolute |
| SCRIPT=$( dirname "$0" )/$SCRIPT |
| ;; |
| esac |
| fi |
| |
| # Work out root from script location |
| JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" |
| export JENA_HOME |
| fi |
| |
| # If JENA_HOME is a symbolic link need to resolve |
| if [ -L "${JENA_HOME}" ]; then |
| JENA_HOME=$(resolveLink "$JENA_HOME") |
| # If link is relative |
| case "$JENA_HOME" in |
| /*) |
| # Already absolute |
| ;; |
| *) |
| # Relative, make absolute |
| JENA_HOME=$(dirname "$JENA_HOME") |
| ;; |
| esac |
| export JENA_HOME |
| echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" |
| fi |
| |
| if [ -e "${JENA_HOME}/bin/tdbloader2common" ]; then |
| # Can source common functions |
| source "${JENA_HOME}/bin/tdbloader2common" |
| else |
| echo "Unable to locate common functions script tdbloader2common" |
| exit 1 |
| fi |
| |
| # ---- Setup |
| JVM_ARGS=${JVM_ARGS:--Xmx1024M} |
| # Expand JENA_HOME but literal * |
| JENA_CP="$JENA_HOME"'/lib/*' |
| SOCKS= |
| LOGGING="-Dlog4j.configuration=file:$JENA_HOME/jena-log4j.properties" |
| |
| # Platform specific fixup |
| #??On CYGWIN convert path and end with a ';' |
| case "$(uname)" in |
| CYGWIN*) JENA_CP="$(cygpath -wp "$JENA_CP");";; |
| esac |
| |
| export JENA_CP |
| |
| |
| # Process arguments |
| LOC= |
| PHASE= |
| KEEP_WORK=0 |
| DEBUG=0 |
| TRACE=0 |
| JVM_ARGS= |
| |
| while [ $# -gt 0 ] |
| do |
| ARG=$1 |
| case "$ARG" in |
| -d|--debug) |
| # Debug Mode |
| shift |
| DEBUG=1 |
| ;; |
| -h|--help) |
| # Help |
| printUsage |
| exit 0 |
| ;; |
| -j|--jvm-args) |
| # JVM Arguments |
| shift |
| JVM_ARGS="$1" |
| shift |
| ;; |
| -k|--keep-work) |
| # Keep work files |
| shift |
| KEEP_WORK=1 |
| ;; |
| -l|--loc|-loc) |
| # Location space separated |
| shift |
| LOC="$1" |
| shift |
| ;; |
| -*loc=*) |
| # Location = separated |
| LOC=${ARG/-*loc=/} |
| shift |
| ;; |
| -p|--phase) |
| # Phase space separated |
| shift |
| PHASE="$1" |
| shift |
| ;; |
| -s|--sort-args) |
| # Sort arguments |
| shift |
| SORT_ARGS=$1 |
| shift |
| ;; |
| -t|--trace) |
| # Trace mode |
| shift |
| TRACE=1 |
| set -x |
| ;; |
| --) |
| # Arguments separator |
| # All further arguments are treated as data files |
| shift |
| break |
| ;; |
| -*) |
| # Looks like an option but not known |
| abort 1 "Unrecognized option $ARG, if this was meant to be a data file separate options from data files with --" |
| ;; |
| *) |
| # Once we see an unrecognized argument that doesn't look like an option treat as start of files to process |
| break |
| ;; |
| esac |
| done |
| |
| if [ -z "$PHASE" ]; then |
| PHASE="all" |
| fi |
| |
| # Prepare arguments to pass to children |
| COMMON_ARGS= |
| DATA_ARGS= |
| INDEX_ARGS= |
| if [ $KEEP_WORK = 1 ]; then |
| COMMON_ARGS="--keep-work" |
| fi |
| if [ $DEBUG = 1 ]; then |
| COMMON_ARGS="$COMMON_ARGS --debug" |
| fi |
| if [ $TRACE = 1 ]; then |
| COMMON_ARGS="$COMMON_ARGS --trace" |
| fi |
| if [ -n "$JVM_ARGS" ]; then |
| COMMON_ARGS="$COMMON_ARGS --jvm-args $JVM_ARGS" |
| fi |
| if [ -n "$SORT_ARGS" ]; then |
| INDEX_ARGS="--sort-args $SORT_ARGS" |
| fi |
| |
| # ---- Start |
| info "-- TDB Bulk Loader Start" |
| TIME1="$(date +%s)" |
| |
| TOOL_DIR="$JENA_HOME/bin" |
| case "$PHASE" in |
| all) |
| # All Phases |
| # Data Phase |
| "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS $DATA_ARGS --loc "$LOC" -- "$@" |
| RET=$? |
| if [ $RET -ne 0 ]; then |
| abort $RET "Failed during data phase" |
| fi |
| |
| # Index Phase |
| "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS $INDEX_ARGS --loc "$LOC" |
| RET=$? |
| if [ $RET -ne 0 ]; then |
| abort $RET "Failed during data phase" |
| fi |
| ;; |
| |
| data) |
| # Data Phase |
| "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS $DATA_ARGS --loc "$LOC" -- "$@" |
| RET=$? |
| if [ $RET -ne 0 ]; then |
| abort $RET "Failed during data phase" |
| fi |
| ;; |
| |
| index) |
| # Index Phase |
| "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS $INDEX_ARGS --loc "$LOC" |
| RET=$? |
| if [ $RET -ne 0 ]; then |
| abort $RET "Failed during index phase" |
| fi |
| ;; |
| *) |
| abort 1 "Unrecognized phase $PHASE" |
| ;; |
| esac |
| |
| # ---- End |
| TIME2="$(date +%s)" |
| info "-- TDB Bulk Loader Finish" |
| ELAPSED=$(($TIME2-$TIME1)) |
| info "-- $ELAPSED seconds" |