blob: 530aca83fdd9c6aea73327bdb4b0a3be4ce1217d [file] [log] [blame]
#!/usr/bin/env bash
## Licensed to the Apache Software Foundation (ASF) under one
## or more contributor license agreements. See the NOTICE file
## distributed with this work for additional information
## regarding copyright ownership. The ASF licenses this file
## to you under the Apache License, Version 2.0 (the
## "License"); you may not use this file except in compliance
## with the License. You may obtain a copy of the License at
##
## http://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
function printUsage() {
cat << EOF
tdbloader2 - TDB Bulk Loader
Usage: tdbloader2 --loc <Directory> [Options] <Data> ...
Bulk loader for TDB which manipulates the data files directly and so
can only be used to create new databases. This command relies on
POSIX utilities so will only work on POSIX operating systems.
If you wish to bulk load to an existing database please use tdbloader
instead.
Required options are as follows:
-l <DatabaseDirectory>
--loc <DatabaseDirectory>
Sets the location in which the database should be created.
This location must be a directory and must be empty, if a
non-existent path is specified it will be created as a new
directory.
<Data>
Specifies the path to one/more data files to load
Common additional options are as follows:
-h
--help
Prints this help summary and exits
Advanced additional options are as follows:
-d
--debug
Enable debug mode, adds extra debug output
-j <JvmArgs>
--jvm-args <JvmArgs>
Sets the arguments that should be passed to the JVM for the
JVM based portions of the build.
Generally it is best to not change these unless you have been
specifically advised to. The scripts will use appropriate
defaults if this is not specified.
In particular be careful increasing the heap size since many
parts of TDB actually use memory mapped files that live
outside the heap so if the heap is too large the heap may
conflict with the memory mapped files for memory space.
-k
--keep-work
Keeps the temporary work files around after they are no longer
needed. May be useful for debugging.
-p <Phase>
--phase <Phase>
Sets the phase of the build to run, supported values are:
all Full bulk load
data Data phase only
index Index phase only, requires the data phase to
previously have been run
When no phase is specified it defaults to all
-s <SortArgs>
--sort-args <SortArgs>
Sets the arguments that should be passed to sort for the sort
based portions of the build.
Generally it is best not to change these as the scripts will
use appropriate defaults for your system.
-t
--trace
Enable trace mode, essentially sets -x within the scripts
EOF
}
function resolveLink() {
local NAME=$1
if [ -L "$NAME" ]; then
case "$OSTYPE" in
darwin*|bsd*)
# BSD style readlink behaves differently to GNU readlink
# Have to manually follow links
while [ -L "$NAME" ]; do
NAME=$(readlink "$NAME")
done
;;
*)
# Assuming standard GNU readlink with -f for
# canonicalize
NAME=$(readlink -f "$NAME")
;;
esac
fi
echo "$NAME"
}
# If JENA_HOME is empty
if [ -z "$JENA_HOME" ]; then
SCRIPT="$0"
# Catch common issue: script has been symlinked
if [ -L "$SCRIPT" ]; then
SCRIPT=$(resolveLink "$0")
# If link is relative
case "$SCRIPT" in
/*)
# Already absolute
;;
*)
# Relative, make absolute
SCRIPT=$( dirname "$0" )/$SCRIPT
;;
esac
fi
# Work out root from script location
JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
export JENA_HOME
fi
# If JENA_HOME is a symbolic link need to resolve
if [ -L "${JENA_HOME}" ]; then
JENA_HOME=$(resolveLink "$JENA_HOME")
# If link is relative
case "$JENA_HOME" in
/*)
# Already absolute
;;
*)
# Relative, make absolute
JENA_HOME=$(dirname "$JENA_HOME")
;;
esac
export JENA_HOME
echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
fi
if [ -e "${JENA_HOME}/bin/tdbloader2common" ]; then
# Can source common functions
source "${JENA_HOME}/bin/tdbloader2common"
else
echo "Unable to locate common functions script tdbloader2common"
exit 1
fi
# ---- Setup
JVM_ARGS=${JVM_ARGS:--Xmx1024M}
# Expand JENA_HOME but literal *
JENA_CP="$JENA_HOME"'/lib/*'
SOCKS=
LOGGING="-Dlog4j.configuration=file:$JENA_HOME/jena-log4j.properties"
# Platform specific fixup
#??On CYGWIN convert path and end with a ';'
case "$(uname)" in
CYGWIN*) JENA_CP="$(cygpath -wp "$JENA_CP");";;
esac
export JENA_CP
# Process arguments
LOC=
PHASE=
KEEP_WORK=0
DEBUG=0
TRACE=0
JVM_ARGS=
SORT_ARGS=
while [ $# -gt 0 ]
do
ARG=$1
case "$ARG" in
-d|--debug)
# Debug Mode
shift
DEBUG=1
;;
-h|--help)
# Help
printUsage
exit 0
;;
-j|--jvm-args)
# JVM Arguments
shift
JVM_ARGS="$1"
shift
;;
-k|--keep-work)
# Keep work files
shift
KEEP_WORK=1
;;
-l|--loc|-loc)
# Location space separated
shift
LOC="$1"
shift
;;
-*loc=*)
# Location = separated
LOC=${ARG/-*loc=/}
shift
;;
-p|--phase)
# Phase space separated
shift
PHASE="$1"
shift
;;
-s|--sort-args)
# Sort arguments
shift
SORT_ARGS=$1
shift
;;
-t|--trace)
# Trace mode
shift
TRACE=1
set -x
;;
--)
# Arguments separator
# All further arguments are treated as data files
shift
break
;;
-*)
# Looks like an option but not known
abort 1 "Unrecognized option $ARG, if this was meant to be a data file separate options from data files with --"
;;
*)
# Once we see an unrecognized argument that doesn't look like an option treat as start of files to process
break
;;
esac
done
if [ -z "$PHASE" ]; then
PHASE="all"
fi
# Prepare arguments to pass to children
COMMON_ARGS=
DATA_ARGS=
INDEX_ARGS=
if [ $KEEP_WORK = 1 ]; then
COMMON_ARGS="--keep-work"
fi
if [ $DEBUG = 1 ]; then
COMMON_ARGS="$COMMON_ARGS --debug"
fi
if [ $TRACE = 1 ]; then
COMMON_ARGS="$COMMON_ARGS --trace"
fi
if [ -n "$JVM_ARGS" ]; then
COMMON_ARGS="$COMMON_ARGS --jvm-args $JVM_ARGS"
fi
if [ -n "$SORT_ARGS" ]; then
INDEX_ARGS="--sort-args $SORT_ARGS"
fi
# ---- Start
info "-- TDB Bulk Loader Start"
TIME1="$(date +%s)"
TOOL_DIR="$JENA_HOME/bin"
case "$PHASE" in
all)
# All Phases
# Data Phase
"${TOOL_DIR}/tdbloader2data" $COMMON_ARGS $DATA_ARGS --loc "$LOC" -- "$@"
RET=$?
if [ $RET -ne 0 ]; then
abort $RET "Failed during data phase"
fi
# Index Phase
"${TOOL_DIR}/tdbloader2index" $COMMON_ARGS $INDEX_ARGS --loc "$LOC"
RET=$?
if [ $RET -ne 0 ]; then
abort $RET "Failed during data phase"
fi
;;
data)
# Data Phase
"${TOOL_DIR}/tdbloader2data" $COMMON_ARGS $DATA_ARGS --loc "$LOC" -- "$@"
RET=$?
if [ $RET -ne 0 ]; then
abort $RET "Failed during data phase"
fi
;;
index)
# Index Phase
"${TOOL_DIR}/tdbloader2index" $COMMON_ARGS $INDEX_ARGS --loc "$LOC"
RET=$?
if [ $RET -ne 0 ]; then
abort $RET "Failed during index phase"
fi
;;
*)
abort 1 "Unrecognized phase $PHASE"
;;
esac
# ---- End
TIME2="$(date +%s)"
info "-- TDB Bulk Loader Finish"
ELAPSED=$(($TIME2-$TIME1))
info "-- $ELAPSED seconds"