blob: 488457a826925e7e6757b36287fbb729cbeb864a [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# -*- mode: ruby -*-
# vi: set ft=ruby :
#
# Builds a single node Rya on Accumulo on Hadoop and Zookeeper.
# Deploys Rya, and the Eclipse rdf4j-workbench on Tomcat accessible from the host's browser.
# See the accompanying readme for URL's, verification, and troubleshooting.
#
# Note: Machine's ip is 192.168.33.10
# username : vagrant
# password : vagrant
#
# accumulo instance : dev
# accumulo username : root
# accumulo password : root
Vagrant.configure(2) do |config|
config.vm.box = "bento/ubuntu-18.04"
config.vm.provider "virtualbox" do |vb|
vb.name = "rya-example-box"
vb.memory = "4096"
vb.cpus = 4
end
config.vm.network :private_network, ip: "192.168.33.10"
config.vm.hostname = "rya-example-box"
config.vm.provision "shell", inline: <<-SHELL
set -x ## turn on command echo with expanded variables
# List of dependency versions
export ACCUMULO_VERSION=1.6.6
export HADOOP_VERSION=2.7.2
### IMPORTANT: Verify this exists in the Maven repos. As you know, I am writing this before it exists.
export RYA_EXAMPLE_VERSION=4.0.1
export RDF4J_VERSION=2.5.5
export ZOOKEEPER_VERSION=3.4.5-cdh5.16.1
mavenRepoUrl=https://repo1.maven.org/maven2/
echo "Updating host file with permanent ip"
sudo sed -i 's/127.0.1.1/192.168.33.10/' /etc/hosts
cat >> /etc/hosts <<EOF
127.0.0.1 localhost
192.168.33.10 zoo1 zoo2 zoo3 leader1
EOF
sudo -E apt-get -qq update
echo "Installing Java installer..."
# if you want dev tools like javac, change this to openjdk-8-jdk-headless
sudo -E apt-get install -y openjdk-8-jre || exit $?
echo "Installing Tomcat..."
sudo useradd -r -m -U -d /opt/tomcat -s /bin/false tomcat
sudo -E apt-get install -y tomcat9 || exit $?
sudo usermod -a -G tomcat vagrant
echo "Installing Unzip..."
apt-get install unzip || exit $?
echo "Setting up environment..."
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/jre
export HADOOP_HOME=/home/vagrant/hadoop-${HADOOP_VERSION}
export ZOOKEEPER_HOME=/home/vagrant/zookeeper-${ZOOKEEPER_VERSION}
export ZOO_LOG_DIR=${ZOOKEEPER_HOME}/logs/
export ACCUMULO_HOME=/home/vagrant/accumulo-${ACCUMULO_VERSION}
export PATHADD=$JAVA_HOME/bin:$ZOOKEEPER_HOME/bin:$ACCUMULO_HOME/bin:$HADOOP_HOME/bin
export PATH=$PATH:$PATHADD
export HADOOP_PREFIX="$HADOOP_HOME"
export HADOOP_CONF_DIR="$HADOOP_PREFIX/etc/hadoop"
export ACCUMULO_LOG_DIR=$ACCUMULO_HOME/logs
export ACCUMULO_TSERVER_OPTS="-Xmx384m -Xms384m "
export ACCUMULO_MASTER_OPTS="-Xmx128m -Xms128m"
export ACCUMULO_MONITOR_OPTS="-Xmx64m -Xms64m"
export ACCUMULO_GC_OPTS="-Xmx64m -Xms64m"
export ACCUMULO_GENERAL_OPTS="-XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=75 -Djava.net.preferIPv4Stack=true"
export ACCUMULO_OTHER_OPTS="-Xmx128m -Xms64m"
export ACCUMULO_KILL_CMD='kill -9 %p'
# Shell environment includes Accumulo resources.
ACCUMULO_RC=/home/vagrant/.accumulo_rc.sh
cat > ${ACCUMULO_RC} <<EOF
export JAVA_HOME=\'$JAVA_HOME\'
export HADOOP_HOME=\'$HADOOP_HOME\'
export ZOOKEEPER_HOME=\'$ZOOKEEPER_HOME\'
export ZOO_LOG_DIR=\'$ZOO_LOG_DIR\'
export ACCUMULO_HOME=\'$ACCUMULO_HOME\'
export PATH=\\$PATH:$PATHADD
export HADOOP_PREFIX=\'$HADOOP_PREFIX\'
export HADOOP_CONF_DIR=\'$HADOOP_CONF_DIR\'
export ACCUMULO_LOG_DIR=\'$ACCUMULO_LOG_DIR\'
export ACCUMULO_TSERVER_OPTS=\'$ACCUMULO_TSERVER_OPTS\'
export ACCUMULO_MASTER_OPTS=\'$ACCUMULO_MASTER_OPTS\'
export ACCUMULO_MONITOR_OPTS=\'$ACCUMULO_MONITOR_OPTS\'
export ACCUMULO_GC_OPTS=\'$ACCUMULO_GC_OPTS\'
export ACCUMULO_GENERAL_OPTS=\'$ACCUMULO_GENERAL_OPTS\'
export ACCUMULO_OTHER_OPTS=\'$ACCUMULO_OTHER_OPTS\'
export ACCUMULO_KILL_CMD=\'$ACCUMULO_KILL_CMD\'
### command to list the 7 correct java processes: tomcat-catalina, zookeeper, and 5 Accumulo: tracer, master, monitor, tserver, gc.
function ryaps() { ps -ef | grep java | tr ' ' '\\n' | egrep '^org\\.apache|^tracer|^master|^monitor|^tserver|^gc' | sed '/\\.Main/ N ; s/\\n/ /' ; }
EOF
source ${ACCUMULO_RC} || exit 151
# include it at the beginning of both shell configuration files.
for BASHRC in /home/vagrant/.bashrc /home/vagrant/.bash_profile ;
do touch ${BASHRC} ;
cat - ${BASHRC} > ${BASHRC}.new <<EOF && mv ${BASHRC}.new ${BASHRC} || exit 152
source ${ACCUMULO_RC}
EOF
done
echo "Acquiring and Extracting ..."
function echoerr() { printf "%s\n" "$*" >&2; }
function download {
### curl --fail treat http status >= 400 as an error. --location follow redirects status>=300
curl --silent --show-error --fail --location "$@"
if [ $? -ne 0 ]; then
echoerr "--------------------------"
echoerr "-"
echoerr "- download failed" "$@"
echoerr "-"
echoerr "-" exiting ...
echoerr "-"
echoerr "--------------------------"
exit 800
fi
}
### wait for a directory to exist or 60 seconds timeout
function waitForDeploy {
waitfordir="$1"
timeout=60
while [[ ! -d "$waitfordir" ]]
do
sleep 5
let timeout-=5
if [[ $timeout -le "0" ]]; then
echo "Timeout waiting for war to deploy, $waitfordir still does not exist.";
exit 401
fi
done
}
echo "- Hadoop"
hadoopUrl=https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz
if [[ ! -d ${HADOOP_HOME} ]] ; then
echo "Downloading $hadoopUrl"
download $hadoopUrl \
| tar -zxC /home/vagrant || exit 101
fi
echo "- Zookeeper"
zookeeperUrl=http://archive.cloudera.com/cdh5/cdh/5/zookeeper-${ZOOKEEPER_VERSION}.tar.gz
if [[ ! -d ${ZOOKEEPER_HOME} ]] ; then
echo "Downloading $zookeeperUrl"
download $zookeeperUrl \
| tar -zxC /home/vagrant || exit 102
fi
echo "- Accumulo"
accumuloUrl=https://archive.apache.org/dist/accumulo/${ACCUMULO_VERSION}/accumulo-${ACCUMULO_VERSION}-bin.tar.gz
if [[ ! -d ${ACCUMULO_HOME} ]] ; then
echo "Downloading $accumuloUrl"
download $accumuloUrl \
| tar -zxC /home/vagrant || exit 103
fi
echo "Configuring Zookeeper..."
sudo mkdir --parents /var/zookeeper
sudo chown vagrant:vagrant /var/zookeeper
sudo cp ${ZOOKEEPER_HOME}/conf/zoo_sample.cfg ${ZOOKEEPER_HOME}/conf/zoo.cfg
# Change the dataDir to /var/zookeeper -- strange that backslashes need to be escaped only on this line.
sudo sed -i 's/^\\s*dataDir\\s*=.*$/dataDir=\\/var\\/zookeeper/' ${ZOOKEEPER_HOME}/conf/zoo.cfg || exit 104
# Conflicts with Accumulo and maybe Zookeeper
sudo rm --force ${HADOOP_HOME}/share/hadoop/common/lib/slf4j-api-1.7.10.jar
sudo rm --force ${HADOOP_HOME}/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar
# Assure logs are creatable and writeable
sudo mkdir --parents ${ZOO_LOG_DIR}
sudo touch "${ZOO_LOG_DIR}/zookeeper.out"
sudo chmod -R a+wX ${ZOO_LOG_DIR}
echo "Running Zookeeper..."
sudo -E ${ZOOKEEPER_HOME}/bin/zkServer.sh start
echo "Configuring Accumulo..."
cp ${ACCUMULO_HOME}/conf/examples/1GB/standalone/* ${ACCUMULO_HOME}/conf/
rm --force ${ACCUMULO_HOME}/conf/accumulo-site.xml
cat >> ${ACCUMULO_HOME}/conf/accumulo-site.xml <<EOF
<configuration>
<property><name>instance.dfs.uri</name><value>file:///</value></property>
<property><name>instance.dfs.dir</name><value>/data/accumulo</value></property>
<property><name>instance.zookeeper.host</name><value>localhost:2181</value></property>
<property><name>instance.secret</name><value>DONTTELL</value></property>
<property><name>tserver.port.search</name><value>true</value></property>
<property><name>logger.dir.walog</name><value>/data/accumulo/walogs</value></property>
<property><name>tserver.cache.data.size</name><value>15M</value></property>
<property><name>tserver.cache.index.size</name><value>15M</value></property>
<property><name>tserver.memory.maps.max</name><value>256M</value></property>
<property><name>tserver.walog.max.size</name><value>256M</value></property>
<property><name>tserver.memory.maps.native.enabled</name><value>false</value></property>
<property><name>trace.token.property.password</name><value>root</value></property>
<property><name>gc.cycle.delay</name><value>4s</value></property>
<property><name>gc.cycle.start</name><value>0s</value></property>
<property><name>tserver.compaction.major.delay</name><value>3</value></property>
<property><name>general.classpaths</name><value>
/data/accumulo/lib/[^.].*.jar,
${HADOOP_HOME}/share/hadoop/common/.*.jar,
${HADOOP_HOME}/share/hadoop/common/lib/.*.jar,
${HADOOP_HOME}/share/hadoop/hdfs/.*.jar,
${HADOOP_HOME}/share/hadoop/mapreduce/.*.jar,
${HADOOP_HOME}/share/hadoop/yarn/.*.jar,
${ACCUMULO_HOME}/server/target/classes/,
${ACCUMULO_HOME}/lib/accumulo-server.jar,
${ACCUMULO_HOME}/core/target/classes/,
${ACCUMULO_HOME}/lib/accumulo-core.jar,
${ACCUMULO_HOME}/start/target/classes/,
${ACCUMULO_HOME}/lib/accumulo-start.jar,
${ACCUMULO_HOME}/fate/target/classes/,
${ACCUMULO_HOME}/lib/accumulo-fate.jar,
${ACCUMULO_HOME}/proxy/target/classes/,
${ACCUMULO_HOME}/lib/accumulo-proxy.jar,
${ACCUMULO_HOME}/lib/[^.].*.jar,
/home/vagrant/${ZOOKEEPER_HOME}/zookeeper[^.].*.jar,
$HADOOP_CONF_DIR,
${HADOOP_HOME}/[^.].*.jar,
${HADOOP_HOME}/lib/[^.].*.jar,
</value></property>
<property><name>general.dynamic.classpaths</name><value>/data/accumulo/lib/ext/[^.].*.jar</value></property>
<property><name>trace.port.client</name><value>0</value></property>
<property><name>monitor.port.client</name><value>0</value></property>
<property><name>master.port.client</name><value>0</value></property>
<property><name>tserver.port.client</name><value>0</value></property>
<property><name>gc.port.client</name><value>0</value></property>
</configuration>
EOF
cat > ${ACCUMULO_HOME}/conf/masters <<EOF
rya-example-box
EOF
cat > ${ACCUMULO_HOME}/conf/slaves <<EOF
rya-example-box
EOF
sudo mkdir --parents /data
sudo chown vagrant:vagrant /data
mkdir --parents /data/accumulo/lib/ext
sudo chmod -R a+rwX ${ACCUMULO_HOME}/logs/
echo "Starting Accumulo..."
echo "Init will fail during a re-provision, but you can ignore it: 'FATAL: It appears the directories [...] were previously initialized.'"
echo "Also, you may see an indefinitely repeating: 'Waiting for accumulo to be initialized' which means Accumulo won't start."
echo "Either issue can be resolved by removing the directory: 'sudo rm -r /data/accumulo' then re-provision. Warning: this will erase all Rya/Accumulo data."
${ACCUMULO_HOME}/bin/accumulo init --instance-name dev --password root
${ACCUMULO_HOME}/bin/start-all.sh || exit 107
sudo chmod -R a+rwX ${ACCUMULO_HOME}/logs/
echo 'Done!'
# --------------------
echo "Installing RDF4J Server"
# this is the rdf4j data dir. It might need to be registered with tomcat to write there.
export rdf4jServerBase=/opt/tomcat/
sudo install -d -o tomcat -m u=rwx,go=rx ${rdf4jServerBase}
sudo -u tomcat mkdir --parents ${rdf4jServerBase}/server/logs
sudo -u tomcat mkdir --parents ${rdf4jServerBase}/.RDF4J/server/logs
# tomcat v9 requires explicit permissions to the file system.
sudo mkdir --parents /etc/systemd/system/tomcat9.service.d
cat <<EOF | sudo tee --append /etc/systemd/system/tomcat9.service.d/logging-allow.conf
[Service]
ReadWritePaths=${rdf4jServerBase}
EOF
echo 'Restarting Tomcat (or starting)'
sudo -E systemctl daemon-reload || exit 1080
sudo -E systemctl enable tomcat9 || exit 1081
sudo -E systemctl restart tomcat9 || exit 1082
rdf4jwar=/var/lib/tomcat9/webapps/rdf4j-server.war
if [[ ! -s $rdf4jwar ]] ; then
echo "Downloading RDF4J Server"
download --output $rdf4jwar ${mavenRepoUrl}org/eclipse/rdf4j/rdf4j-http-server/${RDF4J_VERSION}/rdf4j-http-server-${RDF4J_VERSION}.war || exit 110
fi
echo "RDF4J http server deployed at http://rya-example-box:8080/rdf4j-server"
# --------------------
echo "Installing RDF4J Workbench"
workbench=/var/lib/tomcat9/webapps/rdf4j-workbench.war
if [[ ! -s $workbench ]] ; then
echo "Downloading RDF4J Workbench"
download --output $workbench ${mavenRepoUrl}org/eclipse/rdf4j/rdf4j-http-workbench/${RDF4J_VERSION}/rdf4j-http-workbench-${RDF4J_VERSION}.war || exit 111
fi
echo "RDF4J workbench deployed at http://rya-example-box:8080/rdf4j-workbench"
# ----------------------
echo "Installing Rya"
ryaIndexing=rya.indexing.example-${RYA_EXAMPLE_VERSION}-distribution
# Place in the vagrant folder from your dev project, example: extras/indexingExample/target/rya.indexing.example-4.0.1-distribution.zip
if [[ -s "/vagrant/${ryaIndexing}.zip" ]] ; then
echo "Using found local file: /vagrant/${ryaIndexing}.zip "
cp "/vagrant/${ryaIndexing}.zip" ./
fi
if [[ ! -s ${ryaIndexing}.zip ]] ; then
echo "Downloading ${ryaIndexing}.zip quietly, this will take some minutes with no output..."
download --output ${ryaIndexing}.zip ${mavenRepoUrl}org/apache/rya/rya.indexing.example/${RYA_EXAMPLE_VERSION}/${ryaIndexing}.zip || exit 112
fi
sudo mkdir --parents ${ryaIndexing}
sudo unzip -q -o ${ryaIndexing}.zip -d ${ryaIndexing}
# before continuing, wait for tomcat to deploy wars:
waitForDeploy /var/lib/tomcat9/webapps/rdf4j-workbench/WEB-INF/lib/
waitForDeploy /var/lib/tomcat9/webapps/rdf4j-server/WEB-INF/lib/
# soft linking the files doesn't seem to work in tomcat, so we copy them instead :(
sudo -u tomcat cp ${ryaIndexing}/dist/lib/* /var/lib/tomcat9/webapps/rdf4j-workbench/WEB-INF/lib/ || exit 113
sudo -u tomcat cp ${ryaIndexing}/dist/lib/* /var/lib/tomcat9/webapps/rdf4j-server/WEB-INF/lib/ || exit 114
# These are older libs that break tomcat and rdf4j that come with Rya above.
sudo rm --force /var/lib/tomcat9/webapps/rdf4j-workbench/WEB-INF/lib/servlet-api-2.5.jar
sudo rm --force /var/lib/tomcat9/webapps/rdf4j-workbench/WEB-INF/lib/jsp-api-2.1.jar
s=/var/lib/tomcat9/webapps/rdf4j-server/WEB-INF/lib/
sudo rm --force $s/servlet-api-2.5.jar $s/lib/jsp-api-2.1.jar
sudo find "$s" -name 'spring-*-3.*.jar' -exec sudo rm --force {} +
sudo rm $s/spring-aop-4.2.1.RELEASE.jar $s/rdf4j-http-server-spring-2.3.1.jar
sudo rm --force $s/slf4j-log4j12-1.7.25.jar $s/slf4j-api-1.7.25.jar $s/jcabi-log-0.14.jar $s/jcl-over-slf4j-1.7.25.jar $s/minlog-1.3.0.jar $s/commons-logging-1.1.1.jar $s/log4j-1.2.16.jar
sudo chown -R tomcat:tomcat /var/lib/tomcat9/webapps/rdf4j-workbench/WEB-INF/lib/
sudo chown -R tomcat:tomcat /var/lib/tomcat9/webapps/rdf4j-server/WEB-INF/lib/
# ----------------------
echo "Downloading and installing new templates for RDF4J WorkBench"
ryaVagrant=rya.vagrant.example-${RYA_EXAMPLE_VERSION}
# Place in the vagrant folder from your dev project: extras/vagrantExample/target/rya.vagrant.example-*.jar
if [[ -s "/vagrant/${ryaVagrant}.jar" ]] ; then
echo "Using found local file: /vagrant/${ryaVagrant}.jar "
cp "/vagrant/${ryaVagrant}.jar" ./
fi
if [[ ! -s ${ryaVagrant}.jar ]] ; then
echo "Downloading ${ryaVagrant}.jar"
download --output ${ryaVagrant}.jar ${mavenRepoUrl}org/apache/rya/rya.vagrant.example/${RYA_EXAMPLE_VERSION}/${ryaVagrant}.jar || exit 120
fi
sudo mkdir --parents ${ryaVagrant}
sudo unzip -q -o ${ryaVagrant}.jar -d ${ryaVagrant}
waitForDeploy /var/lib/tomcat9/webapps/rdf4j-workbench/transformations
sudo -u tomcat cp ${ryaVagrant}/*.xsl /var/lib/tomcat9/webapps/rdf4j-workbench/transformations/
sudo chown tomcat:tomcat /var/lib/tomcat9/webapps/rdf4j-workbench/transformations/*
# ----------------------
echo "Deploying Rya Web"
ryaWar=web.rya-${RYA_EXAMPLE_VERSION}.war
# Place in the vagrant folder from your dev project: web/web.rya/target/web.rya.war rename to add version.
if [[ -s "/vagrant/${ryaWar}" ]] ; then
echo "Using found local file: /vagrant/${ryaWar} "
cp "/vagrant/${ryaWar}" ./
fi
if [[ ! -s ${ryaWar} ]] ; then
echo "Downloading ${ryaWar}"
###download https://www.dropbox.com/s/jrmgfey8ch1vrd6/${ryaWar}?raw=1 --output ${ryaWar} || exit 121
download ${mavenRepoUrl}org/apache/rya/web.rya/${RYA_EXAMPLE_VERSION}/${ryaWar} --output ${ryaWar} || exit 121
fi
# remove this so we can wait for it to be recreated.
sudo rm -rf /var/lib/tomcat9/webapps/web.rya/WEB-INF/classes
sudo -u tomcat cp ${ryaWar} /var/lib/tomcat9/webapps/web.rya.war
# Wait for the war to deploy
waitForDeploy /var/lib/tomcat9/webapps/web.rya/WEB-INF/classes/
# These are older libs that break tomcat
sudo rm --force /var/lib/tomcat9/webapps/web.rya/WEB-INF/lib/servlet-api-2.5*.jar
sudo rm --force /var/lib/tomcat9/webapps/web.rya/WEB-INF/lib/jsp-api-2.1.jar
echo "Modify Rya Web Config"
sudo chmod -R a+rwX /var/lib/tomcat9/webapps/web.rya/
cat > /var/lib/tomcat9/webapps/web.rya/WEB-INF/classes/environment.properties <<EOF
instance.name=dev
instance.zk=localhost:2181
instance.username=root
instance.password=root
rya.tableprefix=rya_
rya.displayqueryplan=true
EOF
echo "Rya web deployed at http://rya-example-box:8080/web.rya/sparqlQuery.jsp"
# restart tomcat
systemctl restart tomcat9 || exit $?
echo "Finished and ready to use!"
echo "You can re-apply these settings without losing data by running the command 'vagrant provision'"
SHELL
end