blob: 2330bd9f5e2d9728112982aeba7a76ff407b83c2 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# -*- mode: ruby -*-
# vi: set ft=ruby :
#
# Builds a single node Rya on Accumulo on Hadoop and Zookeeper.
# Deploys Rya, and the Eclipse rdf4j-workbench on Tomcat accessible from the host's browser.
# See the accompanying readme for URL's, verification, and troubleshooting.
#
# Note: Machine's ip is 192.168.33.10
# username : vagrant
# password : vagrant
#
# accumulo instance : dev
# accumulo username : root
# accumulo password : root
Vagrant.configure(2) do |config|
config.vm.box = "ubuntu/trusty64"
config.vm.box_url = "https://cloud-images.ubuntu.com/vagrant/trusty/current/trusty-server-cloudimg-amd64-vagrant-disk1.box"
config.vm.provider "virtualbox" do |vb|
vb.name = "rya-example-box"
vb.memory = "4096"
vb.cpus = 4
end
config.vm.network :private_network, ip: "192.168.33.10"
config.vm.hostname = "rya-example-box"
config.vm.provision "shell", inline: <<-SHELL
###set -x ## turn on command echo with expanded variables
# List of dependency versions
export ACCUMULO_VERSION=1.6.5
###export ACCUMULO_VERSION=1.7.1
export HADOOP_VERSION=2.7.2
export RYA_EXAMPLE_VERSION=4.0.0-incubating
export RDF4J_VERSION=2.3.1
export ZOOKEEPER_VERSION=3.4.5-cdh4.5.0
mavenRepoUrl=http://repo1.maven.org/maven2/
echo "Updating host file with permanent ip"
sudo sed -i 's/127.0.1.1/192.168.33.10/' /etc/hosts
cat >> /etc/hosts <<EOF
192.168.33.10 zoo1 zoo2 zoo3
EOF
sudo -E apt-get -qq update
echo "Installing Java installer..."
sudo -E add-apt-repository ppa:webupd8team/java || exit $?
sudo -E apt-get -qq update || exit $?
echo debconf shared/accepted-oracle-license-v1-1 select true | \
sudo -E /usr/bin/debconf-set-selections
echo debconf shared/accepted-oracle-license-v1-1 seen true | \
sudo -E /usr/bin/debconf-set-selections
sudo mkdir --parents /var/cache/oracle-jdk8-installer || exit $?
echo verbose=off >> /var/cache/oracle-jdk8-installer/wgetrc || exit $?
echo "Installing Java..."
sudo -E apt-get -qq install -y oracle-java8-installer || exit $?
sudo ln --force -s /usr/lib/jvm/java-8-oracle/ /usr/lib/jvm/default-java
echo "Installing Tomcat..."
sudo -E apt-get install -y tomcat7 || exit $?
echo "Installing Unzip..."
apt-get install unzip || exit $?
echo "Setting up environment..."
export JAVA_HOME=/usr/lib/jvm/java-8-oracle
export HADOOP_HOME=/home/vagrant/hadoop-${HADOOP_VERSION}
export ZOOKEEPER_HOME=/home/vagrant/zookeeper-${ZOOKEEPER_VERSION}
export ZOO_LOG_DIR=${ZOOKEEPER_HOME}/logs/
export ACCUMULO_HOME=/home/vagrant/accumulo-${ACCUMULO_VERSION}
export PATHADD=$JAVA_HOME/bin:$ZOOKEEPER_HOME/bin:$ACCUMULO_HOME/bin:$HADOOP_HOME/bin
export PATH=$PATH:$PATHADD
export HADOOP_PREFIX="$HADOOP_HOME"
export HADOOP_CONF_DIR="$HADOOP_PREFIX/etc/hadoop"
export ACCUMULO_LOG_DIR=$ACCUMULO_HOME/logs
export ACCUMULO_TSERVER_OPTS="-Xmx384m -Xms384m "
export ACCUMULO_MASTER_OPTS="-Xmx128m -Xms128m"
export ACCUMULO_MONITOR_OPTS="-Xmx64m -Xms64m"
export ACCUMULO_GC_OPTS="-Xmx64m -Xms64m"
export ACCUMULO_GENERAL_OPTS="-XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=75 -Djava.net.preferIPv4Stack=true"
export ACCUMULO_OTHER_OPTS="-Xmx128m -Xms64m"
export ACCUMULO_KILL_CMD='kill -9 %p'
# Shell environment includes Accumulo resources.
ACCUMULO_RC=/home/vagrant/.accumulo_rc.sh
cat > ${ACCUMULO_RC} <<EOF
export JAVA_HOME=\'$JAVA_HOME\'
export HADOOP_HOME=\'$HADOOP_HOME\'
export ZOOKEEPER_HOME=\'$ZOOKEEPER_HOME\'
export ZOO_LOG_DIR=\'$ZOO_LOG_DIR\'
export ACCUMULO_HOME=\'$ACCUMULO_HOME\'
export PATH=\\$PATH:$PATHADD
export HADOOP_PREFIX=\'$HADOOP_PREFIX\'
export HADOOP_CONF_DIR=\'$HADOOP_CONF_DIR\'
export ACCUMULO_LOG_DIR=\'$ACCUMULO_LOG_DIR\'
export ACCUMULO_TSERVER_OPTS=\'$ACCUMULO_TSERVER_OPTS\'
export ACCUMULO_MASTER_OPTS=\'$ACCUMULO_MASTER_OPTS\'
export ACCUMULO_MONITOR_OPTS=\'$ACCUMULO_MONITOR_OPTS\'
export ACCUMULO_GC_OPTS=\'$ACCUMULO_GC_OPTS\'
export ACCUMULO_GENERAL_OPTS=\'$ACCUMULO_GENERAL_OPTS\'
export ACCUMULO_OTHER_OPTS=\'$ACCUMULO_OTHER_OPTS\'
export ACCUMULO_KILL_CMD=\'$ACCUMULO_KILL_CMD\'
### command to list the 7 correct java processes: tomcat-catalina, zookeeper, and 5 Accumulo: tracer, master, monitor, tserver, gc.
function ryaps() { ps -ef | grep java | tr ' ' '\\n' | egrep '^org\\.apache|^tracer|^master|^monitor|^tserver|^gc' | sed '/\\.Main/ N ; s/\\n/ /' ; }
EOF
source ${ACCUMULO_RC} || exit 151
# include it at the beginning of both shell configuration files.
for BASHRC in /home/vagrant/.bashrc /home/vagrant/.bash_profile ;
do touch ${BASHRC} ;
cat - ${BASHRC} > ${BASHRC}.new <<EOF && mv ${BASHRC}.new ${BASHRC} || exit 152
source ${ACCUMULO_RC}
EOF
done
echo "Acquiring and Extracting ..."
function echoerr() { printf "%s\n" "$*" >&2; }
function download {
### curl --fail treat http status >= 400 as an error. --location follow redirects status>=300
curl --silent --show-error --fail --location "$@"
if [ $? -ne 0 ]; then
echoerr "--------------------------"
echoerr "-"
echoerr "- download failed" "$@"
echoerr "-"
echoerr "-" exiting ...
echoerr "-"
echoerr "--------------------------"
exit 800
fi
}
### wait for a directory to exist or 60 seconds timeout
function waitForDeploy {
waitfordir="$1"
timeout=60
while [[ ! -d "$waitfordir" ]]
do
sleep 5
let timeout-=5
if [[ $timeout -le "0" ]]; then
echo "Timeout waiting for war to deploy, $waitfordir still does not exist.";
exit 401
fi
done
}
echo "- Hadoop"
hadoopUrl=http://apache.mirrors.tds.net/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz
if [[ ! -d ${HADOOP_HOME} ]] ; then
echo "Downloading $hadoopUrl"
download $hadoopUrl \
| tar -zxC /home/vagrant || exit 101
fi
echo "- Zookeeper"
zookeeperUrl=http://archive-primary.cloudera.com/cdh4/cdh/4/zookeeper-${ZOOKEEPER_VERSION}.tar.gz
if [[ ! -d ${ZOOKEEPER_HOME} ]] ; then
echo "Downloading $zookeeperUrl"
download $zookeeperUrl \
| tar -zxC /home/vagrant || exit 102
fi
echo "- Accumulo"
accumuloUrl=http://apache.mirrors.pair.com/accumulo/${ACCUMULO_VERSION}/accumulo-${ACCUMULO_VERSION}-bin.tar.gz
if [[ ! -d ${ACCUMULO_HOME} ]] ; then
echo "Downloading $accumuloUrl"
download $accumuloUrl \
| tar -zxC /home/vagrant || exit 103
fi
echo "Configuring Zookeeper..."
sudo mkdir --parents /var/zookeeper
sudo chown vagrant:vagrant /var/zookeeper
sudo cp ${ZOOKEEPER_HOME}/conf/zoo_sample.cfg ${ZOOKEEPER_HOME}/conf/zoo.cfg
# Change the dataDir to /var/zookeeper -- strange that backslashes need to be escaped only on this line.
sudo sed -i 's/^\\s*dataDir\\s*=.*$/dataDir=\\/var\\/zookeeper/' ${ZOOKEEPER_HOME}/conf/zoo.cfg || exit 104
# Conflicts with Accumulo and maybe Zookeeper
sudo rm --force ${HADOOP_HOME}/share/hadoop/common/lib/slf4j-api-1.7.10.jar
sudo rm --force ${HADOOP_HOME}/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar
# Assure logs are creatable and writeable
sudo mkdir --parents ${ZOO_LOG_DIR}
sudo touch "${ZOO_LOG_DIR}/zookeeper.out"
sudo chmod -R a+wX ${ZOO_LOG_DIR}
echo "Running Zookeeper..."
sudo -E ${ZOOKEEPER_HOME}/bin/zkServer.sh start
echo "Configuring Accumulo..."
cp ${ACCUMULO_HOME}/conf/examples/1GB/standalone/* ${ACCUMULO_HOME}/conf/
rm --force ${ACCUMULO_HOME}/conf/accumulo-site.xml
cat >> ${ACCUMULO_HOME}/conf/accumulo-site.xml <<EOF
<configuration>
<property><name>instance.dfs.uri</name><value>file:///</value></property>
<property><name>instance.dfs.dir</name><value>/data/accumulo</value></property>
<property><name>instance.zookeeper.host</name><value>localhost:2181</value></property>
<property><name>instance.secret</name><value>DONTTELL</value></property>
<property><name>tserver.port.search</name><value>true</value></property>
<property><name>logger.dir.walog</name><value>/data/accumulo/walogs</value></property>
<property><name>tserver.cache.data.size</name><value>15M</value></property>
<property><name>tserver.cache.index.size</name><value>15M</value></property>
<property><name>tserver.memory.maps.max</name><value>256M</value></property>
<property><name>tserver.walog.max.size</name><value>256M</value></property>
<property><name>tserver.memory.maps.native.enabled</name><value>false</value></property>
<property><name>trace.token.property.password</name><value>root</value></property>
<property><name>gc.cycle.delay</name><value>4s</value></property>
<property><name>gc.cycle.start</name><value>0s</value></property>
<property><name>tserver.compaction.major.delay</name><value>3</value></property>
<property><name>general.classpaths</name><value>
/data/accumulo/lib/[^.].*.jar,
${HADOOP_HOME}/share/hadoop/common/.*.jar,
${HADOOP_HOME}/share/hadoop/common/lib/.*.jar,
${HADOOP_HOME}/share/hadoop/hdfs/.*.jar,
${HADOOP_HOME}/share/hadoop/mapreduce/.*.jar,
${HADOOP_HOME}/share/hadoop/yarn/.*.jar,
${ACCUMULO_HOME}/server/target/classes/,
${ACCUMULO_HOME}/lib/accumulo-server.jar,
${ACCUMULO_HOME}/core/target/classes/,
${ACCUMULO_HOME}/lib/accumulo-core.jar,
${ACCUMULO_HOME}/start/target/classes/,
${ACCUMULO_HOME}/lib/accumulo-start.jar,
${ACCUMULO_HOME}/fate/target/classes/,
${ACCUMULO_HOME}/lib/accumulo-fate.jar,
${ACCUMULO_HOME}/proxy/target/classes/,
${ACCUMULO_HOME}/lib/accumulo-proxy.jar,
${ACCUMULO_HOME}/lib/[^.].*.jar,
/home/vagrant/${ZOOKEEPER_HOME}/zookeeper[^.].*.jar,
$HADOOP_CONF_DIR,
${HADOOP_HOME}/[^.].*.jar,
${HADOOP_HOME}/lib/[^.].*.jar,
</value></property>
<property><name>general.dynamic.classpaths</name><value>/data/accumulo/lib/ext/[^.].*.jar</value></property>
<property><name>trace.port.client</name><value>0</value></property>
<property><name>monitor.port.client</name><value>0</value></property>
<property><name>master.port.client</name><value>0</value></property>
<property><name>tserver.port.client</name><value>0</value></property>
<property><name>gc.port.client</name><value>0</value></property>
</configuration>
EOF
cat > ${ACCUMULO_HOME}/conf/masters <<EOF
rya-example-box
EOF
cat > ${ACCUMULO_HOME}/conf/slaves <<EOF
rya-example-box
EOF
sudo mkdir --parents /data
sudo chown vagrant:vagrant /data
mkdir --parents /data/accumulo/lib/ext
sudo chmod -R a+rwX ${ACCUMULO_HOME}/logs/
echo "Starting Accumulo..."
echo "Init will fail during a re-provision, but you can ignore it: 'FATAL: It appears the directories [...] were previously initialized.'"
echo "Also, you may see an indefinitely repeating: 'Waiting for accumulo to be initialized' which means Accumulo won't start."
echo "Either issue can be resolved by removing the directory: 'sudo rm -r /data/accumulo' then re-provision. Warning: this will erase all Rya/Accumulo data."
${ACCUMULO_HOME}/bin/accumulo init --instance-name dev --password root
${ACCUMULO_HOME}/bin/start-all.sh || exit 107
sudo chmod -R a+rwX ${ACCUMULO_HOME}/logs/
echo 'Done!'
echo "Installing RDF4J Server"
# creating log dir rdf4j-http-server-${RDF4J_VERSION}
sudo mkdir --parents /usr/share/tomcat7/.RDF4J
sudo chown -R tomcat7:tomcat7 /usr/share/tomcat7
sudo ln --force -s /usr/share/tomcat7/.RDF4J/Server/logs /var/log/tomcat7/rdf4j-server
rdf4jwar=/var/lib/tomcat7/webapps/rdf4j-server.war
if [[ ! -s $rdf4jwar ]] ; then
echo "Downloading RDF4J Server"
download --output $rdf4jwar ${mavenRepoUrl}org/eclipse/rdf4j/rdf4j-http-server/${RDF4J_VERSION}/rdf4j-http-server-${RDF4J_VERSION}.war || exit 110
fi
echo "RDF4J http server deployed at http://rya-example-box:8080/rdf4j-server"
echo "Installing RDF4J Workbench"
workbench=/var/lib/tomcat7/webapps/rdf4j-workbench.war
if [[ ! -s $workbench ]] ; then
echo "Downloading RDF4J Workbench"
download --output $workbench ${mavenRepoUrl}org/eclipse/rdf4j/rdf4j-http-workbench/${RDF4J_VERSION}/rdf4j-http-workbench-${RDF4J_VERSION}.war || exit 111
fi
echo "RDF4J workbench deployed at http://rya-example-box:8080/rdf4j-workbench"
echo "Installing Rya"
ryaIndexing=rya.indexing.example-${RYA_EXAMPLE_VERSION}-distribution
if [[ ! -s ${ryaIndexing}.zip ]] ; then
# Eventually it'll be on maven...
echo "Downloading ${ryaIndexing} quietly, this will take some minutes with no output..."
download --output ${ryaIndexing}.zip ${mavenRepoUrl}org/apache/rya/rya.indexing.example/${RYA_EXAMPLE_VERSION}/${ryaIndexing}.zip?raw=1 || exit 112
fi
sudo mkdir --parents ${ryaIndexing}
sudo unzip -q -o ${ryaIndexing}.zip -d ${ryaIndexing}
# before continueing, wait for tomcat to deploy wars:
waitForDeploy /var/lib/tomcat7/webapps/rdf4j-workbench/WEB-INF/lib/
waitForDeploy /var/lib/tomcat7/webapps/rdf4j-server/WEB-INF/lib/
# soft linking the files doesn't seem to work in tomcat, so we copy them instead :(
sudo cp ${ryaIndexing}/dist/lib/* /var/lib/tomcat7/webapps/rdf4j-workbench/WEB-INF/lib/ || exit 113
sudo cp ${ryaIndexing}/dist/lib/* /var/lib/tomcat7/webapps/rdf4j-server/WEB-INF/lib/ || exit 114
# These are older libs that breaks tomcat 7
sudo rm --force /var/lib/tomcat7/webapps/rdf4j-workbench/WEB-INF/lib/servlet-api-2.5.jar
sudo rm --force /var/lib/tomcat7/webapps/rdf4j-workbench/WEB-INF/lib/jsp-api-2.1.jar
sudo rm --force /var/lib/tomcat7/webapps/rdf4j-server/WEB-INF/lib/servlet-api-2.5.jar
sudo rm --force /var/lib/tomcat7/webapps/rdf4j-server/WEB-INF/lib/jsp-api-2.1.jar
sudo chown -R tomcat7:tomcat7 /var/lib/tomcat7/webapps/rdf4j-workbench/WEB-INF/lib/
sudo chown -R tomcat7:tomcat7 /var/lib/tomcat7/webapps/rdf4j-server/WEB-INF/lib/
echo "Downloading and installing new templates for RDF4J WorkBench"
ryaVagrant=rya.vagrant.example-${RYA_EXAMPLE_VERSION}
if [[ ! -s ${ryaVagrant}.jar ]] ; then
echo "Downloading ${ryaVagrant}"
download --output ${ryaVagrant}.jar ${mavenRepoUrl}org/apache/rya/rya.vagrant.example/${RYA_EXAMPLE_VERSION}/${ryaVagrant}.jar?raw=1 || exit 120
fi
sudo mkdir --parents ${ryaVagrant}
sudo unzip -q -o ${ryaVagrant}.jar -d ${ryaVagrant}
sudo cp ${ryaVagrant}/*.xsl /var/lib/tomcat7/webapps/rdf4j-workbench/transformations/
sudo chown tomcat7:tomcat7 /var/lib/tomcat7/webapps/rdf4j-workbench/transformations/*
echo "Deploying Rya Web"
ryaWar=web.rya-${RYA_EXAMPLE_VERSION}.war
if [[ ! -s ${ryaWar} ]] ; then
echo "Downloading ${ryaWar}"
download ${mavenRepoUrl}org/apache/rya/web.rya/${RYA_EXAMPLE_VERSION}/${ryaWar}.jar?raw=1 --output ${ryaWar} || exit 121
fi
sudo cp ${ryaWar} /var/lib/tomcat7/webapps/web.rya.war
# Wait for the war to deploy
waitForDeploy /var/lib/tomcat7/webapps/web.rya/WEB-INF/classes/
# These are older libs that breaks tomcat 7
sudo rm --force /var/lib/tomcat7/webapps/web.rya/WEB-INF/lib/servlet-api-2.5*.jar
sudo rm --force /var/lib/tomcat7/webapps/web.rya/WEB-INF/lib/jsp-api-2.1.jar
echo "Modify Rya Web Config"
cat > /var/lib/tomcat7/webapps/web.rya/WEB-INF/classes/environment.properties <<EOF
instance.name=dev
instance.zk=localhost:2181
instance.username=root
instance.password=root
rya.tableprefix=rya_
rya.displayqueryplan=true
EOF
echo "Rya web deployed at http://rya-example-box:8080/web.rya/sparqlQuery.jsp"
# restart tomcat
sudo -E service tomcat7 restart
echo "Finished and ready to use!"
echo "You can re-apply these settings without losing data by running the command 'vagrant provision'"
SHELL
end