| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| # -*- mode: ruby -*- |
| # vi: set ft=ruby : |
| |
| # |
| # Builds a single node Rya on Accumulo on Hadoop and Zookeeper. |
| # Deploys Rya, and the Sesame openrdf-workbench on Tomcat accessable from the host's browser. |
| # See the accompanying readme for URL's, verification, and troubleshooting. |
| # |
| # Note: Machine's ip is 192.168.33.10 |
| # username : vagrant |
| # password : vagrant |
| # |
| # accumulo instance : dev |
| # accumulo username : root |
| # accumulo password : root |
| |
| Vagrant.configure(2) do |config| |
| |
| config.vm.box = "ubuntu/trusty64" |
| config.vm.box_url = "https://cloud-images.ubuntu.com/vagrant/trusty/current/trusty-server-cloudimg-amd64-vagrant-disk1.box" |
| |
| config.vm.provider "virtualbox" do |vb| |
| vb.name = "rya-example-box" |
| vb.memory = "4096" |
| vb.cpus = 4 |
| end |
| |
| config.vm.network :private_network, ip: "192.168.33.10" |
| config.vm.hostname = "rya-example-box" |
| |
| config.vm.provision "shell", inline: <<-SHELL |
| |
| ###set -x ## turn on command echo with expanded variables |
| # List of dependency versions |
| export ACCUMULO_VERSION=1.6.5 |
| ###export ACCUMULO_VERSION=1.7.1 |
| export HADOOP_VERSION=2.7.2 |
| export RYA_EXAMPLE_VERSION=3.2.10-SNAPSHOT |
| export SESAME_VERSION=2.7.6 |
| export ZOOKEEPER_VERSION=3.4.5-cdh4.5.0 |
| |
| echo "Updating host file with permanent ip" |
| sudo sed -i 's/127.0.1.1/192.168.33.10/' /etc/hosts |
| cat >> /etc/hosts <<EOF |
| 192.168.33.10 zoo1 zoo2 zoo3 |
| EOF |
| |
| sudo -E apt-get -qq update |
| |
| echo "Installing Java installer..." |
| sudo -E add-apt-repository ppa:webupd8team/java || exit $? |
| sudo -E apt-get -qq update || exit $? |
| echo debconf shared/accepted-oracle-license-v1-1 select true | \ |
| sudo -E /usr/bin/debconf-set-selections |
| echo debconf shared/accepted-oracle-license-v1-1 seen true | \ |
| sudo -E /usr/bin/debconf-set-selections |
| sudo mkdir --parents /var/cache/oracle-jdk8-installer || exit $? |
| echo verbose=off >> /var/cache/oracle-jdk8-installer/wgetrc || exit $? |
| |
| echo "Installing Java..." |
| sudo -E apt-get -qq install -y oracle-java8-installer || exit $? |
| sudo ln --force -s /usr/lib/jvm/java-8-oracle/ /usr/lib/jvm/default-java |
| |
| echo "Installing Tomcat..." |
| sudo -E apt-get install -y tomcat7 || exit $? |
| |
| echo "Installing Unzip..." |
| apt-get install unzip || exit $? |
| |
| echo "Setting up environment..." |
| export JAVA_HOME=/usr/lib/jvm/java-8-oracle |
| export HADOOP_HOME=/home/vagrant/hadoop-${HADOOP_VERSION} |
| export ZOOKEEPER_HOME=/home/vagrant/zookeeper-${ZOOKEEPER_VERSION} |
| export ZOO_LOG_DIR=${ZOOKEEPER_HOME}/logs/ |
| export ACCUMULO_HOME=/home/vagrant/accumulo-${ACCUMULO_VERSION} |
| export PATHADD=$JAVA_HOME/bin:$ZOOKEEPER_HOME/bin:$ACCUMULO_HOME/bin:$HADOOP_HOME/bin |
| export PATH=$PATH:$PATHADD |
| |
| export HADOOP_PREFIX="$HADOOP_HOME" |
| export HADOOP_CONF_DIR="$HADOOP_PREFIX/etc/hadoop" |
| export ACCUMULO_LOG_DIR=$ACCUMULO_HOME/logs |
| export ACCUMULO_TSERVER_OPTS="-Xmx384m -Xms384m " |
| export ACCUMULO_MASTER_OPTS="-Xmx128m -Xms128m" |
| export ACCUMULO_MONITOR_OPTS="-Xmx64m -Xms64m" |
| export ACCUMULO_GC_OPTS="-Xmx64m -Xms64m" |
| export ACCUMULO_GENERAL_OPTS="-XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=75 -Djava.net.preferIPv4Stack=true" |
| export ACCUMULO_OTHER_OPTS="-Xmx128m -Xms64m" |
| export ACCUMULO_KILL_CMD='kill -9 %p' |
| # Shell environment includes Accumulo resources. |
| ACCUMULO_RC=/home/vagrant/.accumulo_rc.sh |
| cat > ${ACCUMULO_RC} <<EOF |
| |
| export JAVA_HOME=\'$JAVA_HOME\' |
| export HADOOP_HOME=\'$HADOOP_HOME\' |
| export ZOOKEEPER_HOME=\'$ZOOKEEPER_HOME\' |
| export ZOO_LOG_DIR=\'$ZOO_LOG_DIR\' |
| export ACCUMULO_HOME=\'$ACCUMULO_HOME\' |
| export PATH=\\$PATH:$PATHADD |
| |
| export HADOOP_PREFIX=\'$HADOOP_PREFIX\' |
| export HADOOP_CONF_DIR=\'$HADOOP_CONF_DIR\' |
| export ACCUMULO_LOG_DIR=\'$ACCUMULO_LOG_DIR\' |
| export ACCUMULO_TSERVER_OPTS=\'$ACCUMULO_TSERVER_OPTS\' |
| export ACCUMULO_MASTER_OPTS=\'$ACCUMULO_MASTER_OPTS\' |
| export ACCUMULO_MONITOR_OPTS=\'$ACCUMULO_MONITOR_OPTS\' |
| export ACCUMULO_GC_OPTS=\'$ACCUMULO_GC_OPTS\' |
| export ACCUMULO_GENERAL_OPTS=\'$ACCUMULO_GENERAL_OPTS\' |
| export ACCUMULO_OTHER_OPTS=\'$ACCUMULO_OTHER_OPTS\' |
| export ACCUMULO_KILL_CMD=\'$ACCUMULO_KILL_CMD\' |
| |
| ### command to list the 7 correct java processes: tomcat-catalina, zookeeper, and 5 Accumulo: tracer, master, monitor, tserver, gc. |
| function ryaps() { ps -ef | grep java | tr ' ' '\\n' | egrep '^org\\.apache|^tracer|^master|^monitor|^tserver|^gc' | sed '/\\.Main/ N ; s/\\n/ /' ; } |
| EOF |
| source ${ACCUMULO_RC} || exit 151 |
| |
| # include it at the beginning of both shell configuration files. |
| |
| for BASHRC in /home/vagrant/.bashrc /home/vagrant/.bash_profile ; |
| do touch ${BASHRC} ; |
| cat - ${BASHRC} > ${BASHRC}.new <<EOF && mv ${BASHRC}.new ${BASHRC} || exit 152 |
| source ${ACCUMULO_RC} |
| EOF |
| done |
| echo "Acquiring and Extracting ..." |
| |
| function echoerr() { printf "%s\n" "$*" >&2; } |
| |
| function download { |
| ### curl --fail treat http status >= 400 as an error. --location follow redirects status>=300 |
| curl --silent --show-error --fail --location "$@" |
| if [ $? -ne 0 ]; then |
| echoerr "--------------------------" |
| echoerr "-" |
| echoerr "- download failed" "$@" |
| echoerr "-" |
| echoerr "-" exiting ... |
| echoerr "-" |
| echoerr "--------------------------" |
| exit 800 |
| fi |
| } |
| ### wait for a directory to exist or 60 seconds timeout |
| function waitForDeploy { |
| waitfordir="$1" |
| timeout=60 |
| while [[ ! -d "$waitfordir" ]] |
| do |
| sleep 5 |
| let timeout-=5 |
| if [[ $timeout -le "0" ]]; then |
| echo "Timeout waiting for war to deploy, $waitfordir still does not exist."; |
| exit 401 |
| fi |
| done |
| } |
| |
| echo "- Hadoop" |
| hadoopUrl=http://apache.mirrors.tds.net/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz |
| if [[ ! -d ${HADOOP_HOME} ]] ; then |
| echo "Downloading $hadoopUrl" |
| download $hadoopUrl \ |
| | tar -zxC /home/vagrant || exit 101 |
| fi |
| |
| echo "- Zookeeper" |
| zookeeperUrl=http://archive-primary.cloudera.com/cdh4/cdh/4/zookeeper-${ZOOKEEPER_VERSION}.tar.gz |
| if [[ ! -d ${ZOOKEEPER_HOME} ]] ; then |
| echo "Downloading $zookeeperUrl" |
| download $zookeeperUrl \ |
| | tar -zxC /home/vagrant || exit 102 |
| fi |
| |
| echo "- Accumulo" |
| accumuloUrl=http://apache.mirrors.pair.com/accumulo/${ACCUMULO_VERSION}/accumulo-${ACCUMULO_VERSION}-bin.tar.gz |
| if [[ ! -d ${ACCUMULO_HOME} ]] ; then |
| echo "Downloading $accumuloUrl" |
| download $accumuloUrl \ |
| | tar -zxC /home/vagrant || exit 103 |
| fi |
| |
| echo "Configuring Zookeeper..." |
| sudo mkdir --parents /var/zookeeper |
| sudo chown vagrant:vagrant /var/zookeeper |
| sudo cp ${ZOOKEEPER_HOME}/conf/zoo_sample.cfg ${ZOOKEEPER_HOME}/conf/zoo.cfg |
| # Change the dataDir to /var/zookeeper -- strange that backslashes need to be escaped only on this line. |
| sudo sed -i 's/^\\s*dataDir\\s*=.*$/dataDir=\\/var\\/zookeeper/' ${ZOOKEEPER_HOME}/conf/zoo.cfg || exit 104 |
| # Conflicts with Accumulo and maybe Zookeeper |
| sudo rm --force ${HADOOP_HOME}/share/hadoop/common/lib/slf4j-api-1.7.10.jar |
| sudo rm --force ${HADOOP_HOME}/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar |
| |
| # Assure logs are creatable and writeable |
| sudo mkdir --parents ${ZOO_LOG_DIR} |
| sudo touch "${ZOO_LOG_DIR}/zookeeper.out" |
| sudo chmod -R a+wX ${ZOO_LOG_DIR} |
| |
| echo "Running Zookeeper..." |
| sudo -E ${ZOOKEEPER_HOME}/bin/zkServer.sh start |
| |
| echo "Configuring Accumulo..." |
| cp ${ACCUMULO_HOME}/conf/examples/1GB/standalone/* ${ACCUMULO_HOME}/conf/ |
| rm --force ${ACCUMULO_HOME}/conf/accumulo-site.xml |
| cat >> ${ACCUMULO_HOME}/conf/accumulo-site.xml <<EOF |
| <configuration> |
| <property><name>instance.dfs.uri</name><value>file:///</value></property> |
| <property><name>instance.dfs.dir</name><value>/data/accumulo</value></property> |
| <property><name>instance.zookeeper.host</name><value>localhost:2181</value></property> |
| <property><name>instance.secret</name><value>DONTTELL</value></property> |
| <property><name>tserver.port.search</name><value>true</value></property> |
| <property><name>logger.dir.walog</name><value>/data/accumulo/walogs</value></property> |
| <property><name>tserver.cache.data.size</name><value>15M</value></property> |
| <property><name>tserver.cache.index.size</name><value>15M</value></property> |
| <property><name>tserver.memory.maps.max</name><value>256M</value></property> |
| <property><name>tserver.walog.max.size</name><value>256M</value></property> |
| <property><name>tserver.memory.maps.native.enabled</name><value>false</value></property> |
| <property><name>trace.token.property.password</name><value>root</value></property> |
| <property><name>gc.cycle.delay</name><value>4s</value></property> |
| <property><name>gc.cycle.start</name><value>0s</value></property> |
| <property><name>tserver.compaction.major.delay</name><value>3</value></property> |
| <property><name>general.classpaths</name><value> |
| /data/accumulo/lib/[^.].*.jar, |
| ${HADOOP_HOME}/share/hadoop/common/.*.jar, |
| ${HADOOP_HOME}/share/hadoop/common/lib/.*.jar, |
| ${HADOOP_HOME}/share/hadoop/hdfs/.*.jar, |
| ${HADOOP_HOME}/share/hadoop/mapreduce/.*.jar, |
| ${HADOOP_HOME}/share/hadoop/yarn/.*.jar, |
| ${ACCUMULO_HOME}/server/target/classes/, |
| ${ACCUMULO_HOME}/lib/accumulo-server.jar, |
| ${ACCUMULO_HOME}/core/target/classes/, |
| ${ACCUMULO_HOME}/lib/accumulo-core.jar, |
| ${ACCUMULO_HOME}/start/target/classes/, |
| ${ACCUMULO_HOME}/lib/accumulo-start.jar, |
| ${ACCUMULO_HOME}/fate/target/classes/, |
| ${ACCUMULO_HOME}/lib/accumulo-fate.jar, |
| ${ACCUMULO_HOME}/proxy/target/classes/, |
| ${ACCUMULO_HOME}/lib/accumulo-proxy.jar, |
| ${ACCUMULO_HOME}/lib/[^.].*.jar, |
| /home/vagrant/${ZOOKEEPER_HOME}/zookeeper[^.].*.jar, |
| $HADOOP_CONF_DIR, |
| ${HADOOP_HOME}/[^.].*.jar, |
| ${HADOOP_HOME}/lib/[^.].*.jar, |
| </value></property> |
| <property><name>general.dynamic.classpaths</name><value>/data/accumulo/lib/ext/[^.].*.jar</value></property> |
| <property><name>trace.port.client</name><value>0</value></property> |
| <property><name>monitor.port.client</name><value>0</value></property> |
| <property><name>master.port.client</name><value>0</value></property> |
| <property><name>tserver.port.client</name><value>0</value></property> |
| <property><name>gc.port.client</name><value>0</value></property> |
| </configuration> |
| EOF |
| cat > ${ACCUMULO_HOME}/conf/masters <<EOF |
| rya-example-box |
| EOF |
| |
| cat > ${ACCUMULO_HOME}/conf/slaves <<EOF |
| rya-example-box |
| EOF |
| sudo mkdir --parents /data |
| sudo chown vagrant:vagrant /data |
| mkdir --parents /data/accumulo/lib/ext |
| |
| sudo chmod -R a+rwX ${ACCUMULO_HOME}/logs/ |
| |
| echo "Starting Accumulo..." |
| echo "Init will fail during a re-provision, but you can ignore it: 'FATAL: It appears the directories [...] were previously initialized.'" |
| echo "Also, you may see an indefinitely repeating: 'Waiting for accumulo to be initialized' which means Accumulo won't start." |
| echo "Either issue can be resolved by removing the directory: 'sudo rm -r /data/accumulo' then re-provision. Warning: this will erase all Rya/Accumulo data." |
| ${ACCUMULO_HOME}/bin/accumulo init --instance-name dev --password root |
| ${ACCUMULO_HOME}/bin/start-all.sh || exit 107 |
| |
| sudo chmod -R a+rwX ${ACCUMULO_HOME}/logs/ |
| |
| echo 'Done!' |
| |
| echo "Installing Sesame Server" |
| # creating log dir sesame-http-server-${SESAME_VERSION} |
| sudo mkdir --parents /usr/share/tomcat7/.aduna |
| sudo chown -R tomcat7:tomcat7 /usr/share/tomcat7 |
| sudo ln --force -s /usr/share/tomcat7/.aduna/openrdf-sesame/logs /var/log/tomcat7/openrdf-sesame |
| sesamewar=/var/lib/tomcat7/webapps/openrdf-sesame.war |
| if [[ ! -s $sesamewar ]] ; then |
| echo "Downloading" |
| download --output $sesamewar http://repo1.maven.org/maven2/org/openrdf/sesame/sesame-http-server/${SESAME_VERSION}/sesame-http-server-${SESAME_VERSION}.war || exit 110 |
| fi |
| echo "Sesame http server deployed at http://rya-example-box:8080/openrdf-sesame" |
| |
| echo "Installing Sesame Workbench" |
| workbench=/var/lib/tomcat7/webapps/openrdf-workbench.war |
| if [[ ! -s $workbench ]] ; then |
| echo "Downloading" |
| download --output $workbench http://repo1.maven.org/maven2/org/openrdf/sesame/sesame-http-workbench/${SESAME_VERSION}/sesame-http-workbench-${SESAME_VERSION}.war || exit 111 |
| fi |
| echo "Sesame workbench deployed at http://rya-example-box:8080/openrdf-workbench" |
| |
| echo "Installing Rya" |
| ryaIndexing=rya.indexing.example-${RYA_EXAMPLE_VERSION}-distribution |
| if [[ ! -s ${ryaIndexing}.zip ]] ; then |
| # Right now it's on dropbox, but eventually it'll be on maven... |
| echo "Downloading quietly, this will take some minutes with no output..." |
| download --output ${ryaIndexing}.zip https://dl.dropbox.com/s/7e74yiuq4jmu0od/${ryaIndexing}.zip?raw=1 || exit 112 |
| fi |
| sudo mkdir --parents ${ryaIndexing} |
| sudo unzip -q -o ${ryaIndexing}.zip -d ${ryaIndexing} |
| |
| # before continueing, wait for tomcat to deploy wars: |
| waitForDeploy /var/lib/tomcat7/webapps/openrdf-workbench/WEB-INF/lib/ |
| waitForDeploy /var/lib/tomcat7/webapps/openrdf-sesame/WEB-INF/lib/ |
| |
| # soft linking the files doesn't seem to work in tomcat, so we copy them instead :( |
| sudo cp ${ryaIndexing}/dist/lib/* /var/lib/tomcat7/webapps/openrdf-workbench/WEB-INF/lib/ || exit 113 |
| sudo cp ${ryaIndexing}/dist/lib/* /var/lib/tomcat7/webapps/openrdf-sesame/WEB-INF/lib/ || exit 114 |
| |
| # These are older libs that breaks tomcat 7 |
| sudo rm --force /var/lib/tomcat7/webapps/openrdf-workbench/WEB-INF/lib/servlet-api-2.5.jar |
| sudo rm --force /var/lib/tomcat7/webapps/openrdf-workbench/WEB-INF/lib/jsp-api-2.1.jar |
| sudo rm --force /var/lib/tomcat7/webapps/openrdf-sesame/WEB-INF/lib/servlet-api-2.5.jar |
| sudo rm --force /var/lib/tomcat7/webapps/openrdf-sesame/WEB-INF/lib/jsp-api-2.1.jar |
| |
| sudo chown -R tomcat7:tomcat7 /var/lib/tomcat7/webapps/openrdf-workbench/WEB-INF/lib/ |
| sudo chown -R tomcat7:tomcat7 /var/lib/tomcat7/webapps/openrdf-sesame/WEB-INF/lib/ |
| |
| echo "Downloading and installing new templates for OpenRdf WorkBench" |
| ryaVagrant=rya.vagrant.example-${RYA_EXAMPLE_VERSION} |
| if [[ ! -s ${ryaVagrant}.jar ]] ; then |
| echo "Downloading" |
| download --output ${ryaVagrant}.jar https://dl.dropbox.com/s/dgw63m66nubyy4z/${ryaVagrant}.jar?raw=1 || exit 120 |
| fi |
| sudo mkdir --parents ${ryaVagrant} |
| sudo unzip -q -o ${ryaVagrant}.jar -d ${ryaVagrant} |
| sudo cp ${ryaVagrant}/*.xsl /var/lib/tomcat7/webapps/openrdf-workbench/transformations/ |
| sudo chown tomcat7:tomcat7 /var/lib/tomcat7/webapps/openrdf-workbench/transformations/* |
| |
| echo "Deploying Rya Web" |
| ryaWar=web.rya-${RYA_EXAMPLE_VERSION}.war |
| if [[ ! -s ${ryaWar} ]] ; then |
| echo "Downloading" |
| download https://dl.dropbox.com/s/332wr4b2f34dp6e/${ryaWar}?raw=1 --output ${ryaWar} || exit 121 |
| fi |
| sudo cp ${ryaWar} /var/lib/tomcat7/webapps/web.rya.war |
| # Wait for the war to deploy |
| waitForDeploy /var/lib/tomcat7/webapps/web.rya/WEB-INF/classes/ |
| |
| # These are older libs that breaks tomcat 7 |
| sudo rm --force /var/lib/tomcat7/webapps/web.rya/WEB-INF/lib/servlet-api-2.5*.jar |
| sudo rm --force /var/lib/tomcat7/webapps/web.rya/WEB-INF/lib/jsp-api-2.1.jar |
| |
| echo "Modify Rya Web Config" |
| cat > /var/lib/tomcat7/webapps/web.rya/WEB-INF/classes/environment.properties <<EOF |
| instance.name=dev |
| instance.zk=localhost:2181 |
| instance.username=root |
| instance.password=root |
| rya.tableprefix=rya_ |
| rya.displayqueryplan=true |
| EOF |
| |
| echo "Rya web deployed at http://rya-example-box:8080/web.rya/sparqlQuery.jsp" |
| |
| # restart tomcat |
| sudo -E service tomcat7 restart |
| |
| echo "Finished and ready to use!" |
| echo "You can re-apply these settings without losing data by running the command 'vagrant provision'" |
| SHELL |
| |
| end |