Updates for starting MapReduce jobs from randomwalk (#49)
* Set additional MapReduce configuration and Hadoop username
* These settings work if running randomwalk on user machine
or in Docker
diff --git a/Dockerfile b/Dockerfile
index 4038764..d543d8e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -15,6 +15,11 @@
FROM centos:7
+ARG HADOOP_HOME
+ARG HADOOP_USER_NAME
+ENV HADOOP_HOME ${HADOOP_HOME}
+ENV HADOOP_USER_NAME ${HADOOP_USER_NAME:-hadoop}
+
RUN yum install -y java-1.8.0-openjdk-devel
ENV JAVA_HOME /usr/lib/jvm/java-1.8.0-openjdk
diff --git a/README.md b/README.md
index d659f8d..1a758a1 100644
--- a/README.md
+++ b/README.md
@@ -50,10 +50,11 @@
* `conf/accumulo-testing.properties` - Copy this from the example file and configure it
* `target/accumulo-testing-2.0.0-SNAPSHOT-shaded.jar` - Can be created using `./bin/build`
- Run the following command to create the image:
+ Run the following command to create the image. `HADOOP_HOME` should be where Hadoop is installed on your cluster.
+ `HADOOP_USER_NAME` should match the user running Hadoop on your cluster.
```
- docker build -t accumulo-testing .
+ docker build --build-arg HADOOP_HOME=$HADOOP_HOME --build-arg HADOOP_USER_NAME=`whoami` -t accumulo-testing .
```
2. The `accumulo-testing` image can run a single command:
diff --git a/bin/rwalk b/bin/rwalk
index 6bd7299..30cf0a7 100755
--- a/bin/rwalk
+++ b/bin/rwalk
@@ -34,7 +34,7 @@
. "$at_home"/conf/env.sh.example
fi
-export CLASSPATH="$TEST_JAR_PATH:$HADOOP_API_JAR:$HADOOP_RUNTIME_JAR:$HADOOP_CONF_DIR:$CLASSPATH"
+export CLASSPATH="$TEST_JAR_PATH:$HADOOP_API_JAR:$HADOOP_RUNTIME_JAR:$CLASSPATH"
randomwalk_main="org.apache.accumulo.testing.randomwalk.Framework"
diff --git a/conf/env.sh.example b/conf/env.sh.example
index a48451c..bd372c3 100644
--- a/conf/env.sh.example
+++ b/conf/env.sh.example
@@ -18,8 +18,6 @@
## Hadoop installation
export HADOOP_HOME="${HADOOP_HOME:-/path/to/hadoop}"
-## Hadoop configuration
-export HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-${HADOOP_HOME}/etc/hadoop}"
## Accumulo installation
export ACCUMULO_HOME="${ACCUMULO_HOME:-/path/to/accumulo}"
## Path to Accumulo client properties
diff --git a/src/main/docker/docker-entry b/src/main/docker/docker-entry
index 5a98525..6f23e00 100755
--- a/src/main/docker/docker-entry
+++ b/src/main/docker/docker-entry
@@ -36,6 +36,11 @@
exit 1
fi
+if [ -z "$HADOOP_HOME" ]; then
+ echo "HADOOP_HOME must be set!"
+ exit 1
+fi
+
case "$1" in
cingest|rwalk)
"${at_home}"/bin/"$1" "${@:2}"
diff --git a/src/main/java/org/apache/accumulo/testing/TestEnv.java b/src/main/java/org/apache/accumulo/testing/TestEnv.java
index e5ffa1a..601db3c 100644
--- a/src/main/java/org/apache/accumulo/testing/TestEnv.java
+++ b/src/main/java/org/apache/accumulo/testing/TestEnv.java
@@ -97,6 +97,13 @@
hadoopConfig.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
hadoopConfig.set("mapreduce.framework.name", "yarn");
hadoopConfig.set("yarn.resourcemanager.hostname", getYarnResourceManager());
+ String hadoopHome = System.getenv("HADOOP_HOME");
+ if (hadoopHome == null) {
+ throw new IllegalArgumentException("HADOOP_HOME must be set in env");
+ }
+ hadoopConfig.set("yarn.app.mapreduce.am.env", "HADOOP_MAPRED_HOME=" + hadoopHome);
+ hadoopConfig.set("mapreduce.map.env", "HADOOP_MAPRED_HOME=" + hadoopHome);
+ hadoopConfig.set("mapreduce.reduce.env", "HADOOP_MAPRED_HOME=" + hadoopHome);
}
return hadoopConfig;
}