Cleaned up dependency handling.
* Removed Spark and Hadoop from shaded jar. Assume these are available from spark framework.
* Put version of Accumulo and Fluo on system in shaded jar.
* Use mvn copy dependencies instead of dependency get to avoid duplicating dependency coordinate in script.
* Cleaned up some duplication on shaded jar by excluding transitive deps.
Fixes #91
Fixes #93
diff --git a/bin/impl/base.sh b/bin/impl/base.sh
index 5117ca5..6aec7a7 100755
--- a/bin/impl/base.sh
+++ b/bin/impl/base.sh
@@ -54,7 +54,7 @@
echo "Building $WI_DATA_DEP_JAR"
cd $WI_HOME
- : ${SPARK_VERSION?"SPARK_VERSION must be set in bash env or conf/webindex-env.sh"}
- : ${HADOOP_VERSION?"HADOOP_VERSION must be set in bash env or conf/webindex-env.sh"}
- mvn clean package -Pcreate-shade-jar -DskipTests -Dspark.version=$SPARK_VERSION -Dhadoop.version=$HADOOP_VERSION
+ : ${ACCUMULO_VERSION?"ACCUMULO_VERSION must be set in bash env or conf/webindex-env.sh"}
+ : ${FLUO_VERSION?"FLUO_VERSION must be set in bash env or conf/webindex-env.sh"}
+ mvn clean package -Pcreate-shade-jar -DskipTests -Dfluo.version=$FLUO_VERSION -Daccumulo.version=$ACCUMULO_VERSION
fi
diff --git a/bin/impl/init.sh b/bin/impl/init.sh
index e9a854b..ce28a1e 100755
--- a/bin/impl/init.sh
+++ b/bin/impl/init.sh
@@ -45,19 +45,11 @@
FLUO_APP_LIB=$FLUO_APP_HOME/lib
cp $WI_DATA_JAR $FLUO_APP_LIB
-mvn dependency:get -Dartifact=org.apache.fluo:fluo-recipes-core:1.0.0-incubating-SNAPSHOT:jar -Ddest=$FLUO_APP_LIB
-mvn dependency:get -Dartifact=org.apache.fluo:fluo-recipes-kryo:1.0.0-incubating-SNAPSHOT:jar -Ddest=$FLUO_APP_LIB
-mvn dependency:get -Dartifact=org.apache.fluo:fluo-recipes-accumulo:1.0.0-incubating-SNAPSHOT:jar -Ddest=$FLUO_APP_LIB
-# Add kryo and its dependencies
-mvn dependency:get -Dartifact=com.esotericsoftware:kryo:3.0.3:jar -Ddest=$FLUO_APP_LIB
-mvn dependency:get -Dartifact=com.esotericsoftware:minlog:1.3.0:jar -Ddest=$FLUO_APP_LIB
-mvn dependency:get -Dartifact=com.esotericsoftware:reflectasm:1.10.1:jar -Ddest=$FLUO_APP_LIB
-mvn dependency:get -Dartifact=org.objenesis:objenesis:2.1:jar -Ddest=$FLUO_APP_LIB
+mvn package -Pcopy-dependencies -DskipTests -DoutputDirectory=$FLUO_APP_LIB
# Add webindex core and its dependencies
cp $WI_HOME/modules/core/target/webindex-core-$WI_VERSION.jar $FLUO_APP_LIB
-mvn dependency:get -Dartifact=commons-validator:commons-validator:1.4.1:jar -Ddest=$FLUO_APP_LIB
-java -cp $WI_DATA_DEP_JAR webindex.data.Configure $WI_CONFIG
+$FLUO_CMD exec $FLUO_APP webindex.data.Configure $WI_CONFIG
$FLUO_CMD init $FLUO_APP --force
diff --git a/conf/webindex-env.sh.example b/conf/webindex-env.sh.example
index 74791b3..e7d7d93 100644
--- a/conf/webindex-env.sh.example
+++ b/conf/webindex-env.sh.example
@@ -20,18 +20,19 @@
# ======
test -z "$HADOOP_PREFIX" && export HADOOP_PREFIX=/path/to/hadoop
test -z "$HADOOP_CONF_DIR" && export HADOOP_CONF_DIR=/path/to/hadoop/etc/hadoop
-# Set to version of Hadoop installed on system
-export HADOOP_VERSION=2.7.2
# Fluo
# ====
test -z "$FLUO_HOME" && export FLUO_HOME=/path/to/fluo
+#set the Accumulo and Fluo versions that should be included in the shaded jar created for Spark.
+export FLUO_VERSION=`$FLUO_HOME/bin/fluo version`
+export ACCUMULO_VERSION=`accumulo version`
+
# Spark
# =====
test -z "$SPARK_HOME" && export SPARK_HOME=/path/to/spark
-# Set to version of Spark installed on system
-export SPARK_VERSION=1.6.2
+
# Number of Spark executor instances
export WI_EXECUTOR_INSTANCES=2
# Amount of memory given to each Spark executor
diff --git a/modules/data/pom.xml b/modules/data/pom.xml
index df01114..67abec8 100644
--- a/modules/data/pom.xml
+++ b/modules/data/pom.xml
@@ -81,24 +81,14 @@
<artifactId>fluo-recipes-spark</artifactId>
</dependency>
<dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- <exclusions>
- <exclusion>
- <groupId>javax.servlet</groupId>
- <artifactId>servlet-api</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-core_2.10</artifactId>
- </dependency>
- <dependency>
<groupId>org.netpreserve.commons</groupId>
<artifactId>webarchive-commons</artifactId>
<exclusions>
<exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-core</artifactId>
+ </exclusion>
+ <exclusion>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
</exclusion>
@@ -120,6 +110,23 @@
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</dependency>
+ <!-- provided scope is used so hadoop and spark do not end up in shaded jar used for spark. Its assumed the spark runtime environment will provide these. -->
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ <scope>provided</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>javax.servlet</groupId>
+ <artifactId>servlet-api</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-core_2.10</artifactId>
+ <scope>provided</scope>
+ </dependency>
<!-- Test Dependencies -->
<dependency>
<groupId>junit</groupId>
@@ -145,6 +152,30 @@
</dependencies>
<profiles>
<profile>
+ <id>copy-dependencies</id>
+ <build>
+ <plugins>
+ <plugin>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <version>2.10</version>
+ <executions>
+ <execution>
+ <id>copy</id>
+ <goals>
+ <goal>copy-dependencies</goal>
+ </goals>
+ <phase>package</phase>
+ <configuration>
+ <!--define the specific dependencies to copy into the Fluo application dir-->
+ <includeArtifactIds>fluo-recipes-core,fluo-recipes-accumulo,fluo-recipes-kryo,kryo,minlog,reflectasm,objenesis,commons-validator,yamlbeans</includeArtifactIds>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+ </profile>
+ <profile>
<id>create-shade-jar</id>
<build>
<plugins>
diff --git a/pom.xml b/pom.xml
index c53ed1e..d550a59 100644
--- a/pom.xml
+++ b/pom.xml
@@ -195,7 +195,7 @@
<dependency>
<groupId>org.netpreserve.commons</groupId>
<artifactId>webarchive-commons</artifactId>
- <version>1.1.2</version>
+ <version>1.1.7</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>