// Much of this file is a variation on the Apache Samza build.gradle file
buildscript {
repositories {
plugins {
id "" version "3.4.3"
apply from: file("gradle/dependency-versions-scala-" + scalaVersion + ".gradle")
apply plugin: 'scala'
allprojects {
// For all scala compilation, add extra compiler options, taken from version-specific
// dependency-versions-scala file applied above.
tasks.withType(ScalaCompile) {
scalaCompileOptions.additionalParameters = [ scalaOptions ]
archivesBaseName = 'datafu-spark_' + scalaVersion + '_' + sparkVersion
cleanEclipse {
doLast {
delete ".apt_generated"
delete ".settings"
delete ".factorypath"
delete "bin"
dependencies {
compile "org.scala-lang:scala-library:$scalaLibVersion"
testCompile "com.holdenkarau:spark-testing-base_" + scalaVersion + ":" + sparkVersion + "_" + sparkTestingBaseVersion
testCompile "org.scalatest:scalatest_" + scalaVersion + ":" + sparkVersion
// we need to set up the build for hadoop 3
if (hadoopVersion.startsWith("2.")) {
dependencies {
compile "org.apache.hadoop:hadoop-common:$hadoopVersion"
compile "org.apache.hadoop:hadoop-hdfs:$hadoopVersion"
compile "org.apache.hadoop:hadoop-mapreduce-client-jobclient:$hadoopVersion"
compile "org.apache.spark:spark-core_" + scalaVersion + ":" + sparkVersion
compile "org.apache.spark:spark-hive_" + scalaVersion + ":" + sparkVersion
} else {
dependencies {
compile "org.apache.hadoop:hadoop-core:$hadoopVersion"
project.ext.sparkFile = file("build/spark-zips/spark-" + sparkVersion + ".zip")
project.ext.sparkUnzipped = "build/spark-unzipped/spark-" + sparkVersion
// download pyspark for testing. This is not shipped with datafu-spark.
task downloadPySpark (type: Download) {
src '' + sparkVersion + '.zip'
dest project.sparkFile
onlyIfNewer true
downloadPySpark.onlyIf {
! project.sparkFile.exists()
task unzipPySpark(dependsOn: downloadPySpark, type: Copy) {
from zipTree(downloadPySpark.dest)
into file("build/spark-unzipped/")
unzipPySpark.onlyIf {
! file(project.sparkUnzipped).exists()
task zipPySpark(dependsOn: unzipPySpark, type: Zip) {
archiveName = "pyspark-" + sparkVersion + ".zip"
include "pyspark/**/*"
destinationDir = file("data/pysparks/")
from file(project.sparkUnzipped + "/python/")
zipPySpark.onlyIf {
! file("data/pysparks/pyspark-" + sparkVersion + ".zip").exists()
// download py4j for testing. This is not shipped with datafu-spark.
project.ext.py4js = [
"" : "",
"" : "",
"" : "",
"" : ""
task downloadPy4js {
doLast {
for (s in py4js) {
download {
src s.value
dest file("data/py4js/" + s.key)
downloadPy4js.onlyIf {
! file("data/py4js").exists()
// The downloads of pyspark and py4j must succeed in order to test the Scala Python bridge in Eclipse or Gradle
test {
systemProperty 'datafu.jar.dir', file('build/libs')
systemProperty '', file('data')
systemProperty 'datafu.spark.version', sparkVersion
maxHeapSize = "2G"