* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
apply plugin: "java"
apply plugin: "eclipse"
// TODO add idea module config.
apply plugin: "idea"
apply plugin: "scala"
apply plugin: 'com.github.johnrengelman.shadow'
buildscript {
repositories { jcenter() }
dependencies {
classpath 'com.github.jengelman.gradle.plugins:shadow:1.0.2'
// Read the groupId and version properties from the "parent" bigtop project.
// It would be better if there was some better way of doing this. Howvever,
// at this point, we have to do this (or some variation thereof) since gradle
// projects can't have maven projects as parents (AFAIK. If there is a way to do it,
// it doesn't seem to be well-documented).
def setProjectProperties() {
Node xml = new XmlParser().parse("../../pom.xml")
group = xml.groupId.first().value().first()
version = xml.version.first().value().first()
description = """"""
// We are using 1.7 as gradle can't play well when java 8 and scala are combined.
// There is an open issue here:
// There is talk of this being resolved in the next version of gradle. Till then,
// we are stuck with java 7. But we do have scala if we want more syntactic sugar.
sourceCompatibility = 1.7
targetCompatibility = 1.7
// Specify any additional project properties.
ext {
slf4jVersion = "1.7.5"
guavaVersion = "15.0"
datanucleusVersion = "3.2.2"
datanucleusJpaVersion = "3.2.1"
bonecpVersion = "0.8.0.RELEASE"
derbyVersion = ""
// from horton-works repo. They compile mahout-core against hadoop2.x. These
// mahout is compiled against 2.4.0
hadoopVersion = ""
mahoutVersion = ""
repositories {
maven {
url ""
tasks.withType(AbstractCompile) {
options.encoding = 'UTF-8'
options.compilerArgs << "-Xlint:all"
tasks.withType(ScalaCompile) {
// Enables incremental compilation.
scalaCompileOptions.useAnt = false
tasks.withType(Test) {
testLogging {
// Uncomment this if you want to see the console output from the tests.
// showStandardStreams = true
events "passed", "skipped", "failed"
// show standard out and standard error of the test JVM(s) on the console
//showStandardStreams = true
test {
exclude "**/*", "**/*", "**/*", "**/*"
// Create a separate source-set for the src/integrationTest set of classes. The convention here
// is that gradle will look for a directory with the same name as that of the specified source-set
// under the 'src' directory. So, in this case, it will look for a directory named 'src/integrationTest'
// since the name of the source-set is 'integrationTest'
sourceSets {
main {
java.srcDirs = [];
scala.srcDirs = ["src/main/scala", "src/main/java"]
// The main and test source-sets are configured by both java and scala plugins. They contain
// all the src/main and src/test classes. The following statements make all of those classes
// available on the classpath for the integration-tests, for both java and scala.
integrationTest {
java {
compileClasspath += main.output + test.output
runtimeClasspath += main.output + test.output
scala {
compileClasspath += main.output + test.output
runtimeClasspath += main.output + test.output
// Creating a source-set automatically add a couple of corresponding configurations (when java/scala
// plugins are applied). The convention for these configurations is <sourceSetName>Compile and
// <sourceSetName>Runtime. The following statements declare that all the dependencies from the
// testCompile configuration will now be available for integrationTestCompile, and all the
// dependencies (and other configuration that we might have provided) for testRuntime will be
// available for integrationTestRuntime. For ex. the testCompile configuration has a dependency on
// jUnit and scalatest. This makes them available for the integration tests as well.
configurations {
integrationTestCompile {
extendsFrom testCompile
integrationTestRuntime {
extendsFrom integrationTestCompile, testRuntime
// To see the API that is being used here, consult the following docs
def updateDependencyVersion(dependencyDetails, dependencyString) {
def parts = dependencyString.split(':')
def group = parts[0]
def name = parts[1]
def version = parts[2]
if ( == group
&& == name) {
dependencyDetails.useVersion version
def setupPigIntegrationTestDependencyVersions(dependencyResolveDetails) {
// This is the way we override the dependencies.
updateDependencyVersion dependencyResolveDetails, "joda-time:joda-time:2.2"
def setupCrunchIntegrationTestDependencyVersions(dependencyResolveDetails) {
// Specify any dependencies that you want to override for crunch integration tests.
def setupMahoutIntegrationTestDependencyVersions(dependencyResolveDetails) {
// Specify any dependencies that you want to override for mahout integration tests.
task integrationTest(type: Test, dependsOn: test) {
testClassesDir = sourceSets.integrationTest.output.classesDir
classpath = sourceSets.integrationTest.runtimeClasspath
if(!project.hasProperty('ITProfile')) {
// skip integration-tests if no profile has been specified.
integrationTest.onlyIf { false }
def patternsToInclude
def dependencyConfigClosure
def skipDependencyUpdates = false
// Select the pattern for test classes that should be executed, and the dependency
// configuration function to be called based on the profile name specified at the command line.
switch (project.ITProfile) {
case "pig":
patternsToInclude = "*PigIT*"
dependencyConfigClosure = { setupPigIntegrationTestDependencyVersions(it) }
//In pig integration tests, the custom pig script seems to have high
//memory requirements.
minHeapSize = "1000m"
maxHeapSize = "4000m"
// set JVM arguments for the test JVM(s)
jvmArgs '-XX:MaxPermSize=256m'
case "crunch":
patternsToInclude = "*CrunchIT*"
dependencyConfigClosure = { setupCrunchIntegrationTestDependencyVersions(it) }
case "mahout":
patternsToInclude = "*MahoutIT*"
dependencyConfigClosure = { setupMahoutIntegrationTestDependencyVersions(it) }
// skip integration-tests if the passed in profile-name is not valid
default: integrationTest.onlyIf { false }; return
filter { includeTestsMatching patternsToInclude }
// This is the standard way gradle allows overriding each specific dependency.
// see:
project.configurations.all {
resolutionStrategy {
eachDependency {
dependencies {
compile "org.kohsuke:graphviz-api:1.0"
compile "org.apache.crunch:crunch-core:0.9.0-hadoop2"
compile "com.jolbox:bonecp:${project.bonecpVersion}"
compile "org.apache.derby:derby:${project.derbyVersion}"
compile "${project.guavaVersion}"
compile "commons-lang:commons-lang:2.6"
compile "joda-time:joda-time:2.3"
compile "org.apache.commons:commons-lang3:3.1"
compile ""
compile "commons-logging:commons-logging:1.1.3"
compile "com.thoughtworks.xstream:xstream:+"
compile "org.apache.lucene:lucene-core:+"
compile "org.apache.lucene:lucene-analyzers-common:+"
compile "org.apache.solr:solr-commons-csv:3.5.0"
compile group: "org.apache.pig", name: "pig", version: "0.12.0", classifier:"h2"
compile "dk.brics.automaton:automaton:1.11-8"
compile "org.slf4j:slf4j-api:${project.slf4jVersion}"
compile "log4j:log4j:1.2.12"
compile "org.slf4j:slf4j-log4j12:${project.slf4jVersion}"
compile "org.datanucleus:datanucleus-core:${project.datanucleusVersion}"
compile "org.datanucleus:datanucleus-rdbms:${project.datanucleusJpaVersion}"
compile "org.datanucleus:datanucleus-api-jdo:${project.datanucleusJpaVersion}"
compile "org.datanucleus:datanucleus-accessplatform-jdo-rdbms:${project.datanucleusJpaVersion}"
compile group: "org.apache.mrunit", name: "mrunit", version: "1.0.0", classifier:"hadoop2"
compile "org.jfairy:jfairy:0.2.4"
// from horton-works repo. They compile mahout-core against hadoop2.x
compile "org.apache.hadoop:hadoop-client:${hadoopVersion}"
compile "org.apache.mahout:mahout-core:${mahoutVersion}"
compile 'org.scala-lang:scala-library:2.11.0'
testCompile "junit:junit:4.11"
testCompile "org.hamcrest:hamcrest-all:1.3"
testCompile "org.scalatest:scalatest_2.11:2.1.7"
configurations {
hadoopClusterRuntime {
// extendsFrom integrationTestRuntime
if(project.hasProperty('for-cluster')) {
excludeRules += [getGroup: { 'org.apache.crunch' }, getModule: { 'crunch-core' } ] as ExcludeRule
excludeRules += [getGroup: { 'org.apache.pig' }, getModule: { 'pig' } ] as ExcludeRule
excludeRules += [getGroup: { 'org.apache.mahout' }, getModule: { 'mahout-core' } ] as ExcludeRule
excludeRules += [getGroup: { 'org.apache.hadoop' }, getModule: { 'hadoop-client' } ] as ExcludeRule
task listJars << {
configurations.shadow.each { println }
def copyDependencyJarsForHadoopCluster() {
copy {
from configurations.hadoopClusterRuntime
into 'build/libs'
build {
doLast {
eclipse {
classpath {
// Add the dependencies and the src dirs for the integrationTest source-set to the
// .classpath file that will be generated by the eclipse plugin.
plusConfigurations += [configurations.integrationTestCompile]
// Comment out the following two lines if you want to generate an eclipse project quickly.
downloadSources = true
downloadJavadoc = false
// shadowJar can merge files which have the same path but it differs only in case into a single jar,
// but it causes a problem on some case-insensitive platforms such as Mac OS X.
// The following function removes either of the duplicated path.
shadowJar {