buildscript {
repositories {
plugins {
id "" version "3.3.0"
apply plugin: 'java'
apply plugin: datafu.autojar.GradleAutojarPlugin
archivesBaseName = 'datafu-pig'
import groovy.xml.MarkupBuilder
// the autojarred configuration includes all JARs that will be included
// in the final JAR via autojar
configurations {
compile {
extendsFrom autojarred
eclipse {
jdt {
file {
withProperties { properties ->
// set up annotation processing, which we use so we can have multi-line strings in our tests
properties.setProperty("org.eclipse.jdt.core.compiler.processAnnotations", "enabled")
// need to assemble the build plugin for eclipse since tests use the annotation processor defined there
// more annotation processor setup
eclipseJdt {
doFirst {
def f = file(".factorypath")
def w = new FileWriter(f)
def xml = new MarkupBuilder(w)
xml."factorypath"() {
"factorypathentry" (
kind: "EXTJAR",
id: configurations.testCompile.find {"build-plugin")
enabled: true,
runInBatchMode: false
cleanEclipse {
doLast {
delete ".apt_generated"
delete ".settings"
delete ".factorypath"
delete "bin"
// initial jar only includes the main classes of datafu, not the dependencies.
// this is not the one we'll publish.
classifier = "core"
autojarBuildDir = tasks.jar.destinationDir
task jarWithDependencies(type: Autojar) {
description 'Creates a jar that includes the dependencies (under their own namespaces)'
autojarFiles = [
targetConfiguration = configurations.autojarred
autojarExtra = '-baeq'
def outputFile = file(tasks.jar.getArchivePath().absoluteFile.toString().replace("-core","-jarjar"))
task jarWithDependenciesNamespaced(dependsOn: jarWithDependencies) {
description 'Creates the jar that includes dependencies (under a datafu namespace)'
doLast {
project.ant {
taskdef name: "jarjar", classname: "com.tonicsystems.jarjar.JarJarTask", classpath: configurations.jarjar.asPath
jarjar(jarfile: outputFile, filesetmanifest: "merge") {
zipfileset(src: tasks.jarWithDependencies.autojarOutput)
rule pattern: "it.unimi.dsi.fastutil.**", result: ""
rule pattern: "org.apache.commons.math.**", result: ""
rule pattern: "**", result: ""
rule pattern: "**", result: ""
rule pattern: "opennlp.**", result: "datafu.opennlp.@1"
task finalJar(type: Jar, dependsOn: jarWithDependenciesNamespaced) {
description 'Creates the final jar'
// don't publish the core archive, as this doesn't have the dependencies
configurations.archives.artifacts.removeAll { return it.classifier == "core"; }
artifacts {
archives finalJar
dependencies {
// dependencies that are packaged into the jar using autojar
// autojar only includes what is needed
autojarred "it.unimi.dsi:fastutil:$fastutilVersion"
autojarred "org.apache.commons:commons-math:$commonsMathVersion"
autojarred "$streamVersion"
autojarred "$guavaVersion"
autojarred "org.apache.opennlp:opennlp-tools:$openNlpVersion"
autojarred "org.apache.opennlp:opennlp-uima:$openNlpVersion"
autojarred "org.apache.opennlp:opennlp-maxent:$openNlpMaxEntVersion"
// needed to run jarjar
jarjar "com.googlecode.jarjar:jarjar:1.3"
// not included in autojar because it's already a pig dependency and so
// should be available
compile "joda-time:joda-time:$jodaTimeVersion"
testCompile "org.apache.pig:pigunit:$pigVersion"
testCompile "log4j:log4j:$log4jVersion"
testCompile "jline:jline:$jlineVersion"
testCompile "org.antlr:antlr:$antlrVersion"
testCompile "commons-io:commons-io:$commonsIoVersion"
testCompile "org.testng:testng:$testngVersion"
testCompile project(":build-plugin")
modifyPom {
project {
dependencies {
// No dependencies because everything we need is autojarred.
// The only exception is joda-time, but this is already a pig dependency
// so it should already be available.
if (hadoopVersion.startsWith("2.") || hadoopVersion.startsWith("0.23.")) {
dependencies {
// needed for compilation only. obviously don't need to autojar this.
compile "org.apache.pig:pig:$pigVersion:h2"
compile "org.apache.hadoop:hadoop-common:$hadoopVersion"
compile "org.apache.hadoop:hadoop-hdfs:$hadoopVersion"
compile "org.apache.hadoop:hadoop-mapreduce-client-jobclient:$hadoopVersion"
} else {
dependencies {
// needed for compilation only. obviously don't need to autojar this.
compile "org.apache.pig:pig:$pigVersion"
compile "org.apache.hadoop:hadoop-core:$hadoopVersion"
compileTestJava.doFirst {
options.compilerArgs = ['-processor', 'org.adrianwalker.multilinestring.MultilineProcessor']
// open NLP models used for testing. these are not shipped with datafu.
task downloadOpenNlpModels {
doLast {
download {
src ''
dest file('data/en-pos-maxent.bin')
onlyIfNewer true
download {
src ''
dest file('data/en-sent.bin')
onlyIfNewer true
download {
src ''
dest file('data/en-token.bin')
onlyIfNewer true
// download models so can test in eclipse or testng
test {
// enable TestNG support (default is JUnit)
testLogging {
showStandardStreams true
systemProperty 'datafu.jar.dir', file('build/libs')
systemProperty '', file('data')
maxHeapSize = "2G"