sdks/java/io/hadoop-format/build.gradle - beam - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * License); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an AS IS BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import groovy.json.JsonOutput
 import java.util.stream.Collectors

 plugins { id 'org.apache.beam.module' }
 applyJavaNature(
   automaticModuleName: 'org.apache.beam.sdk.io.hadoop.format',
 )
 provideIntegrationTestingDependencies()
 enableJavaPerformanceTesting()

 description = "Apache Beam :: SDKs :: Java :: IO :: Hadoop Format"
 ext.summary = "IO to read data from sources and to write data to sinks that implement Hadoop MapReduce Format."

 def hadoopVersions = [
     "285": "2.8.5",
     "292": "2.9.2",
     "2102": "2.10.2",
     "324": "3.2.4",
 ]

 hadoopVersions.each {kv -> configurations.create("hadoopVersion$kv.key")}

 def elastic_search_version = "7.12.0"

 // Ban dependencies from the test runtime classpath
 configurations.testRuntimeClasspath {
   // Prevent a StackOverflow because of wiring LOG4J -> SLF4J -> LOG4J
   exclude group: "org.slf4j", module: "log4j-over-slf4j"

   // Force use the old version of JAMM that cassandra relies on
   resolutionStrategy.force 'com.github.jbellis:jamm:0.3.0'
 }

 dependencies {
   implementation project(path: ":sdks:java:core", configuration: "shadow")
   implementation library.java.vendored_guava_32_1_2_jre
   implementation library.java.slf4j_api
   implementation project(":sdks:java:io:hadoop-common")
   implementation library.java.joda_time
   provided library.java.hadoop_common
   provided library.java.hadoop_hdfs
   permitUnusedDeclared library.java.hadoop_hdfs
   provided library.java.hadoop_hdfs_client
   provided library.java.hadoop_mapreduce_client_core
   testImplementation project(path: ":sdks:java:core", configuration: "shadowTest")
   testImplementation project(path: ":sdks:java:extensions:avro", configuration: "testRuntimeMigration")
   testImplementation project(path: ":sdks:java:io:common", configuration: "testRuntimeMigration")
   testImplementation project(path: ":sdks:java:testing:test-utils", configuration: "testRuntimeMigration")
   testImplementation project(":sdks:java:io:jdbc")
   testImplementation "org.elasticsearch.plugin:transport-netty4-client:$elastic_search_version"
   testImplementation library.java.testcontainers_elasticsearch
   testImplementation "org.elasticsearch.client:elasticsearch-rest-high-level-client:$elastic_search_version"
   testImplementation "org.elasticsearch:elasticsearch:$elastic_search_version"
   testImplementation ("org.elasticsearch:elasticsearch-hadoop:$elastic_search_version") {
     // TODO(https://issues.apache.org/jira/browse/BEAM-3715)
     // These are all optional deps of elasticsearch-hadoop. Why do they have to be excluded?
     exclude group: "cascading", module: "cascading-local"
     exclude group: "cascading", module: "cascading-hadoop"
     exclude group: "org.apache.hive", module: "hive-service"
     exclude group: "org.apache.pig", module: "pig"
     exclude group: "org.apache.spark", module: "spark-core_2.10"
     exclude group: "org.apache.spark", module: "spark-streaming_2.10"
     exclude group: "org.apache.spark", module: "spark-sql_2.10"
     exclude group: "org.apache.storm", module: "storm-core"
   }
   testImplementation "org.apache.httpcomponents:httpclient:4.5.13"
   testImplementation library.java.commons_lang3
   testImplementation library.java.commons_io
   testImplementation library.java.cassandra_driver_core
   testImplementation library.java.cassandra_driver_mapping
   // TODO(yathu) bump to cassandra-5.x which uses newer jamm when released & beam runs test on Java11
   testImplementation "org.apache.cassandra:cassandra-all:3.11.10"
   testImplementation library.java.hadoop_common
   testImplementation library.java.hadoop_hdfs
   testImplementation library.java.hadoop_mapreduce_client_core
   testImplementation library.java.postgres
   testImplementation library.java.junit
   testImplementation library.java.hamcrest
   testImplementation library.java.testcontainers_postgresql
   testImplementation library.java.netty_all
   testRuntimeOnly library.java.slf4j_jdk14
   testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow")

   hadoopVersions.each {kv ->
     "hadoopVersion$kv.key" "org.apache.hadoop:hadoop-common:$kv.value"
     "hadoopVersion$kv.key" "org.apache.hadoop:hadoop-mapreduce-client-core:$kv.value"
     "hadoopVersion$kv.key" "org.apache.hadoop:hadoop-hdfs:$kv.value"
     "hadoopVersion$kv.key" "org.apache.httpcomponents:httpclient:4.5.13"
   }
 }

 hadoopVersions.each {kv ->
   configurations."hadoopVersion$kv.key" {
     resolutionStrategy {
       force "org.apache.hadoop:hadoop-common:$kv.value"
       force "org.apache.hadoop:hadoop-mapreduce-client-core:$kv.value"
       force "org.apache.hadoop:hadoop-hdfs:$kv.value"
       // without forcing httpclient httpcore gets below 4.4.9 which has incompatible API
       force "org.apache.httpcomponents:httpclient:4.5.13"
     }
   }
 }

 // Hadoop dependencies require old version of Guava (BEAM-11626)
 configurations.all (Configuration it) -> {
   // error-prone requires newer guava, don't override for annotation processing
   // https://github.com/google/error-prone/issues/2745
   if (it.name == "annotationProcessor" || it.name =="testAnnotationProcessor") {
     return
   }
   resolutionStrategy {
     force 'com.google.guava:guava:25.1-jre'
   }
 }

 // The cassandra.yaml file currently assumes "target/..." exists.
 // TODO: Update cassandra.yaml to inject new properties representing
 // the root path. Also migrate cassandra.yaml to use any open ports
 // instead of a static port.
 task createTargetDirectoryForCassandra() {
   doLast {
     if (!project.file("target").exists()) {
       project.file("target").mkdirs()
     }
   }
 }
 test.dependsOn createTargetDirectoryForCassandra

 task hadoopVersionsTest(group: "Verification") {
   description = "Runs Hadoop format tests with different Hadoop versions"
   dependsOn createTaskNames(hadoopVersions, "Test")
   dependsOn createTaskNames(hadoopVersions, "IT")
   dependsOn createTaskNames(hadoopVersions, "ElasticIT")
 }

 hadoopVersions.each {kv ->
   task "runHadoopFormatIO${kv.key}ElasticIT"(type: Test, group: "Verification") {
     description = "Runs HadoopFormatIOElasticIT with Hadoop '${kv.value}'"
     outputs.upToDateWhen { false }
     testClassesDirs = sourceSets.test.output.classesDirs
     classpath = configurations."hadoopVersion$kv.key" + sourceSets.test.runtimeClasspath
     def pipelineOptions = JsonOutput.toJson(["--withTestcontainers=true"])
     systemProperty "beamTestPipelineOptions", pipelineOptions
     include '**/HadoopFormatIOElasticIT.class'
   }
 }

 hadoopVersions.each {kv ->
   task "runHadoopFormatIO${kv.key}IT"(type: Test, group: "Verification") {
     description = "Runs HadoopFormatIOIT with Hadoop '${kv.value}'"
     outputs.upToDateWhen { false }
     testClassesDirs = sourceSets.test.output.classesDirs
     classpath = configurations."hadoopVersion$kv.key" + sourceSets.test.runtimeClasspath

     def pipelineOptions = JsonOutput.toJson([
         "--postgresServerName=dummy_value",
         "--postgresUsername=postgres",
         "--postgresDatabaseName=postgres",
         "--postgresPassword=postgres",
         "--numberOfRecords=1000",
         "--withTestcontainers=true",
         "--postgresSsl=false",
     ])
     systemProperty "beamTestPipelineOptions", pipelineOptions
     include '**/HadoopFormatIOIT.class'
   }
 }

 hadoopVersions.each{kv ->
   task "runHadoopFormatIO${kv.key}Test"(type: Test, group: "Verification") {
     description = "Runs HadoopFormatIO tests with Hadoop '${kv.value}'"
     classpath = configurations."hadoopVersion$kv.key" + sourceSets.test.runtimeClasspath
     outputs.upToDateWhen { false }
     include '**/*Test.class'
     // Cassandra test cannot run in parallel
     exclude '**/HadoopFormatIOCassandraTest.class'
   }
 }

 static def createTaskNames(Map<String, String> hadoopVersions, String suffix) {
   return hadoopVersions.keySet().stream()
       .map{num -> "runHadoopFormatIO$num$suffix"}
       .collect(Collectors.toList())
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* License); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an AS IS BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import groovy.json.JsonOutput
	import java.util.stream.Collectors

	plugins { id 'org.apache.beam.module' }
	applyJavaNature(
	automaticModuleName: 'org.apache.beam.sdk.io.hadoop.format',
	)
	provideIntegrationTestingDependencies()
	enableJavaPerformanceTesting()

	description = "Apache Beam :: SDKs :: Java :: IO :: Hadoop Format"
	ext.summary = "IO to read data from sources and to write data to sinks that implement Hadoop MapReduce Format."

	def hadoopVersions = [
	"285": "2.8.5",
	"292": "2.9.2",
	"2102": "2.10.2",
	"324": "3.2.4",
	]

	hadoopVersions.each {kv -> configurations.create("hadoopVersion$kv.key")}

	def elastic_search_version = "7.12.0"

	// Ban dependencies from the test runtime classpath
	configurations.testRuntimeClasspath {
	// Prevent a StackOverflow because of wiring LOG4J -> SLF4J -> LOG4J
	exclude group: "org.slf4j", module: "log4j-over-slf4j"

	// Force use the old version of JAMM that cassandra relies on
	resolutionStrategy.force 'com.github.jbellis:jamm:0.3.0'
	}

	dependencies {
	implementation project(path: ":sdks:java:core", configuration: "shadow")
	implementation library.java.vendored_guava_32_1_2_jre
	implementation library.java.slf4j_api
	implementation project(":sdks:java:io:hadoop-common")
	implementation library.java.joda_time
	provided library.java.hadoop_common
	provided library.java.hadoop_hdfs
	permitUnusedDeclared library.java.hadoop_hdfs
	provided library.java.hadoop_hdfs_client
	provided library.java.hadoop_mapreduce_client_core
	testImplementation project(path: ":sdks:java:core", configuration: "shadowTest")
	testImplementation project(path: ":sdks:java:extensions:avro", configuration: "testRuntimeMigration")
	testImplementation project(path: ":sdks:java:io:common", configuration: "testRuntimeMigration")
	testImplementation project(path: ":sdks:java:testing:test-utils", configuration: "testRuntimeMigration")
	testImplementation project(":sdks:java:io:jdbc")
	testImplementation "org.elasticsearch.plugin:transport-netty4-client:$elastic_search_version"
	testImplementation library.java.testcontainers_elasticsearch
	testImplementation "org.elasticsearch.client:elasticsearch-rest-high-level-client:$elastic_search_version"
	testImplementation "org.elasticsearch:elasticsearch:$elastic_search_version"
	testImplementation ("org.elasticsearch:elasticsearch-hadoop:$elastic_search_version") {
	// TODO(https://issues.apache.org/jira/browse/BEAM-3715)
	// These are all optional deps of elasticsearch-hadoop. Why do they have to be excluded?
	exclude group: "cascading", module: "cascading-local"
	exclude group: "cascading", module: "cascading-hadoop"
	exclude group: "org.apache.hive", module: "hive-service"
	exclude group: "org.apache.pig", module: "pig"
	exclude group: "org.apache.spark", module: "spark-core_2.10"
	exclude group: "org.apache.spark", module: "spark-streaming_2.10"
	exclude group: "org.apache.spark", module: "spark-sql_2.10"
	exclude group: "org.apache.storm", module: "storm-core"
	}
	testImplementation "org.apache.httpcomponents:httpclient:4.5.13"
	testImplementation library.java.commons_lang3
	testImplementation library.java.commons_io
	testImplementation library.java.cassandra_driver_core
	testImplementation library.java.cassandra_driver_mapping
	// TODO(yathu) bump to cassandra-5.x which uses newer jamm when released & beam runs test on Java11
	testImplementation "org.apache.cassandra:cassandra-all:3.11.10"
	testImplementation library.java.hadoop_common
	testImplementation library.java.hadoop_hdfs
	testImplementation library.java.hadoop_mapreduce_client_core
	testImplementation library.java.postgres
	testImplementation library.java.junit
	testImplementation library.java.hamcrest
	testImplementation library.java.testcontainers_postgresql
	testImplementation library.java.netty_all
	testRuntimeOnly library.java.slf4j_jdk14
	testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow")

	hadoopVersions.each {kv ->
	"hadoopVersion$kv.key" "org.apache.hadoop:hadoop-common:$kv.value"
	"hadoopVersion$kv.key" "org.apache.hadoop:hadoop-mapreduce-client-core:$kv.value"
	"hadoopVersion$kv.key" "org.apache.hadoop:hadoop-hdfs:$kv.value"
	"hadoopVersion$kv.key" "org.apache.httpcomponents:httpclient:4.5.13"
	}
	}

	hadoopVersions.each {kv ->
	configurations."hadoopVersion$kv.key" {
	resolutionStrategy {
	force "org.apache.hadoop:hadoop-common:$kv.value"
	force "org.apache.hadoop:hadoop-mapreduce-client-core:$kv.value"
	force "org.apache.hadoop:hadoop-hdfs:$kv.value"
	// without forcing httpclient httpcore gets below 4.4.9 which has incompatible API
	force "org.apache.httpcomponents:httpclient:4.5.13"
	}
	}
	}

	// Hadoop dependencies require old version of Guava (BEAM-11626)
	configurations.all (Configuration it) -> {
	// error-prone requires newer guava, don't override for annotation processing
	// https://github.com/google/error-prone/issues/2745
	if (it.name == "annotationProcessor" \|\| it.name =="testAnnotationProcessor") {
	return
	}
	resolutionStrategy {
	force 'com.google.guava:guava:25.1-jre'
	}
	}

	// The cassandra.yaml file currently assumes "target/..." exists.
	// TODO: Update cassandra.yaml to inject new properties representing
	// the root path. Also migrate cassandra.yaml to use any open ports
	// instead of a static port.
	task createTargetDirectoryForCassandra() {
	doLast {
	if (!project.file("target").exists()) {
	project.file("target").mkdirs()
	}
	}
	}
	test.dependsOn createTargetDirectoryForCassandra

	task hadoopVersionsTest(group: "Verification") {
	description = "Runs Hadoop format tests with different Hadoop versions"
	dependsOn createTaskNames(hadoopVersions, "Test")
	dependsOn createTaskNames(hadoopVersions, "IT")
	dependsOn createTaskNames(hadoopVersions, "ElasticIT")
	}

	hadoopVersions.each {kv ->
	task "runHadoopFormatIO${kv.key}ElasticIT"(type: Test, group: "Verification") {
	description = "Runs HadoopFormatIOElasticIT with Hadoop '${kv.value}'"
	outputs.upToDateWhen { false }
	testClassesDirs = sourceSets.test.output.classesDirs
	classpath = configurations."hadoopVersion$kv.key" + sourceSets.test.runtimeClasspath
	def pipelineOptions = JsonOutput.toJson(["--withTestcontainers=true"])
	systemProperty "beamTestPipelineOptions", pipelineOptions
	include '**/HadoopFormatIOElasticIT.class'
	}
	}

	hadoopVersions.each {kv ->
	task "runHadoopFormatIO${kv.key}IT"(type: Test, group: "Verification") {
	description = "Runs HadoopFormatIOIT with Hadoop '${kv.value}'"
	outputs.upToDateWhen { false }
	testClassesDirs = sourceSets.test.output.classesDirs
	classpath = configurations."hadoopVersion$kv.key" + sourceSets.test.runtimeClasspath

	def pipelineOptions = JsonOutput.toJson([
	"--postgresServerName=dummy_value",
	"--postgresUsername=postgres",
	"--postgresDatabaseName=postgres",
	"--postgresPassword=postgres",
	"--numberOfRecords=1000",
	"--withTestcontainers=true",
	"--postgresSsl=false",
	])
	systemProperty "beamTestPipelineOptions", pipelineOptions
	include '**/HadoopFormatIOIT.class'
	}
	}

	hadoopVersions.each{kv ->
	task "runHadoopFormatIO${kv.key}Test"(type: Test, group: "Verification") {
	description = "Runs HadoopFormatIO tests with Hadoop '${kv.value}'"
	classpath = configurations."hadoopVersion$kv.key" + sourceSets.test.runtimeClasspath
	outputs.upToDateWhen { false }
	include '*/Test.class'
	// Cassandra test cannot run in parallel
	exclude '**/HadoopFormatIOCassandraTest.class'
	}
	}

	static def createTaskNames(Map<String, String> hadoopVersions, String suffix) {
	return hadoopVersions.keySet().stream()
	.map{num -> "runHadoopFormatIO$num$suffix"}
	.collect(Collectors.toList())
	}