blob: fe2a049888082b59713fd08bd072581027530951 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import groovy.json.JsonOutput
import java.util.stream.Collectors
plugins { id 'org.apache.beam.module' }
applyJavaNature(
automaticModuleName: 'org.apache.beam.sdk.io.hadoop.format',
)
provideIntegrationTestingDependencies()
enableJavaPerformanceTesting()
description = "Apache Beam :: SDKs :: Java :: IO :: Hadoop Format"
ext.summary = "IO to read data from sources and to write data to sinks that implement Hadoop MapReduce Format."
def hadoopVersions = [
"285": "2.8.5",
"292": "2.9.2",
"2102": "2.10.2",
"324": "3.2.4",
]
hadoopVersions.each {kv -> configurations.create("hadoopVersion$kv.key")}
def elastic_search_version = "7.12.0"
// Ban dependencies from the test runtime classpath
configurations.testRuntimeClasspath {
// Prevent a StackOverflow because of wiring LOG4J -> SLF4J -> LOG4J
exclude group: "org.slf4j", module: "log4j-over-slf4j"
// Force use the old version of JAMM that cassandra relies on
resolutionStrategy.force 'com.github.jbellis:jamm:0.3.0'
}
dependencies {
implementation project(path: ":sdks:java:core", configuration: "shadow")
implementation library.java.vendored_guava_32_1_2_jre
implementation library.java.slf4j_api
implementation project(":sdks:java:io:hadoop-common")
implementation library.java.joda_time
provided library.java.hadoop_common
provided library.java.hadoop_hdfs
permitUnusedDeclared library.java.hadoop_hdfs
provided library.java.hadoop_hdfs_client
provided library.java.hadoop_mapreduce_client_core
testImplementation project(path: ":sdks:java:core", configuration: "shadowTest")
testImplementation project(path: ":sdks:java:extensions:avro", configuration: "testRuntimeMigration")
testImplementation project(path: ":sdks:java:io:common", configuration: "testRuntimeMigration")
testImplementation project(path: ":sdks:java:testing:test-utils", configuration: "testRuntimeMigration")
testImplementation project(":sdks:java:io:jdbc")
testImplementation "org.elasticsearch.plugin:transport-netty4-client:$elastic_search_version"
testImplementation library.java.testcontainers_elasticsearch
testImplementation "org.elasticsearch.client:elasticsearch-rest-high-level-client:$elastic_search_version"
testImplementation "org.elasticsearch:elasticsearch:$elastic_search_version"
testImplementation ("org.elasticsearch:elasticsearch-hadoop:$elastic_search_version") {
// TODO(https://issues.apache.org/jira/browse/BEAM-3715)
// These are all optional deps of elasticsearch-hadoop. Why do they have to be excluded?
exclude group: "cascading", module: "cascading-local"
exclude group: "cascading", module: "cascading-hadoop"
exclude group: "org.apache.hive", module: "hive-service"
exclude group: "org.apache.pig", module: "pig"
exclude group: "org.apache.spark", module: "spark-core_2.10"
exclude group: "org.apache.spark", module: "spark-streaming_2.10"
exclude group: "org.apache.spark", module: "spark-sql_2.10"
exclude group: "org.apache.storm", module: "storm-core"
}
testImplementation "org.apache.httpcomponents:httpclient:4.5.13"
testImplementation library.java.commons_lang3
testImplementation library.java.commons_io
testImplementation library.java.cassandra_driver_core
testImplementation library.java.cassandra_driver_mapping
// TODO(yathu) bump to cassandra-5.x which uses newer jamm when released & beam runs test on Java11
testImplementation "org.apache.cassandra:cassandra-all:3.11.10"
testImplementation library.java.hadoop_common
testImplementation library.java.hadoop_hdfs
testImplementation library.java.hadoop_mapreduce_client_core
testImplementation library.java.postgres
testImplementation library.java.junit
testImplementation library.java.hamcrest
testImplementation library.java.testcontainers_postgresql
testImplementation library.java.netty_all
testRuntimeOnly library.java.slf4j_jdk14
testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow")
hadoopVersions.each {kv ->
"hadoopVersion$kv.key" "org.apache.hadoop:hadoop-common:$kv.value"
"hadoopVersion$kv.key" "org.apache.hadoop:hadoop-mapreduce-client-core:$kv.value"
"hadoopVersion$kv.key" "org.apache.hadoop:hadoop-hdfs:$kv.value"
"hadoopVersion$kv.key" "org.apache.httpcomponents:httpclient:4.5.13"
}
}
hadoopVersions.each {kv ->
configurations."hadoopVersion$kv.key" {
resolutionStrategy {
force "org.apache.hadoop:hadoop-common:$kv.value"
force "org.apache.hadoop:hadoop-mapreduce-client-core:$kv.value"
force "org.apache.hadoop:hadoop-hdfs:$kv.value"
// without forcing httpclient httpcore gets below 4.4.9 which has incompatible API
force "org.apache.httpcomponents:httpclient:4.5.13"
}
}
}
// Hadoop dependencies require old version of Guava (BEAM-11626)
configurations.all (Configuration it) -> {
// error-prone requires newer guava, don't override for annotation processing
// https://github.com/google/error-prone/issues/2745
if (it.name == "annotationProcessor" || it.name =="testAnnotationProcessor") {
return
}
resolutionStrategy {
force 'com.google.guava:guava:25.1-jre'
}
}
// The cassandra.yaml file currently assumes "target/..." exists.
// TODO: Update cassandra.yaml to inject new properties representing
// the root path. Also migrate cassandra.yaml to use any open ports
// instead of a static port.
task createTargetDirectoryForCassandra() {
doLast {
if (!project.file("target").exists()) {
project.file("target").mkdirs()
}
}
}
test.dependsOn createTargetDirectoryForCassandra
task hadoopVersionsTest(group: "Verification") {
description = "Runs Hadoop format tests with different Hadoop versions"
dependsOn createTaskNames(hadoopVersions, "Test")
dependsOn createTaskNames(hadoopVersions, "IT")
dependsOn createTaskNames(hadoopVersions, "ElasticIT")
}
hadoopVersions.each {kv ->
task "runHadoopFormatIO${kv.key}ElasticIT"(type: Test, group: "Verification") {
description = "Runs HadoopFormatIOElasticIT with Hadoop '${kv.value}'"
outputs.upToDateWhen { false }
testClassesDirs = sourceSets.test.output.classesDirs
classpath = configurations."hadoopVersion$kv.key" + sourceSets.test.runtimeClasspath
def pipelineOptions = JsonOutput.toJson(["--withTestcontainers=true"])
systemProperty "beamTestPipelineOptions", pipelineOptions
include '**/HadoopFormatIOElasticIT.class'
}
}
hadoopVersions.each {kv ->
task "runHadoopFormatIO${kv.key}IT"(type: Test, group: "Verification") {
description = "Runs HadoopFormatIOIT with Hadoop '${kv.value}'"
outputs.upToDateWhen { false }
testClassesDirs = sourceSets.test.output.classesDirs
classpath = configurations."hadoopVersion$kv.key" + sourceSets.test.runtimeClasspath
def pipelineOptions = JsonOutput.toJson([
"--postgresServerName=dummy_value",
"--postgresUsername=postgres",
"--postgresDatabaseName=postgres",
"--postgresPassword=postgres",
"--numberOfRecords=1000",
"--withTestcontainers=true",
"--postgresSsl=false",
])
systemProperty "beamTestPipelineOptions", pipelineOptions
include '**/HadoopFormatIOIT.class'
}
}
hadoopVersions.each{kv ->
task "runHadoopFormatIO${kv.key}Test"(type: Test, group: "Verification") {
description = "Runs HadoopFormatIO tests with Hadoop '${kv.value}'"
classpath = configurations."hadoopVersion$kv.key" + sourceSets.test.runtimeClasspath
outputs.upToDateWhen { false }
include '**/*Test.class'
// Cassandra test cannot run in parallel
exclude '**/HadoopFormatIOCassandraTest.class'
}
}
static def createTaskNames(Map<String, String> hadoopVersions, String suffix) {
return hadoopVersions.keySet().stream()
.map{num -> "runHadoopFormatIO$num$suffix"}
.collect(Collectors.toList())
}