| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * License); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an AS IS BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import groovy.json.JsonOutput |
| import java.util.stream.Collectors |
| |
| plugins { id 'org.apache.beam.module' } |
| applyJavaNature( |
| automaticModuleName: 'org.apache.beam.sdk.io.hadoop.format', |
| ) |
| provideIntegrationTestingDependencies() |
| enableJavaPerformanceTesting() |
| |
| description = "Apache Beam :: SDKs :: Java :: IO :: Hadoop Format" |
| ext.summary = "IO to read data from sources and to write data to sinks that implement Hadoop MapReduce Format." |
| |
| def hadoopVersions = [ |
| "285": "2.8.5", |
| "292": "2.9.2", |
| "2102": "2.10.2", |
| "324": "3.2.4", |
| ] |
| |
| hadoopVersions.each {kv -> configurations.create("hadoopVersion$kv.key")} |
| |
| def elastic_search_version = "7.12.0" |
| |
| // Ban dependencies from the test runtime classpath |
| configurations.testRuntimeClasspath { |
| // Prevent a StackOverflow because of wiring LOG4J -> SLF4J -> LOG4J |
| exclude group: "org.slf4j", module: "log4j-over-slf4j" |
| |
| // Force use the old version of JAMM that cassandra relies on |
| resolutionStrategy.force 'com.github.jbellis:jamm:0.3.0' |
| } |
| |
| dependencies { |
| implementation project(path: ":sdks:java:core", configuration: "shadow") |
| implementation library.java.vendored_guava_32_1_2_jre |
| implementation library.java.slf4j_api |
| implementation project(":sdks:java:io:hadoop-common") |
| implementation library.java.joda_time |
| provided library.java.hadoop_common |
| provided library.java.hadoop_hdfs |
| permitUnusedDeclared library.java.hadoop_hdfs |
| provided library.java.hadoop_hdfs_client |
| provided library.java.hadoop_mapreduce_client_core |
| testImplementation project(path: ":sdks:java:core", configuration: "shadowTest") |
| testImplementation project(path: ":sdks:java:extensions:avro", configuration: "testRuntimeMigration") |
| testImplementation project(path: ":sdks:java:io:common", configuration: "testRuntimeMigration") |
| testImplementation project(path: ":sdks:java:testing:test-utils", configuration: "testRuntimeMigration") |
| testImplementation project(":sdks:java:io:jdbc") |
| testImplementation "org.elasticsearch.plugin:transport-netty4-client:$elastic_search_version" |
| testImplementation library.java.testcontainers_elasticsearch |
| testImplementation "org.elasticsearch.client:elasticsearch-rest-high-level-client:$elastic_search_version" |
| testImplementation "org.elasticsearch:elasticsearch:$elastic_search_version" |
| testImplementation ("org.elasticsearch:elasticsearch-hadoop:$elastic_search_version") { |
| // TODO(https://issues.apache.org/jira/browse/BEAM-3715) |
| // These are all optional deps of elasticsearch-hadoop. Why do they have to be excluded? |
| exclude group: "cascading", module: "cascading-local" |
| exclude group: "cascading", module: "cascading-hadoop" |
| exclude group: "org.apache.hive", module: "hive-service" |
| exclude group: "org.apache.pig", module: "pig" |
| exclude group: "org.apache.spark", module: "spark-core_2.10" |
| exclude group: "org.apache.spark", module: "spark-streaming_2.10" |
| exclude group: "org.apache.spark", module: "spark-sql_2.10" |
| exclude group: "org.apache.storm", module: "storm-core" |
| } |
| testImplementation "org.apache.httpcomponents:httpclient:4.5.13" |
| testImplementation library.java.commons_lang3 |
| testImplementation library.java.commons_io |
| testImplementation library.java.cassandra_driver_core |
| testImplementation library.java.cassandra_driver_mapping |
| // TODO(yathu) bump to cassandra-5.x which uses newer jamm when released & beam runs test on Java11 |
| testImplementation "org.apache.cassandra:cassandra-all:3.11.10" |
| testImplementation library.java.hadoop_common |
| testImplementation library.java.hadoop_hdfs |
| testImplementation library.java.hadoop_mapreduce_client_core |
| testImplementation library.java.postgres |
| testImplementation library.java.junit |
| testImplementation library.java.hamcrest |
| testImplementation library.java.testcontainers_postgresql |
| testImplementation library.java.netty_all |
| testRuntimeOnly library.java.slf4j_jdk14 |
| testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow") |
| |
| hadoopVersions.each {kv -> |
| "hadoopVersion$kv.key" "org.apache.hadoop:hadoop-common:$kv.value" |
| "hadoopVersion$kv.key" "org.apache.hadoop:hadoop-mapreduce-client-core:$kv.value" |
| "hadoopVersion$kv.key" "org.apache.hadoop:hadoop-hdfs:$kv.value" |
| "hadoopVersion$kv.key" "org.apache.httpcomponents:httpclient:4.5.13" |
| } |
| } |
| |
| hadoopVersions.each {kv -> |
| configurations."hadoopVersion$kv.key" { |
| resolutionStrategy { |
| force "org.apache.hadoop:hadoop-common:$kv.value" |
| force "org.apache.hadoop:hadoop-mapreduce-client-core:$kv.value" |
| force "org.apache.hadoop:hadoop-hdfs:$kv.value" |
| // without forcing httpclient httpcore gets below 4.4.9 which has incompatible API |
| force "org.apache.httpcomponents:httpclient:4.5.13" |
| } |
| } |
| } |
| |
| // Hadoop dependencies require old version of Guava (BEAM-11626) |
| configurations.all (Configuration it) -> { |
| // error-prone requires newer guava, don't override for annotation processing |
| // https://github.com/google/error-prone/issues/2745 |
| if (it.name == "annotationProcessor" || it.name =="testAnnotationProcessor") { |
| return |
| } |
| resolutionStrategy { |
| force 'com.google.guava:guava:25.1-jre' |
| } |
| } |
| |
| // The cassandra.yaml file currently assumes "target/..." exists. |
| // TODO: Update cassandra.yaml to inject new properties representing |
| // the root path. Also migrate cassandra.yaml to use any open ports |
| // instead of a static port. |
| task createTargetDirectoryForCassandra() { |
| doLast { |
| if (!project.file("target").exists()) { |
| project.file("target").mkdirs() |
| } |
| } |
| } |
| test.dependsOn createTargetDirectoryForCassandra |
| |
| task hadoopVersionsTest(group: "Verification") { |
| description = "Runs Hadoop format tests with different Hadoop versions" |
| dependsOn createTaskNames(hadoopVersions, "Test") |
| dependsOn createTaskNames(hadoopVersions, "IT") |
| dependsOn createTaskNames(hadoopVersions, "ElasticIT") |
| } |
| |
| hadoopVersions.each {kv -> |
| task "runHadoopFormatIO${kv.key}ElasticIT"(type: Test, group: "Verification") { |
| description = "Runs HadoopFormatIOElasticIT with Hadoop '${kv.value}'" |
| outputs.upToDateWhen { false } |
| testClassesDirs = sourceSets.test.output.classesDirs |
| classpath = configurations."hadoopVersion$kv.key" + sourceSets.test.runtimeClasspath |
| def pipelineOptions = JsonOutput.toJson(["--withTestcontainers=true"]) |
| systemProperty "beamTestPipelineOptions", pipelineOptions |
| include '**/HadoopFormatIOElasticIT.class' |
| } |
| } |
| |
| hadoopVersions.each {kv -> |
| task "runHadoopFormatIO${kv.key}IT"(type: Test, group: "Verification") { |
| description = "Runs HadoopFormatIOIT with Hadoop '${kv.value}'" |
| outputs.upToDateWhen { false } |
| testClassesDirs = sourceSets.test.output.classesDirs |
| classpath = configurations."hadoopVersion$kv.key" + sourceSets.test.runtimeClasspath |
| |
| def pipelineOptions = JsonOutput.toJson([ |
| "--postgresServerName=dummy_value", |
| "--postgresUsername=postgres", |
| "--postgresDatabaseName=postgres", |
| "--postgresPassword=postgres", |
| "--numberOfRecords=1000", |
| "--withTestcontainers=true", |
| "--postgresSsl=false", |
| ]) |
| systemProperty "beamTestPipelineOptions", pipelineOptions |
| include '**/HadoopFormatIOIT.class' |
| } |
| } |
| |
| hadoopVersions.each{kv -> |
| task "runHadoopFormatIO${kv.key}Test"(type: Test, group: "Verification") { |
| description = "Runs HadoopFormatIO tests with Hadoop '${kv.value}'" |
| classpath = configurations."hadoopVersion$kv.key" + sourceSets.test.runtimeClasspath |
| outputs.upToDateWhen { false } |
| include '**/*Test.class' |
| // Cassandra test cannot run in parallel |
| exclude '**/HadoopFormatIOCassandraTest.class' |
| } |
| } |
| |
| static def createTaskNames(Map<String, String> hadoopVersions, String suffix) { |
| return hadoopVersions.keySet().stream() |
| .map{num -> "runHadoopFormatIO$num$suffix"} |
| .collect(Collectors.toList()) |
| } |