| import groovy.json.JsonOutput |
| |
| import java.util.stream.Collectors |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * License); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an AS IS BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| plugins { id 'org.apache.beam.module' } |
| applyJavaNature( |
| automaticModuleName: 'org.apache.beam.sdk.io.iceberg', |
| // iceberg ended support for Java 8 in 1.7.0 |
| requireJavaVersion: JavaVersion.VERSION_11, |
| ) |
| |
| description = "Apache Beam :: SDKs :: Java :: IO :: Iceberg" |
| ext.summary = "Integration with Iceberg data warehouses." |
| |
| def hadoopVersions = [ |
| "336": "3.3.6", |
| "341": "3.4.1", |
| ] |
| |
| hadoopVersions.each {kv -> configurations.create("hadoopVersion$kv.key")} |
| |
| def iceberg_version = "1.10.0" |
| def parquet_version = "1.16.0" |
| def orc_version = "1.9.6" |
| def hive_version = "3.1.3" |
| |
| dependencies { |
| implementation library.java.vendored_guava_32_1_2_jre |
| implementation project(path: ":sdks:java:core", configuration: "shadow") |
| implementation project(path: ":model:pipeline", configuration: "shadow") |
| implementation library.java.avro |
| implementation library.java.slf4j_api |
| implementation library.java.joda_time |
| implementation "org.apache.parquet:parquet-column:$parquet_version" |
| implementation "org.apache.parquet:parquet-hadoop:$parquet_version" |
| implementation "org.apache.orc:orc-core:$orc_version" |
| implementation "org.apache.iceberg:iceberg-core:$iceberg_version" |
| implementation "org.apache.iceberg:iceberg-api:$iceberg_version" |
| implementation "org.apache.iceberg:iceberg-parquet:$iceberg_version" |
| implementation "org.apache.iceberg:iceberg-orc:$iceberg_version" |
| implementation "org.apache.iceberg:iceberg-data:$iceberg_version" |
| implementation library.java.hadoop_common |
| // TODO(https://github.com/apache/beam/issues/21156): Determine how to build without this dependency |
| provided "org.immutables:value:2.8.8" |
| permitUnusedDeclared "org.immutables:value:2.8.8" |
| implementation library.java.vendored_calcite_1_40_0 |
| runtimeOnly "org.apache.iceberg:iceberg-gcp:$iceberg_version" |
| runtimeOnly library.java.bigdataoss_gcs_connector |
| runtimeOnly library.java.hadoop_client |
| |
| testImplementation project(":sdks:java:managed") |
| testImplementation library.java.bigdataoss_gcsio |
| testImplementation library.java.bigdataoss_util_hadoop |
| testImplementation "org.apache.parquet:parquet-avro:$parquet_version" |
| testImplementation "org.apache.parquet:parquet-common:$parquet_version" |
| testImplementation "org.apache.iceberg:iceberg-data:$iceberg_version" |
| testImplementation project(path: ":sdks:java:core", configuration: "shadowTest") |
| testImplementation project(":sdks:java:extensions:google-cloud-platform-core") |
| testImplementation library.java.junit |
| |
| // Hive catalog test dependencies |
| testImplementation project(path: ":sdks:java:io:iceberg:hive") |
| testImplementation "org.apache.iceberg:iceberg-common:$iceberg_version" |
| testImplementation ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version") |
| testImplementation ("org.apache.hive:hive-metastore:$hive_version") |
| testImplementation "org.assertj:assertj-core:3.11.1" |
| testRuntimeOnly ("org.apache.hive.hcatalog:hive-hcatalog-core:$hive_version") { |
| exclude group: "org.apache.hive", module: "hive-exec" |
| exclude group: "org.apache.parquet", module: "parquet-hadoop-bundle" |
| } |
| |
| // BigQueryMetastore catalog dep |
| testImplementation project(path: ":sdks:java:io:iceberg:bqms", configuration: "shadow") |
| testImplementation project(":sdks:java:io:google-cloud-platform") |
| testImplementation library.java.google_api_services_bigquery |
| |
| testRuntimeOnly library.java.slf4j_jdk14 |
| testImplementation project(path: ":runners:direct-java", configuration: "shadow") |
| testRuntimeOnly project(path: ":runners:google-cloud-dataflow-java") |
| testRuntimeOnly project(path: ":sdks:java:harness") |
| hadoopVersions.each {kv -> |
| "hadoopVersion$kv.key" "org.apache.hadoop:hadoop-client:$kv.value" |
| "hadoopVersion$kv.key" "org.apache.hadoop:hadoop-minicluster:$kv.value" |
| "hadoopVersion$kv.key" "org.apache.hadoop:hadoop-hdfs-client:$kv.value" |
| "hadoopVersion$kv.key" "org.apache.hadoop:hadoop-mapreduce-client-core:$kv.value" |
| } |
| } |
| |
| configurations.all { |
| // iceberg-core needs avro:1.12.0 |
| resolutionStrategy.force 'org.apache.avro:avro:1.12.0' |
| } |
| |
| hadoopVersions.each {kv -> |
| configurations."hadoopVersion$kv.key" { |
| resolutionStrategy { |
| force "org.apache.hadoop:hadoop-client:$kv.value" |
| force "org.apache.hadoop:hadoop-common:$kv.value" |
| force "org.apache.hadoop:hadoop-mapreduce-client-core:$kv.value" |
| force "org.apache.hadoop:hadoop-minicluster:$kv.value" |
| force "org.apache.hadoop:hadoop-hdfs:$kv.value" |
| force "org.apache.hadoop:hadoop-hdfs-client:$kv.value" |
| } |
| } |
| } |
| |
| task hadoopVersionsTest(group: "Verification") { |
| description = "Runs Iceberg tests with different Hadoop versions" |
| def taskNames = hadoopVersions.keySet().stream() |
| .map{num -> "hadoopVersion${num}Test"} |
| .collect(Collectors.toList()) |
| dependsOn taskNames |
| } |
| |
| hadoopVersions.each { kv -> |
| task "hadoopVersion${kv.key}Test"(type: Test, group: "Verification") { |
| description = "Runs Iceberg tests with Hadoop version $kv.value" |
| classpath = configurations."hadoopVersion$kv.key" + sourceSets.test.runtimeClasspath |
| include '**/*Test.class' |
| } |
| } |
| |
| def gcpProject = project.findProperty('gcpProject') ?: 'apache-beam-testing' |
| def gcpTempLocation = project.findProperty('gcpTempLocation') ?: 'gs://managed-iceberg-integration-tests/temp' |
| def usingJava8 = (project.findProperty('testJavaVersion') == '8' || JavaVersion.current().equals(JavaVersion.VERSION_1_8)) |
| |
| task integrationTest(type: Test) { |
| group = "Verification" |
| systemProperty "beamTestPipelineOptions", JsonOutput.toJson([ |
| "--project=${gcpProject}", |
| "--tempLocation=${gcpTempLocation}", |
| ]) |
| |
| // Disable Gradle cache: these ITs interact with live service that should always be considered "out of date" |
| outputs.upToDateWhen { false } |
| |
| include '**/*IT.class' |
| // BQ metastore catalog doesn't support java 8 |
| if (usingJava8) { |
| exclude '**/BigQueryMetastoreCatalogIT.class' |
| } |
| |
| maxParallelForks 1 |
| classpath = sourceSets.test.runtimeClasspath |
| testClassesDirs = sourceSets.test.output.classesDirs |
| } |
| |
| task dataflowIntegrationTest(type: Test) { |
| group = "Verification" |
| evaluationDependsOn(":runners:google-cloud-dataflow-java") |
| dependsOn ":runners:google-cloud-dataflow-java:buildAndPushDockerJavaContainer" |
| finalizedBy ":runners:google-cloud-dataflow-java:cleanUpDockerJavaImages" |
| def dockerJavaImageName = project.project(':runners:google-cloud-dataflow-java').ext.dockerJavaImageName |
| |
| def args = [ |
| "--runner=DataflowRunner", |
| "--region=us-central1", |
| "--project=${gcpProject}", |
| "--tempLocation=${gcpTempLocation}", |
| "--tempRoot=${gcpTempLocation}", |
| "--sdkContainerImage=${dockerJavaImageName}", |
| "--experiments=use_runner_v2,use_staged_dataflow_worker_jar" |
| ] |
| if (project.hasProperty('enableManagedTransforms')) { |
| args.add("--experiments=enable_managed_transforms") |
| } |
| systemProperty "beamTestPipelineOptions", JsonOutput.toJson(args) |
| |
| // Disable Gradle cache: these ITs interact with live service that should always be considered "out of date" |
| outputs.upToDateWhen { false } |
| |
| filter { |
| includeTestsMatching 'org.apache.beam.sdk.io.iceberg.catalog.BigQueryMetastoreCatalogIT.testRead' |
| includeTestsMatching 'org.apache.beam.sdk.io.iceberg.catalog.BigQueryMetastoreCatalogIT.testReadWithFilter' |
| includeTestsMatching 'org.apache.beam.sdk.io.iceberg.catalog.BigQueryMetastoreCatalogIT.testStreamingRead' |
| includeTestsMatching 'org.apache.beam.sdk.io.iceberg.catalog.BigQueryMetastoreCatalogIT.testWrite' |
| includeTestsMatching 'org.apache.beam.sdk.io.iceberg.catalog.BigQueryMetastoreCatalogIT.testWriteRead' |
| includeTestsMatching 'org.apache.beam.sdk.io.iceberg.catalog.BigQueryMetastoreCatalogIT.testReadWriteStreaming' |
| includeTestsMatching 'org.apache.beam.sdk.io.iceberg.catalog.BigQueryMetastoreCatalogIT.testStreamToPartitionedDynamicDestinations' |
| } |
| |
| doLast { |
| if (usingJava8) { |
| throw new StopExecutionException("BigQueryMetastoreCatalog doesn't support Java 8"); |
| } |
| } |
| |
| maxParallelForks 4 |
| classpath = sourceSets.test.runtimeClasspath |
| testClassesDirs = sourceSets.test.output.classesDirs |
| } |
| |
| task loadTest(type: Test) { |
| systemProperty "beamTestPipelineOptions", JsonOutput.toJson([ |
| "--project=${gcpProject}", |
| "--tempLocation=${gcpTempLocation}", |
| "--testSize=large", |
| "--runner=DataflowRunner", |
| "--region=us-central1" |
| ]) |
| |
| // Disable Gradle cache: these ITs interact with live service that should always be considered "out of date" |
| outputs.upToDateWhen { false } |
| |
| include '**/*LT.class' |
| |
| maxParallelForks 3 |
| classpath = sourceSets.test.runtimeClasspath |
| testClassesDirs = sourceSets.test.output.classesDirs |
| } |