blob: 42a624a4c5fbd0212b54dee374198c5909fb4ef9 [file] [log] [blame]
import groovy.json.JsonOutput
import java.util.stream.Collectors
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
plugins { id 'org.apache.beam.module' }
applyJavaNature(
automaticModuleName: 'org.apache.beam.sdk.io.iceberg',
// iceberg ended support for Java 8 in 1.7.0
requireJavaVersion: JavaVersion.VERSION_11,
)
description = "Apache Beam :: SDKs :: Java :: IO :: Iceberg"
ext.summary = "Integration with Iceberg data warehouses."
def hadoopVersions = [
"336": "3.3.6",
"341": "3.4.1",
]
hadoopVersions.each {kv -> configurations.create("hadoopVersion$kv.key")}
def iceberg_version = "1.10.0"
def parquet_version = "1.16.0"
def orc_version = "1.9.6"
def hive_version = "3.1.3"
dependencies {
implementation library.java.vendored_guava_32_1_2_jre
implementation project(path: ":sdks:java:core", configuration: "shadow")
implementation project(path: ":model:pipeline", configuration: "shadow")
implementation library.java.avro
implementation library.java.slf4j_api
implementation library.java.joda_time
implementation "org.apache.parquet:parquet-column:$parquet_version"
implementation "org.apache.parquet:parquet-hadoop:$parquet_version"
implementation "org.apache.orc:orc-core:$orc_version"
implementation "org.apache.iceberg:iceberg-core:$iceberg_version"
implementation "org.apache.iceberg:iceberg-api:$iceberg_version"
implementation "org.apache.iceberg:iceberg-parquet:$iceberg_version"
implementation "org.apache.iceberg:iceberg-orc:$iceberg_version"
implementation "org.apache.iceberg:iceberg-data:$iceberg_version"
implementation library.java.hadoop_common
// TODO(https://github.com/apache/beam/issues/21156): Determine how to build without this dependency
provided "org.immutables:value:2.8.8"
permitUnusedDeclared "org.immutables:value:2.8.8"
implementation library.java.vendored_calcite_1_40_0
runtimeOnly "org.apache.iceberg:iceberg-gcp:$iceberg_version"
runtimeOnly library.java.bigdataoss_gcs_connector
runtimeOnly library.java.hadoop_client
testImplementation project(":sdks:java:managed")
testImplementation library.java.bigdataoss_gcsio
testImplementation library.java.bigdataoss_util_hadoop
testImplementation "org.apache.parquet:parquet-avro:$parquet_version"
testImplementation "org.apache.parquet:parquet-common:$parquet_version"
testImplementation "org.apache.iceberg:iceberg-data:$iceberg_version"
testImplementation project(path: ":sdks:java:core", configuration: "shadowTest")
testImplementation project(":sdks:java:extensions:google-cloud-platform-core")
testImplementation library.java.junit
// Hive catalog test dependencies
testImplementation project(path: ":sdks:java:io:iceberg:hive")
testImplementation "org.apache.iceberg:iceberg-common:$iceberg_version"
testImplementation ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version")
testImplementation ("org.apache.hive:hive-metastore:$hive_version")
testImplementation "org.assertj:assertj-core:3.11.1"
testRuntimeOnly ("org.apache.hive.hcatalog:hive-hcatalog-core:$hive_version") {
exclude group: "org.apache.hive", module: "hive-exec"
exclude group: "org.apache.parquet", module: "parquet-hadoop-bundle"
}
// BigQueryMetastore catalog dep
testImplementation project(path: ":sdks:java:io:iceberg:bqms", configuration: "shadow")
testImplementation project(":sdks:java:io:google-cloud-platform")
testImplementation library.java.google_api_services_bigquery
testRuntimeOnly library.java.slf4j_jdk14
testImplementation project(path: ":runners:direct-java", configuration: "shadow")
testRuntimeOnly project(path: ":runners:google-cloud-dataflow-java")
testRuntimeOnly project(path: ":sdks:java:harness")
hadoopVersions.each {kv ->
"hadoopVersion$kv.key" "org.apache.hadoop:hadoop-client:$kv.value"
"hadoopVersion$kv.key" "org.apache.hadoop:hadoop-minicluster:$kv.value"
"hadoopVersion$kv.key" "org.apache.hadoop:hadoop-hdfs-client:$kv.value"
"hadoopVersion$kv.key" "org.apache.hadoop:hadoop-mapreduce-client-core:$kv.value"
}
}
configurations.all {
// iceberg-core needs avro:1.12.0
resolutionStrategy.force 'org.apache.avro:avro:1.12.0'
}
hadoopVersions.each {kv ->
configurations."hadoopVersion$kv.key" {
resolutionStrategy {
force "org.apache.hadoop:hadoop-client:$kv.value"
force "org.apache.hadoop:hadoop-common:$kv.value"
force "org.apache.hadoop:hadoop-mapreduce-client-core:$kv.value"
force "org.apache.hadoop:hadoop-minicluster:$kv.value"
force "org.apache.hadoop:hadoop-hdfs:$kv.value"
force "org.apache.hadoop:hadoop-hdfs-client:$kv.value"
}
}
}
task hadoopVersionsTest(group: "Verification") {
description = "Runs Iceberg tests with different Hadoop versions"
def taskNames = hadoopVersions.keySet().stream()
.map{num -> "hadoopVersion${num}Test"}
.collect(Collectors.toList())
dependsOn taskNames
}
hadoopVersions.each { kv ->
task "hadoopVersion${kv.key}Test"(type: Test, group: "Verification") {
description = "Runs Iceberg tests with Hadoop version $kv.value"
classpath = configurations."hadoopVersion$kv.key" + sourceSets.test.runtimeClasspath
include '**/*Test.class'
}
}
def gcpProject = project.findProperty('gcpProject') ?: 'apache-beam-testing'
def gcpTempLocation = project.findProperty('gcpTempLocation') ?: 'gs://managed-iceberg-integration-tests/temp'
def usingJava8 = (project.findProperty('testJavaVersion') == '8' || JavaVersion.current().equals(JavaVersion.VERSION_1_8))
task integrationTest(type: Test) {
group = "Verification"
systemProperty "beamTestPipelineOptions", JsonOutput.toJson([
"--project=${gcpProject}",
"--tempLocation=${gcpTempLocation}",
])
// Disable Gradle cache: these ITs interact with live service that should always be considered "out of date"
outputs.upToDateWhen { false }
include '**/*IT.class'
// BQ metastore catalog doesn't support java 8
if (usingJava8) {
exclude '**/BigQueryMetastoreCatalogIT.class'
}
maxParallelForks 1
classpath = sourceSets.test.runtimeClasspath
testClassesDirs = sourceSets.test.output.classesDirs
}
task dataflowIntegrationTest(type: Test) {
group = "Verification"
evaluationDependsOn(":runners:google-cloud-dataflow-java")
dependsOn ":runners:google-cloud-dataflow-java:buildAndPushDockerJavaContainer"
finalizedBy ":runners:google-cloud-dataflow-java:cleanUpDockerJavaImages"
def dockerJavaImageName = project.project(':runners:google-cloud-dataflow-java').ext.dockerJavaImageName
def args = [
"--runner=DataflowRunner",
"--region=us-central1",
"--project=${gcpProject}",
"--tempLocation=${gcpTempLocation}",
"--tempRoot=${gcpTempLocation}",
"--sdkContainerImage=${dockerJavaImageName}",
"--experiments=use_runner_v2,use_staged_dataflow_worker_jar"
]
if (project.hasProperty('enableManagedTransforms')) {
args.add("--experiments=enable_managed_transforms")
}
systemProperty "beamTestPipelineOptions", JsonOutput.toJson(args)
// Disable Gradle cache: these ITs interact with live service that should always be considered "out of date"
outputs.upToDateWhen { false }
filter {
includeTestsMatching 'org.apache.beam.sdk.io.iceberg.catalog.BigQueryMetastoreCatalogIT.testRead'
includeTestsMatching 'org.apache.beam.sdk.io.iceberg.catalog.BigQueryMetastoreCatalogIT.testReadWithFilter'
includeTestsMatching 'org.apache.beam.sdk.io.iceberg.catalog.BigQueryMetastoreCatalogIT.testStreamingRead'
includeTestsMatching 'org.apache.beam.sdk.io.iceberg.catalog.BigQueryMetastoreCatalogIT.testWrite'
includeTestsMatching 'org.apache.beam.sdk.io.iceberg.catalog.BigQueryMetastoreCatalogIT.testWriteRead'
includeTestsMatching 'org.apache.beam.sdk.io.iceberg.catalog.BigQueryMetastoreCatalogIT.testReadWriteStreaming'
includeTestsMatching 'org.apache.beam.sdk.io.iceberg.catalog.BigQueryMetastoreCatalogIT.testStreamToPartitionedDynamicDestinations'
}
doLast {
if (usingJava8) {
throw new StopExecutionException("BigQueryMetastoreCatalog doesn't support Java 8");
}
}
maxParallelForks 4
classpath = sourceSets.test.runtimeClasspath
testClassesDirs = sourceSets.test.output.classesDirs
}
task loadTest(type: Test) {
systemProperty "beamTestPipelineOptions", JsonOutput.toJson([
"--project=${gcpProject}",
"--tempLocation=${gcpTempLocation}",
"--testSize=large",
"--runner=DataflowRunner",
"--region=us-central1"
])
// Disable Gradle cache: these ITs interact with live service that should always be considered "out of date"
outputs.upToDateWhen { false }
include '**/*LT.class'
maxParallelForks 3
classpath = sourceSets.test.runtimeClasspath
testClassesDirs = sourceSets.test.output.classesDirs
}