blob: da4a317d320386bca8a6fe2d380c3fd4b0d183ad [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
plugins { id 'org.apache.beam.module' }
applyJavaNature(
automaticModuleName: 'org.apache.beam.sdk.tpcds',
exportJavadoc: false,
archivesBaseName: 'beam-sdks-java-tpcds',
)
description = "Apache Beam :: SDKs :: Java :: TPC-DS"
// When running via Gradle, this property can be used to pass commandline arguments
// to the TPD-DS run
def tpcdsArgsProperty = "tpcds.args"
// When running via Gradle, this property sets the runner dependency
def tpcdsRunnerProperty = "tpcds.runner"
def tpcdsRunnerDependency = project.findProperty(tpcdsRunnerProperty)
?: ":runners:direct-java"
def isSpark = tpcdsRunnerDependency.startsWith(":runners:spark:")
def isDataflowRunner = ":runners:google-cloud-dataflow-java".equals(tpcdsRunnerDependency)
def runnerConfiguration = ":runners:direct-java".equals(tpcdsRunnerDependency) ? "shadow" : null
if (isDataflowRunner) {
/*
* We need to rely on manually specifying these evaluationDependsOn to ensure that
* the following projects are evaluated before we evaluate this project. This is because
* we are attempting to reference a property from the project directly.
*/
evaluationDependsOn(":runners:google-cloud-dataflow-java:worker")
}
configurations {
// A configuration for running the TPC-DS launcher directly from Gradle, which
// uses Gradle to put the appropriate dependencies on the Classpath rather than
// bundling them into a fat jar
gradleRun
}
dependencies {
implementation enforcedPlatform(library.java.google_cloud_platform_libraries_bom)
// TODO(https://github.com/apache/beam/issues/21156): Determine how to build without this dependency
provided "org.immutables:value:2.8.8"
permitUnusedDeclared "org.immutables:value:2.8.8"
implementation library.java.avro
implementation library.java.joda_time
implementation library.java.vendored_guava_32_1_2_jre
implementation library.java.vendored_calcite_1_28_0
implementation library.java.commons_csv
implementation library.java.slf4j_api
implementation "com.googlecode.json-simple:json-simple:1.1.1"
implementation library.java.jackson_databind
implementation project(":sdks:java:extensions:sql")
implementation project(":sdks:java:extensions:sql:zetasql")
implementation project(":sdks:java:io:parquet")
implementation project(":sdks:java:extensions:google-cloud-platform-core")
implementation project(":sdks:java:testing:test-utils")
permitUnusedDeclared project(":sdks:java:extensions:google-cloud-platform-core")
implementation project(":sdks:java:io:google-cloud-platform")
permitUnusedDeclared project(":sdks:java:io:google-cloud-platform")
implementation project(":runners:google-cloud-dataflow-java")
implementation project(path: ":sdks:java:core", configuration: "shadow")
testRuntimeOnly library.java.slf4j_jdk14
testImplementation project(path: ":sdks:java:io:google-cloud-platform", configuration: "testRuntimeMigration")
testImplementation project(path: ":sdks:java:testing:test-utils", configuration: "testRuntimeMigration")
gradleRun project(project.path)
gradleRun project(path: tpcdsRunnerDependency, configuration: runnerConfiguration)
}
if (isSpark) {
configurations.gradleRun {
exclude group: "org.slf4j", module: "slf4j-jdk14"
}
}
// Execute the TPC-DS queries or suites via Gradle.
//
// Parameters:
// -Ptpcds.runner
// Specify a runner subproject, such as ":runners:spark:3" or ":runners:flink:1.17"
// Defaults to ":runners:direct-java"
//
// -Ptpcds.args
// Specify the command line for invoking org.apache.beam.sdk.tpcds.BeamTpcds
task run(type: JavaExec) {
def tpcdsArgsStr = project.findProperty(tpcdsArgsProperty) ?: ""
def tpcdsArgsList = new ArrayList<String>()
Collections.addAll(tpcdsArgsList, tpcdsArgsStr.split())
if (isDataflowRunner) {
dependsOn ":runners:google-cloud-dataflow-java:worker:shadowJar"
def dataflowWorkerJar = project.findProperty('dataflowWorkerJar') ?:
project(":runners:google-cloud-dataflow-java:worker")
.shadowJar.archivePath
// Provide job with a customizable worker jar.
// With legacy worker jar, containerImage is set to empty (i.e. to use the internal build).
// More context and discussions can be found in PR#6694.
tpcdsArgsList.add("--dataflowWorkerJar=${dataflowWorkerJar}".toString())
tpcdsArgsList.add('--workerHarnessContainerImage=')
}
if(isSpark) {
// Disable UI
systemProperty "spark.ui.enabled", "false"
systemProperty "spark.ui.showConsoleProgress", "false"
// For transparency, be explicit about configuration of local Spark
tpcdsArgsList.add("--sparkMaster=local[4]")
// Dataset runner only
systemProperty "spark.sql.shuffle.partitions", "4"
systemProperty "spark.sql.adaptive.enabled", "false" // high overhead for complex queries
}
mainClass = "org.apache.beam.sdk.tpcds.BeamTpcds"
classpath = configurations.gradleRun
args tpcdsArgsList.toArray()
}