| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| def sparkProjects = [ |
| project(':iceberg-spark:iceberg-spark-3.1_2.12'), |
| project(":iceberg-spark:iceberg-spark-extensions-3.1_2.12"), |
| project(':iceberg-spark:iceberg-spark-runtime-3.1_2.12') |
| ] |
| |
| configure(sparkProjects) { |
| project.ext { |
| sparkVersion = '3.1.3' |
| } |
| |
| configurations { |
| all { |
| resolutionStrategy { |
| force 'com.fasterxml.jackson.module:jackson-module-scala_2.12:2.11.4' |
| force 'com.fasterxml.jackson.module:jackson-module-paranamer:2.11.4' |
| force 'com.fasterxml.jackson.core:jackson-core:2.11.4' |
| force 'com.fasterxml.jackson.core:jackson-databind:2.11.4' |
| } |
| } |
| } |
| } |
| |
| project(':iceberg-spark:iceberg-spark-3.1_2.12') { |
| apply plugin: 'scala' |
| apply plugin: 'com.github.alisiikh.scalastyle' |
| |
| sourceSets { |
| main { |
| scala.srcDirs = ['src/main/scala', 'src/main/java'] |
| java.srcDirs = [] |
| } |
| } |
| |
| dependencies { |
| implementation project(path: ':iceberg-bundled-guava', configuration: 'shadow') |
| api project(':iceberg-api') |
| implementation project(':iceberg-common') |
| implementation project(':iceberg-core') |
| implementation project(':iceberg-data') |
| implementation project(':iceberg-orc') |
| implementation project(':iceberg-parquet') |
| implementation project(':iceberg-arrow') |
| |
| compileOnly "com.google.errorprone:error_prone_annotations" |
| compileOnly "org.apache.avro:avro" |
| compileOnly("org.apache.spark:spark-hive_2.12:${sparkVersion}") { |
| exclude group: 'org.apache.avro', module: 'avro' |
| exclude group: 'org.apache.arrow' |
| exclude group: 'org.apache.parquet' |
| exclude group: 'org.roaringbitmap' |
| } |
| |
| implementation("org.apache.parquet:parquet-column") |
| implementation("org.apache.parquet:parquet-hadoop") |
| |
| implementation("org.apache.orc:orc-core::nohive") { |
| exclude group: 'org.apache.hadoop' |
| exclude group: 'commons-lang' |
| // These artifacts are shaded and included in the orc-core fat jar |
| exclude group: 'com.google.protobuf', module: 'protobuf-java' |
| exclude group: 'org.apache.hive', module: 'hive-storage-api' |
| } |
| |
| implementation("org.apache.arrow:arrow-vector") { |
| exclude group: 'io.netty', module: 'netty-buffer' |
| exclude group: 'io.netty', module: 'netty-common' |
| exclude group: 'com.google.code.findbugs', module: 'jsr305' |
| } |
| |
| testImplementation("org.apache.hadoop:hadoop-minicluster") { |
| exclude group: 'org.apache.avro', module: 'avro' |
| } |
| testImplementation project(path: ':iceberg-hive-metastore') |
| testImplementation project(path: ':iceberg-hive-metastore', configuration: 'testArtifacts') |
| testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts') |
| testImplementation project(path: ':iceberg-core', configuration: 'testArtifacts') |
| testImplementation project(path: ':iceberg-data', configuration: 'testArtifacts') |
| testImplementation "org.xerial:sqlite-jdbc" |
| } |
| |
| tasks.withType(Test) { |
| // For vectorized reads |
| // Allow unsafe memory access to avoid the costly check arrow does to check if index is within bounds |
| systemProperty("arrow.enable_unsafe_memory_access", "true") |
| // Disable expensive null check for every get(index) call. |
| // Iceberg manages nullability checks itself instead of relying on arrow. |
| systemProperty("arrow.enable_null_check_for_get", "false") |
| |
| // Vectorized reads need more memory |
| maxHeapSize '2560m' |
| } |
| } |
| |
| project(":iceberg-spark:iceberg-spark-extensions-3.1_2.12") { |
| apply plugin: 'java-library' |
| apply plugin: 'scala' |
| apply plugin: 'com.github.alisiikh.scalastyle' |
| apply plugin: 'antlr' |
| |
| configurations { |
| /* |
| The Gradle Antlr plugin erroneously adds both antlr-build and runtime dependencies to the runtime path. This |
| bug https://github.com/gradle/gradle/issues/820 exists because older versions of Antlr do not have separate |
| runtime and implementation dependencies and they do not want to break backwards compatibility. So to only end up with |
| the runtime dependency on the runtime classpath we remove the dependencies added by the plugin here. Then add |
| the runtime dependency back to only the runtime configuration manually. |
| */ |
| implementation { |
| extendsFrom = extendsFrom.findAll { it != configurations.antlr } |
| } |
| } |
| |
| dependencies { |
| compileOnly "org.scala-lang:scala-library" |
| compileOnly project(path: ':iceberg-bundled-guava', configuration: 'shadow') |
| compileOnly project(':iceberg-api') |
| compileOnly project(':iceberg-core') |
| compileOnly project(':iceberg-data') |
| compileOnly project(':iceberg-orc') |
| compileOnly project(':iceberg-common') |
| compileOnly project(':iceberg-spark:iceberg-spark-3.1_2.12') |
| compileOnly("org.apache.spark:spark-hive_2.12:${sparkVersion}") { |
| exclude group: 'org.apache.avro', module: 'avro' |
| exclude group: 'org.apache.arrow' |
| exclude group: 'org.apache.parquet' |
| exclude group: 'org.roaringbitmap' |
| } |
| |
| testImplementation project(path: ':iceberg-hive-metastore') |
| testImplementation project(path: ':iceberg-hive-metastore', configuration: 'testArtifacts') |
| |
| testImplementation project(path: ':iceberg-data', configuration: 'testArtifacts') |
| testImplementation project(path: ':iceberg-orc', configuration: 'testArtifacts') |
| testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts') |
| testImplementation project(path: ':iceberg-hive-metastore', configuration: 'testArtifacts') |
| testImplementation project(path: ':iceberg-spark:iceberg-spark-3.1_2.12', configuration: 'testArtifacts') |
| |
| testImplementation "org.apache.avro:avro" |
| |
| // Required because we remove antlr plugin dependencies from the compile configuration, see note above |
| runtimeOnly "org.antlr:antlr4-runtime:4.8" |
| antlr "org.antlr:antlr4:4.8" |
| } |
| |
| generateGrammarSource { |
| maxHeapSize = "64m" |
| arguments += ['-visitor', '-package', 'org.apache.spark.sql.catalyst.parser.extensions'] |
| } |
| } |
| |
| project(':iceberg-spark:iceberg-spark-runtime-3.1_2.12') { |
| apply plugin: 'com.github.johnrengelman.shadow' |
| |
| tasks.jar.dependsOn tasks.shadowJar |
| |
| sourceSets { |
| integration { |
| java.srcDir "$projectDir/src/integration/java" |
| resources.srcDir "$projectDir/src/integration/resources" |
| } |
| } |
| |
| configurations { |
| implementation { |
| exclude group: 'org.apache.spark' |
| // included in Spark |
| exclude group: 'org.slf4j' |
| exclude group: 'org.apache.commons' |
| exclude group: 'commons-pool' |
| exclude group: 'commons-codec' |
| exclude group: 'org.xerial.snappy' |
| exclude group: 'javax.xml.bind' |
| exclude group: 'javax.annotation' |
| exclude group: 'com.github.luben' |
| exclude group: 'com.ibm.icu' |
| exclude group: 'org.glassfish' |
| exclude group: 'org.abego.treelayout' |
| exclude group: 'org.antlr' |
| } |
| } |
| |
| dependencies { |
| api project(':iceberg-api') |
| implementation project(':iceberg-spark:iceberg-spark-3.1_2.12') |
| implementation project(':iceberg-spark:iceberg-spark-extensions-3.1_2.12') |
| implementation project(':iceberg-aws') |
| implementation(project(':iceberg-aliyun')) { |
| exclude group: 'edu.umd.cs.findbugs', module: 'findbugs' |
| exclude group: 'org.apache.httpcomponents', module: 'httpclient' |
| exclude group: 'commons-logging', module: 'commons-logging' |
| } |
| implementation project(':iceberg-hive-metastore') |
| implementation(project(':iceberg-nessie')) { |
| exclude group: 'com.google.code.findbugs', module: 'jsr305' |
| } |
| |
| integrationImplementation "org.apache.spark:spark-hive_2.12:${sparkVersion}" |
| integrationImplementation 'org.junit.vintage:junit-vintage-engine' |
| integrationImplementation 'org.slf4j:slf4j-simple' |
| integrationImplementation project(path: ':iceberg-api', configuration: 'testArtifacts') |
| integrationImplementation project(path: ':iceberg-hive-metastore', configuration: 'testArtifacts') |
| integrationImplementation project(path: ':iceberg-spark:iceberg-spark-3.1_2.12', configuration: 'testArtifacts') |
| integrationImplementation project(path: ':iceberg-spark:iceberg-spark-extensions-3.1_2.12', configuration: 'testArtifacts') |
| // Not allowed on our classpath, only the runtime jar is allowed |
| integrationCompileOnly project(':iceberg-spark:iceberg-spark-extensions-3.1_2.12') |
| integrationCompileOnly project(':iceberg-spark:iceberg-spark-3.1_2.12') |
| integrationCompileOnly project(':iceberg-api') |
| } |
| |
| shadowJar { |
| configurations = [project.configurations.runtimeClasspath] |
| |
| zip64 true |
| |
| // include the LICENSE and NOTICE files for the shaded Jar |
| from(projectDir) { |
| include 'LICENSE' |
| include 'NOTICE' |
| } |
| |
| // Relocate dependencies to avoid conflicts |
| relocate 'com.google', 'org.apache.iceberg.shaded.com.google' |
| relocate 'com.fasterxml', 'org.apache.iceberg.shaded.com.fasterxml' |
| relocate 'com.github.benmanes', 'org.apache.iceberg.shaded.com.github.benmanes' |
| relocate 'org.checkerframework', 'org.apache.iceberg.shaded.org.checkerframework' |
| relocate 'org.apache.avro', 'org.apache.iceberg.shaded.org.apache.avro' |
| relocate 'avro.shaded', 'org.apache.iceberg.shaded.org.apache.avro.shaded' |
| relocate 'com.thoughtworks.paranamer', 'org.apache.iceberg.shaded.com.thoughtworks.paranamer' |
| relocate 'org.apache.parquet', 'org.apache.iceberg.shaded.org.apache.parquet' |
| relocate 'shaded.parquet', 'org.apache.iceberg.shaded.org.apache.parquet.shaded' |
| relocate 'org.apache.orc', 'org.apache.iceberg.shaded.org.apache.orc' |
| relocate 'io.airlift', 'org.apache.iceberg.shaded.io.airlift' |
| // relocate Arrow and related deps to shade Iceberg specific version |
| relocate 'io.netty.buffer', 'org.apache.iceberg.shaded.io.netty.buffer' |
| relocate 'org.apache.arrow', 'org.apache.iceberg.shaded.org.apache.arrow' |
| relocate 'com.carrotsearch', 'org.apache.iceberg.shaded.com.carrotsearch' |
| relocate 'org.threeten.extra', 'org.apache.iceberg.shaded.org.threeten.extra' |
| relocate 'org.roaringbitmap', 'org.apache.iceberg.shaded.org.roaringbitmap' |
| |
| classifier null |
| } |
| |
| task integrationTest(type: Test) { |
| description = "Test Spark3 Runtime Jar against Spark 3.1" |
| group = "verification" |
| testClassesDirs = sourceSets.integration.output.classesDirs |
| classpath = sourceSets.integration.runtimeClasspath + files(shadowJar.archiveFile.get().asFile.path) |
| inputs.file(shadowJar.archiveFile.get().asFile.path) |
| } |
| integrationTest.dependsOn shadowJar |
| |
| jar { |
| enabled = false |
| } |
| } |
| |