project/SparkBuild.scala - spark - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.io._
 import java.nio.charset.StandardCharsets.UTF_8
 import java.nio.file.Files
 import java.util.Locale

 import scala.io.Source
 import scala.util.Properties
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ListBuffer

 import sbt._
 import sbt.Classpaths.publishTask
 import sbt.Keys._
 import sbt.librarymanagement.{ VersionNumber, SemanticSelector }
 import com.etsy.sbt.checkstyle.CheckstylePlugin.autoImport._
 import com.simplytyped.Antlr4Plugin._
 import com.typesafe.sbt.pom.{PomBuild, SbtPomKeys}
 import com.typesafe.tools.mima.plugin.MimaKeys
 import org.scalastyle.sbt.ScalastylePlugin.autoImport._
 import org.scalastyle.sbt.Tasks
 import sbtassembly.AssemblyPlugin.autoImport._

 import spray.revolver.RevolverPlugin._

 object BuildCommons {

   private val buildLocation = file(".").getAbsoluteFile.getParentFile

   val sqlProjects@Seq(catalyst, sql, hive, hiveThriftServer, tokenProviderKafka010, sqlKafka010, avro) = Seq(
     "catalyst", "sql", "hive", "hive-thriftserver", "token-provider-kafka-0-10", "sql-kafka-0-10", "avro"
   ).map(ProjectRef(buildLocation, _))

   val streamingProjects@Seq(streaming, streamingKafka010) =
     Seq("streaming", "streaming-kafka-0-10").map(ProjectRef(buildLocation, _))

   val allProjects@Seq(
     core, graphx, mllib, mllibLocal, repl, networkCommon, networkShuffle, launcher, unsafe, tags, sketch, kvstore, _*
   ) = Seq(
     "core", "graphx", "mllib", "mllib-local", "repl", "network-common", "network-shuffle", "launcher", "unsafe",
     "tags", "sketch", "kvstore"
   ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects

   val optionallyEnabledProjects@Seq(kubernetes, mesos, yarn,
     sparkGangliaLgpl, streamingKinesisAsl,
     dockerIntegrationTests, hadoopCloud, kubernetesIntegrationTests) =
     Seq("kubernetes", "mesos", "yarn",
       "ganglia-lgpl", "streaming-kinesis-asl",
       "docker-integration-tests", "hadoop-cloud", "kubernetes-integration-tests").map(ProjectRef(buildLocation, _))

   val assemblyProjects@Seq(networkYarn, streamingKafka010Assembly, streamingKinesisAslAssembly) =
     Seq("network-yarn", "streaming-kafka-0-10-assembly", "streaming-kinesis-asl-assembly")
       .map(ProjectRef(buildLocation, _))

   val copyJarsProjects@Seq(assembly, examples) = Seq("assembly", "examples")
     .map(ProjectRef(buildLocation, _))

   val tools = ProjectRef(buildLocation, "tools")
   // Root project.
   val spark = ProjectRef(buildLocation, "spark")
   val sparkHome = buildLocation

   val testTempDir = s"$sparkHome/target/tmp"

   val javaVersion = settingKey[String]("source and target JVM version for javac and scalac")
 }

 object SparkBuild extends PomBuild {

   import BuildCommons._
   import sbtunidoc.GenJavadocPlugin
   import sbtunidoc.GenJavadocPlugin.autoImport._
   import scala.collection.mutable.Map

   val projectsMap: Map[String, Seq[Setting[_]]] = Map.empty

   override val profiles = {
     val profiles = Properties.envOrNone("SBT_MAVEN_PROFILES")
       .orElse(Properties.propOrNone("sbt.maven.profiles")) match {
       case None => Seq("sbt")
       case Some(v) =>
         v.split("(\\s+|,)").filterNot(_.isEmpty).map(_.trim.replaceAll("-P", "")).toSeq
     }
     if (profiles.contains("jdwp-test-debug")) {
       sys.props.put("test.jdwp.enabled", "true")
     }
     profiles
   }

   Properties.envOrNone("SBT_MAVEN_PROPERTIES") match {
     case Some(v) =>
       v.split("(\\s+|,)").filterNot(_.isEmpty).map(_.split("=")).foreach(x => System.setProperty(x(0), x(1)))
     case _ =>
   }

   override val userPropertiesMap = System.getProperties.asScala.toMap

   lazy val MavenCompile = config("m2r") extend(Compile)
   lazy val SbtCompile = config("sbt") extend(Compile)

   lazy val sparkGenjavadocSettings: Seq[sbt.Def.Setting[_]] = GenJavadocPlugin.projectSettings ++ Seq(
     scalacOptions ++= Seq(
       "-P:genjavadoc:strictVisibility=true" // hide package private types
     )
   )

   lazy val scalaStyleRules = Project("scalaStyleRules", file("scalastyle"))
     .settings(
       libraryDependencies += "org.scalastyle" %% "scalastyle" % "1.0.0"
     )

   lazy val scalaStyleOnCompile = taskKey[Unit]("scalaStyleOnCompile")

   lazy val scalaStyleOnTest = taskKey[Unit]("scalaStyleOnTest")

   // We special case the 'println' lint rule to only be a warning on compile, because adding
   // printlns for debugging is a common use case and is easy to remember to remove.
   val scalaStyleOnCompileConfig: String = {
     val in = "scalastyle-config.xml"
     val out = "scalastyle-on-compile.generated.xml"
     val replacements = Map(
       """customId="println" level="error"""" -> """customId="println" level="warn""""
     )
     var contents = Source.fromFile(in).getLines.mkString("\n")
     for ((k, v) <- replacements) {
       require(contents.contains(k), s"Could not rewrite '$k' in original scalastyle config.")
       contents = contents.replace(k, v)
     }
     new PrintWriter(out) {
       write(contents)
       close()
     }
     out
   }

   // Return a cached scalastyle task for a given configuration (usually Compile or Test)
   private def cachedScalaStyle(config: Configuration) = Def.task {
     val logger = streams.value.log
     // We need a different cache dir per Configuration, otherwise they collide
     val cacheDir = target.value / s"scalastyle-cache-${config.name}"
     val cachedFun = FileFunction.cached(cacheDir, FilesInfo.lastModified, FilesInfo.exists) {
       (inFiles: Set[File]) => {
         val args: Seq[String] = Seq.empty
         val scalaSourceV = Seq(file((config / scalaSource).value.getAbsolutePath))
         val configV = (ThisBuild / baseDirectory).value / scalaStyleOnCompileConfig
         val configUrlV = (config / scalastyleConfigUrl).value
         val streamsV = ((config / streams).value: @sbtUnchecked)
         val failOnErrorV = true
         val failOnWarningV = false
         val scalastyleTargetV = (config / scalastyleTarget).value
         val configRefreshHoursV = (config / scalastyleConfigRefreshHours).value
         val targetV = (config / target).value
         val configCacheFileV = (config / scalastyleConfigUrlCacheFile).value

         logger.info(s"Running scalastyle on ${name.value} in ${config.name}")
         Tasks.doScalastyle(args, configV, configUrlV, failOnErrorV, failOnWarningV, scalaSourceV,
           scalastyleTargetV, streamsV, configRefreshHoursV, targetV, configCacheFileV)

         Set.empty
       }
     }

     cachedFun(findFiles((config / scalaSource).value))
   }

   private def findFiles(file: File): Set[File] = if (file.isDirectory) {
     file.listFiles().toSet.flatMap(findFiles) + file
   } else {
     Set(file)
   }

   def enableScalaStyle: Seq[sbt.Def.Setting[_]] = Seq(
     scalaStyleOnCompile := cachedScalaStyle(Compile).value,
     scalaStyleOnTest := cachedScalaStyle(Test).value,
     (scalaStyleOnCompile / logLevel) := Level.Warn,
     (scalaStyleOnTest / logLevel) := Level.Warn,
     (Compile / compile) := {
       scalaStyleOnCompile.value
       (Compile / compile).value
     },
     (Test / compile) := {
       scalaStyleOnTest.value
       (Test / compile).value
     }
   )

   // Silencer: Scala compiler plugin for warning suppression
   // Aim: enable fatal warnings, but suppress ones related to using of deprecated APIs
   // depends on scala version:
   // <2.13.2 - silencer 1.7.5 and compiler settings to enable fatal warnings
   // 2.13.2+ - no silencer and configured warnings to achieve the same
   lazy val compilerWarningSettings: Seq[sbt.Def.Setting[_]] = Seq(
     libraryDependencies ++= {
       if (VersionNumber(scalaVersion.value).matchesSemVer(SemanticSelector("<2.13.2"))) {
         val silencerVersion = "1.7.5"
         Seq(
           "org.scala-lang.modules" %% "scala-collection-compat" % "2.2.0",
           compilerPlugin("com.github.ghik" % "silencer-plugin" % silencerVersion cross CrossVersion.full),
           "com.github.ghik" % "silencer-lib" % silencerVersion % Provided cross CrossVersion.full
         )
       } else {
         Seq.empty
       }
     },
     (Compile / scalacOptions) ++= {
       if (VersionNumber(scalaVersion.value).matchesSemVer(SemanticSelector("<2.13.2"))) {
         Seq(
           "-Xfatal-warnings",
           "-deprecation",
           "-Ywarn-unused-import",
           "-P:silencer:globalFilters=.*deprecated.*" //regex to catch deprecation warnings and suppress them
         )
       } else {
         Seq(
           // replace -Xfatal-warnings with fine-grained configuration, since 2.13.2
           // verbose warning on deprecation, error on all others
           // see `scalac -Wconf:help` for details
           "-Wconf:cat=deprecation:wv,any:e",
           // 2.13-specific warning hits to be muted (as narrowly as possible) and addressed separately
           // TODO(SPARK-33499): Enable this option when Scala 2.12 is no longer supported.
           // "-Wunused:imports",
           "-Wconf:cat=lint-multiarg-infix:wv",
           "-Wconf:cat=other-nullary-override:wv",
           "-Wconf:cat=other-match-analysis&site=org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupFunction.catalogFunction:wv",
           "-Wconf:cat=other-pure-statement&site=org.apache.spark.streaming.util.FileBasedWriteAheadLog.readAll.readFile:wv",
           "-Wconf:cat=other-pure-statement&site=org.apache.spark.scheduler.OutputCommitCoordinatorSuite.<local OutputCommitCoordinatorSuite>.futureAction:wv",
           "-Wconf:cat=other-pure-statement&site=org.apache.spark.sql.streaming.sources.StreamingDataSourceV2Suite.testPositiveCase.\\$anonfun:wv",
           // SPARK-33775 Suppress compilation warnings that contain the following contents.
           // TODO(SPARK-33805): Undo the corresponding deprecated usage suppression rule after
           //  fixed.
           "-Wconf:msg=^(?=.*?method|value|type|object|trait|inheritance)(?=.*?deprecated)(?=.*?since 2.13).+$:s",
           "-Wconf:msg=^(?=.*?Widening conversion from)(?=.*?is deprecated because it loses precision).+$:s",
           "-Wconf:msg=Auto-application to \\`\\(\\)\\` is deprecated:s",
           "-Wconf:msg=method with a single empty parameter list overrides method without any parameter list:s",
           "-Wconf:msg=method without a parameter list overrides a method with a single empty one:s",
           // SPARK-35574 Prevent the recurrence of compilation warnings related to `procedure syntax is deprecated`
           "-Wconf:cat=deprecation&msg=procedure syntax is deprecated:e"
         )
       }
     }
   )

   lazy val sharedSettings = sparkGenjavadocSettings ++
                             compilerWarningSettings ++
       (if (sys.env.contains("NOLINT_ON_COMPILE")) Nil else enableScalaStyle) ++ Seq(
     (Compile / exportJars) := true,
     (Test / exportJars) := false,
     javaHome := sys.env.get("JAVA_HOME")
       .orElse(sys.props.get("java.home").map { p => new File(p).getParentFile().getAbsolutePath() })
       .map(file),
     publishMavenStyle := true,
     unidocGenjavadocVersion := "0.18",

     // Override SBT's default resolvers:
     resolvers := Seq(
       // Google Mirror of Maven Central, placed first so that it's used instead of flaky Maven Central.
       // See https://storage-download.googleapis.com/maven-central/index.html for more info.
       "gcs-maven-central-mirror" at "https://maven-central.storage-download.googleapis.com/maven2/",
       DefaultMavenRepository,
       Resolver.mavenLocal,
       Resolver.file("ivyLocal", file(Path.userHome.absolutePath + "/.ivy2/local"))(Resolver.ivyStylePatterns)
     ),
     externalResolvers := resolvers.value,
     otherResolvers := SbtPomKeys.mvnLocalRepository(dotM2 => Seq(Resolver.file("dotM2", dotM2))).value,
     (MavenCompile / publishLocalConfiguration) := PublishConfiguration()
         .withResolverName("dotM2")
         .withArtifacts(packagedArtifacts.value.toVector)
         .withLogging(ivyLoggingLevel.value),
     (SbtCompile / publishLocalConfiguration) := PublishConfiguration()
         .withResolverName("ivyLocal")
         .withArtifacts(packagedArtifacts.value.toVector)
         .withLogging(ivyLoggingLevel.value),
     (MavenCompile / publishMavenStyle) := true,
     (SbtCompile / publishMavenStyle) := false,
     (MavenCompile / publishLocal) := publishTask((MavenCompile / publishLocalConfiguration)).value,
     (SbtCompile / publishLocal) := publishTask((SbtCompile / publishLocalConfiguration)).value,
     publishLocal := Seq((MavenCompile / publishLocal), (SbtCompile / publishLocal)).dependOn.value,

     javaOptions ++= {
       val versionParts = System.getProperty("java.version").split("[+.\\-]+", 3)
       var major = versionParts(0).toInt
       if (major >= 16) Seq("--add-modules=jdk.incubator.vector,jdk.incubator.foreign", "-Dforeign.restricted=warn") else Seq.empty
     },

     (Compile / doc / javacOptions) ++= {
       val versionParts = System.getProperty("java.version").split("[+.\\-]+", 3)
       var major = versionParts(0).toInt
       if (major == 1) major = versionParts(1).toInt
       if (major >= 8) Seq("-Xdoclint:all", "-Xdoclint:-missing") else Seq.empty
     },

     javaVersion := SbtPomKeys.effectivePom.value.getProperties.get("java.version").asInstanceOf[String],

     (Compile / javacOptions) ++= Seq(
       "-encoding", UTF_8.name(),
       "-source", javaVersion.value
     ),
     // This -target and Xlint:unchecked options cannot be set in the Compile configuration scope since
     // `javadoc` doesn't play nicely with them; see https://github.com/sbt/sbt/issues/355#issuecomment-3817629
     // for additional discussion and explanation.
     (Compile / compile / javacOptions) ++= Seq(
       "-target", javaVersion.value,
       "-Xlint:unchecked"
     ),

     (Compile / scalacOptions) ++= Seq(
       s"-target:jvm-${javaVersion.value}",
       "-sourcepath", (ThisBuild / baseDirectory).value.getAbsolutePath  // Required for relative source links in scaladoc
     ),

     SbtPomKeys.profiles := profiles,

     // Remove certain packages from Scaladoc
     (Compile / doc / scalacOptions) := Seq(
       "-groups",
       "-skip-packages", Seq(
         "org.apache.spark.api.python",
         "org.apache.spark.network",
         "org.apache.spark.deploy",
         "org.apache.spark.util.collection"
       ).mkString(":"),
       "-doc-title", "Spark " + version.value.replaceAll("-SNAPSHOT", "") + " ScalaDoc"
     ) ++ {
       // Do not attempt to scaladoc javadoc comments under 2.12 since it can't handle inner classes
       if (scalaBinaryVersion.value == "2.12") Seq("-no-java-comments") else Seq.empty
     },

     // disable Mima check for all modules,
     // to be enabled in specific ones that have previous artifacts
     MimaKeys.mimaFailOnNoPrevious := false,

     // To prevent intermittent compilation failures, see also SPARK-33297
     // Apparently we can remove this when we use JDK 11.
     Test / classLoaderLayeringStrategy := ClassLoaderLayeringStrategy.Flat
   )

   def enable(settings: Seq[Setting[_]])(projectRef: ProjectRef) = {
     val existingSettings = projectsMap.getOrElse(projectRef.project, Seq[Setting[_]]())
     projectsMap += (projectRef.project -> (existingSettings ++ settings))
   }

   // Note ordering of these settings matter.
   /* Enable shared settings on all projects */
   (allProjects ++ optionallyEnabledProjects ++ assemblyProjects ++ copyJarsProjects ++ Seq(spark, tools))
     .foreach(enable(sharedSettings ++ DependencyOverrides.settings ++
       ExcludedDependencies.settings ++ Checkstyle.settings))

   /* Enable tests settings for all projects except examples, assembly and tools */
   (allProjects ++ optionallyEnabledProjects).foreach(enable(TestSettings.settings))

   val mimaProjects = allProjects.filterNot { x =>
     Seq(
       spark, hive, hiveThriftServer, catalyst, repl, networkCommon, networkShuffle, networkYarn,
       unsafe, tags, tokenProviderKafka010, sqlKafka010, kvstore, avro
     ).contains(x)
   }

   mimaProjects.foreach { x =>
     enable(MimaBuild.mimaSettings(sparkHome, x))(x)
   }

   /* Generate and pick the spark build info from extra-resources */
   enable(Core.settings)(core)

   /* Unsafe settings */
   enable(Unsafe.settings)(unsafe)

   /*
    * Set up tasks to copy dependencies during packaging. This step can be disabled in the command
    * line, so that dev/mima can run without trying to copy these files again and potentially
    * causing issues.
    */
   if (!"false".equals(System.getProperty("copyDependencies"))) {
     copyJarsProjects.foreach(enable(CopyDependencies.settings))
   }

   /* Enable Assembly for all assembly projects */
   assemblyProjects.foreach(enable(Assembly.settings))

   /* Package pyspark artifacts in a separate zip file for YARN. */
   enable(PySparkAssembly.settings)(assembly)

   /* Enable unidoc only for the root spark project */
   enable(Unidoc.settings)(spark)

   /* Catalyst ANTLR generation settings */
   enable(Catalyst.settings)(catalyst)

   /* Spark SQL Core console settings */
   enable(SQL.settings)(sql)

   /* Hive console settings */
   enable(Hive.settings)(hive)

   // SPARK-14738 - Remove docker tests from main Spark build
   // enable(DockerIntegrationTests.settings)(dockerIntegrationTests)

   enable(KubernetesIntegrationTests.settings)(kubernetesIntegrationTests)

   enable(YARN.settings)(yarn)

   if (profiles.contains("sparkr")) {
     enable(SparkR.settings)(core)
   }

   /**
    * Adds the ability to run the spark shell directly from SBT without building an assembly
    * jar.
    *
    * Usage: `build/sbt sparkShell`
    */
   val sparkShell = taskKey[Unit]("start a spark-shell.")
   val sparkPackage = inputKey[Unit](
     s"""
        |Download and run a spark package.
        |Usage `builds/sbt "sparkPackage <group:artifact:version> <MainClass> [args]
      """.stripMargin)
   val sparkSql = taskKey[Unit]("starts the spark sql CLI.")

   enable(Seq(
     (run / connectInput) := true,
     fork := true,
     (run / outputStrategy) := Some (StdoutOutput),

     javaOptions += "-Xmx2g",

     sparkShell := {
       (Compile / runMain).toTask(" org.apache.spark.repl.Main -usejavacp").value
     },

     sparkPackage := {
       import complete.DefaultParsers._
       val packages :: className :: otherArgs = spaceDelimited("<group:artifact:version> <MainClass> [args]").parsed.toList
       val scalaRun = (run / runner).value
       val classpath = (Runtime / fullClasspath).value
       val args = Seq("--packages", packages, "--class", className, (LocalProject("core") / Compile / Keys.`package`)
         .value.getCanonicalPath) ++ otherArgs
       println(args)
       scalaRun.run("org.apache.spark.deploy.SparkSubmit", classpath.map(_.data), args, streams.value.log)
     },

     (Compile / javaOptions) += "-Dspark.master=local",

     sparkSql := {
       (Compile / runMain).toTask(" org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver").value
     }
   ))(assembly)

   enable(Seq(sparkShell := (LocalProject("assembly") / sparkShell).value))(spark)

   // TODO: move this to its upstream project.
   override def projectDefinitions(baseDirectory: File): Seq[Project] = {
     super.projectDefinitions(baseDirectory).map { x =>
       if (projectsMap.exists(_._1 == x.id)) x.settings(projectsMap(x.id): _*)
       else x.settings(Seq[Setting[_]](): _*)
     } ++ Seq[Project](OldDeps.project)
   }

   if (!sys.env.contains("SERIAL_SBT_TESTS")) {
     allProjects.foreach(enable(SparkParallelTestGrouping.settings))
   }
 }

 object SparkParallelTestGrouping {
   // Settings for parallelizing tests. The basic strategy here is to run the slowest suites (or
   // collections of suites) in their own forked JVMs, allowing us to gain parallelism within a
   // SBT project. Here, we take an opt-in approach where the default behavior is to run all
   // tests sequentially in a single JVM, requiring us to manually opt-in to the extra parallelism.
   //
   // There are a reasons why such an opt-in approach is good:
   //
   //    1. Launching one JVM per suite adds significant overhead for short-running suites. In
   //       addition to JVM startup time and JIT warmup, it appears that initialization of Derby
   //       metastores can be very slow so creating a fresh warehouse per suite is inefficient.
   //
   //    2. When parallelizing within a project we need to give each forked JVM a different tmpdir
   //       so that the metastore warehouses do not collide. Unfortunately, it seems that there are
   //       some tests which have an overly tight dependency on the default tmpdir, so those fragile
   //       tests need to continue re-running in the default configuration (or need to be rewritten).
   //       Fixing that problem would be a huge amount of work for limited payoff in most cases
   //       because most test suites are short-running.
   //

   private val testsWhichShouldRunInTheirOwnDedicatedJvm = Set(
     "org.apache.spark.DistributedSuite",
     "org.apache.spark.sql.catalyst.expressions.DateExpressionsSuite",
     "org.apache.spark.sql.catalyst.expressions.HashExpressionsSuite",
     "org.apache.spark.sql.catalyst.expressions.CastSuite",
     "org.apache.spark.sql.catalyst.expressions.MathExpressionsSuite",
     "org.apache.spark.sql.hive.HiveExternalCatalogSuite",
     "org.apache.spark.sql.hive.StatisticsSuite",
     "org.apache.spark.sql.hive.client.VersionsSuite",
     "org.apache.spark.sql.hive.client.HiveClientVersions",
     "org.apache.spark.sql.hive.HiveExternalCatalogVersionsSuite",
     "org.apache.spark.ml.classification.LogisticRegressionSuite",
     "org.apache.spark.ml.classification.LinearSVCSuite",
     "org.apache.spark.sql.SQLQueryTestSuite",
     "org.apache.spark.sql.hive.client.HadoopVersionInfoSuite",
     "org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperationSuite",
     "org.apache.spark.sql.hive.thriftserver.ThriftServerQueryTestSuite",
     "org.apache.spark.sql.hive.thriftserver.SparkSQLEnvSuite",
     "org.apache.spark.sql.hive.thriftserver.ui.ThriftServerPageSuite",
     "org.apache.spark.sql.hive.thriftserver.ui.HiveThriftServer2ListenerSuite",
     "org.apache.spark.sql.kafka010.KafkaDelegationTokenSuite",
     "org.apache.spark.shuffle.KubernetesLocalDiskShuffleDataIOSuite"
   )

   private val DEFAULT_TEST_GROUP = "default_test_group"
   private val HIVE_EXECUTION_TEST_GROUP = "hive_execution_test_group"

   private def testNameToTestGroup(name: String): String = name match {
     case _ if testsWhichShouldRunInTheirOwnDedicatedJvm.contains(name) => name
     // Different with the cases in testsWhichShouldRunInTheirOwnDedicatedJvm, here we are grouping
     // all suites of `org.apache.spark.sql.hive.execution.*` into a single group, instead of
     // launching one JVM per suite.
     case _ if name.contains("org.apache.spark.sql.hive.execution") => HIVE_EXECUTION_TEST_GROUP
     case _ => DEFAULT_TEST_GROUP
   }

   lazy val settings = Seq(
     (Test / testGrouping) := {
       val tests: Seq[TestDefinition] = (Test / definedTests).value
       val defaultForkOptions = ForkOptions(
         javaHome = javaHome.value,
         outputStrategy = outputStrategy.value,
         bootJars = Vector.empty[java.io.File],
         workingDirectory = Some(baseDirectory.value),
         runJVMOptions = (Test / javaOptions).value.toVector,
         connectInput = connectInput.value,
         envVars = (Test / envVars).value
       )
       tests.groupBy(test => testNameToTestGroup(test.name)).map { case (groupName, groupTests) =>
         val forkOptions = {
           if (groupName == DEFAULT_TEST_GROUP) {
             defaultForkOptions
           } else {
             defaultForkOptions.withRunJVMOptions(defaultForkOptions.runJVMOptions ++
               Seq(s"-Djava.io.tmpdir=${baseDirectory.value}/target/tmp/$groupName"))
           }
         }
         new Tests.Group(
           name = groupName,
           tests = groupTests,
           runPolicy = Tests.SubProcess(forkOptions))
       }
     }.toSeq
   )
 }

 object Core {
   import scala.sys.process.Process
   lazy val settings = Seq(
     (Compile / resourceGenerators) += Def.task {
       val buildScript = baseDirectory.value + "/../build/spark-build-info"
       val targetDir = baseDirectory.value + "/target/extra-resources/"
       val command = Seq("bash", buildScript, targetDir, version.value)
       Process(command).!!
       val propsFile = baseDirectory.value / "target" / "extra-resources" / "spark-version-info.properties"
       Seq(propsFile)
     }.taskValue
   )
 }

 object Unsafe {
   lazy val settings = Seq(
     // This option is needed to suppress warnings from sun.misc.Unsafe usage
     (Compile / javacOptions) += "-XDignore.symbol.file"
   )
 }


 object DockerIntegrationTests {
   // This serves to override the override specified in DependencyOverrides:
   lazy val settings = Seq(
     dependencyOverrides += "com.google.guava" % "guava" % "18.0",
     resolvers += "DB2" at "https://app.camunda.com/nexus/content/repositories/public/",
     libraryDependencies += "com.oracle" % "ojdbc6" % "11.2.0.1.0" from "https://app.camunda.com/nexus/content/repositories/public/com/oracle/ojdbc6/11.2.0.1.0/ojdbc6-11.2.0.1.0.jar" // scalastyle:ignore
   )
 }

 /**
  * These settings run a hardcoded configuration of the Kubernetes integration tests using
  * minikube. Docker images will have the "dev" tag, and will be overwritten every time the
  * integration tests are run. The integration tests are actually bound to the "test" phase,
  * so running "test" on this module will run the integration tests.
  *
  * There are two ways to run the tests:
  * - the "tests" task builds docker images and runs the test, so it's a little slow.
  * - the "run-its" task just runs the tests on a pre-built set of images.
  *
  * Note that this does not use the shell scripts that the maven build uses, which are more
  * configurable. This is meant as a quick way for developers to run these tests against their
  * local changes.
  */
 object KubernetesIntegrationTests {
   import BuildCommons._
   import scala.sys.process.Process

   val dockerBuild = TaskKey[Unit]("docker-imgs", "Build the docker images for ITs.")
   val runITs = TaskKey[Unit]("run-its", "Only run ITs, skip image build.")
   val imageTag = settingKey[String]("Tag to use for images built during the test.")
   val namespace = settingKey[String]("Namespace where to run pods.")

   // Hack: this variable is used to control whether to build docker images. It's updated by
   // the tasks below in a non-obvious way, so that you get the functionality described in
   // the scaladoc above.
   private var shouldBuildImage = true

   lazy val settings = Seq(
     imageTag := "dev",
     namespace := "default",
     dockerBuild := {
       if (shouldBuildImage) {
         val dockerTool = s"$sparkHome/bin/docker-image-tool.sh"
         val bindingsDir = s"$sparkHome/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings"
         val cmd = Seq(dockerTool, "-m",
           "-t", imageTag.value,
           "-p", s"$bindingsDir/python/Dockerfile",
           "-R", s"$bindingsDir/R/Dockerfile",
           "build"
         )
         val ec = Process(cmd).!
         if (ec != 0) {
           throw new IllegalStateException(s"Process '${cmd.mkString(" ")}' exited with $ec.")
         }
       }
       shouldBuildImage = true
     },
     runITs := Def.taskDyn {
       shouldBuildImage = false
       Def.task {
         (Test / test).value
       }
     }.value,
     (Test / test) := (Test / test).dependsOn(dockerBuild).value,
     (Test / javaOptions) ++= Seq(
       "-Dspark.kubernetes.test.deployMode=minikube",
       s"-Dspark.kubernetes.test.imageTag=${imageTag.value}",
       s"-Dspark.kubernetes.test.namespace=${namespace.value}",
       s"-Dspark.kubernetes.test.unpackSparkDir=$sparkHome"
     ),
     // Force packaging before building images, so that the latest code is tested.
     dockerBuild := dockerBuild
       .dependsOn(assembly / Compile / packageBin)
       .dependsOn(examples / Compile / packageBin)
       .value
   )
 }

 /**
  * Overrides to work around sbt's dependency resolution being different from Maven's.
  */
 object DependencyOverrides {
   lazy val guavaVersion = sys.props.get("guava.version").getOrElse("14.0.1")
   lazy val settings = Seq(
     dependencyOverrides += "com.google.guava" % "guava" % guavaVersion,
     dependencyOverrides += "xerces" % "xercesImpl" % "2.12.0",
     dependencyOverrides += "jline" % "jline" % "2.14.6",
     dependencyOverrides += "org.apache.avro" % "avro" % "1.10.2")
 }

 /**
  * This excludes library dependencies in sbt, which are specified in maven but are
  * not needed by sbt build.
  */
 object ExcludedDependencies {
   lazy val settings = Seq(
     libraryDependencies ~= { libs => libs.filterNot(_.name == "groovy-all") },
     // SPARK-33705: Due to sbt compiler issues, it brings exclusions defined in maven pom back to
     // the classpath directly and assemble test scope artifacts to assembly/target/scala-xx/jars,
     // which is also will be added to the classpath of some unit tests that will build a subprocess
     // to run `spark-submit`, e.g. HiveThriftServer2Test.
     //
     // These artifacts are for the jersey-1 API but Spark use jersey-2 ones, so it cause test
     // flakiness w/ jar conflicts issues.
     //
     // Also jersey-1 is only used by yarn module(see resource-managers/yarn/pom.xml) for testing
     // purpose only. Here we exclude them from the whole project scope and add them w/ yarn only.
     excludeDependencies ++= Seq(
       ExclusionRule(organization = "com.sun.jersey"),
       ExclusionRule("javax.servlet", "javax.servlet-api"),
       ExclusionRule("javax.ws.rs", "jsr311-api"),
       ExclusionRule("io.netty", "netty-handler"),
       ExclusionRule("io.netty", "netty-transport-native-epoll"))
   )
 }

 /**
  * Project to pull previous artifacts of Spark for generating Mima excludes.
  */
 object OldDeps {

   lazy val project = Project("oldDeps", file("dev"))
     .settings(oldDepsSettings)
     .disablePlugins(com.typesafe.sbt.pom.PomReaderPlugin)

   lazy val allPreviousArtifactKeys = Def.settingDyn[Seq[Set[ModuleID]]] {
     SparkBuild.mimaProjects
       .map { project => (project / MimaKeys.mimaPreviousArtifacts) }
       .map(k => Def.setting(k.value))
       .join
   }

   def oldDepsSettings() = Defaults.coreDefaultSettings ++ Seq(
     name := "old-deps",
     libraryDependencies := allPreviousArtifactKeys.value.flatten
   )
 }

 object Catalyst {
   import com.simplytyped.Antlr4Plugin
   import com.simplytyped.Antlr4Plugin.autoImport._

   lazy val settings = Antlr4Plugin.projectSettings ++ Seq(
     (Antlr4 / antlr4Version) := SbtPomKeys.effectivePom.value.getProperties.get("antlr4.version").asInstanceOf[String],
     (Antlr4 / antlr4PackageName) := Some("org.apache.spark.sql.catalyst.parser"),
     (Antlr4 / antlr4GenListener) := true,
     (Antlr4 / antlr4GenVisitor) := true,
     (Antlr4 / antlr4TreatWarningsAsErrors) := true
   )
 }

 object SQL {
   lazy val settings = Seq(
     (console / initialCommands) :=
       """
         |import org.apache.spark.SparkContext
         |import org.apache.spark.sql.SQLContext
         |import org.apache.spark.sql.catalyst.analysis._
         |import org.apache.spark.sql.catalyst.dsl._
         |import org.apache.spark.sql.catalyst.errors._
         |import org.apache.spark.sql.catalyst.expressions._
         |import org.apache.spark.sql.catalyst.plans.logical._
         |import org.apache.spark.sql.catalyst.rules._
         |import org.apache.spark.sql.catalyst.util._
         |import org.apache.spark.sql.execution
         |import org.apache.spark.sql.functions._
         |import org.apache.spark.sql.types._
         |
         |val sc = new SparkContext("local[*]", "dev-shell")
         |val sqlContext = new SQLContext(sc)
         |import sqlContext.implicits._
         |import sqlContext._
       """.stripMargin,
     (console / cleanupCommands) := "sc.stop()"
   )
 }

 object Hive {

   lazy val settings = Seq(
     // Specially disable assertions since some Hive tests fail them
     (Test / javaOptions) := (Test / javaOptions).value.filterNot(_ == "-ea"),
     // Hive tests need higher metaspace size
     (Test / javaOptions) := (Test / javaOptions).value.filterNot(_.contains("MaxMetaspaceSize")),
     (Test / javaOptions) += "-XX:MaxMetaspaceSize=2g",
     // Supporting all SerDes requires us to depend on deprecated APIs, so we turn off the warnings
     // only for this subproject.
     scalacOptions := (scalacOptions map { currentOpts: Seq[String] =>
       currentOpts.filterNot(_ == "-deprecation")
     }).value,
     (console / initialCommands) :=
       """
         |import org.apache.spark.SparkContext
         |import org.apache.spark.sql.catalyst.analysis._
         |import org.apache.spark.sql.catalyst.dsl._
         |import org.apache.spark.sql.catalyst.errors._
         |import org.apache.spark.sql.catalyst.expressions._
         |import org.apache.spark.sql.catalyst.plans.logical._
         |import org.apache.spark.sql.catalyst.rules._
         |import org.apache.spark.sql.catalyst.util._
         |import org.apache.spark.sql.execution
         |import org.apache.spark.sql.functions._
         |import org.apache.spark.sql.hive._
         |import org.apache.spark.sql.hive.test.TestHive._
         |import org.apache.spark.sql.hive.test.TestHive.implicits._
         |import org.apache.spark.sql.types._""".stripMargin,
     (console / cleanupCommands) := "sparkContext.stop()",
     // Some of our log4j jars make it impossible to submit jobs from this JVM to Hive Map/Reduce
     // in order to generate golden files.  This is only required for developers who are adding new
     // new query tests.
     (Test / fullClasspath) := (Test / fullClasspath).value.filterNot { f => f.toString.contains("jcl-over") }
   )
 }

 object YARN {
   val genConfigProperties = TaskKey[Unit]("gen-config-properties",
     "Generate config.properties which contains a setting whether Hadoop is provided or not")
   val propFileName = "config.properties"
   val hadoopProvidedProp = "spark.yarn.isHadoopProvided"

   lazy val settings = Seq(
     excludeDependencies --= Seq(
       ExclusionRule(organization = "com.sun.jersey"),
       ExclusionRule("javax.servlet", "javax.servlet-api"),
       ExclusionRule("javax.ws.rs", "jsr311-api")),
     Compile / unmanagedResources :=
       (Compile / unmanagedResources).value.filter(!_.getName.endsWith(s"$propFileName")),
     genConfigProperties := {
       val file = (Compile / classDirectory).value / s"org/apache/spark/deploy/yarn/$propFileName"
       val isHadoopProvided = SbtPomKeys.effectivePom.value.getProperties.get(hadoopProvidedProp)
       IO.write(file, s"$hadoopProvidedProp = $isHadoopProvided")
     },
     Compile / copyResources := (Def.taskDyn {
       val c = (Compile / copyResources).value
       Def.task {
         (Compile / genConfigProperties).value
         c
       }
     }).value
   )
 }

 object Assembly {
   import sbtassembly.AssemblyUtils._
   import sbtassembly.AssemblyPlugin.autoImport._

   val hadoopVersion = taskKey[String]("The version of hadoop that spark is compiled against.")

   lazy val settings = baseAssemblySettings ++ Seq(
     (assembly / test) := {},
     hadoopVersion := {
       sys.props.get("hadoop.version")
         .getOrElse(SbtPomKeys.effectivePom.value.getProperties.get("hadoop.version").asInstanceOf[String])
     },
     (assembly / assemblyJarName) := {
       lazy val hadoopVersionValue = hadoopVersion.value
       if (moduleName.value.contains("streaming-kafka-0-10-assembly")
         || moduleName.value.contains("streaming-kinesis-asl-assembly")) {
         s"${moduleName.value}-${version.value}.jar"
       } else {
         s"${moduleName.value}-${version.value}-hadoop${hadoopVersionValue}.jar"
       }
     },
     (Test / assembly / assemblyJarName) := s"${moduleName.value}-test-${version.value}.jar",
     (assembly / assemblyMergeStrategy) := {
       case m if m.toLowerCase(Locale.ROOT).endsWith("manifest.mf")
                                                                => MergeStrategy.discard
       case m if m.toLowerCase(Locale.ROOT).matches("meta-inf.*\\.sf$")
                                                                => MergeStrategy.discard
       case "log4j.properties"                                  => MergeStrategy.discard
       case m if m.toLowerCase(Locale.ROOT).startsWith("meta-inf/services/")
                                                                => MergeStrategy.filterDistinctLines
       case "reference.conf"                                    => MergeStrategy.concat
       case _                                                   => MergeStrategy.first
     }
   )
 }

 object PySparkAssembly {
   import sbtassembly.AssemblyPlugin.autoImport._
   import java.util.zip.{ZipOutputStream, ZipEntry}

   lazy val settings = Seq(
     // Use a resource generator to copy all .py files from python/pyspark into a managed directory
     // to be included in the assembly. We can't just add "python/" to the assembly's resource dir
     // list since that will copy unneeded / unwanted files.
     (Compile / resourceGenerators) += Def.macroValueI((Compile / resourceManaged) map { outDir: File =>
       val src = new File(BuildCommons.sparkHome, "python/pyspark")
       val zipFile = new File(BuildCommons.sparkHome , "python/lib/pyspark.zip")
       zipFile.delete()
       zipRecursive(src, zipFile)
       Seq.empty[File]
     }).value
   )

   private def zipRecursive(source: File, destZipFile: File) = {
     val destOutput = new ZipOutputStream(new FileOutputStream(destZipFile))
     addFilesToZipStream("", source, destOutput)
     destOutput.flush()
     destOutput.close()
   }

   private def addFilesToZipStream(parent: String, source: File, output: ZipOutputStream): Unit = {
     if (source.isDirectory()) {
       output.putNextEntry(new ZipEntry(parent + source.getName()))
       for (file <- source.listFiles()) {
         addFilesToZipStream(parent + source.getName() + File.separator, file, output)
       }
     } else {
       val in = new FileInputStream(source)
       output.putNextEntry(new ZipEntry(parent + source.getName()))
       val buf = new Array[Byte](8192)
       var n = 0
       while (n != -1) {
         n = in.read(buf)
         if (n != -1) {
           output.write(buf, 0, n)
         }
       }
       output.closeEntry()
       in.close()
     }
   }

 }

 object SparkR {
   import scala.sys.process.Process

   val buildRPackage = taskKey[Unit]("Build the R package")
   lazy val settings = Seq(
     buildRPackage := {
       val command = baseDirectory.value / ".." / "R" / "install-dev.sh"
       Process(command.toString).!!
     },
     (Compile / compile) := (Def.taskDyn {
       val c = (Compile / compile).value
       Def.task {
         (Compile / buildRPackage).value
         c
       }
     }).value
   )
 }

 object Unidoc {

   import BuildCommons._
   import sbtunidoc.BaseUnidocPlugin
   import sbtunidoc.JavaUnidocPlugin
   import sbtunidoc.ScalaUnidocPlugin
   import sbtunidoc.BaseUnidocPlugin.autoImport._
   import sbtunidoc.GenJavadocPlugin.autoImport._
   import sbtunidoc.JavaUnidocPlugin.autoImport._
   import sbtunidoc.ScalaUnidocPlugin.autoImport._

   private def ignoreUndocumentedPackages(packages: Seq[Seq[File]]): Seq[Seq[File]] = {
     packages
       .map(_.filterNot(_.getName.contains("$")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/deploy")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/examples")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/internal")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/memory")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/network")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/rpc")))
       .map(_.filterNot(f =>
         f.getCanonicalPath.contains("org/apache/spark/shuffle") &&
         !f.getCanonicalPath.contains("org/apache/spark/shuffle/api")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/executor")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/ExecutorAllocationClient")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend")))
       .map(_.filterNot(f =>
         f.getCanonicalPath.contains("org/apache/spark/unsafe") &&
         !f.getCanonicalPath.contains("org/apache/spark/unsafe/types/CalendarInterval")))
       .map(_.filterNot(_.getCanonicalPath.contains("python")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/util/collection")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/util/kvstore")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/catalyst")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/execution")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/internal")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/hive")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/catalog/v2/utils")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/hive")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/v2/avro")))
       .map(_.filterNot(_.getCanonicalPath.contains("SSLOptions")))
   }

   private def ignoreClasspaths(classpaths: Seq[Classpath]): Seq[Classpath] = {
     classpaths
       .map(_.filterNot(_.data.getCanonicalPath.matches(""".*kafka-clients-0\.10.*""")))
       .map(_.filterNot(_.data.getCanonicalPath.matches(""".*kafka_2\..*-0\.10.*""")))
   }

   val unidocSourceBase = settingKey[String]("Base URL of source links in Scaladoc.")

   lazy val settings = BaseUnidocPlugin.projectSettings ++
                       ScalaUnidocPlugin.projectSettings ++
                       JavaUnidocPlugin.projectSettings ++
                       Seq (
     publish := {},

     (ScalaUnidoc / unidoc / unidocProjectFilter) :=
       inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, kubernetes,
         yarn, tags, streamingKafka010, sqlKafka010),
     (JavaUnidoc / unidoc / unidocProjectFilter) :=
       inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, kubernetes,
         yarn, tags, streamingKafka010, sqlKafka010),

     (ScalaUnidoc / unidoc / unidocAllClasspaths) := {
       ignoreClasspaths((ScalaUnidoc / unidoc / unidocAllClasspaths).value)
     },

     (JavaUnidoc / unidoc / unidocAllClasspaths) := {
       ignoreClasspaths((JavaUnidoc / unidoc / unidocAllClasspaths).value)
     },

     // Skip actual catalyst, but include the subproject.
     // Catalyst is not public API and contains quasiquotes which break scaladoc.
     (ScalaUnidoc / unidoc / unidocAllSources) := {
       ignoreUndocumentedPackages((ScalaUnidoc / unidoc / unidocAllSources).value)
     },

     // Skip class names containing $ and some internal packages in Javadocs
     (JavaUnidoc / unidoc / unidocAllSources) := {
       ignoreUndocumentedPackages((JavaUnidoc / unidoc / unidocAllSources).value)
         .map(_.filterNot(_.getCanonicalPath.contains("org/apache/hadoop")))
     },

     (JavaUnidoc / unidoc / javacOptions) := {
       val versionParts = System.getProperty("java.version").split("[+.\\-]+", 3)
       var major = versionParts(0).toInt
       if (major == 1) major = versionParts(1).toInt

       Seq(
         "-windowtitle", "Spark " + version.value.replaceAll("-SNAPSHOT", "") + " JavaDoc",
         "-public",
         "-noqualifier", "java.lang",
         "-tag", """example:a:Example\:""",
         "-tag", """note:a:Note\:""",
         "-tag", "group:X",
         "-tag", "tparam:X",
         "-tag", "constructor:X",
         "-tag", "todo:X",
         "-tag", "groupname:X",
       ) ++ { if (major >= 9) Seq("--ignore-source-errors", "-notree") else Seq.empty }
     },

     // Use GitHub repository for Scaladoc source links
     unidocSourceBase := s"https://github.com/apache/spark/tree/v${version.value}",

     (ScalaUnidoc / unidoc / scalacOptions) ++= Seq(
       "-groups", // Group similar methods together based on the @group annotation.
       "-skip-packages", "org.apache.hadoop",
       "-sourcepath", (ThisBuild / baseDirectory).value.getAbsolutePath
     ) ++ (
       // Add links to sources when generating Scaladoc for a non-snapshot release
       if (!isSnapshot.value) {
         Opts.doc.sourceUrl(unidocSourceBase.value + "€{FILE_PATH}.scala")
       } else {
         Seq()
       }
     )
   )
 }

 object Checkstyle {
   lazy val settings = Seq(
     checkstyleSeverityLevel := Some(CheckstyleSeverityLevel.Error),
     (Compile / checkstyle / javaSource) := baseDirectory.value / "src/main/java",
     (Test / checkstyle / javaSource) := baseDirectory.value / "src/test/java",
     checkstyleConfigLocation := CheckstyleConfigLocation.File("dev/checkstyle.xml"),
     checkstyleOutputFile := baseDirectory.value / "target/checkstyle-output.xml",
     (Test / checkstyleOutputFile) := baseDirectory.value / "target/checkstyle-output.xml"
   )
 }

 object CopyDependencies {

   val copyDeps = TaskKey[Unit]("copyDeps", "Copies needed dependencies to the build directory.")
   val destPath = (Compile / crossTarget) { _ / "jars"}

   lazy val settings = Seq(
     copyDeps := {
       val dest = destPath.value
       if (!dest.isDirectory() && !dest.mkdirs()) {
         throw new IOException("Failed to create jars directory.")
       }

       (Compile / dependencyClasspath).value.map(_.data)
         .filter { jar => jar.isFile() }
         .foreach { jar =>
           val destJar = new File(dest, jar.getName())
           if (destJar.isFile()) {
             destJar.delete()
           }
           Files.copy(jar.toPath(), destJar.toPath())
         }
     },
     (Compile / packageBin / crossTarget) := destPath.value,
     (Compile / packageBin) := (Compile / packageBin).dependsOn(copyDeps).value
   )

 }

 object TestSettings {
   import BuildCommons._
   private val defaultExcludedTags = Seq("org.apache.spark.tags.ChromeUITest")

   lazy val settings = Seq (
     // Fork new JVMs for tests and set Java options for those
     fork := true,
     // Setting SPARK_DIST_CLASSPATH is a simple way to make sure any child processes
     // launched by the tests have access to the correct test-time classpath.
     (Test / envVars) ++= Map(
       "SPARK_DIST_CLASSPATH" ->
         (Test / fullClasspath).value.files.map(_.getAbsolutePath)
           .mkString(File.pathSeparator).stripSuffix(File.pathSeparator),
       "SPARK_PREPEND_CLASSES" -> "1",
       "SPARK_SCALA_VERSION" -> scalaBinaryVersion.value,
       "SPARK_TESTING" -> "1",
       "JAVA_HOME" -> sys.env.get("JAVA_HOME").getOrElse(sys.props("java.home"))),
     (Test / javaOptions) += s"-Djava.io.tmpdir=$testTempDir",
     (Test / javaOptions) += "-Dspark.test.home=" + sparkHome,
     (Test / javaOptions) += "-Dspark.testing=1",
     (Test / javaOptions) += "-Dspark.port.maxRetries=100",
     (Test / javaOptions) += "-Dspark.master.rest.enabled=false",
     (Test / javaOptions) += "-Dspark.memory.debugFill=true",
     (Test / javaOptions) += "-Dspark.ui.enabled=false",
     (Test / javaOptions) += "-Dspark.ui.showConsoleProgress=false",
     (Test / javaOptions) += "-Dspark.unsafe.exceptionOnMemoryLeak=true",
     (Test / javaOptions) += "-Dspark.hadoop.hadoop.security.key.provider.path=test:///",
     (Test / javaOptions) += "-Dsun.io.serialization.extendedDebugInfo=false",
     (Test / javaOptions) += "-Dderby.system.durability=test",
     (Test / javaOptions) += "-Dio.netty.tryReflectionSetAccessible=true",
     (Test / javaOptions) ++= System.getProperties.asScala.filter(_._1.startsWith("spark"))
       .map { case (k,v) => s"-D$k=$v" }.toSeq,
     (Test / javaOptions) += "-ea",
     // SPARK-29282 This is for consistency between JDK8 and JDK11.
     (Test / javaOptions) ++= {
       val metaspaceSize = sys.env.get("METASPACE_SIZE").getOrElse("1300m")
       s"-Xmx3200m -Xss4m -XX:MaxMetaspaceSize=$metaspaceSize -XX:+UseParallelGC -XX:-UseDynamicNumberOfGCThreads -XX:ReservedCodeCacheSize=128m"
         .split(" ").toSeq
     },
     javaOptions ++= {
       val metaspaceSize = sys.env.get("METASPACE_SIZE").getOrElse("1300m")
       s"-Xmx3200m -XX:MaxMetaspaceSize=$metaspaceSize".split(" ").toSeq
     },
     (Test / javaOptions) ++= {
       val jdwpEnabled = sys.props.getOrElse("test.jdwp.enabled", "false").toBoolean

       if (jdwpEnabled) {
         val jdwpAddr = sys.props.getOrElse("test.jdwp.address", "localhost:0")
         val jdwpServer = sys.props.getOrElse("test.jdwp.server", "y")
         val jdwpSuspend = sys.props.getOrElse("test.jdwp.suspend", "y")
         ("-agentlib:jdwp=transport=dt_socket," +
           s"suspend=$jdwpSuspend,server=$jdwpServer,address=$jdwpAddr").split(" ").toSeq
       } else {
         Seq.empty
       }
     },
     // Exclude tags defined in a system property
     (Test / testOptions) += Tests.Argument(TestFrameworks.ScalaTest,
       sys.props.get("test.exclude.tags").map { tags =>
         tags.split(",").flatMap { tag => Seq("-l", tag) }.toSeq
       }.getOrElse(Nil): _*),
     (Test / testOptions) += Tests.Argument(TestFrameworks.ScalaTest,
       sys.props.get("test.default.exclude.tags").map(tags => tags.split(",").toSeq)
         .map(tags => tags.filter(!_.trim.isEmpty)).getOrElse(defaultExcludedTags)
         .flatMap(tag => Seq("-l", tag)): _*),
     (Test / testOptions) += Tests.Argument(TestFrameworks.JUnit,
       sys.props.get("test.exclude.tags").map { tags =>
         Seq("--exclude-categories=" + tags)
       }.getOrElse(Nil): _*),
     // Include tags defined in a system property
     (Test / testOptions) += Tests.Argument(TestFrameworks.ScalaTest,
       sys.props.get("test.include.tags").map { tags =>
         tags.split(",").flatMap { tag => Seq("-n", tag) }.toSeq
       }.getOrElse(Nil): _*),
     (Test / testOptions) += Tests.Argument(TestFrameworks.JUnit,
       sys.props.get("test.include.tags").map { tags =>
         Seq("--include-categories=" + tags)
       }.getOrElse(Nil): _*),
     // Show full stack trace and duration in test cases.
     (Test / testOptions) += Tests.Argument("-oDF"),
     // Slowpoke notifications: receive notifications every 5 minute of tests that have been running
     // longer than two minutes.
     (Test / testOptions) += Tests.Argument(TestFrameworks.ScalaTest, "-W", "120", "300"),
     (Test / testOptions) += Tests.Argument(TestFrameworks.JUnit, "-v", "-a"),
     // Enable Junit testing.
     libraryDependencies += "com.novocode" % "junit-interface" % "0.11" % "test",
     // `parallelExecutionInTest` controls whether test suites belonging to the same SBT project
     // can run in parallel with one another. It does NOT control whether tests execute in parallel
     // within the same JVM (which is controlled by `testForkedParallel`) or whether test cases
     // within the same suite can run in parallel (which is a ScalaTest runner option which is passed
     // to the underlying runner but is not a SBT-level configuration). This needs to be `true` in
     // order for the extra parallelism enabled by `SparkParallelTestGrouping` to take effect.
     // The `SERIAL_SBT_TESTS` check is here so the extra parallelism can be feature-flagged.
     (Test / parallelExecution) := { if (sys.env.contains("SERIAL_SBT_TESTS")) false else true },
     // Make sure the test temp directory exists.
     (Test / resourceGenerators) += Def.macroValueI((Test / resourceManaged) map { outDir: File =>
       var dir = new File(testTempDir)
       if (!dir.isDirectory()) {
         // Because File.mkdirs() can fail if multiple callers are trying to create the same
         // parent directory, this code tries to create parents one at a time, and avoids
         // failures when the directories have been created by somebody else.
         val stack = new ListBuffer[File]()
         while (!dir.isDirectory()) {
           stack.prepend(dir)
           dir = dir.getParentFile()
         }

         while (stack.nonEmpty) {
           val d = stack.remove(0)
           require(d.mkdir() || d.isDirectory(), s"Failed to create directory $d")
         }
       }
       Seq.empty[File]
     }).value,
     (Global / concurrentRestrictions) := {
       // The number of concurrent test groups is empirically chosen based on experience
       // with Jenkins flakiness.
       if (sys.env.contains("SERIAL_SBT_TESTS")) (Global / concurrentRestrictions).value
       else Seq(Tags.limit(Tags.ForkedTestGroup, 4))
     }
   )

 }