| name: Build and test |
| |
| on: |
| push: |
| branches: |
| - branch-3.0 |
| pull_request: |
| branches: |
| - branch-3.0 |
| |
| jobs: |
| # Build: build Spark and run the tests for specified modules. |
| build: |
| name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})" |
| runs-on: ubuntu-latest |
| strategy: |
| fail-fast: false |
| matrix: |
| java: |
| - 1.8 |
| hadoop: |
| - hadoop2.7 |
| hive: |
| - hive2.3 |
| # TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now. |
| # Kinesis tests depends on external Amazon kinesis service. |
| # Note that the modules below are from sparktestsupport/modules.py. |
| modules: |
| - >- |
| core, unsafe, kvstore, avro, |
| network-common, network-shuffle, repl, launcher, |
| examples, sketch, graphx |
| - >- |
| catalyst, hive-thriftserver |
| - >- |
| streaming, sql-kafka-0-10, streaming-kafka-0-10, |
| mllib-local, mllib, |
| yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl |
| - >- |
| pyspark-sql, pyspark-mllib |
| - >- |
| pyspark-core, pyspark-streaming, pyspark-ml |
| - >- |
| sparkr |
| # Here, we split Hive and SQL tests into some of slow ones and the rest of them. |
| included-tags: [""] |
| # Some tests are disabled in GitHun Actions. Ideally, we should remove this tag |
| # and run all tests. |
| excluded-tags: ["org.apache.spark.tags.GitHubActionsUnstableTest"] |
| comment: [""] |
| include: |
| # Hive tests |
| - modules: hive |
| java: 1.8 |
| hadoop: hadoop2.7 |
| hive: hive2.3 |
| included-tags: org.apache.spark.tags.SlowHiveTest |
| comment: "- slow tests" |
| - modules: hive |
| java: 1.8 |
| hadoop: hadoop2.7 |
| hive: hive2.3 |
| excluded-tags: org.apache.spark.tags.SlowHiveTest,org.apache.spark.tags.GitHubActionsUnstableTest |
| comment: "- other tests" |
| # SQL tests |
| - modules: sql |
| java: 1.8 |
| hadoop: hadoop2.7 |
| hive: hive2.3 |
| included-tags: org.apache.spark.tags.ExtendedSQLTest |
| comment: "- slow tests" |
| - modules: sql |
| java: 1.8 |
| hadoop: hadoop2.7 |
| hive: hive2.3 |
| excluded-tags: org.apache.spark.tags.ExtendedSQLTest,org.apache.spark.tags.GitHubActionsUnstableTest |
| comment: "- other tests" |
| env: |
| MODULES_TO_TEST: ${{ matrix.modules }} |
| EXCLUDED_TAGS: ${{ matrix.excluded-tags }} |
| INCLUDED_TAGS: ${{ matrix.included-tags }} |
| HADOOP_PROFILE: ${{ matrix.hadoop }} |
| HIVE_PROFILE: ${{ matrix.hive }} |
| # GitHub Actions' default miniconda to use in pip packaging test. |
| CONDA_PREFIX: /usr/share/miniconda |
| GITHUB_PREV_SHA: ${{ github.event.before }} |
| steps: |
| - name: Checkout Spark repository |
| uses: actions/checkout@v2 |
| # In order to fetch changed files |
| with: |
| fetch-depth: 0 |
| # Cache local repositories. Note that GitHub Actions cache has a 2G limit. |
| - name: Cache Scala, SBT, Maven and Zinc |
| uses: actions/cache@v2 |
| with: |
| path: | |
| build/apache-maven-* |
| build/zinc-* |
| build/scala-* |
| build/*.jar |
| key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} |
| restore-keys: | |
| build- |
| - name: Cache Maven local repository |
| uses: actions/cache@v2 |
| with: |
| path: ~/.m2/repository |
| key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ hashFiles('**/pom.xml') }} |
| restore-keys: | |
| ${{ matrix.java }}-${{ matrix.hadoop }}-maven- |
| - name: Cache Ivy local repository |
| uses: actions/cache@v2 |
| with: |
| path: ~/.ivy2/cache |
| key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} |
| restore-keys: | |
| ${{ matrix.java }}-${{ matrix.hadoop }}-ivy- |
| - name: Install JDK ${{ matrix.java }} |
| uses: actions/setup-java@v1 |
| with: |
| java-version: ${{ matrix.java }} |
| # PySpark |
| - name: Install PyPy3 |
| # Note that order of Python installations here matters because default python3 is |
| # overridden by pypy3. |
| uses: actions/setup-python@v2 |
| if: contains(matrix.modules, 'pyspark') |
| with: |
| python-version: pypy3 |
| architecture: x64 |
| - name: Install Python 2.7 |
| uses: actions/setup-python@v2 |
| if: contains(matrix.modules, 'pyspark') |
| with: |
| python-version: 2.7 |
| architecture: x64 |
| - name: Install Python 3.8 |
| uses: actions/setup-python@v2 |
| # We should install one Python that is higher then 3+ for SQL and Yarn because: |
| # - SQL component also has Python related tests, for example, IntegratedUDFTestUtils. |
| # - Yarn has a Python specific test too, for example, YarnClusterSuite. |
| if: contains(matrix.modules, 'yarn') || contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) |
| with: |
| python-version: 3.8 |
| architecture: x64 |
| - name: Install Python packages (Python 2.7 and PyPy3) |
| if: contains(matrix.modules, 'pyspark') |
| # PyArrow is not supported in PyPy yet, see ARROW-2651. |
| # TODO(SPARK-32247): scipy installation with PyPy fails for an unknown reason. |
| run: | |
| python2.7 -m pip install numpy pyarrow pandas scipy xmlrunner |
| python2.7 -m pip list |
| # PyPy does not have xmlrunner |
| pypy3 -m pip install numpy pandas |
| pypy3 -m pip list |
| - name: Install Python packages (Python 3.8) |
| if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) |
| run: | |
| python3.8 -m pip install numpy pyarrow pandas scipy xmlrunner |
| python3.8 -m pip list |
| # SparkR |
| - name: Install R 4.0 |
| if: contains(matrix.modules, 'sparkr') |
| run: | |
| sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' >> /etc/apt/sources.list" |
| curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add |
| sudo apt-get update |
| sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev |
| - name: Install R packages |
| if: contains(matrix.modules, 'sparkr') |
| run: | |
| # qpdf is required to reduce the size of PDFs to make CRAN check pass. See SPARK-32497. |
| sudo apt-get install -y libcurl4-openssl-dev qpdf |
| sudo Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow', 'roxygen2'), repos='https://cloud.r-project.org/')" |
| # Show installed packages in R. |
| sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]' |
| # Run the tests. |
| - name: Run tests |
| run: | |
| # Hive tests become flaky when running in parallel as it's too intensive. |
| if [[ "$MODULES_TO_TEST" == "hive" ]]; then export SERIAL_SBT_TESTS=1; fi |
| mkdir -p ~/.m2 |
| ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS" |
| rm -rf ~/.m2/repository/org/apache/spark |
| - name: Upload test results to report |
| if: always() |
| uses: actions/upload-artifact@v2 |
| with: |
| name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }} |
| path: "**/target/test-reports/*.xml" |
| - name: Upload unit tests log files |
| if: failure() |
| uses: actions/upload-artifact@v2 |
| with: |
| name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }} |
| path: "**/target/unit-tests.log" |
| |
| # Static analysis, and documentation build |
| lint: |
| name: Linters, licenses, dependencies and documentation generation |
| runs-on: ubuntu-latest |
| steps: |
| - name: Checkout Spark repository |
| uses: actions/checkout@v2 |
| - name: Cache Maven local repository |
| uses: actions/cache@v2 |
| with: |
| path: ~/.m2/repository |
| key: docs-maven-repo-${{ hashFiles('**/pom.xml') }} |
| restore-keys: | |
| docs-maven- |
| - name: Install JDK 1.8 |
| uses: actions/setup-java@v1 |
| with: |
| java-version: 1.8 |
| - name: Install Python 3.6 |
| uses: actions/setup-python@v2 |
| with: |
| python-version: 3.6 |
| architecture: x64 |
| - name: Install Python linter dependencies |
| run: | |
| pip3 install flake8 sphinx numpy |
| - name: Install R 4.0 |
| run: | |
| sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' >> /etc/apt/sources.list" |
| curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add |
| sudo apt-get update |
| sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev |
| - name: Install R linter dependencies and SparkR |
| run: | |
| sudo apt-get install -y libcurl4-openssl-dev |
| sudo Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')" |
| sudo Rscript -e "devtools::install_github('jimhester/lintr@v2.0.0')" |
| ./R/install-dev.sh |
| - name: Install Ruby 2.7 for documentation generation |
| uses: actions/setup-ruby@v1 |
| with: |
| ruby-version: 2.7 |
| - name: Install dependencies for documentation generation |
| run: | |
| sudo apt-get install -y libcurl4-openssl-dev pandoc |
| pip install sphinx mkdocs numpy |
| gem install jekyll jekyll-redirect-from rouge |
| sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')" |
| - name: Scala linter |
| run: ./dev/lint-scala |
| - name: Java linter |
| run: ./dev/lint-java |
| - name: Python linter |
| run: ./dev/lint-python |
| - name: R linter |
| run: ./dev/lint-r |
| - name: License test |
| run: ./dev/check-license |
| - name: Dependencies test |
| run: ./dev/test-dependencies.sh |
| - name: Run documentation build |
| run: | |
| cd docs |
| jekyll build |
| |
| java11: |
| name: Java 11 build |
| runs-on: ubuntu-latest |
| steps: |
| - name: Checkout Spark repository |
| uses: actions/checkout@v2 |
| - name: Cache Maven local repository |
| uses: actions/cache@v2 |
| with: |
| path: ~/.m2/repository |
| key: java11-maven-${{ hashFiles('**/pom.xml') }} |
| restore-keys: | |
| java11-maven- |
| - name: Install Java 11 |
| uses: actions/setup-java@v1 |
| with: |
| java-version: 11 |
| - name: Build with Maven |
| run: | |
| export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" |
| export MAVEN_CLI_OPTS="--no-transfer-progress" |
| mkdir -p ~/.m2 |
| ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install |
| rm -rf ~/.m2/repository/org/apache/spark |