| name: Build and test |
| |
| on: |
| push: |
| branches: |
| - branch-2.4 |
| pull_request: |
| branches: |
| - branch-2.4 |
| |
| jobs: |
| # Build: build Spark and run the tests for specified modules. |
| build: |
| name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }})" |
| # Ubuntu 20.04 is the latest LTS. The next LTS is 22.04. |
| runs-on: ubuntu-20.04 |
| strategy: |
| fail-fast: false |
| matrix: |
| java: |
| - 8 |
| hadoop: |
| - hadoop2.6 |
| # TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now. |
| # Kinesis tests depends on external Amazon kinesis service. |
| # Note that the modules below are from sparktestsupport/modules.py. |
| modules: |
| - >- |
| core, unsafe, kvstore, avro, |
| network-common, network-shuffle, repl, launcher, |
| examples, sketch, graphx |
| - >- |
| catalyst, hive-thriftserver |
| - >- |
| streaming, sql-kafka-0-10, streaming-kafka-0-10, |
| mllib-local, mllib, |
| yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl, |
| streaming-flume, streaming-flume-sink, streaming-kafka-0-8 |
| - >- |
| pyspark-sql, pyspark-mllib |
| - >- |
| pyspark-core, pyspark-streaming, pyspark-ml |
| - >- |
| sparkr |
| - >- |
| sql |
| # Here, we split Hive and SQL tests into some of slow ones and the rest of them. |
| included-tags: [""] |
| excluded-tags: [""] |
| comment: [""] |
| include: |
| # Hive tests |
| - modules: hive |
| java: 8 |
| hadoop: hadoop2.6 |
| included-tags: org.apache.spark.tags.SlowHiveTest |
| comment: "- slow tests" |
| - modules: hive |
| java: 8 |
| hadoop: hadoop2.6 |
| excluded-tags: org.apache.spark.tags.SlowHiveTest |
| comment: "- other tests" |
| env: |
| MODULES_TO_TEST: ${{ matrix.modules }} |
| EXCLUDED_TAGS: ${{ matrix.excluded-tags }} |
| INCLUDED_TAGS: ${{ matrix.included-tags }} |
| HADOOP_PROFILE: ${{ matrix.hadoop }} |
| # GitHub Actions' default miniconda to use in pip packaging test. |
| CONDA_PREFIX: /usr/share/miniconda |
| GITHUB_PREV_SHA: ${{ github.event.before }} |
| ARROW_PRE_0_15_IPC_FORMAT: 1 |
| steps: |
| - name: Checkout Spark repository |
| uses: actions/checkout@v2 |
| # In order to fetch changed files |
| with: |
| fetch-depth: 0 |
| # Cache local repositories. Note that GitHub Actions cache has a 2G limit. |
| - name: Cache Scala, SBT, Maven and Zinc |
| uses: actions/cache@v2 |
| with: |
| path: | |
| build/apache-maven-* |
| build/zinc-* |
| build/scala-* |
| build/*.jar |
| ~/.sbt |
| key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} |
| restore-keys: | |
| build- |
| - name: Cache Ivy local repository |
| uses: actions/cache@v2 |
| with: |
| path: ~/.ivy2/cache |
| key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} |
| restore-keys: | |
| ${{ matrix.java }}-${{ matrix.hadoop }}-ivy- |
| - name: Install Java ${{ matrix.java }} |
| uses: actions/setup-java@v1 |
| with: |
| java-version: ${{ matrix.java }} |
| # PySpark |
| - name: Install PyPy3 |
| # Note that order of Python installations here matters because default python is |
| # overridden. |
| uses: actions/setup-python@v2 |
| if: contains(matrix.modules, 'pyspark') |
| with: |
| python-version: pypy3 |
| architecture: x64 |
| - name: Install Python 3.6 |
| uses: actions/setup-python@v2 |
| if: contains(matrix.modules, 'pyspark') |
| with: |
| python-version: 3.6 |
| architecture: x64 |
| - name: Install Python 2.7 |
| uses: actions/setup-python@v2 |
| # Yarn has a Python specific test too, for example, YarnClusterSuite. |
| with: |
| python-version: 2.7 |
| architecture: x64 |
| - name: Install Python packages (Python 3.6 and PyPy3) |
| if: contains(matrix.modules, 'pyspark') |
| # PyArrow is not supported in PyPy yet, see ARROW-2651. |
| run: | |
| python3.6 -m pip install numpy 'pyarrow<0.12.0' 'pandas<0.24.0' scipy xmlrunner |
| python3.6 -m pip list |
| # PyPy does not have xmlrunner, and pandas<0.24.0 installation fails in PyPy3, just skipping. |
| pypy3 -m pip install numpy scipy |
| pypy3 -m pip list |
| - name: Install Python packages (Python 2.7) |
| if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) |
| run: | |
| # Some tests do not pass in PySpark with PyArrow, for example, pyspark.sql.tests.ArrowTests. |
| python2.7 -m pip install numpy 'pandas<0.24.0' scipy xmlrunner |
| python2.7 -m pip list |
| # SparkR |
| - name: Install R 4.0 |
| uses: r-lib/actions/setup-r@v1 |
| if: contains(matrix.modules, 'sparkr') |
| with: |
| r-version: 4.0 |
| - name: Install R packages |
| if: contains(matrix.modules, 'sparkr') |
| run: | |
| # qpdf is required to reduce the size of PDFs to make CRAN check pass. See SPARK-32497. |
| sudo apt-get install -y libcurl4-openssl-dev qpdf |
| sudo Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow', 'roxygen2'), repos='https://cloud.r-project.org/')" |
| # Show installed packages in R. |
| sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]' |
| # Run the tests. |
| - name: Run tests |
| run: | |
| # Hive tests become flaky when running in parallel as it's too intensive. |
| if [[ "$MODULES_TO_TEST" == "hive" ]]; then export SERIAL_SBT_TESTS=1; fi |
| ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS" |
| - name: Upload test results to report |
| if: always() |
| uses: actions/upload-artifact@v2 |
| with: |
| name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }} |
| path: "**/target/test-reports/*.xml" |
| - name: Upload unit tests log files |
| if: failure() |
| uses: actions/upload-artifact@v2 |
| with: |
| name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }} |
| path: "**/target/unit-tests.log" |
| |
| # Static analysis, and documentation build |
| lint: |
| name: Linters, licenses, dependencies and documentation generation |
| runs-on: ubuntu-20.04 |
| steps: |
| - name: Checkout Spark repository |
| uses: actions/checkout@v2 |
| # Cache local repositories. Note that GitHub Actions cache has a 2G limit. |
| - name: Cache Scala, SBT, Maven and Zinc |
| uses: actions/cache@v2 |
| with: |
| path: | |
| build/apache-maven-* |
| build/zinc-* |
| build/scala-* |
| build/*.jar |
| ~/.sbt |
| key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} |
| restore-keys: | |
| build- |
| - name: Cache Ivy local repository |
| uses: actions/cache@v2 |
| with: |
| path: ~/.ivy2/cache |
| key: docs-ivy-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} |
| restore-keys: | |
| docs-ivy- |
| - name: Cache Maven local repository |
| uses: actions/cache@v2 |
| with: |
| path: ~/.m2/repository |
| key: docs-maven-${{ hashFiles('**/pom.xml') }} |
| restore-keys: | |
| docs-maven- |
| - name: Install Java 8 |
| uses: actions/setup-java@v1 |
| with: |
| java-version: 8 |
| - name: Install Python 3.6 |
| uses: actions/setup-python@v2 |
| with: |
| python-version: 3.6 |
| architecture: x64 |
| - name: Install Python linter dependencies |
| run: | |
| pip3 install flake8 sphinx numpy |
| - name: Install R 4.0 |
| uses: r-lib/actions/setup-r@v1 |
| with: |
| r-version: 4.0 |
| - name: Install R linter dependencies and SparkR |
| run: | |
| sudo apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev |
| sudo Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')" |
| sudo Rscript -e "devtools::install_github('jimhester/lintr@v2.0.1')" |
| ./R/install-dev.sh |
| - name: Install Ruby 2.7 for documentation generation |
| uses: actions/setup-ruby@v1 |
| with: |
| ruby-version: 2.7 |
| - name: Install dependencies for documentation generation |
| run: | |
| sudo apt-get install -y libcurl4-openssl-dev pandoc |
| pip install sphinx mkdocs numpy |
| gem install jekyll jekyll-redirect-from pygments.rb |
| sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')" |
| - name: Scala linter |
| run: ./dev/lint-scala |
| - name: Java linter |
| run: ./dev/lint-java |
| - name: Python linter |
| run: ./dev/lint-python |
| - name: R linter |
| run: ./dev/lint-r |
| - name: License test |
| run: ./dev/check-license |
| - name: Dependencies test |
| run: ./dev/test-dependencies.sh |
| - name: Run documentation build |
| run: | |
| cd docs |
| jekyll build |