blob: 6afc610b387ff5fd228bac6475849a9e7c27c227 [file]
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: master pull request ci
on:
push:
branches: [master]
pull_request:
types: [opened, synchronize, reopened]
branches: [master]
concurrency:
group: master-build-${{ github.ref }}
cancel-in-progress: true
# Java Version Strategy:
# - Requires Java 17+ to build, test, and run (Hadoop 3.5+ client + JUnit 6).
# - Default bytecode: javac.version=17 (see default.properties).
# - CI exercises Eclipse Temurin JDK 17 and JDK 21 on Ubuntu (and tests on macOS).
jobs:
javadoc:
strategy:
matrix:
java: ['17', '21']
os: [ubuntu-latest]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v5
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v5
with:
java-version: ${{ matrix.java }}
distribution: 'temurin'
- name: Cache Ivy dependencies
uses: actions/cache@v4
with:
path: ~/.ivy2/cache
key: ${{ runner.os }}-ivy-${{ hashFiles('ivy/ivy.xml', 'src/plugin/**/ivy.xml') }}
restore-keys: |
${{ runner.os }}-ivy-
- name: Javadoc
run: ant clean javadoc -buildfile build.xml
rat:
strategy:
matrix:
java: ['17', '21']
os: [ubuntu-latest]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v5
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v5
with:
java-version: ${{ matrix.java }}
distribution: 'temurin'
- name: Cache Ivy dependencies
uses: actions/cache@v4
with:
path: ~/.ivy2/cache
key: ${{ runner.os }}-ivy-${{ hashFiles('ivy/ivy.xml', 'src/plugin/**/ivy.xml') }}
restore-keys: |
${{ runner.os }}-ivy-
- name: Run Apache Rat
run: ant clean releaseaudit -buildfile build.xml
- name: Cache unknown licenses
run: echo "UNKNOWN_LICENSES=$(sed -n 18p /home/runner/work/nutch/nutch/build/apache-rat-report.txt)" >> $GITHUB_ENV
- name: Versions
run: |
echo $UNKNOWN_LICENSES
- name: Fail if any unknown licenses
if: ${{ env.UNKNOWN_LICENSES != '0 Unknown Licenses' }}
run: exit 1
openapi-lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36
id: filter
with:
filters: |
openapi:
- 'openapi.yaml'
- '.spectral.yaml'
- name: Set up Node.js
if: steps.filter.outputs.openapi == 'true'
uses: actions/setup-node@v4
with:
node-version: '18'
- name: Install IBM OpenAPI Validator
if: steps.filter.outputs.openapi == 'true'
run: npm install --no-save ibm-openapi-validator
- name: Validate OpenAPI specification
if: steps.filter.outputs.openapi == 'true'
run: ./node_modules/.bin/lint-openapi openapi.yaml
# Build verification on JDK 17 and 21 (bytecode target Java 17)
build:
strategy:
fail-fast: false
matrix:
java: ['17', '21']
os: [ubuntu-latest]
runs-on: ${{ matrix.os }}
name: build (jdk ${{ matrix.java }}, javac.version=17)
steps:
- uses: actions/checkout@v5
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v5
with:
java-version: ${{ matrix.java }}
distribution: 'temurin'
- name: Cache Ivy dependencies
uses: actions/cache@v4
with:
path: ~/.ivy2/cache
key: ${{ runner.os }}-ivy-${{ hashFiles('ivy/ivy.xml', 'src/plugin/**/ivy.xml') }}
restore-keys: |
${{ runner.os }}-ivy-
- name: Build with javac.version=17
run: ant clean runtime -Djavac.version=17 -buildfile build.xml
- name: Verify bytecode version
run: |
# Java 17 = major version 61
EXPECTED_VERSION=61
echo "Expected major version: $EXPECTED_VERSION (Java 17 bytecode)"
# Find a real class file (exclude package-info.class which may have different version)
cd build/classes
CLASS_FILE=$(find . -name "*.class" ! -name "package-info.class" | head -1)
if [ -n "$CLASS_FILE" ]; then
echo "Checking: $CLASS_FILE"
ACTUAL_VERSION=$(javap -verbose "$CLASS_FILE" 2>/dev/null | grep "major version" | awk '{print $NF}')
echo "Actual major version: $ACTUAL_VERSION"
if [ "$ACTUAL_VERSION" != "$EXPECTED_VERSION" ]; then
echo "ERROR: Bytecode version mismatch!"
exit 1
fi
echo "Bytecode version verified successfully"
else
echo "ERROR: No class files found"
exit 1
fi
# Smoke-test runtime on the same JDK used to build (17 and 21)
runtime-smoke:
needs: build
strategy:
fail-fast: false
matrix:
java: ['17', '21']
runs-on: ubuntu-latest
name: runtime-smoke (jdk ${{ matrix.java }})
steps:
- uses: actions/checkout@v5
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v5
with:
java-version: ${{ matrix.java }}
distribution: 'temurin'
- name: Cache Ivy dependencies
uses: actions/cache@v4
with:
path: ~/.ivy2/cache
key: ${{ runner.os }}-ivy-${{ hashFiles('ivy/ivy.xml', 'src/plugin/**/ivy.xml') }}
restore-keys: |
${{ runner.os }}-ivy-
- name: Build with javac.version=17
run: ant clean runtime -Djavac.version=17 -buildfile build.xml
- name: Verify runtime on JDK ${{ matrix.java }}
run: |
echo "Verifying Nutch on JDK ${{ matrix.java }}..."
java -version
cd runtime/local
bin/nutch showproperties | head -20
echo "Runtime smoke test complete"
tests:
strategy:
fail-fast: false
matrix:
java: ['17', '21']
os: [ubuntu-latest, macos-latest]
runs-on: ${{ matrix.os }}
name: tests (jdk ${{ matrix.java }}, ${{ matrix.os }})
timeout-minutes: 45
steps:
- uses: actions/checkout@v5
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v5
with:
java-version: ${{ matrix.java }}
distribution: 'temurin'
- name: Cache Ivy dependencies
uses: actions/cache@v4
with:
path: ~/.ivy2/cache
key: ${{ runner.os }}-ivy-${{ hashFiles('ivy/ivy.xml', 'src/plugin/**/ivy.xml') }}
restore-keys: |
${{ runner.os }}-ivy-
- uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36
id: filter
with:
filters: |
core:
- 'src/java/**'
- 'src/test/**'
- 'src/testresources/**'
plugins:
- 'src/plugin/**'
indexer_plugins:
- 'src/plugin/indexer-*/**'
protocol_plugins:
- 'src/plugin/protocol-*/**'
buildconf:
- 'build.xml'
- 'ivy/ivy.xml'
- '.github/workflows/*'
# run if the build configuration or both 'core' and 'plugins' files were changed
- name: test all
if: ${{ steps.filter.outputs.buildconf == 'true' || ( steps.filter.outputs.core == 'true' && steps.filter.outputs.plugins == 'true' ) }}
run: ant clean test -buildfile build.xml
# run only if 'core' files were changed
- name: test core
if: ${{ steps.filter.outputs.core == 'true' && steps.filter.outputs.plugins == 'false' && steps.filter.outputs.buildconf == 'false' }}
run: ant clean test-core -buildfile build.xml
# run only if 'plugins' files were changed
- name: test plugins
if: ${{ steps.filter.outputs.plugins == 'true' && steps.filter.outputs.core == 'false' && steps.filter.outputs.buildconf == 'false' }}
run: ant clean test-plugins -buildfile build.xml
# run indexer integration tests when indexer plugin files change (Docker required, ubuntu-latest only)
- name: test indexer integration
if: ${{ steps.filter.outputs.indexer_plugins == 'true' && matrix.os == 'ubuntu-latest' }}
run: ant clean test-indexer-integration -buildfile build.xml
# run protocol integration tests when protocol plugin files change (Docker required, ubuntu-latest only)
- name: test protocol integration
if: ${{ steps.filter.outputs.protocol_plugins == 'true' && matrix.os == 'ubuntu-latest' }}
run: ant clean test-protocol-integration -buildfile build.xml
- name: Check for test results
id: check_tests
if: always() && matrix.os == 'ubuntu-latest'
run: |
shopt -s globstar nullglob
files=(./build/test/TEST-*.xml ./build/**/test/TEST-*.xml)
if [ ${#files[@]} -gt 0 ]; then
echo "has_results=true" >> $GITHUB_OUTPUT
else
echo "has_results=false" >> $GITHUB_OUTPUT
fi
- name: Upload Test Report
uses: actions/upload-artifact@v4
if: always() && matrix.os == 'ubuntu-latest' && matrix.java == '17' && steps.check_tests.outputs.has_results == 'true'
with:
name: junit-test-results-${{ matrix.os }}-jdk${{ matrix.java }}
path: |
./build/test/TEST-*.xml
./build/**/test/TEST-*.xml
retention-days: 1
- name: Upload Coverage Data
uses: actions/upload-artifact@v4
if: always() && matrix.os == 'ubuntu-latest'
with:
name: coverage-data-ubuntu-jdk${{ matrix.java }}
path: ./build/coverage/*.exec
retention-days: 1
if-no-files-found: ignore