RATIS-1961. Create workflow for running tests repeatedly (#985)

diff --git a/.github/workflows/repeat-test.yml b/.github/workflows/repeat-test.yml
new file mode 100644
index 0000000..86150d2
--- /dev/null
+++ b/.github/workflows/repeat-test.yml
@@ -0,0 +1,138 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: repeat-test
+
+on:
+  workflow_dispatch:
+    inputs:
+      ref:
+        description: Git Ref (branch, tag or commit SHA)
+        default: master
+        required: true
+      test-class:
+        description: Test Class
+        required: true
+      test-method:
+        description: Test Method
+        default: ''
+        required: false
+      splits:
+        description: Number of splits
+        default: 10
+        required: true
+      iterations:
+        description: Number of iterations per split
+        default: 10
+        required: true
+      fail-fast:
+        description: Stop after first failure
+        default: false
+        required: true
+env:
+  MAVEN_OPTS: -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false -Dmaven.wagon.http.retryHandler.class=standard -Dmaven.wagon.http.retryHandler.count=3
+  TEST_CLASS: ${{ github.event.inputs.test-class }}
+  TEST_METHOD: ${{ github.event.inputs.test-method }}
+  ITERATIONS: ${{ github.event.inputs.iterations }}
+  FAIL_FAST: ${{ github.event.inputs.fail-fast }}
+run-name: ${{ github.event_name == 'workflow_dispatch' && format('{0}#{1}[{2}]-{3}x{4}', inputs.test-class, inputs.test-method, inputs.ref, inputs.splits, inputs.iterations) || '' }}
+jobs:
+  prepare:
+    runs-on: ubuntu-20.04
+    outputs:
+      matrix: ${{ steps.generate.outputs.matrix }}
+      test-spec: ${{ steps.test-spec.outputs.test-spec }}
+    steps:
+      - id: generate
+        name: Generate test matrix
+        run: |
+          splits=()
+          for ((i = 1; i <= ${{ github.event.inputs.splits }}; i++)); do
+              splits+=("$i")
+          done
+          printf -v x "%s," "${splits[@]}"
+          split_matrix="[${x%,}]"
+          echo "matrix=$split_matrix" >> $GITHUB_OUTPUT
+      - name: Define test spec
+        id: test-spec
+        run: |
+          if [[ -z "$TEST_METHOD" ]]; then
+            test_spec="$TEST_CLASS"
+          else
+            test_spec="$TEST_CLASS#$TEST_METHOD"
+          fi
+          echo "Test to be run: $test_spec"
+          echo "test-spec=$test_spec" >> $GITHUB_OUTPUT
+  test:
+    if: ${{ always() }}
+    needs:
+      - prepare
+    runs-on: ubuntu-20.04
+    env:
+      TEST_SPEC: ${{ needs.prepare.outputs.test-spec }}
+    strategy:
+      matrix:
+        split: ${{ fromJson(needs.prepare.outputs.matrix) }}
+      fail-fast: ${{ fromJson(github.event.inputs.fail-fast) }}
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          ref: ${{ github.event.inputs.ref }}
+      - name: Cache for maven dependencies
+        uses: actions/cache@v3
+        with:
+          path: |
+            ~/.m2/repository
+            !~/.m2/repository/org/apache/ratis
+          key: maven-repo-${{ hashFiles('**/pom.xml') }}
+          restore-keys: |
+            maven-repo-
+      - name: Setup java
+        uses: actions/setup-java@v3
+        with:
+          distribution: 'temurin'
+          java-version: 8
+      - name: Validate test spec
+        run: if [[ -z "$TEST_SPEC" ]]; then exit 1; fi
+      - name: Execute ${{ env.TEST_SPEC }}
+        run: dev-support/checks/unit.sh -Dtest="$TEST_SPEC"
+        continue-on-error: true
+      - name: Summary of failures
+        run: dev-support/checks/_summary.sh target/unit/summary.txt
+        if: ${{ !cancelled() }}
+      - name: Archive build results
+        uses: actions/upload-artifact@v3
+        if: always()
+        with:
+          name: result-${{ env.TEST_CLASS }}-split-${{ matrix.split }}
+          path: target/unit
+  count-failures:
+    if: ${{ always() }}
+    needs: test
+    runs-on: ubuntu-20.04
+    steps:
+      - name: Download build results
+        uses: actions/download-artifact@v3
+      - name: Count failures
+        run: |
+          failures=$(find . -name 'summary.txt' | grep -v 'iteration' | xargs grep -v 'exit code: 0' | wc -l)
+          echo "Total failures: $failures"
+          if [[ $failures -gt 0 ]]; then
+            echo ""
+            echo "Failed runs:"
+            grep 'exit code: 1' */summary.txt | grep -o 'split.*teration [0-9]*' | sed -e 's/.summary.txt:/ /' -e 's/-/ /' | sort -g -k2 -k4
+            echo ""
+            exit 1
+          fi
diff --git a/dev-support/checks/_summary.sh b/dev-support/checks/_summary.sh
new file mode 100755
index 0000000..f8c7bb9
--- /dev/null
+++ b/dev-support/checks/_summary.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+REPORT_FILE="$1"
+
+: ${ITERATIONS:="1"}
+
+declare -i ITERATIONS
+
+rc=0
+
+if [[ ! -e "${REPORT_FILE}" ]]; then
+  echo "Report file missing, check logs for details"
+  rc=255
+
+elif [[ ${ITERATIONS} -gt 1 ]]; then
+  cat "${REPORT_FILE}"
+
+  if grep -q 'exit code: [^0]' "${REPORT_FILE}"; then
+    rc=1
+  fi
+
+elif [[ -s "${REPORT_FILE}" ]]; then
+  cat "${REPORT_FILE}"
+  rc=1
+fi
+
+exit ${rc}
diff --git a/dev-support/checks/unit.sh b/dev-support/checks/unit.sh
index 0f16f55..0d0c122 100755
--- a/dev-support/checks/unit.sh
+++ b/dev-support/checks/unit.sh
@@ -21,6 +21,7 @@
 
 source "${DIR}/../find_maven.sh"
 
+: ${FAIL_FAST:="false"}
 : ${ITERATIONS:="1"}
 
 declare -i ITERATIONS
@@ -32,6 +33,13 @@
 mkdir -p "$REPORT_DIR"
 
 export MAVEN_OPTS="-Xmx4096m"
+MAVEN_OPTIONS='-B --no-transfer-progress'
+
+if [[ "${FAIL_FAST}" == "true" ]]; then
+  MAVEN_OPTIONS="${MAVEN_OPTIONS} --fail-fast -Dsurefire.skipAfterFailureCount=1"
+else
+  MAVEN_OPTIONS="${MAVEN_OPTIONS} --fail-at-end"
+fi
 
 rc=0
 for i in $(seq 1 ${ITERATIONS}); do
@@ -41,7 +49,7 @@
     mkdir -p "${REPORT_DIR}"
   fi
 
-  ${MVN} -B -fae --no-transfer-progress test "$@" \
+  ${MVN} ${MAVEN_OPTIONS} test "$@" \
     | tee "${REPORT_DIR}/output.log"
   irc=$?
 
@@ -52,6 +60,10 @@
   fi
 
   if [[ ${ITERATIONS} -gt 1 ]]; then
+    if [[ ${irc} == 0 ]]; then
+      rm -fr "${REPORT_DIR}"
+    fi
+
     REPORT_DIR="${original_report_dir}"
     echo "Iteration ${i} exit code: ${irc}" | tee -a "${REPORT_DIR}/summary.txt"
   fi
@@ -59,6 +71,10 @@
   if [[ ${rc} == 0 ]]; then
     rc=${irc}
   fi
+
+  if [[ ${rc} != 0 ]] && [[ "${FAIL_FAST}" == "true" ]]; then
+    break
+  fi
 done
 
 exit ${rc}