| #!/usr/bin/env bash |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| set -euo pipefail |
| |
| |
| function redraw_progress_bar { # int barsize, int base, int current, int top |
| # Source: https://stackoverflow.com/a/20311674 |
| local barsize=$1 |
| local base=$2 |
| local current=$3 |
| local top=$4 |
| local j=0 |
| local progress=$(( (barsize * (current - base)) / (top - base ) )) |
| echo -n "[" |
| for ((j=0; j < progress; j++)) ; do echo -n '='; done |
| echo -n '=>' |
| for ((j=progress; j < barsize ; j++)) ; do echo -n ' '; done |
| echo -n "] $current / $top " $'\r' |
| } |
| |
| function check_absolute_links { |
| local env="$1" |
| local exception_link="$2" |
| local -n env_exception_links="$3" |
| |
| echo "Checking $env links." |
| for external_link in "${external_links[@]}"; |
| do |
| processed_external_link=`awk -F/ '{print $3}' <<<"$external_link"` |
| if [[ "$processed_external_link" == "$exception_link" ]]; |
| then |
| checked_word=`awk -F/ '{print $4}' <<<"$external_link"` |
| if [[ $checked_word != "releases" ]]; then |
| env_exception_links+=("${external_link}") |
| echo "${external_link}" |
| fi |
| fi |
| done |
| } |
| |
| function report_absolute_links { |
| local -n env_exception_links="$1" |
| local env="$2" |
| |
| if [[ ${#env_exception_links[@]} -ne 0 ]]; then |
| echo "Found ${#env_exception_links[@]} link(s) leading to $env site. Recommended to use relative links to Apache Beam website. Absolute links to Apache Beam $env website:" |
| printf '%s\n' ${env_exception_links[@]} |
| else |
| echo "No absolute $env links" |
| fi |
| } |
| |
| if ! command -v lynx; then |
| echo "This script requires lynx to work properly." |
| echo |
| echo "For more information, look at: https://lynx.browser.org/" |
| exit 1 |
| fi |
| |
| MY_DIR="$(cd "$(dirname "$0")" && pwd)" |
| pushd "${MY_DIR}" &>/dev/null || exit 1 |
| |
| echo "Working directory: ${MY_DIR}" |
| |
| DIST_DIR=${1:-"./dist"} |
| echo "Dist directory: ${DIST_DIR}" |
| |
| echo "" |
| |
| if [[ ! -f "${DIST_DIR}/index.html" ]]; then |
| echo "You should build website first." |
| exit 1 |
| fi |
| |
| mkdir -pv "${DIST_DIR}" |
| |
| readarray -d '' pages < <(find "${DIST_DIR}" -name '*.html' -print0) |
| echo "Found ${#pages[@]} HTML files." |
| |
| echo "Searching links." |
| mapfile -t links < <(printf '%s\n' "${pages[@]}" | xargs -n 1 lynx -listonly -nonumbers -dump -display_charset=iso-8859-1 | grep -v " ") |
| mapfile -t external_links < <(printf '%s\n' "${links[@]}" | grep "^https\?://" | grep -v "http://localhost" | grep -v "http://link/" | grep -v "http://docker.local" | grep -v "https://github.com/apache/beam/edit/master/website/www/site/content/" | sort | uniq) |
| echo "Found ${#links[@]} links including ${#external_links[@]} unique external links." |
| |
| echo "Checking links." |
| prod_exception_links=() |
| staging_exception_links=() |
| check_absolute_links "production" "beam.apache.org" prod_exception_links |
| check_absolute_links "staging" "apache-beam-website-pull-requests.storage.googleapis.com" staging_exception_links |
| |
| echo "Checking working links." |
| verified_list="https://reporter.apache.org/addrelease.html?beam,https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork,https://help.github.com/articles/securing-your-account-with-two-factor-authentication-2fa/,https://qwiklabs.com/focuses/608?locale=en&parent=catalog,https://www.artstation.com/jbruno,https://www.qwiklabs.com/focuses/1098?parent=catalog,https://www.infoworld.com/article/3336072/infoworlds-2019-technology-of-the-year-award-winners.html,https://be.linkedin.com/in/mattcasters,https://www.linkedin.com/company/beam-summit/,https://www.linkedin.com/company/beam-summit/?viewAsMember=true,https://www.ricardo.ch/,https://www.linkedin.com/company/apache-beam/,https://repository.apache.org/content/repositories/orgapachebeam-NNNN/),https://www.meetup.com/Apache-Beam-Stockholm/?_cookie-check=v_YHSSjYcT9rpm61,https://www.meetup.com/Apache-Beam-Stockholm/events/260634514" |
| invalid_links=() |
| |
| function handle_urls { |
| i=1 |
| for external_link in "${external_links[@]}" |
| do |
| redraw_progress_bar 50 1 $i ${#external_links[@]} |
| |
| curl_result=$(curl -sSfL --max-filesize 1000000 --max-time 10 --connect-timeout 10 --retry 2 -4 "$external_link" 2>&1 > /dev/null) && status=$? || status=$? |
| if [ $status -ne 0 ] ; then |
| if [[ $curl_result =~ (error: )([0-9]{3}) ]]; then |
| error_code=${BASH_REMATCH[0]} |
| |
| # Check if link is in verified_list |
| if [[ $verified_list =~ "$external_link" ]]; then |
| continue |
| fi |
| |
| invalid_links+=("${error_code} ${external_link}") |
| echo "${external_link}" |
| fi |
| fi |
| i=$((i+1)) |
| done |
| # Clear line - hide progress bar |
| echo -n -e "\033[2K" |
| } |
| |
| handle_urls |
| |
| report_absolute_links prod_exception_links "production" |
| report_absolute_links staging_exception_links "staging" |
| |
| # Sort invalid links by error status |
| IFS=$'\n' |
| sorted_invalid_links=($(sort <<<"${invalid_links[*]}")); |
| unset IFS |
| |
| if [[ ${#sorted_invalid_links[@]} -ne 0 ]]; then |
| echo "Found ${#sorted_invalid_links[@]} invalid links: " |
| printf '%s\n' "${sorted_invalid_links[@]}" |
| else |
| echo "All links work" |
| fi |
| |
| popd &>/dev/null || exit 1 |