ARROW-12236: [R][CI] Add check that all docs pages are listed in _pkgdown.yml

Added a bash script to the workflow that checks that all .Rd files in /r/man/ that aren't internal reference ones are listed as sections in the _pkgdown.yml reference sections

Closes #9946 from thisisnic/ARROW-12236-pkgdown-ci

Authored-by: Nic Crane <thisisnic@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index 0bdecac..7851b6b 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -72,6 +72,8 @@
           path: .docker
           key: ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}-${{ hashFiles('cpp/**') }}
           restore-keys: ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}-
+      - name: Check pkgdown reference sections
+        run: ci/scripts/r_pkgdown_check.sh
       - name: Setup Python
         uses: actions/setup-python@v1
         with:
diff --git a/ci/scripts/r_pkgdown_check.sh b/ci/scripts/r_pkgdown_check.sh
new file mode 100755
index 0000000..327480a
--- /dev/null
+++ b/ci/scripts/r_pkgdown_check.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Use this script to prevent errors in the pkgdown site being rendered due to missing YAML entries
+
+# all .Rd files in the repo
+all_rd_files=`find ./r/man -maxdepth 1 -name "*.Rd" | sed -e 's/.\/r\/man\///g' | sed -e 's/.Rd//g' | sort`
+
+# .Rd files to exclude from search (i.e. are internal)
+exclusions=`grep "\keyword{internal}" -rl ./r/man --include=*.Rd | sed -e 's/.\/r\/man\///g' | sed -e 's/.Rd//g' | sort`
+
+# .Rd files to check against pkgdown.yml
+rd_files=`echo ${exclusions[@]} ${all_rd_files[@]} | tr ' ' '\n' | sort | uniq -u`
+
+# pkgdown sections
+pkgdown_sections=`awk '/^[^ ]/{ f=/reference:/; next } f{ if (sub(/:$/,"")) pkg=$2; else print pkg, $2 }' ./r/_pkgdown.yml | grep -v "title:" | sort`
+
+# get things that appear in man files that don't appear in pkgdown sections
+pkgdown_missing=`echo ${pkgdown_sections[@]} ${pkgdown_sections[@]} ${rd_files[@]} | tr ' ' '\n' | sort | uniq -u`
+
+# if any sections are missing raise an error
+if ([ ${#pkgdown_missing} -ge 1 ]); then
+	echo "Error! $pkgdown_missing missing from ./r/_pkgdown.yml"
+  	exit 1 
+fi