blob: b5d7b3c44425ed183102ea7f66fc9ca6e3d4f73b [file] [log] [blame]
#!/bin/bash
#
# Copyright 2013 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Author: anupama@google.com (Anupama Dutta)
#
# This script can be used to verify whether downstream caching
# (https://developers.google.com/speed/pagespeed/module/downstream-caching)
# has been configured correctly on your system by doing wgets with a sample URL
# and looking at the response headers.
#
# This script currently verifies the working of a mod_pagespeed enabled Apache
# server with an Nginx proxy_cache server acting as a downstream caching layer.
#
# Example invocation for this script:
<<SCRIPT_EXAMPLE
./verify_downstream_caching_configuration.sh \
-u "http://localhost:8051/mod_pagespeed_test/cachable_rewritten_html/downstream_caching.html" \
-h "proxy_cache.example.com" \
-s "localhost:8080" \
-m "GET" \
-p "http://localhost:8051/purge" \
-c "/usr/local/apache2/pagespeed_cache"
SCRIPT_EXAMPLE
SAMPLE_URL_TO_WGET=""
WGET_HOST_HEADER=""
APACHE_MOD_PAGESPEED_SERVER=""
PURGE_METHOD=""
PURGE_LOCATION_PREFIX=""
MOD_PAGESPEED_FILE_CACHE_PATH=""
function usage() {
cat - >&2 <<EOF
usage: $0
-u sample_url
-h host_header
-s apache_server_host_port
-m purge_http_method
-p purge_location_prefix
-c mod_pagespeed_file_cache_path
EOF
exit 1
}
# Helper method for issuing a wget for the specified URL (argument).
# Argument 1 represents the URL to be fetched.
# Argument 2 represents any identifier to be appended to generated log
# and output files.
function wget_url() {
wget "$1" --header="$WGET_HOST_HEADER" -O $TMPDIR/tmp$2.gz \
-o $TMPDIR/log$2 -S --user-agent="Chrome/26.0" \
--header="Accept-Encoding: gzip"
}
# Helper method for issuing a wget for the specified URL (argument) and
# returning 1 if there was a MISS and 0 otherwise.
# Argument 1 represents the URL to be fetched.
function wget_url_and_test_for_miss() {
wget_url $1
grep -q "X-Cache: MISS" $TMPDIR/log
}
# Helper method for invalidating mod_pagespeed cache by deleting the
# $MOD_PAGESPEED_FILE_CACHE_PATH directory
function invalidate_mod_pagespeed_cache() {
rm -rf "$MOD_PAGESPEED_FILE_CACHE_PATH"
}
# Helper method for removing a specific URL from proxy_cache (using purges
# issued from Apache server).
function cleanup_url_from_proxy_cache() {
# Invalidate the mod_pagespeed cache, so that rewriting can start from
# scratch for the next request.
invalidate_mod_pagespeed_cache
# Do a wget on the Apache server, which should indirectly trigger a PURGE in
# the background. This cleans up the proxy_cache.
wget_url "$APACHE_URL"
# Sleep for 2 seconds waiting for the PURGE to happen.
sleep 2
}
# Prints out the error message to be used as a last resort, if we are baffled
# about how the checks are failing.
function print_last_resort_error_message_and_exit() {
echo "Looks like this got into the cache while we were not looking! Check if"
echo "you are getting live traffic for this URL. If yes, try running this"
echo "script with a not-so-popular URL. If everything else looks correct,"
echo "mail mod-pagespeed-discuss@googlegroups.com!"
exit 1
}
# Helper method to validate URLs to make sure they start with http://.
check_and_exit_on_invalid_urls() {
case $1 in
http://*) ;;
*) echo "$1 is not a valid URL. Valid URLs must begin with http://" >&2
exit 1 ;;
esac
}
# Parse commandline arguments.
while getopts u:h:s:m:p:c: opt
do
case "$opt" in
u) SAMPLE_URL_TO_WGET="$OPTARG";;
h) WGET_HOST_HEADER="$OPTARG";;
s) APACHE_MOD_PAGESPEED_SERVER="$OPTARG";;
m) PURGE_METHOD="$OPTARG";;
p) PURGE_LOCATION_PREFIX="$OPTARG";;
c) MOD_PAGESPEED_FILE_CACHE_PATH="$OPTARG";;
*) usage;;
esac
done
shift `expr $OPTIND - 1`
if [ -z "$SAMPLE_URL_TO_WGET" ] || \
[ -z "$WGET_HOST_HEADER" ] || \
[ -z "$APACHE_MOD_PAGESPEED_SERVER" ] || \
[ -z "$PURGE_METHOD" ] || \
[ -z "$PURGE_LOCATION_PREFIX" ] || \
[ -z "$MOD_PAGESPEED_FILE_CACHE_PATH" ]; then
echo "Error in usage!"
usage
fi
# Validate URL arguments.
check_and_exit_on_invalid_urls "$SAMPLE_URL_TO_WGET"
check_and_exit_on_invalid_urls "$PURGE_LOCATION_PREFIX"
WGET_HOST_HEADER="Host: $WGET_HOST_HEADER"
TMPDIR="/tmp/verify-downstream-caching-configuration-$$"
rm -rf $TMPDIR
mkdir $TMPDIR
# Prompt about invalidating mod_pagespeed cache. This invalidation needs to be
# done at multiple points in the script to aid in verifying the setup.
echo "---------------------------------------------------------------"
echo "This script needs to delete the mod_pagespeed cache located at"
echo "$MOD_PAGESPEED_FILE_CACHE_PATH."
echo -n "Enter Y to proceed or anything else to abort:"
read agreement
if [ "$agreement" != "Y" ]; then
exit 0
fi
invalidate_mod_pagespeed_cache
# Replace the http://host:port piece of the sample URL with
# http://APACHE_MOD_PAGESPEED_SERVER to get a sample Apache server URL.
URL_PATH=${SAMPLE_URL_TO_WGET#http://*/}
APACHE_URL="http://$APACHE_MOD_PAGESPEED_SERVER/$URL_PATH"
# Use the PURGE_LOCATION_PREFIX to get a Purge URL for the caching layer.
PURGE_URL="$PURGE_LOCATION_PREFIX/$URL_PATH"
# TODO(anupama): More checks to add here: Check that purge location prefix is
# a substring of the sample url to wget. Else something is wrong.
# Check if APACHE_MOD_PAGESPEED_SERVER is running.
if ! wget_url "$APACHE_URL"; then
echo "---------------------------------------------------------------"
echo "Wget didn't succeed. Check if your mod_pagespeed enabled apache"
echo "server is running correctly at "$APACHE_MOD_PAGESPEED_SERVER
exit 1
fi
# Check if APACHE_MOD_PAGESPEED_SERVER is running mod_pagespeed.
if ! grep -q "X-Mod-Pagespeed" $TMPDIR/log; then
echo "---------------------------------------------------------------"
echo "Your apache server at $APACHE_MOD_PAGESPEED_SERVER does not"
echo "seem to be running mod_pagespeed. Please recheck your configuration."
exit 1
fi
# Check if APACHE_MOD_PAGESPEED_SERVER has been configured to output non-zero
# max-age values in its Cache-Control headers (via ModifyCachingHeaders off).
grep -q "Cache-Control: .*no-cache" $TMPDIR/log
IS_APACHE_SERVING_NO_CACHE_HEADERS=$?
if [ $IS_APACHE_SERVING_NO_CACHE_HEADERS = 0 ]; then
echo "---------------------------------------------------------------"
echo "You don't seem to be using \"ModPagespeedModifyCachingHeaders off\""
echo "to allow non-zero max-age values to be propagated to and respected"
echo "by your proxy_cache servers. Press Enter if you want to continue with"
echo "this and fix things in your proxy_cache layer. Otherwise press"
echo -n "Ctrl+C and fix your mod_pagespeed-enabled apache server:"
read ignored
fi
# Do wgets to verify that proxy_pass directive in proxy_cache configuration
# is pointing to a valid Apache backend with mod_pagespeed enabled on it.
# Note that since we already primed the mod_pagespeed cache with a direct wget
# to APACHE_MOD_PAGESPEED_SERVER, this response ought to be rewritten fully,
# and hence cached for the next wget to use as a HIT.
for i in 1 2; do
wget_url "$SAMPLE_URL_TO_WGET" $i
done
# Check if the backend that proxy_pass is pointing to is running.
if [ ! -s $TMPDIR/tmp1.gz ]; then
echo "---------------------------------------------------------------"
echo "Wget didn't succeed. Check if proxy_pass directive in your proxy_cache"
echo "config is set correctly to point to your mod_pagespeed-enabled Apache"
echo "server."
exit 1
fi
# Check if the backend that proxy_pass is pointing to has mod_pagespeed enabled.
if ! grep -q "X-Mod-Pagespeed" $TMPDIR/log1; then
echo "---------------------------------------------------------------"
echo "The backend server referenced in proxy_pass in your proxy_cache"
echo "config seems to be not running mod_pagespeed. Please recheck to see"
echo "if it is referencing $APACHE_MOD_PAGESPEED_SERVER"
exit 1
fi
# Confirm that outgoing headers from the proxy_cache server are set to no-cache
# for HTML response irrespective of whether they are cached in proxy_cache or
# not. Etag, Last-Modified and upstream Cache-Control headers should be removed
# from the client response for correct operation.
if ! grep -q "Cache-Control: .*no-cache" $TMPDIR/log1; then
echo "---------------------------------------------------------------"
echo "You should set your outgoing headers from proxy_cache to be "
echo "no-cache for HTML responses. Please do this by adding the following"
echo "lines to your http block:"
echo ' map $upstream_http_content_type $new_cache_control_header_val {'
echo ' default $upstream_http_cache_control;'
echo ' "~*text/html" "no-cache, max-age=0";'
echo " }"
echo "and adding the following lines to your 'location /' block:"
echo " proxy_hide_header Last-Modified;"
echo " proxy_hide_header ETag;"
echo " proxy_hide_header Cache-Control;"
echo ' add_header Cache-Control $new_cache_control_header_val;'
echo "Press Enter if you want to skip this. Otherwise press"
echo -n "Ctrl+C and fix your proxy_cache config:"
read ignored
fi
# Check if X-Cache status headers are present. This script needs these
# headers for verification purposes.
if ! grep -q "X-Cache: " $TMPDIR/log1; then
echo "---------------------------------------------------------------"
echo "This script relies on the cache status header for many of its "
echo "checks. Please add the following line to your 'location /' block:"
echo ' add_header X-Cache $upstream_cache_status;'
exit 1
fi
# Check if we incorrectly got a BYPASS X-Cache status for the first wget through
# the proxy_cache layer.
if grep -q "X-Cache: BYPASS" $TMPDIR/log1; then
echo "---------------------------------------------------------------"
echo "Cache is being bypassed for this request. Please check for the"
echo "following line in the 'location /' block of your proxy_cache_config:"
echo " proxy_cache_bypass <SHOULD_CACHE_BE_BYPASSED>;"
echo "and confirm that <SHOULD_CACHE_BE_BYPASSED> is defined correctly"
echo "as per the documentation. Alternately, verify that the URL you have"
echo "provided for testing is not meant to be bypassed by the cache."
exit 1
fi
# Check if the first wget through the caching layer resulted in a MISS.
if ! grep -q "X-Cache: MISS" $TMPDIR/log1; then
echo "---------------------------------------------------------------"
echo "Looks like your cache already has the response (HIT) or the response"
echo "has EXPIRED. Please restart your proxy_cache servers (so that the"
echo "downstream cache is cleared for this URL)."
exit 1
fi
# Check if the second wget through the caching layer resulted in a HIT.
if ! grep -q "X-Cache: HIT" $TMPDIR/log2; then
if [ $IS_APACHE_SERVING_NO_CACHE_HEADERS = 0 ]; then
# MPS is serving no-cache. This will work only if proxy_cache_valid
# and proxy_ignore_headers are both defined
echo "---------------------------------------------------------------"
echo "If you want to cache HTML that is originally marked with no-cache in"
echo "proxy_cache, you must have the following line in your 'location /'"
echo "block of your proxy_cache config:"
echo " proxy_ignore_headers Cache-Control;"
echo "You must also add a proxy_cache_valid line to this block to indicate"
echo "how long you want to cache the content:"
echo " proxy_cache_valid <TIME>;"
else
# The second wget (effecively third because the first direct wget on the
# Apache server would have caused background rewriting to happen) should
# have a HIT. If there are no HITs, there is something very wrong!
echo "---------------------------------------------------------------"
echo "Looks like your earlier response did not get cached, or the response"
echo "has EXPIRED due to very small caching durations, or your cache"
echo "somehow got purged very quickly. Check the size of your cache. If"
echo "everything else looks correct, contact"
echo "mod-pagespeed-discuss@googlegroups.com!"
fi
exit 1
fi
# Check if the purge location prefix and purge method have been specified
# correctly by executing curl with a purge URL that directly goes to the caching
# layer. Note: wget does not allow http request methods to be specified, so we
# use curl here.
curl -X "$PURGE_METHOD" "$PURGE_URL" -H "User-Agent: Chrome/26.0" \
-H "Host: proxy_cache.example.com" >& $TMPDIR/trace
if ! grep -q "Successful purge" $TMPDIR/trace; then
# Unsuccessful purges could result from incorrect values for
# purge_location_prefix or purge_method, ACLs for purge block/command being
# incorrect, zone named for purging being incorrect or cache key used for
# purging not matching with the proxy_cache_key directive.
echo "---------------------------------------------------------------"
echo "Check your location block for handling purges as follows:"
echo ""
echo "1) The --purge_location_prefix specified for this script should match"
echo " the regexp for the block handling purges."
echo ""
echo "2) The --purge_method specified for this script should be correct."
echo ""
echo "3) The ACL on the purge-handling block should be correct. Example ACLs:"
echo " allow localhost;"
echo " allow 127.0.0.1;"
echo " allow <YOUR-SERVER-IP>;"
echo " deny all;"
echo ""
echo "3) Check that the zone named for purging and the cache key match the"
echo " variables used in the 'location /' block:"
echo " proxy_cache_purge <ZONE_NAME_FOR_CACHING> " \
' <PS_CAPABILITY_LIST>$1$is_args$args;'
echo ""
echo " Ideally, the 'location /' block will be using these variables as"
echo " follows:"
echo " proxy_cache <ZONE_NAME_FOR_CACHING>;"
echo ' proxy_cache_key <PS_CAPABILITY_LIST>$uri$is_args$args;'
echo " Note that <PS_CAPABILITY_LIST> should be defined as suggested in the"
echo " documentation."
exit 1
fi
# Do a series of wgets to check if PURGES issued via mod_pagespeed are being
# received and processed correctly by the proxy_cache server.
# Wget through the caching layer to get a MISS because the previous request
# was a successful purge.
if ! wget_url_and_test_for_miss "$SAMPLE_URL_TO_WGET"; then
echo "---------------------------------------------------------------"
print_last_resort_error_message_and_exit
fi
# Now, purge the URL from proxy cache.
cleanup_url_from_proxy_cache "$APACHE_URL"
# Now do a wget through the caching layer and verify that this is a MISS.
# It should be because of the previous background PURGE that should have gotten
# triggered.
if ! wget_url_and_test_for_miss "$SAMPLE_URL_TO_WGET"; then
echo "---------------------------------------------------------------"
echo "Looks like the purge from your Apache server was not executed on your"
echo "proxy_cache server."
echo "1) Check if the mod_pagespeed-enabled Apache server IPs are part of the"
echo " ACLS you specify for your purge-handling block in proxy_cache"
echo " config."
echo "2) Check your ModPagespeedDownstreamCache* directives and make sure"
echo " that they match your proxy_cache config."
echo "If both of the above things are correct, then ..."
print_last_resort_error_message_and_exit
fi
# Do a series of wgets to check if requests are being sent upstream with all of
# the required info, so that subsequent PURGEs can be succesful.
cleanup_url_from_proxy_cache "$APACHE_URL"
# Invalidate mod_pagespeed cache to allow rewriting to start from scratch for
# this URL.
invalidate_mod_pagespeed_cache
# Do a wget through the caching layer and verify that this is a MISS before
# proceeding to the final wget.
if ! wget_url_and_test_for_miss "$SAMPLE_URL_TO_WGET"; then
echo "---------------------------------------------------------------"
print_last_resort_error_message_and_exit
fi
# Sleep for 2 seconds waiting for the PURGE to happen.
sleep 2
# Do a wget through the caching layer and verify that this is a MISS because of
# the previous PURGE that ought to have been triggered.
if ! wget_url_and_test_for_miss "$SAMPLE_URL_TO_WGET"; then
echo "---------------------------------------------------------------"
echo "Looks like the purge didn't succeed. Check the 'location /' block to"
echo "see if the following directives are defined correctly:"
echo ' proxy_set_header Host $host;'
echo " proxy_set_header PS-CapabilityList <PS_CAPABILITY_LIST>;"
echo "If all of this is correct, then ..."
print_last_resort_error_message_and_exit
fi
echo "---------------------------------------------------------------"
echo "Your downstream caching setup looks good!"
# Cleanup
rm -rf $TMPDIR