|  | #!/bin/bash | 
|  | # | 
|  | # Copyright 2013 Google Inc. | 
|  | # | 
|  | # Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | # you may not use this file except in compliance with the License. | 
|  | # You may obtain a copy of the License at | 
|  | # | 
|  | #      http://www.apache.org/licenses/LICENSE-2.0 | 
|  | # | 
|  | # Unless required by applicable law or agreed to in writing, software | 
|  | # distributed under the License is distributed on an "AS IS" BASIS, | 
|  | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | # See the License for the specific language governing permissions and | 
|  | # limitations under the License. | 
|  | # | 
|  | # Author: anupama@google.com (Anupama Dutta) | 
|  | # | 
|  | # This script can be used to verify whether downstream caching | 
|  | # (https://modpagespeed.com/doc/downstream-caching) | 
|  | # has been configured correctly on your system by doing wgets with a sample URL | 
|  | # and looking at the response headers. | 
|  | # | 
|  | # This script currently verifies the working of a mod_pagespeed enabled Apache | 
|  | # server with an Nginx proxy_cache server acting as a downstream caching layer. | 
|  | # | 
|  | # Example invocation for this script: | 
|  | <<SCRIPT_EXAMPLE | 
|  | ./verify_downstream_caching_configuration.sh \ | 
|  | -u "http://localhost:8051/mod_pagespeed_test/cachable_rewritten_html/downstream_caching.html" \ | 
|  | -h "proxy_cache.example.com" \ | 
|  | -s "localhost:8080" \ | 
|  | -m "GET" \ | 
|  | -p "http://localhost:8051/purge" \ | 
|  | -c "/usr/local/apache2/pagespeed_cache" | 
|  | SCRIPT_EXAMPLE | 
|  |  | 
|  | SAMPLE_URL_TO_WGET="" | 
|  | WGET_HOST_HEADER="" | 
|  | APACHE_MOD_PAGESPEED_SERVER="" | 
|  | PURGE_METHOD="" | 
|  | PURGE_LOCATION_PREFIX="" | 
|  | MOD_PAGESPEED_FILE_CACHE_PATH="" | 
|  |  | 
|  | function usage() { | 
|  | cat - >&2 <<EOF | 
|  | usage: $0 | 
|  | -u sample_url | 
|  | -h host_header | 
|  | -s apache_server_host_port | 
|  | -m purge_http_method | 
|  | -p purge_location_prefix | 
|  | -c mod_pagespeed_file_cache_path | 
|  | EOF | 
|  | exit 1 | 
|  | } | 
|  |  | 
|  | # Helper method for issuing a wget for the specified URL (argument). | 
|  | # Argument 1 represents the URL to be fetched. | 
|  | # Argument 2 represents any identifier to be appended to generated log | 
|  | # and output files. | 
|  | function wget_url() { | 
|  | wget "$1" --header="$WGET_HOST_HEADER" -O $TMPDIR/tmp$2.gz \ | 
|  | -o $TMPDIR/log$2 -S --user-agent="Chrome/26.0" \ | 
|  | --header="Accept-Encoding: gzip" | 
|  | } | 
|  |  | 
|  | # Helper method for issuing a wget for the specified URL (argument) and | 
|  | # returning 1 if there was a MISS and 0 otherwise. | 
|  | # Argument 1 represents the URL to be fetched. | 
|  | function wget_url_and_test_for_miss() { | 
|  | wget_url $1 | 
|  | grep -q "X-Cache: MISS" $TMPDIR/log | 
|  | } | 
|  |  | 
|  | # Helper method for invalidating mod_pagespeed cache by deleting the | 
|  | # $MOD_PAGESPEED_FILE_CACHE_PATH directory | 
|  | function invalidate_mod_pagespeed_cache() { | 
|  | rm -rf "$MOD_PAGESPEED_FILE_CACHE_PATH" | 
|  | } | 
|  |  | 
|  | # Helper method for removing a specific URL from proxy_cache (using purges | 
|  | # issued from Apache server). | 
|  | function cleanup_url_from_proxy_cache() { | 
|  | # Invalidate the mod_pagespeed cache, so that rewriting can start from | 
|  | # scratch for the next request. | 
|  | invalidate_mod_pagespeed_cache | 
|  | # Do a wget on the Apache server, which should indirectly trigger a PURGE in | 
|  | # the background. This cleans up the proxy_cache. | 
|  | wget_url "$APACHE_URL" | 
|  | # Sleep for 2 seconds waiting for the PURGE to happen. | 
|  | sleep 2 | 
|  | } | 
|  |  | 
|  | # Prints out the error message to be used as a last resort, if we are baffled | 
|  | # about how the checks are failing. | 
|  | function print_last_resort_error_message_and_exit() { | 
|  | echo "Looks like this got into the cache while we were not looking! Check if" | 
|  | echo "you are getting live traffic for this URL. If yes, try running this" | 
|  | echo "script with a not-so-popular URL. If everything else looks correct," | 
|  | echo "mail mod-pagespeed-discuss@googlegroups.com!" | 
|  | exit 1 | 
|  | } | 
|  |  | 
|  | # Helper method to validate URLs to make sure they start with http://. | 
|  | check_and_exit_on_invalid_urls() { | 
|  | case $1 in | 
|  | http://*) ;; | 
|  | *) echo "$1 is not a valid URL. Valid URLs must begin with http://" >&2 | 
|  | exit 1 ;; | 
|  | esac | 
|  | } | 
|  |  | 
|  | # Parse commandline arguments. | 
|  | while getopts u:h:s:m:p:c: opt | 
|  | do | 
|  | case "$opt" in | 
|  | u)  SAMPLE_URL_TO_WGET="$OPTARG";; | 
|  | h)  WGET_HOST_HEADER="$OPTARG";; | 
|  | s)  APACHE_MOD_PAGESPEED_SERVER="$OPTARG";; | 
|  | m)  PURGE_METHOD="$OPTARG";; | 
|  | p)  PURGE_LOCATION_PREFIX="$OPTARG";; | 
|  | c)  MOD_PAGESPEED_FILE_CACHE_PATH="$OPTARG";; | 
|  | *)  usage;; | 
|  | esac | 
|  | done | 
|  | shift `expr $OPTIND - 1` | 
|  |  | 
|  | if [ -z "$SAMPLE_URL_TO_WGET" ] || \ | 
|  | [ -z "$WGET_HOST_HEADER" ] || \ | 
|  | [ -z "$APACHE_MOD_PAGESPEED_SERVER" ] || \ | 
|  | [ -z "$PURGE_METHOD" ] || \ | 
|  | [ -z "$PURGE_LOCATION_PREFIX" ] || \ | 
|  | [ -z "$MOD_PAGESPEED_FILE_CACHE_PATH" ]; then | 
|  | echo "Error in usage!" | 
|  | usage | 
|  | fi | 
|  |  | 
|  | # Validate URL arguments. | 
|  | check_and_exit_on_invalid_urls "$SAMPLE_URL_TO_WGET" | 
|  | check_and_exit_on_invalid_urls "$PURGE_LOCATION_PREFIX" | 
|  |  | 
|  | WGET_HOST_HEADER="Host: $WGET_HOST_HEADER" | 
|  |  | 
|  | TMPDIR="/tmp/verify-downstream-caching-configuration-$$" | 
|  | rm -rf $TMPDIR | 
|  | mkdir $TMPDIR | 
|  |  | 
|  | # Prompt about invalidating mod_pagespeed cache. This invalidation needs to be | 
|  | # done at multiple points in the script to aid in verifying the setup. | 
|  | echo "---------------------------------------------------------------" | 
|  | echo "This script needs to delete the mod_pagespeed cache located at" | 
|  | echo "$MOD_PAGESPEED_FILE_CACHE_PATH." | 
|  | echo -n "Enter Y to proceed or anything else to abort:" | 
|  | read agreement | 
|  | if [ "$agreement" != "Y" ]; then | 
|  | exit 0 | 
|  | fi | 
|  | invalidate_mod_pagespeed_cache | 
|  |  | 
|  | # Replace the http://host:port piece of the sample URL with | 
|  | # http://APACHE_MOD_PAGESPEED_SERVER to get a sample Apache server URL. | 
|  | URL_PATH=${SAMPLE_URL_TO_WGET#http://*/} | 
|  | APACHE_URL="http://$APACHE_MOD_PAGESPEED_SERVER/$URL_PATH" | 
|  |  | 
|  | # Use the PURGE_LOCATION_PREFIX to get a Purge URL for the caching layer. | 
|  | PURGE_URL="$PURGE_LOCATION_PREFIX/$URL_PATH" | 
|  |  | 
|  | # TODO(anupama): More checks to add here: Check that purge location prefix is | 
|  | # a substring of the sample url to wget. Else something is wrong. | 
|  |  | 
|  | # Check if APACHE_MOD_PAGESPEED_SERVER is running. | 
|  | if ! wget_url "$APACHE_URL"; then | 
|  | echo "---------------------------------------------------------------" | 
|  | echo "Wget didn't succeed. Check if your mod_pagespeed enabled apache" | 
|  | echo "server is running correctly at "$APACHE_MOD_PAGESPEED_SERVER | 
|  | exit 1 | 
|  | fi | 
|  |  | 
|  | # Check if APACHE_MOD_PAGESPEED_SERVER is running mod_pagespeed. | 
|  | if ! grep -q "X-Mod-Pagespeed" $TMPDIR/log; then | 
|  | echo "---------------------------------------------------------------" | 
|  | echo "Your apache server at $APACHE_MOD_PAGESPEED_SERVER does not" | 
|  | echo "seem to be running mod_pagespeed. Please recheck your configuration." | 
|  | exit 1 | 
|  | fi | 
|  |  | 
|  | # Check if APACHE_MOD_PAGESPEED_SERVER has been configured to output non-zero | 
|  | # max-age values in its Cache-Control headers (via ModifyCachingHeaders off). | 
|  | grep -q "Cache-Control: .*no-cache" $TMPDIR/log | 
|  | IS_APACHE_SERVING_NO_CACHE_HEADERS=$? | 
|  | if [ $IS_APACHE_SERVING_NO_CACHE_HEADERS = 0 ]; then | 
|  | echo "---------------------------------------------------------------" | 
|  | echo "You don't seem to be using \"ModPagespeedModifyCachingHeaders off\"" | 
|  | echo "to allow non-zero max-age values to be propagated to and respected" | 
|  | echo "by your proxy_cache servers. Press Enter if you want to continue with" | 
|  | echo "this and fix things in your proxy_cache layer. Otherwise press" | 
|  | echo -n "Ctrl+C and fix your mod_pagespeed-enabled apache server:" | 
|  | read ignored | 
|  | fi | 
|  |  | 
|  | # Do wgets to verify that proxy_pass directive in proxy_cache configuration | 
|  | # is pointing to a valid Apache backend with mod_pagespeed enabled on it. | 
|  | # Note that since we already primed the mod_pagespeed cache with a direct wget | 
|  | # to APACHE_MOD_PAGESPEED_SERVER, this response ought to be rewritten fully, | 
|  | # and hence cached for the next wget to use as a HIT. | 
|  | for i in 1 2; do | 
|  | wget_url "$SAMPLE_URL_TO_WGET" $i | 
|  | done | 
|  |  | 
|  | # Check if the backend that proxy_pass is pointing to is running. | 
|  | if [ ! -s $TMPDIR/tmp1.gz ]; then | 
|  | echo "---------------------------------------------------------------" | 
|  | echo "Wget didn't succeed. Check if proxy_pass directive in your proxy_cache" | 
|  | echo "config is set correctly to point to your mod_pagespeed-enabled Apache" | 
|  | echo "server." | 
|  | exit 1 | 
|  | fi | 
|  |  | 
|  | # Check if the backend that proxy_pass is pointing to has mod_pagespeed enabled. | 
|  | if ! grep -q "X-Mod-Pagespeed" $TMPDIR/log1; then | 
|  | echo "---------------------------------------------------------------" | 
|  | echo "The backend server referenced in proxy_pass in your proxy_cache" | 
|  | echo "config seems to be not running mod_pagespeed. Please recheck to see" | 
|  | echo "if it is referencing $APACHE_MOD_PAGESPEED_SERVER" | 
|  | exit 1 | 
|  | fi | 
|  |  | 
|  | # Confirm that outgoing headers from the proxy_cache server are set to no-cache | 
|  | # for HTML response irrespective of whether they are cached in proxy_cache or | 
|  | # not. Etag, Last-Modified and upstream Cache-Control headers should be removed | 
|  | # from the client response for correct operation. | 
|  | if ! grep -q "Cache-Control: .*no-cache" $TMPDIR/log1; then | 
|  | echo "---------------------------------------------------------------" | 
|  | echo "You should set your outgoing headers from proxy_cache to be " | 
|  | echo "no-cache for HTML responses. Please do this by adding the following" | 
|  | echo "lines to your http block:" | 
|  | echo '  map $upstream_http_content_type $new_cache_control_header_val {' | 
|  | echo '       default $upstream_http_cache_control;' | 
|  | echo '       "~*text/html" "no-cache, max-age=0";' | 
|  | echo "  }" | 
|  | echo "and adding the following lines to your 'location /' block:" | 
|  | echo "  proxy_hide_header Last-Modified;" | 
|  | echo "  proxy_hide_header ETag;" | 
|  | echo "  proxy_hide_header Cache-Control;" | 
|  | echo '  add_header Cache-Control $new_cache_control_header_val;' | 
|  | echo "Press Enter if you want to skip this. Otherwise press" | 
|  | echo -n "Ctrl+C and fix your proxy_cache config:" | 
|  | read ignored | 
|  | fi | 
|  |  | 
|  | # Check if X-Cache status headers are present. This script needs these | 
|  | # headers for verification purposes. | 
|  | if ! grep -q "X-Cache: " $TMPDIR/log1; then | 
|  | echo "---------------------------------------------------------------" | 
|  | echo "This script relies on the cache status header for many of its " | 
|  | echo "checks. Please add the following line to your 'location /' block:" | 
|  | echo '  add_header X-Cache $upstream_cache_status;' | 
|  | exit 1 | 
|  | fi | 
|  |  | 
|  | # Check if we incorrectly got a BYPASS X-Cache status for the first wget through | 
|  | # the proxy_cache layer. | 
|  | if grep -q "X-Cache: BYPASS" $TMPDIR/log1; then | 
|  | echo "---------------------------------------------------------------" | 
|  | echo "Cache is being bypassed for this request. Please check for the" | 
|  | echo "following line in the 'location /' block of your proxy_cache_config:" | 
|  | echo "  proxy_cache_bypass <SHOULD_CACHE_BE_BYPASSED>;" | 
|  | echo "and confirm that <SHOULD_CACHE_BE_BYPASSED> is defined correctly" | 
|  | echo "as per the documentation. Alternately, verify that the URL you have" | 
|  | echo "provided for testing is not meant to be bypassed by the cache." | 
|  | exit 1 | 
|  | fi | 
|  |  | 
|  | # Check if the first wget through the caching layer resulted in a MISS. | 
|  | if ! grep -q "X-Cache: MISS" $TMPDIR/log1; then | 
|  | echo "---------------------------------------------------------------" | 
|  | echo "Looks like your cache already has the response (HIT) or the response" | 
|  | echo "has EXPIRED. Please restart your proxy_cache servers (so that the" | 
|  | echo "downstream cache is cleared for this URL)." | 
|  | exit 1 | 
|  | fi | 
|  |  | 
|  | # Check if the second wget through the caching layer resulted in a HIT. | 
|  | if ! grep -q "X-Cache: HIT" $TMPDIR/log2; then | 
|  | if [ $IS_APACHE_SERVING_NO_CACHE_HEADERS = 0 ]; then | 
|  | # MPS is serving no-cache. This will work only if proxy_cache_valid | 
|  | # and proxy_ignore_headers are both defined | 
|  | echo "---------------------------------------------------------------" | 
|  | echo "If you want to cache HTML that is originally marked with no-cache in" | 
|  | echo "proxy_cache, you must have the following line in your 'location /'" | 
|  | echo "block of your proxy_cache config:" | 
|  | echo "  proxy_ignore_headers Cache-Control;" | 
|  | echo "You must also add a proxy_cache_valid line to this block to indicate" | 
|  | echo "how long you want to cache the content:" | 
|  | echo "  proxy_cache_valid <TIME>;" | 
|  | else | 
|  | # The second wget (effecively third because the first direct wget on the | 
|  | # Apache server would have caused background rewriting to happen) should | 
|  | # have a HIT. If there are no HITs, there is something very wrong! | 
|  | echo "---------------------------------------------------------------" | 
|  | echo "Looks like your earlier response did not get cached, or the response" | 
|  | echo "has EXPIRED due to very small caching durations, or your cache" | 
|  | echo "somehow got purged very quickly. Check the size of your cache. If" | 
|  | echo "everything else looks correct, contact" | 
|  | echo "mod-pagespeed-discuss@googlegroups.com!" | 
|  | fi | 
|  | exit 1 | 
|  | fi | 
|  |  | 
|  | # Check if the purge location prefix and purge method have been specified | 
|  | # correctly by executing curl with a purge URL that directly goes to the caching | 
|  | # layer. Note: wget does not allow http request methods to be specified, so we | 
|  | # use curl here. | 
|  | curl -X "$PURGE_METHOD" "$PURGE_URL" -H "User-Agent: Chrome/26.0" \ | 
|  | -H "Host: proxy_cache.example.com" >& $TMPDIR/trace | 
|  | if ! grep -q "Successful purge" $TMPDIR/trace; then | 
|  | # Unsuccessful purges could result from incorrect values for | 
|  | # purge_location_prefix or purge_method, ACLs for purge block/command being | 
|  | # incorrect, zone named for purging being incorrect or cache key used for | 
|  | # purging not matching with the proxy_cache_key directive. | 
|  | echo "---------------------------------------------------------------" | 
|  | echo "Check your location block for handling purges as follows:" | 
|  | echo "" | 
|  | echo "1) The --purge_location_prefix specified for this script should match" | 
|  | echo "   the regexp for the block handling purges." | 
|  | echo "" | 
|  | echo "2) The --purge_method specified for this script should be correct." | 
|  | echo "" | 
|  | echo "3) The ACL on the purge-handling block should be correct. Example ACLs:" | 
|  | echo "     allow localhost;" | 
|  | echo "     allow 127.0.0.1;" | 
|  | echo "     allow <YOUR-SERVER-IP>;" | 
|  | echo "     deny all;" | 
|  | echo "" | 
|  | echo "3) Check that the zone named for purging and the cache key match the" | 
|  | echo "   variables used in the 'location /' block:" | 
|  | echo "     proxy_cache_purge <ZONE_NAME_FOR_CACHING> " \ | 
|  | '       <PS_CAPABILITY_LIST>$1$is_args$args;' | 
|  | echo "" | 
|  | echo "   Ideally, the 'location /' block will be using these variables as" | 
|  | echo "   follows:" | 
|  | echo "     proxy_cache <ZONE_NAME_FOR_CACHING>;" | 
|  | echo '     proxy_cache_key <PS_CAPABILITY_LIST>$uri$is_args$args;' | 
|  | echo "   Note that <PS_CAPABILITY_LIST> should be defined as suggested in the" | 
|  | echo "   documentation." | 
|  | exit 1 | 
|  | fi | 
|  |  | 
|  | # Do a series of wgets to check if PURGES issued via mod_pagespeed are being | 
|  | # received and processed correctly by the proxy_cache server. | 
|  | # Wget through the caching layer to get a MISS because the previous request | 
|  | # was a successful purge. | 
|  | if ! wget_url_and_test_for_miss "$SAMPLE_URL_TO_WGET"; then | 
|  | echo "---------------------------------------------------------------" | 
|  | print_last_resort_error_message_and_exit | 
|  | fi | 
|  | # Now, purge the URL from proxy cache. | 
|  | cleanup_url_from_proxy_cache "$APACHE_URL" | 
|  | # Now do a wget through the caching layer and verify that this is a MISS. | 
|  | # It should be because of the previous background PURGE that should have gotten | 
|  | # triggered. | 
|  | if ! wget_url_and_test_for_miss "$SAMPLE_URL_TO_WGET"; then | 
|  | echo "---------------------------------------------------------------" | 
|  | echo "Looks like the purge from your Apache server was not executed on your" | 
|  | echo "proxy_cache server." | 
|  | echo "1) Check if the mod_pagespeed-enabled Apache server IPs are part of the" | 
|  | echo "   ACLS you specify for your purge-handling block in proxy_cache" | 
|  | echo "   config." | 
|  | echo "2) Check your ModPagespeedDownstreamCache* directives and make sure" | 
|  | echo "   that they match your proxy_cache config." | 
|  | echo "If both of the above things are correct, then ..." | 
|  | print_last_resort_error_message_and_exit | 
|  | fi | 
|  |  | 
|  | # Do a series of wgets to check if requests are being sent upstream with all of | 
|  | # the required info, so that subsequent PURGEs can be succesful. | 
|  | cleanup_url_from_proxy_cache "$APACHE_URL" | 
|  | # Invalidate mod_pagespeed cache to allow rewriting to start from scratch for | 
|  | # this URL. | 
|  | invalidate_mod_pagespeed_cache | 
|  | # Do a wget through the caching layer and verify that this is a MISS before | 
|  | # proceeding to the final wget. | 
|  | if ! wget_url_and_test_for_miss "$SAMPLE_URL_TO_WGET"; then | 
|  | echo "---------------------------------------------------------------" | 
|  | print_last_resort_error_message_and_exit | 
|  | fi | 
|  | # Sleep for 2 seconds waiting for the PURGE to happen. | 
|  | sleep 2 | 
|  | # Do a wget through the caching layer and verify that this is a MISS because of | 
|  | # the previous PURGE that ought to have been triggered. | 
|  | if ! wget_url_and_test_for_miss "$SAMPLE_URL_TO_WGET"; then | 
|  | echo "---------------------------------------------------------------" | 
|  | echo "Looks like the purge didn't succeed. Check the 'location /' block to" | 
|  | echo "see if the following directives are defined correctly:" | 
|  | echo '  proxy_set_header Host $host;' | 
|  | echo "  proxy_set_header PS-CapabilityList <PS_CAPABILITY_LIST>;" | 
|  | echo "If all of this is correct, then ..." | 
|  | print_last_resort_error_message_and_exit | 
|  | fi | 
|  |  | 
|  | echo "---------------------------------------------------------------" | 
|  | echo "Your downstream caching setup looks good!" | 
|  |  | 
|  | # Cleanup | 
|  | rm -rf $TMPDIR |