blob: db6845f334af55af2491f232748e86576fdda93b [file] [log] [blame]
#!/bin/bash
#
# Copyright 2012 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Author: jefftk@google.com (Jeff Kaufman)
#
# Set up variables and functions for use by various system tests.
#
# Scripts using this file (callers) should 'source' or '.' it so that an error
# detected in a function here can exit the caller. Callers should preface tests
# with:
# start_test <test name>
# A test should use check, check_from, fetch_until, and other functions defined
# below, as appropriate. A test should not directly call exit on failure.
#
# Callers should leave argument parsing to this script.
#
# Callers should invoke check_failures_and_exit after no more tests are left
# so that expected failures can be logged.
#
# If command line args are wrong, exit with status code 2.
# If no tests fail, it will exit the shell-script with status 0.
# If a test fails:
# - If it's listed in PAGESPEED_EXPECTED_FAILURES or CONTINUE_AFTER_FAILURE is
# "true", log the name of the failing test to display when
# check_failures_and_exit is called, at which point exit with status code 3
# if the failure was expected or 1 otherwise.
# - Otherwise, exit immediately with status code 1.
# TODO(jefftk): After all tests are converted to use run_test, rework expected
# failures so that it applies to run_test names and not start_test
# names.
#
# The format of PAGESPEED_EXPECTED_FAILURES is '~' separated test names.
# For example:
# PAGESPEED_EXPECTED_FAILURES="convert_meta_tags~extend_cache"
# or:
# PAGESPEED_EXPECTED_FAILURES="
# ~compression is enabled for rewritten JS.~
# ~convert_meta_tags~
# ~regression test with same filtered input twice in combination"
#
# Callers need to set SERVER_NAME, and not run this more than once
# simultaneously with the same SERVER_NAME value.
set -u # Disallow referencing undefined variables.
# Catch potential misuse of this script.
if [ "$(basename $0)" == "system_test_helpers.sh" ] ; then
echo "ERROR: This file must be loaded with source."
exit 2
fi
if [ $# -lt 1 -o $# -gt 3 ]; then
# Note: HOSTNAME and HTTPS_HOST should generally be localhost (when using
# the default port) or localhost:PORT (when not). Specifically, by default
# /mod_pagespeed_statistics is only accessible when accessed as localhost.
echo Usage: $(basename $0) HOSTNAME [HTTPS_HOST [PROXY_HOST]]
exit 2
fi;
if [ -z "${TEMPDIR:-}" ]; then
TEMPDIR="/tmp/mod_pagespeed_test.$USER/$SERVER_NAME"
# If someone else is supplying a TEMPDIR then it's their responsibility to
# make sure it's clean, but if we're using the default one then we need to
# clean it up on start so settings from previous tests don't affect this one.
# Cleaning up on exit doesn't work because if there's a test failure we want
# to leave things as they are to help with debugging.
#
# Because TEMPDIR includes SERVER_NAME this still allows, for example,
# parallel Apache and Nginx test execution.
rm -rf "$TEMPDIR"
mkdir -p "$TEMPDIR"
fi
# EXPECTED_FAILURES acts on "start_test" tests, while UNEXPECTED_FAILURES acts
# on "run_test" tests.
# TODO(jefftk): after we've converted everything to use run_test, including
# nginx_system_test.sh, switch EXPECTED_FAILURES to work on run_test instead.
EXPECTED_FAILURES="${TEMPDIR}/expected_failures"
UNEXPECTED_FAILURES="${TEMPDIR}/unexpected_failures"
# Make this easier to process so we're always looking for '~target~'.
PAGESPEED_EXPECTED_FAILURES="~${PAGESPEED_EXPECTED_FAILURES=}~"
# If the user has specified an alternate WGET as an environment variable, then
# use that, otherwise use the one in the path.
# Note: ${WGET:-} syntax is used to avoid breaking "set -u".
if [ "${WGET:-}" == "" ]; then
WGET=wget
else
echo WGET = $WGET
fi
if ! $WGET --version | head -1 | grep -q "1\.1[2-9]"; then
echo "You have the wrong version of wget. >1.12 is required."
exit 1
fi
# Ditto for curl.
if [ "${CURL:-}" == "" ]; then
CURL=curl
else
echo CURL = $CURL
fi
# Note that 'curl --version' exits with status 2 on CentOS even when
# curl is installed.
if ! which $CURL > /dev/null 2>&1; then
echo "curl ($CURL) is not installed."
exit 1
fi
# We need to set a wgetrc file because of the stupid way that the bash deals
# with strings and variable expansion.
mkdir -p $TEMPDIR || exit 1
export WGETRC=$TEMPDIR/wgetrc
# Use a Chrome User-Agent, so that we get real responses (including compression)
cat > $WGETRC <<EOF
user_agent = Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.0 (KHTML, like Gecko) Chrome/6.0.408.1 Safari/534.0
EOF
# You can pass in TEST_TO_RUN=test-name to run only a specific test. This is
# intended for debugging, where you want to iterate on a single failing test.
# It only works with tests that are set up to use run_test, which is currently
# only the ones in automatic/. Tests that haven't been converted to use
# run_test currently always run.
# TODO(jefftk): convert all system tests to use run_test and separate files.
TEST_TO_RUN="${TEST_TO_RUN:-}"
# Individual tests should use $TESTTMP if they need to store something
# temporarily. Infrastructure can use $ORIGINAL_TEMPDIR if it's ok with
# parallel use.
TESTTMP="$TEMPDIR"
ORIGINAL_TEMPDIR="$TEMPDIR"
unset TEMPDIR
HELPERS_LOADED=1
HOSTNAME=$1
PRIMARY_SERVER=http://$HOSTNAME
EXAMPLE_ROOT=$PRIMARY_SERVER/mod_pagespeed_example
# TODO(sligocki): Should we be rewriting the statistics page by default?
# Currently we are, so disable that so that it doesn't spoil our stats.
DEFAULT_STATISTICS_URL=$PRIMARY_SERVER/mod_pagespeed_statistics?PageSpeed=off
STATISTICS_URL=${STATISTICS_URL:-$DEFAULT_STATISTICS_URL}
DEFAULT_GLOBAL_STATISTICS_URL="$PRIMARY_SERVER/pagespeed_global_admin/statistics?PageSpeed=off"
GLOBAL_STATISTICS_URL=${GLOBAL_STATISTICS_URL:-$DEFAULT_GLOBAL_STATISTICS_URL}
BAD_RESOURCE_URL=$PRIMARY_SERVER/mod_pagespeed/W.bad.pagespeed.cf.hash.css
MESSAGE_URL=$PRIMARY_SERVER/pagespeed_admin/message_history
CONSOLE_URL=$PRIMARY_SERVER/pagespeed_admin/console
# In some servers (Nginx) PageSpeed process html after headers are finalized,
# while in others (Apache) it runs before and has to treat them as tentative.
HEADERS_FINALIZED=${HEADERS_FINALIZED:-true}
# The following shake-and-bake ensures that we set REWRITTEN_TEST_ROOT based on
# the TEST_ROOT in effect when we start up, if any, but if it was not set before
# invocation it is set to the newly-chosen TEST_ROOT. This permits us to call
# this from other test scripts that use different host prefixes for rewritten
# content.
REWRITTEN_TEST_ROOT=${TEST_ROOT:-}
TEST_ROOT=$PRIMARY_SERVER/mod_pagespeed_test
REWRITTEN_TEST_ROOT=${REWRITTEN_TEST_ROOT:-$TEST_ROOT}
# This sets up similar naming for https requests.
HTTPS_HOST=${2:-}
HTTPS_EXAMPLE_ROOT=https://$HTTPS_HOST/mod_pagespeed_example
# Determines whether a variable is defined, even with set -u
# http://stackoverflow.com/questions/228544/
# how-to-tell-if-a-string-is-not-defined-in-a-bash-shell-script
# albeit there are zero votes for that answer.
function var_defined() {
local var_name=$1
set | grep "^${var_name}=" 1>/dev/null
return $?
}
# These are the root URLs for rewritten resources; by default, no change.
REWRITTEN_ROOT=${REWRITTEN_ROOT:-$EXAMPLE_ROOT}
if ! var_defined PROXY_DOMAIN; then
PROXY_DOMAIN="$HOSTNAME"
fi
# Setup wget proxy information
if [ -n "${3:-}" ]; then
export http_proxy=http://$3
else
export http_proxy=""
fi
export https_proxy=${http_proxy}
export ftp_proxy=${http_proxy}
export no_proxy=""
# Version timestamped with nanoseconds, making it extremely unlikely to hit.
BAD_RND_RESOURCE_URL="$PRIMARY_SERVER/mod_pagespeed/bad$(date +%N).\
pagespeed.cf.hash.css"
combine_css_filename=\
styles/yellow.css+blue.css+big.css+bold.css.pagespeed.cc.xo4He3_gYf.css
OUTDIR=$TESTTMP/fetched_directory
rm -rf $OUTDIR
mkdir -p $OUTDIR
# Run a single test, and exit if it does. Use this for tests that are fast and
# should never flake, in order to make sure the system is up before we start
# continuing after failures.
function run_critical_test() {
run_test_helper "$@"
}
function run_test() {
if ! run_test_helper "$@"; then
if "${CONTINUE_AFTER_FAILURE:-false}"; then
echo "$@" >> "$UNEXPECTED_FAILURES"
else
exit 1
fi
fi
}
# Individual tests are in separate files under system_tests/ and may be run
# individually or reordered. If one test must be run after another, put them in
# the same file.
SYSTEM_TEST_DIR="DEFINE_THIS_BEFORE_USING_RUN_TEST"
function run_test_helper() {
local test_name=$1
if [ -n "$TEST_TO_RUN" ] && [ "$TEST_TO_RUN" != "$test_name" ]; then
return # By default TEST_TO_RUN="" so normally we don't skip tests here.
fi
# Use a subshell to keep modifications tests make to the test environment
# from interfering with eachother.
previous_time_ms=0
if ! (source "$SYSTEM_TEST_DIR/${test_name}.sh"); then
return 1
fi
update_elapsed_time
}
# This function expects to be run in the background and then killed when we know
# how the test finished.
function tail_while_waiting() {
local test_name="$1"
local test_log="$2"
# In case it's already done or nearly done, don't print anything.
sleep 1
echo "Still waiting for $test_name"
echo "tail -f $test_log"
tail -f "$test_log"
}
# Returns the unix system time in milliseconds.
function now_ms() {
# Note: the '%N' probably won't work on FreeBSD, and another solution will be
# needed to the current time in milliseconds.
date +%s%N | cut -b1-13
}
# Prints the elapsed time since the last time update_elapsed_time was called.
previous_time_ms=0
function update_elapsed_time() {
current_time_ms=$(now_ms)
if [ "$previous_time_ms" != 0 ]; then
echo 'ELAPSED TIME:' $((current_time_ms - previous_time_ms))"ms"
fi
previous_time_ms="$current_time_ms"
}
CURRENT_TEST="pre tests"
function start_test() {
update_elapsed_time
WGET_ARGS=""
CURRENT_TEST="$@"
echo "TEST: $CURRENT_TEST"
}
# Wget is used three different ways. The first way is nonrecursive and dumps a
# single page (with headers) to standard out. This is useful for grepping for a
# single expected string that's the result of a first-pass rewrite:
# wget -q -O --save-headers - $URL | grep -q foo
# "-q" quells wget's noisy output; "-O -" dumps to stdout; grep's -q quells
# its output and uses the return value to indicate whether the string was
# found. Note that exiting with a nonzero value will immediately kill
# the make run.
#
# Sometimes we want to check for a condition that's not true on the first dump
# of a page, but becomes true after a few seconds as the server's asynchronous
# fetches complete. For this we use the the fetch_until() function:
# fetch_until $URL 'grep -c delayed_foo' 1
# In this case we will continuously fetch $URL and pipe the output to
# grep -c (which prints the count of matches); we repeat until the number is 1.
#
# The final way we use wget is in a recursive mode to download all prerequisites
# of a page. This fetches all resources associated with the page, and thereby
# validates the resources generated by mod_pagespeed:
# wget -H -p -S -o $WGET_OUTPUT -nd -P $WGET_DIR $EXAMPLE_ROOT/$FILE
# Here -H allows wget to cross hosts (e.g. in the case of a sharded domain); -p
# means to fetch all prerequisites; "-S -o $WGET_OUTPUT" saves wget output
# (including server headers) for later analysis; -nd puts all results in one
# directory; -P specifies that directory. We can then run commands on
# $WGET_DIR/$FILE and nuke $WGET_DIR when we're done.
# TODO(abliss): some of these will fail on windows where wget escapes saved
# filenames differently.
# TODO(morlovich): This isn't actually true, since we never pass in -r,
# so this fetch isn't recursive. Clean this up.
function define_fetch_variables {
WGET_OUTPUT=$OUTDIR/wget_output.txt
# We use a separate directory so that it can be rm'd without disturbing other
# data in $OUTDIR.
WGET_DIR=$OUTDIR/wget
WGET_DUMP="$WGET -q -O - --save-headers"
WGET_DUMP_HTTPS="$WGET -q -O - --save-headers --no-check-certificate"
PREREQ_ARGS="-H -p -S -o $WGET_OUTPUT -nd -P $WGET_DIR/ -e robots=off"
WGET_PREREQ="$WGET $PREREQ_ARGS"
WGET_ARGS=""
}
define_fetch_variables
function run_wget_with_args() {
echo $WGET_PREREQ $WGET_ARGS "$@"
$WGET_PREREQ $WGET_ARGS "$@"
}
# Should be called at the end of any system test using this script. While most
# errors will be reported immediately and will make us exit with status 1, tests
# listed in PAGESPEED_EXPECTED_FAILURES will let us continue. This prints out
# failure information for these tests, if appropriate.
#
# This function always exits the script:
# Status 0: pass
# Status 1: fail
# Status 3: only expected failures
function check_failures_and_exit() {
update_elapsed_time
if [ -e "$UNEXPECTED_FAILURES" ] ; then
echo "Failing Tests:"
sed 's/^/ /' "$UNEXPECTED_FAILURES"
echo "FAIL."
exit 1
elif [ -e "$EXPECTED_FAILURES" ] ; then
echo Expected Failing Tests:
sed 's/^/ /' "$EXPECTED_FAILURES"
echo "MOSTLY PASS. Expected failures only."
exit 3
fi
echo "PASS."
exit 0
}
# Did we expect the current test, as set by start_test, to fail?
function is_expected_failure() {
# Does PAGESPEED_EXPECTED_FAILURES contain CURRENT_TEST?
test "$PAGESPEED_EXPECTED_FAILURES" != \
"${PAGESPEED_EXPECTED_FAILURES/~"${CURRENT_TEST}"~/}"
}
# By default, print a message like:
# failure at line 374
# FAIL
# and then exit with return value 1. If we expected this test to fail, log to
# $EXPECTED_FAILURES and return without exiting.
#
# If the shell does not support the 'caller' builtin, skip the line number info.
#
# Assumes it's being called from a failure-reporting function and that the
# actual failure the user is interested in is our caller's caller. If it
# weren't for this, fail and handle_failure could be the same.
function handle_failure() {
if [ $# -eq 1 ]; then
echo FAILed Input: "$1"
fi
# From http://stackoverflow.com/questions/685435/bash-stacktrace
# to avoid printing 'handle_failure' we start with 1 to skip get_stack caller
local i
local stack_size=${#FUNCNAME[@]}
for (( i=1; i<$stack_size ; i++ )); do
local func="${FUNCNAME[$i]}"
[ -z "$func" ] && func=MAIN
local line_number="${BASH_LINENO[(( i - 1 ))]}"
local src="${BASH_SOURCE[$i]}"
[ -z "$src" ] && src=non_file_source
local canonical_dir=$(cd $(dirname "$src") && pwd)
local short_dir=${canonical_dir#*/net/instaweb/}
local leaf=$(basename "$src")
echo "${short_dir}/${leaf}:${line_number}: $func"
done
# Note: we print line number after "failed input" so that it doesn't get
# knocked out of the terminal buffer.
if type caller > /dev/null 2>&1 ; then
# "caller 1" is our caller's caller.
echo " failure at line $(caller 1 | sed 's/ .*//')" 1>&2
fi
echo "in '$CURRENT_TEST'"
if is_expected_failure ; then
echo "$CURRENT_TEST" >> "$EXPECTED_FAILURES"
echo "Continuing after expected failure..."
else
echo FAIL.
exit 1;
fi
}
# Call with a command and its args. Echos the command, then tries to eval it.
# If it returns false, fail the tests.
function check() {
echo " check" "$@"
"$@" || handle_failure
}
# Like check, but the first argument is text to pipe into the command given in
# the remaining arguments.
function check_from() {
local quiet=0
if [ "$1" = "-q" ]; then
quiet=1
shift
fi
local text="$1"
local msg="$text"
shift
if [ "$quiet" -ne 0 ];then
msg="(check_from -q $@): $text"
else
echo " check_from" "$@"
fi
echo "$text" | "$@" || handle_failure "$msg"
}
# Same as check(), but expects command to fail.
function check_not() {
echo " check_not" "$@"
if "$@"; then
handle_failure
fi
}
# Runs a command and verifies that it exits with an expected error code.
function check_error_code() {
local expected_error_code="$1"
shift
echo " check_error_code $expected_error_code $@"
local error_code=0
"$@" || error_code="$?"
check [ "$error_code" = "$expected_error_code" ]
}
# Like check_not, but the first argument is text to pipe into the
# command given in the remaining arguments.
function check_not_from() {
local text="$1"
shift
echo " check_not_from" "$@"
if echo "$text" | "$@"; then
handle_failure "$text"
fi
}
function check_200_http_response() {
check_from "$(head -1 <<< $1)" egrep -q '[ ]*HTTP/1[.]. 200 OK'
}
function check_200_http_response_file() {
check_200_http_response "$(< $1)"
}
# Check for the existence of a single file matching the pattern
# in $1. If it does not exist, print an error. If it does exist,
# check that its size meets constraint identified with $2 $3, e.g.
# check_file_size "$WGET_DIR/xPuzzle*" -le 60000
function check_file_size() {
local filename_pattern="$1"
local op="$2"
local expected_value="$3"
local SIZE=$(stat -c %s $filename_pattern) || handle_failure \
"$filename_pattern not found"
[ "$SIZE" "$op" "$expected_value" ] || handle_failure \
"$filename_pattern : $SIZE $op $expected_value"
}
# In a pipeline a failed check or check_not will not halt the script on error.
# Instead of:
# echo foo | check grep foo
# You need:
# echo foo | grep foo || fail
# If you can legibly rewrite the code not to need a pipeline at all, however,
# check_from is better because it can print the problem test and the failing
# input on failure:
# check_from "foo" grep foo
function fail() {
handle_failure
}
function get_stat() {
grep -w "$1" | awk '{print $2}' | tr -d ' '
}
function check_stat() {
check_stat_op $1 $2 $3 $4 =
}
function check_stat_op() {
if [ "${statistics_enabled:-1}" -eq "0" ]; then
return
fi
local OLD_STATS_FILE=$1
local NEW_STATS_FILE=$2
local COUNTER_NAME=$3
local EXPECTED_DIFF=$4
local OP=$5
local OLD_VAL=$(get_stat ${COUNTER_NAME} <${OLD_STATS_FILE})
local NEW_VAL=$(get_stat ${COUNTER_NAME} <${NEW_STATS_FILE})
# This extra check is necessary because the syntax error in the second if
# does not cause bash to fail :/
if [ "${NEW_VAL}" != "" -a "${OLD_VAL}" != "" ]; then
if [ $((${NEW_VAL} - ${OLD_VAL})) $OP ${EXPECTED_DIFF} ]; then
return;
fi
fi
# Failure
local EXPECTED_VAL=$((${OLD_VAL} + ${EXPECTED_DIFF}))
echo -n "Mismatched counter value : ${COUNTER_NAME} : "
echo "Expected(${EXPECTED_VAL}) $OP Actual(${NEW_VAL})"
echo "Compare stat files ${OLD_STATS_FILE} and ${NEW_STATS_FILE}"
handle_failure
}
# Continuously fetches URL and pipes the output to COMMAND. Loops until COMMAND
# outputs RESULT, in which case we return 0, or until TIMEOUT seconds have
# passed, in which case we return 1.
#
# Usage:
# fetch_until [-save] [-recursive] REQUESTURL COMMAND RESULT [WGET_ARGS] [OP]
#
# If "-save" is specified as the first argument, then the output from $COMMAND
# is retained in $FETCH_UNTIL_OUTFILE.
#
# If "-recursive" is specified, then the resources referenced from the HTML
# file are loaded into $WGET_DIR as a result of this command.
function fetch_until() {
FETCH_UNTIL_OUTFILE="$WGET_DIR/fetch_until_output.$$"
local save=0
if [ "$1" = "-save" ]; then
save=1
shift
fi
local gzip=""
if [ "$1" = "-gzip" ]; then
gzip="--header=Accept-Encoding:gzip"
shift
fi
local recursive=0
if [ "$1" = "-recursive" ]; then
recursive=1
shift
fi
local expect_time_out=0
if [ "$1" = "-expect_time_out" ]; then
expect_time_out=1
shift
fi
REQUESTURL=$1
COMMAND=$2
EXPECTED_RESULT=$3
local wget_arg="${4:-}"
if [[ "$wget_arg" == --user-agent=webp* ]]; then
wget_arg="$wget_arg --header=Accept:image/webp"
shift
fi
FETCH_UNTIL_WGET_ARGS="$gzip $WGET_ARGS $wget_arg"
OP=${5:-=} # Default to =
if [ $recursive -eq 1 ]; then
FETCH_FILE="$WGET_DIR/$(basename $REQUESTURL)"
FETCH_UNTIL_WGET_ARGS="$FETCH_UNTIL_WGET_ARGS $PREREQ_ARGS"
else
FETCH_FILE="$FETCH_UNTIL_OUTFILE"
FETCH_UNTIL_WGET_ARGS="$FETCH_UNTIL_WGET_ARGS -o $WGET_OUTPUT \
-O $FETCH_FILE"
fi
# TIMEOUT is how long to keep trying, in seconds.
if is_expected_failure ; then
# For tests that we expect to fail, don't wait long hoping for the right
# result.
TIMEOUT=10
elif [ $expect_time_out -eq 1 ]; then
# So far, all images tested in this mode are completed in 200 milliseconds
# in non-valgrind mode. To make the test robust, we set the threshold to 5x,
# and then another 5x for valgrind mode.
if [ "${USE_VALGRIND:-}" = true ]; then
TIMEOUT=5
else
TIMEOUT=1
fi
else
# Foreground tests shouldn't wait as long as background tests can, but still
# longer than you'd think we'd need, because of Valgrind.
TIMEOUT=100
fi
START=$(date +%s)
STOP=$((START+$TIMEOUT))
WGET_HERE="$WGET -q $FETCH_UNTIL_WGET_ARGS"
echo -n " Fetching $REQUESTURL $FETCH_UNTIL_WGET_ARGS"
echo " until \$($COMMAND) $OP $EXPECTED_RESULT"
while test -t; do
# Clean out WGET_DIR so that wget doesn't create .1 files.
rm -rf $WGET_DIR
mkdir -p $WGET_DIR
$WGET_HERE $REQUESTURL || true
ACTUAL_RESULT=$($COMMAND < "$FETCH_FILE" || true)
if [ "$ACTUAL_RESULT" "$OP" "$EXPECTED_RESULT" ]; then
echo "."
if [ $save -eq 0 ]; then
if [ $recursive -eq 1 ]; then
rm -rf $WGET_DIR
else
rm -f "$FETCH_FILE"
fi
fi
return;
fi
if [ $(date +%s) -gt $STOP ]; then
echo ""
if [ $expect_time_out -eq 1 ]; then
echo "TIMEOUT: expected"
else
local file_size=$(cat "$FETCH_FILE" | wc -c)
local file_mime=$(file -ib "$FETCH_FILE")
if echo "$file_mime" | grep -q "^text/"; then
# Dump the beginning of the file, if it's text.
echo "Fetched file: $file_size bytes ("
cat "$FETCH_FILE" | head -n 100
echo ")"
else
# Otherwise dump the beginning of the file as hex.
echo "Fetched file: $file_size bytes, $file_mime begins ("
xxd -l 256 "$FETCH_FILE"
echo ")"
fi
echo "TIMEOUT: $WGET_HERE $REQUESTURL output in $FETCH_FILE"
handle_failure
fi
return
fi
echo -n "."
sleep 0.1
done;
}
# Helper to set up most filter tests. Alternate between using:
# 1) query-params vs request-headers
# 2) ModPagespeed... vs PageSpeed...
# to enable the filter so we know all combinations work.
filter_spec_method="query_params"
function test_filter() {
rm -rf $OUTDIR
mkdir -p $OUTDIR
FILTER_NAME=$1;
shift;
FILTER_DESCRIPTION=$@
start_test $FILTER_NAME $FILTER_DESCRIPTION
# Filename is the name of the first filter only.
FILE=${FILTER_NAME%%,*}.html
if [ $filter_spec_method = "query_params" ]; then
WGET_ARGS=""
FILE="$FILE?ModPagespeedFilters=$FILTER_NAME"
filter_spec_method="query_params_pagespeed"
elif [ $filter_spec_method = "query_params_pagespeed" ]; then
WGET_ARGS=""
FILE="$FILE?PageSpeedFilters=$FILTER_NAME"
filter_spec_method="headers"
elif [ $filter_spec_method = "headers" ]; then
WGET_ARGS="--header=ModPagespeedFilters:$FILTER_NAME"
filter_spec_method="headers_pagespeed"
else
WGET_ARGS="--header=ModPagespeedFilters:$FILTER_NAME"
filter_spec_method="query_params"
fi
URL=$EXAMPLE_ROOT/$FILE
FETCHED=$WGET_DIR/$FILE
}
# Helper to test if we mess up extensions on requests to broken url
function test_resource_ext_corruption() {
URL=$1
RESOURCE=$2
# Make sure the resource is actually there, that the test isn't broken
echo checking that wgetting $URL finds $RESOURCE ...
OUT=$($WGET_DUMP $WGET_ARGS $URL)
check_from "$OUT" fgrep -qi $RESOURCE
# Now fetch the broken version. This should succeed anyway, as we now
# ignore the noise.
check $WGET_PREREQ $WGET_ARGS "${EXAMPLE_ROOT}/${RESOURCE}broken"
# Fetch normal again; ensure rewritten url for RESOURCE doesn't contain broken
OUT=$($WGET_DUMP $WGET_ARGS $URL)
check_not_from "$OUT" fgrep "broken"
}
function scrape_pipe_stat {
egrep "^$1:? " | awk '{print $2}'
}
# Scrapes the specified statistic, returning the statistic value.
function scrape_stat {
$WGET_DUMP $STATISTICS_URL | scrape_pipe_stat "$1"
}
function scrape_header {
# Extracts the value from wget's emitted headers. We use " " as a delimeter
# here to avoid a leading space on the returned string. Note also that wget
# always generates "name: value\r", never "name:value\r".
tr -s '\r\n' '\n'| egrep -ia "^.?$1:" | rev | cut -d' ' -f 1 | rev
}
# Scrapes HTTP headers from stdin for Content-Length and returns the value.
function scrape_content_length {
scrape_header 'Content-Length'
}
# Pulls the headers out of a 'wget --save-headers' dump.
function extract_headers {
local carriage_return=$(printf "\r")
local last_line_number=$(
grep --text -n \^${carriage_return}\$ $1 | cut -f1 -d:)
head --lines=$last_line_number "$1" | sed -e "s/$carriage_return//"
}
# Extracts the cookies from a 'wget --save-headers' dump.
function extract_cookies {
grep "Set-Cookie" | \
sed -e 's/;.*//' -e 's/^.*Set-Cookie: */ --header=Cookie:/'
}
# Returns the "URL" suitable for either Apache or Nginx
function generate_url {
DOMAIN="$1" # Must not have leading 'http://'
PATH="$2" # Must have leading '/'.
if [ -z "${STATIC_DOMAIN:-}" ]; then
RESULT="http://$DOMAIN$PATH"
else
RESULT="--header X-Google-Pagespeed-Config-Domain:$DOMAIN"
RESULT+=" http://$STATIC_DOMAIN$PATH"
fi
echo $RESULT
}
# Performs timed reads on the output from a command passed via $1. The stream
# will be interpreted as a chunked http encoding. Each chunk will be allowed
# at most threshold_sec ($2) seconds to be read or the function will fail. When
# the stream is fully read, the funcion will compare the total number of http
# chunks read with expect_chunk_count ($3) and fail on mismatch.
# Usage: check_flushing 5 1
# This will check if the curl command resulted in single chunk which was read
# within one second or less.
function check_flushing() {
local hostname="$1"
local threshold_sec="$2"
local expect_chunk_count="$3"
local output=""
local start=$(date +%s%N)
local chunk_count=0
local base_url="http://$hostname.example.com/mod_pagespeed_test"
local command="$CURL -f -N --raw -sS --proxy $SECONDARY_HOSTNAME"
if [ "${USE_VALGRIND:-}" = true ]; then
# We can't say much about correctness of timings under valgrind, so relax
# the test for that.
threshold_sec=$(echo "scale=2; $threshold_sec*10" | bc)
fi
# First make sure php is working and we can actually fetch this page.
check $command "$base_url/php_withoutflush.php" -o /dev/null
while true; do
start=$(date +%s%N)
# Read the http chunk size from the stream. This is also the read which
# checks timings.
check read -t $threshold_sec line
echo "Chunk number [$chunk_count] has size: $line"
line=$(echo $line | tr -d '\n' | tr -d '\r')
# If we read 0 that means we have finished reading the stream.
if [ $((16#$line)) -eq "0" ] ; then
check [ $expect_chunk_count -le $chunk_count ]
return
fi
let chunk_count=chunk_count+1
# read the actual data from the stream, using the amount indicated in
# the previous read. This read should be fast.
# Note that we need to clear IFS for read since otherwise it can get
# confused by whitespace-only chunks.
IFS= check read -N $((16#$line)) line
echo "Chunk data: $line"
# Read the trailing \r\n - should be fast.
check read -N 2 line
done < <($command "$base_url/slow_flushing_html_response.php")
# Only reached if we finish the stream without a chunk of 0, which is an HTTP
# protocol violation.
fail
}
# Given the output of a page with ?PageSpeedFilters=+debug, print the section of
# the page where it lists what filters are enabled.
function extract_filters_from_debug_html() {
local debug_output="$1"
# Pull out the non-blank lines between "Filters:" and Options:". First
# convert newlines to % so sed can operate on the whole file, then put them
# back again.
check_from -q "$debug_output" grep -q "^Filters:$"
check_from -q "$debug_output" grep -q "^Options:$"
echo "$debug_output" | tr '\n' '%' | sed 's~.*%Filters:%~~' \
| sed "s~%Options:.*~~" | tr '%' '\n'
}
# The prioritize_critical_css test is split into two functions so
# nginx_system_test.sh can verify that beacon data is preserved across restarts
# via shm-cache checkpointing. Specifically, the nginx system test first does a
# run of test_prioritize_critical_css, restarts nginx, and then runs
# test_prioritize_critical_css_final. Because beacon responses are saved in the
# metadata cache this can only pass if the metadata cache is being persisted
# across restarts.
#
# That means this test is run twice when testing, both here and then again later
# on either side of a restart, but it's pretty fast so that's not a problem.
function test_prioritize_critical_css() {
if [ "$SECONDARY_HOSTNAME" != "" ]; then
# Test critical CSS beacon injection, beacon return, and computation. This
# requires UseBeaconResultsInFilters() to be true in rewrite_driver_factory.
# NOTE: must occur after cache flush, which is why it's in this embedded
# block. The flush removes pre-existing beacon results from the pcache.
test_filter prioritize_critical_css
fetch_until -save $URL 'fgrep -c pagespeed.criticalCssBeaconInit' 1
check [ $(fgrep -o ".very_large_class_name_" $FETCH_FILE | wc -l) -eq 36 ]
CALL_PAT=".*criticalCssBeaconInit("
SKIP_ARG="[^,]*,"
CAPTURE_ARG="'\([^']*\)'.*"
BEACON_PATH=$(sed -n "s/${CALL_PAT}${CAPTURE_ARG}/\1/p" $FETCH_FILE)
ESCAPED_URL=$(sed -n \
"s/${CALL_PAT}${SKIP_ARG}${CAPTURE_ARG}/\1/p" $FETCH_FILE)
OPTIONS_HASH=$(sed -n \
"s/${CALL_PAT}${SKIP_ARG}${SKIP_ARG}${CAPTURE_ARG}/\1/p" $FETCH_FILE)
NONCE=$(sed -n \
"s/${CALL_PAT}${SKIP_ARG}${SKIP_ARG}${SKIP_ARG}${CAPTURE_ARG}/\1/p" \
$FETCH_FILE)
BEACON_URL="http://${HOSTNAME}${BEACON_PATH}?url=${ESCAPED_URL}"
BEACON_DATA="oh=${OPTIONS_HASH}&n=${NONCE}&cs=.big,.blue,.bold,.foo"
OUT=$($CURL -sSi -d "$BEACON_DATA" "$BEACON_URL")
check_from "$OUT" grep '^HTTP/1.1 204'
test_prioritize_critical_css_final
fi
}
function test_prioritize_critical_css_final() {
if [ "$SECONDARY_HOSTNAME" != "" ]; then
# Now make sure we see the correct critical css rules.
fetch_until $URL \
'grep -c <style>[.]blue{[^}]*}</style>' 1
fetch_until $URL \
'grep -c <style>[.]big{[^}]*}</style>' 1
fetch_until $URL \
'grep -c <style>[.]blue{[^}]*}[.]bold{[^}]*}</style>' 1
fetch_until -save $URL \
'grep -c <style>[.]foo{[^}]*}</style>' 1
# The last one should also have the other 3, too.
check [ `grep -c '<style>[.]blue{[^}]*}</style>' $FETCH_UNTIL_OUTFILE` = 1 ]
check [ `grep -c '<style>[.]big{[^}]*}</style>' $FETCH_UNTIL_OUTFILE` = 1 ]
check [ `grep -c '<style>[.]blue{[^}]*}[.]bold{[^}]*}</style>' \
$FETCH_UNTIL_OUTFILE` = 1 ]
fi
}
function cache_purge_test() {
# Tests for individual URL purging, and for global cache purging via
# GET pagespeed_admin/cache?purge=URL, and PURGE URL methods.
PURGE_ROOT="$1"
PURGE_STATS_URL="$PURGE_ROOT/pagespeed_admin/statistics"
function cache_purge() {
local purge_method="$1"
local purge_path="$2"
if [ "$purge_method" = "GET" ]; then
echo http_proxy=$SECONDARY_HOSTNAME $WGET -q -O - \
"$PURGE_ROOT/pagespeed_admin/cache?purge=$purge_path"
http_proxy=$SECONDARY_HOSTNAME $WGET -q -O - \
"$PURGE_ROOT/pagespeed_admin/cache?purge=$purge_path"
else
PURGE_URL="$PURGE_ROOT/$purge_path"
echo $CURL --request PURGE --proxy $SECONDARY_HOSTNAME "$PURGE_URL"
check $CURL --request PURGE --proxy $SECONDARY_HOSTNAME "$PURGE_URL"
fi
echo ""
if [ $statistics_enabled -eq "0" ]; then
# Without statistics, we have no mechanism to transmit state-changes
# from one Apache child process to another, and so each process must
# independently poll the cache.purge file, which happens every 5 seconds.
echo sleep 6
sleep 6
fi
}
# Checks to see whether a .pagespeed URL is present in the metadata cache.
# A response including "cache_ok:true" or "cache_ok:false" is send to stdout.
function read_metadata_cache() {
path="$PURGE_ROOT/$1"
http_proxy=$SECONDARY_HOSTNAME $WGET -q -O - \
"$PURGE_ROOT/pagespeed_admin/cache?url=$path"
}
# Find the full .pagespeed. URL of yellow.css
PURGE_COMBINE_CSS="$PURGE_ROOT/combine_css.html"
http_proxy=$SECONDARY_HOSTNAME fetch_until -save "$PURGE_COMBINE_CSS" \
"grep -c pagespeed.cf" 4
yellow_css=$(grep yellow.css $FETCH_UNTIL_OUTFILE | cut -d\" -f6)
blue_css=$(grep blue.css $FETCH_UNTIL_OUTFILE | cut -d\" -f6)
purple_path="styles/$$"
purple_url="$PURGE_ROOT/$purple_path/purple.css"
purple_dir="$APACHE_DOC_ROOT/purge/$purple_path"
ls -ld $APACHE_DOC_ROOT $APACHE_DOC_ROOT/purge
echo $SUDO mkdir -p "$purple_dir"
$SUDO mkdir -p "$purple_dir"
purple_file="$purple_dir/purple.css"
for method in $CACHE_PURGE_METHODS; do
echo Individual URL Cache Purging with $method
check_from "$(read_metadata_cache $yellow_css)" fgrep -q cache_ok:true
check_from "$(read_metadata_cache $blue_css)" fgrep -q cache_ok:true
echo 'body { background: MediumPurple; }' > "/tmp/purple.$$"
$SUDO cp "/tmp/purple.$$" "$purple_file"
http_proxy=$SECONDARY_HOSTNAME fetch_until "$purple_url" 'fgrep -c 9370db' 1
echo 'body { background: black; }' > "/tmp/purple.$$"
$SUDO cp "/tmp/purple.$$" "$purple_file"
cache_purge $method "*"
check_from "$(read_metadata_cache $yellow_css)" fgrep -q cache_ok:false
check_from "$(read_metadata_cache $blue_css)" fgrep -q cache_ok:false
http_proxy=$SECONDARY_HOSTNAME fetch_until "$purple_url" 'fgrep -c #000' 1
cache_purge "$method" "$purple_path/purple.css"
sleep 1
STATS=$OUTDIR/purge.stats
http_proxy=$SECONDARY_HOSTNAME $WGET_DUMP $PURGE_STATS_URL > $STATS.0
http_proxy=$SECONDARY_HOSTNAME fetch_until "$PURGE_COMBINE_CSS" \
"grep -c pagespeed.cf" 4
http_proxy=$SECONDARY_HOSTNAME $WGET_DUMP $PURGE_STATS_URL > $STATS.1
# Having rewritten 4 CSS files, we will have done 4 resources fetches.
check_stat $STATS.0 $STATS.1 num_resource_fetch_successes 4
# Sanity check: rewriting the same CSS file results in no new fetches.
http_proxy=$SECONDARY_HOSTNAME fetch_until "$PURGE_COMBINE_CSS" \
"grep -c pagespeed.cf" 4
http_proxy=$SECONDARY_HOSTNAME $WGET_DUMP $PURGE_STATS_URL > $STATS.2
check_stat $STATS.1 $STATS.2 num_resource_fetch_successes 0
# Now flush one of the files, and it should be the only one that
# needs to be refetched after we get the combine_css file again.
check_from "$(read_metadata_cache $yellow_css)" fgrep -q cache_ok:true
check_from "$(read_metadata_cache $blue_css)" fgrep -q cache_ok:true
cache_purge $method styles/yellow.css
check_from "$(read_metadata_cache $yellow_css)" fgrep -q cache_ok:false
check_from "$(read_metadata_cache $blue_css)" fgrep -q cache_ok:true
sleep 1
http_proxy=$SECONDARY_HOSTNAME fetch_until "$PURGE_COMBINE_CSS" \
"grep -c pagespeed.cf" 4
http_proxy=$SECONDARY_HOSTNAME $WGET_DUMP $PURGE_STATS_URL > $STATS.3
check_stat $STATS.2 $STATS.3 num_resource_fetch_successes 1
done
$SUDO rm -rf "$purple_dir" "/tmp/purple.$$"
}