src/install/system_test.sh - incubator-pagespeed-mod - Git at Google

 #!/bin/bash
 # Copyright 2010 Google Inc. All Rights Reserved.
 # Author: abliss@google.com (Adam Bliss)
 #
 # Usage: ./system_test.sh HOSTNAME
 # Tests a mod_pagespeed installation by fetching and verifying all the examples.
 # Exits with status 0 if all tests pass.  Exits 1 immediately if any test fails.

 if [ $# != 1 ]; then
   echo Usage: ./system_test.sh HOSTNAME
   exit 2
 fi;

 # If the user has specified an alternate WGET as an environment variable, then
 # use that, otherwise use the one in the path.
 if [ "$WGET" == "" ]; then
   WGET=wget
 else
   echo WGET = $WGET
 fi

 $WGET --version | head -1 | grep 1.12 >/dev/null
 if [ $? != 0 ]; then
   echo You have the wrong version of wget.  1.12 is required.
   exit 1
 fi

 HOSTNAME=$1
 PORT=${HOSTNAME/*:/};
 if [ $PORT = $HOSTNAME ]; then
   PORT=80
 fi;
 EXAMPLE_ROOT=http://$HOSTNAME/mod_pagespeed_example
 STATISTICS_URL=http://localhost:$PORT/mod_pagespeed_statistics
 BAD_RESOURCE_URL=http://$HOSTNAME/mod_pagespeed/ic.a.bad.css

 OUTDIR=/tmp/mod_pagespeed_test.$USER/fetched_directory
 rm -rf $OUTDIR

 # Wget is used three different ways.  The first way is nonrecursive and dumps a
 # single page (with headers) to standard out.  This is useful for grepping for a
 # single expected string that's the result of a first-pass rewrite:
 #   wget -q -O --save-headers - $URL | grep -q foo
 # "-q" quells wget's noisy output; "-O -" dumps to stdout; grep's -q quells
 # its output and uses the return value to indicate whether the string was
 # found.  Note that exiting with a nonzero value will immediately kill
 # the make run.
 #
 # Sometimes we want to check for a condition that's not true on the first dump
 # of a page, but becomes true after a few seconds as the server's asynchronous
 # fetches complete.  For this we use the the fetch_until() function:
 #   fetch_until $URL 'grep -c delayed_foo' 1
 # In this case we will continuously fetch $URL and pipe the output to
 # grep -c (which prints the count of matches); we repeat until the number is 1.
 #
 # The final way we use wget is in a recursive mode to download all prerequisites
 # of a page.  This fetches all resources associated with the page, and thereby
 # validates the resources generated by mod_pagespeed:
 #   wget -H -p -S -o $WGET_OUTPUT -nd -P $OUTDIR $EXAMPLE_ROOT/$FILE
 # Here -H allows wget to cross hosts (e.g. in the case of a sharded domain); -p
 # means to fetch all prerequisites; "-S -o $WGET_OUTPUT" saves wget output
 # (including server headers) for later analysis; -nd puts all results in one
 # directory; -P specifies that directory.  We can then run commands on
 # $OUTDIR/$FILE and nuke $OUTDIR when we're done.
 # TODO(abliss): some of these will fail on windows where wget escapes saved
 # filenames differently.

 WGET_OUTPUT=$OUTDIR/wget_output.txt
 WGET_DUMP="$WGET -q -O - --save-headers"
 WGET_PREREQ="$WGET -H -p -S -o $WGET_OUTPUT -nd -P $OUTDIR"

 # Call with a command and its args.  Echos the command, then tries to eval it.
 # If it returns false, fail the tests.
 function check() {
   echo "     " $@
   if eval "$@"; then
     return;
   else
     echo FAIL.
     exit 1;
   fi;
 }

 # Continously fetches URL and pipes the output to COMMAND.  Loops until
 # COMMAND outputs RESULT, in which case we return 0, or until 10 seconds have
 # passed, in which case we return 1.
 function fetch_until() {
   URL=$1
   COMMAND=$2
   RESULT=$3

   TIMEOUT=10
   START=`date +%s`
   STOP=$((START+$TIMEOUT))

   echo "     " Fetching $URL until '`'$COMMAND'`' = $RESULT
   while test -t; do
     if [ `$WGET -q -O - $URL 2>&1 | $COMMAND` = $RESULT ]; then
       /bin/echo "."
       return;
     fi;
     if [ `date +%s` -gt $STOP ]; then
       /bin/echo "FAIL."
       exit 1;
     fi;
     /bin/echo -n "."
     sleep 0.1
   done;
 }

 # Helper to set up most filter tests
 function test_filter() {
   rm -rf $OUTDIR
   mkdir -p $OUTDIR
   FILTER_NAME=$1;
   shift;
   FILTER_DESCRIPTION=$@
   echo TEST: $FILTER_NAME $FILTER_DESCRIPTION
   FILE=$FILTER_NAME.html?ModPagespeedFilters=$FILTER_NAME
   URL=$EXAMPLE_ROOT/$FILE
   FETCHED=$OUTDIR/$FILE
 }


 # General system tests

 echo TEST: mod_pagespeed is running in Apache and writes the expected header.
 check "$WGET_DUMP $EXAMPLE_ROOT/combine_css.html | grep -q X-Mod-Pagespeed"

 echo TEST: 404s are served and properly recorded.
 NUM_404=$($WGET_DUMP $STATISTICS_URL | grep resource_404_count | cut -d: -f2)
 NUM_404=$(($NUM_404+1))
 check "$WGET -O /dev/null $BAD_RESOURCE_URL 2>&1| grep -q '404 Not Found'"
 check "$WGET_DUMP $STATISTICS_URL | grep -q 'resource_404_count: $NUM_404'"

 echo TEST: directory is mapped to index.html.
 rm -rf $OUTDIR
 mkdir -p $OUTDIR
 check "$WGET_PREREQ $EXAMPLE_ROOT"
 check "$WGET_PREREQ $EXAMPLE_ROOT/index.html"
 check diff $OUTDIR/index.html $OUTDIR/mod_pagespeed_example

 echo TEST: compression is enabled for HTML.
 check "$WGET -O /dev/null -q -S --header='Accept-Encoding: gzip' \
   $EXAMPLE_ROOT/ 2>&1 | grep -qi 'Content-Encoding: gzip'"

 # Individual filter tests, in alphabetical order

 test_filter add_instrumentation adds 2 script tags
 check $WGET_PREREQ $URL
 check [ `cat $FETCHED | sed 's/>/>\n/g' | grep -c '<script'` = 2 ]

 test_filter collapse_whitespace removes whitespace, but not from pre tags.
 check $WGET_PREREQ $URL
 check [ `egrep -c '^ +<' $FETCHED` = 1 ]

 test_filter combine_css combines 4 CSS files into 1.
 fetch_until $URL 'grep -c text/css' 1
 check $WGET_PREREQ $URL

 test_filter combine_heads combines 2 heads into 1
 check $WGET_PREREQ $URL
 check [ `grep -ce '<head>' $FETCHED` = 1 ]

 test_filter elide_attributes removes boolean and default attributes.
 check $WGET_PREREQ $URL
 grep "disabled=" $FETCHED   # boolean, should not find
 check [ $? != 0 ]
 grep "type=" $FETCHED       # default, should not find
 check [ $? != 0 ]

 test_filter extend_cache rewrites an image tag.
 fetch_until $URL 'grep -c src.*40265e' 1
 check $WGET_PREREQ $URL

 test_filter move_css_to_head does what it says on the tin.
 check $WGET_PREREQ $URL
 check grep -q "'<head><link'" $FETCHED  # link moved to head

 test_filter inline_css converts a link tag to a style tag
 fetch_until $URL 'grep -c style' 2

 test_filter inline_javascript inlines a small JS file
 fetch_until $URL 'grep -c document.write' 1

 test_filter outline_css outlines large styles, but not small ones.
 check $WGET_PREREQ $URL
 check egrep -q "'<link.*text/css.*large'" $FETCHED  # outlined
 check egrep -q "'<style.*small'" $FETCHED           # not outlined

 test_filter outline_javascript outlines large scripts, but not small ones.
 check $WGET_PREREQ $URL
 check egrep -q "'<script.*src=.*large'" $FETCHED       # outlined
 check egrep -q "'<script.*small.*var hello'" $FETCHED  # not outlined

 echo TEST: compression is enabled for rewritten JS.
 JS_URL=$(egrep -o http://.*.js $FETCHED)
 check "$WGET -O /dev/null -q -S --header='Accept-Encoding: gzip' \
   $JS_URL 2>&1 | grep -qi 'Content-Encoding: gzip'"

 test_filter remove_comments removes comments but not IE directives.
 check $WGET_PREREQ $URL
 grep "removed" $FETCHED                # comment, should not find
 check [ $? != 0 ]
 check grep -q preserved $FETCHED       # preserves IE directives

 test_filter remove_quotes does what it says on the tin.
 check $WGET_PREREQ $URL
 check [ `sed 's/ /\n/g' $FETCHED | grep -c '"' ` = 2 ]  # 2 quoted attrs
 check [ `grep -c "'" $FETCHED` = 0 ]                    # no apostrophes

 test_filter rewrite_css removes comments and saves a bunch of bytes.
 check $WGET_PREREQ $URL
 grep "comment" $FETCHED                   # comment, should not find
 check [ $? != 0 ]
 check [ `stat -c %s $FETCHED` -lt 315 ]   # down from 472

 test_filter rewrite_images inlines, compresses, and resizes.
 fetch_until $URL 'grep -c image/png' 1    # inlined
 check $WGET_PREREQ $URL
 check [ `stat -c %s $OUTDIR/*1023x766*Puzzle*` -lt 241260 ]  # compressed
 check [ `stat -c %s $OUTDIR/*256x192*Puzzle*`  -lt 24126  ]  # resized

 echo TEST: compression is not enabled for rewritten images.
 IMG_URL=$(egrep -o http://.*.jpg $FETCHED | head -n1)
 IMG_HEADERS=$($WGET -O /dev/null -q -S --header='Accept-Encoding: gzip' \
   $IMG_URL 2>&1)
 # Make sure we have some valid headers.
 echo \"$IMG_HEADERS\" | grep -qi 'Content-Type: image/jpeg'
 check [ $? = 0 ]
 # Make sure the response was not gzipped.
 echo "$IMG_HEADERS" | grep -qi 'Content-Encoding: gzip'
 check [ $? != 0 ]

 test_filter rewrite_javascript removes comments and saves a bunch of bytes.
 fetch_until $URL 'grep -c src.*9257c' 2   # external scripts rewritten
 check $WGET_PREREQ $URL
 grep -R "removed" $OUTDIR                 # comments, should not find any
 check [ $? != 0 ]
 check [ `stat -c %s $FETCHED` -lt 1560 ]  # net savings
 check grep -q preserved $FETCHED          # preserves certain comments
 # rewritten JS is cache-extended
 check grep -q "'Cache-control: public, max-age=31536000'" $WGET_OUTPUT
 rm -rf $OUTDIR
 echo "PASS."
	#!/bin/bash
	# Copyright 2010 Google Inc. All Rights Reserved.
	# Author: abliss@google.com (Adam Bliss)
	#
	# Usage: ./system_test.sh HOSTNAME
	# Tests a mod_pagespeed installation by fetching and verifying all the examples.
	# Exits with status 0 if all tests pass. Exits 1 immediately if any test fails.

	if [ $# != 1 ]; then
	echo Usage: ./system_test.sh HOSTNAME
	exit 2
	fi;

	# If the user has specified an alternate WGET as an environment variable, then
	# use that, otherwise use the one in the path.
	if [ "$WGET" == "" ]; then
	WGET=wget
	else
	echo WGET = $WGET
	fi

	$WGET --version \| head -1 \| grep 1.12 >/dev/null
	if [ $? != 0 ]; then
	echo You have the wrong version of wget. 1.12 is required.
	exit 1
	fi

	HOSTNAME=$1
	PORT=${HOSTNAME/*:/};
	if [ $PORT = $HOSTNAME ]; then
	PORT=80
	fi;
	EXAMPLE_ROOT=http://$HOSTNAME/mod_pagespeed_example
	STATISTICS_URL=http://localhost:$PORT/mod_pagespeed_statistics
	BAD_RESOURCE_URL=http://$HOSTNAME/mod_pagespeed/ic.a.bad.css

	OUTDIR=/tmp/mod_pagespeed_test.$USER/fetched_directory
	rm -rf $OUTDIR

	# Wget is used three different ways. The first way is nonrecursive and dumps a
	# single page (with headers) to standard out. This is useful for grepping for a
	# single expected string that's the result of a first-pass rewrite:
	# wget -q -O --save-headers - $URL \| grep -q foo
	# "-q" quells wget's noisy output; "-O -" dumps to stdout; grep's -q quells
	# its output and uses the return value to indicate whether the string was
	# found. Note that exiting with a nonzero value will immediately kill
	# the make run.
	#
	# Sometimes we want to check for a condition that's not true on the first dump
	# of a page, but becomes true after a few seconds as the server's asynchronous
	# fetches complete. For this we use the the fetch_until() function:
	# fetch_until $URL 'grep -c delayed_foo' 1
	# In this case we will continuously fetch $URL and pipe the output to
	# grep -c (which prints the count of matches); we repeat until the number is 1.
	#
	# The final way we use wget is in a recursive mode to download all prerequisites
	# of a page. This fetches all resources associated with the page, and thereby
	# validates the resources generated by mod_pagespeed:
	# wget -H -p -S -o $WGET_OUTPUT -nd -P $OUTDIR $EXAMPLE_ROOT/$FILE
	# Here -H allows wget to cross hosts (e.g. in the case of a sharded domain); -p
	# means to fetch all prerequisites; "-S -o $WGET_OUTPUT" saves wget output
	# (including server headers) for later analysis; -nd puts all results in one
	# directory; -P specifies that directory. We can then run commands on
	# $OUTDIR/$FILE and nuke $OUTDIR when we're done.
	# TODO(abliss): some of these will fail on windows where wget escapes saved
	# filenames differently.

	WGET_OUTPUT=$OUTDIR/wget_output.txt
	WGET_DUMP="$WGET -q -O - --save-headers"
	WGET_PREREQ="$WGET -H -p -S -o $WGET_OUTPUT -nd -P $OUTDIR"

	# Call with a command and its args. Echos the command, then tries to eval it.
	# If it returns false, fail the tests.
	function check() {
	echo " " $@
	if eval "$@"; then
	return;
	else
	echo FAIL.
	exit 1;
	fi;
	}

	# Continously fetches URL and pipes the output to COMMAND. Loops until
	# COMMAND outputs RESULT, in which case we return 0, or until 10 seconds have
	# passed, in which case we return 1.
	function fetch_until() {
	URL=$1
	COMMAND=$2
	RESULT=$3

	TIMEOUT=10
	START=`date +%s`
	STOP=$((START+$TIMEOUT))

	echo " " Fetching $URL until '`'$COMMAND'`' = $RESULT
	while test -t; do
	if [ `$WGET -q -O - $URL 2>&1 \| $COMMAND` = $RESULT ]; then
	/bin/echo "."
	return;
	fi;
	if [ `date +%s` -gt $STOP ]; then
	/bin/echo "FAIL."
	exit 1;
	fi;
	/bin/echo -n "."
	sleep 0.1
	done;
	}

	# Helper to set up most filter tests
	function test_filter() {
	rm -rf $OUTDIR
	mkdir -p $OUTDIR
	FILTER_NAME=$1;
	shift;
	FILTER_DESCRIPTION=$@
	echo TEST: $FILTER_NAME $FILTER_DESCRIPTION
	FILE=$FILTER_NAME.html?ModPagespeedFilters=$FILTER_NAME
	URL=$EXAMPLE_ROOT/$FILE
	FETCHED=$OUTDIR/$FILE
	}


	# General system tests

	echo TEST: mod_pagespeed is running in Apache and writes the expected header.
	check "$WGET_DUMP $EXAMPLE_ROOT/combine_css.html \| grep -q X-Mod-Pagespeed"

	echo TEST: 404s are served and properly recorded.
	NUM_404=$($WGET_DUMP $STATISTICS_URL \| grep resource_404_count \| cut -d: -f2)
	NUM_404=$(($NUM_404+1))
	check "$WGET -O /dev/null $BAD_RESOURCE_URL 2>&1\| grep -q '404 Not Found'"
	check "$WGET_DUMP $STATISTICS_URL \| grep -q 'resource_404_count: $NUM_404'"

	echo TEST: directory is mapped to index.html.
	rm -rf $OUTDIR
	mkdir -p $OUTDIR
	check "$WGET_PREREQ $EXAMPLE_ROOT"
	check "$WGET_PREREQ $EXAMPLE_ROOT/index.html"
	check diff $OUTDIR/index.html $OUTDIR/mod_pagespeed_example

	echo TEST: compression is enabled for HTML.
	check "$WGET -O /dev/null -q -S --header='Accept-Encoding: gzip' \
	$EXAMPLE_ROOT/ 2>&1 \| grep -qi 'Content-Encoding: gzip'"

	# Individual filter tests, in alphabetical order

	test_filter add_instrumentation adds 2 script tags
	check $WGET_PREREQ $URL
	check [ `cat $FETCHED \| sed 's/>/>\n/g' \| grep -c '<script'` = 2 ]

	test_filter collapse_whitespace removes whitespace, but not from pre tags.
	check $WGET_PREREQ $URL
	check [ `egrep -c '^ +<' $FETCHED` = 1 ]

	test_filter combine_css combines 4 CSS files into 1.
	fetch_until $URL 'grep -c text/css' 1
	check $WGET_PREREQ $URL

	test_filter combine_heads combines 2 heads into 1
	check $WGET_PREREQ $URL
	check [ `grep -ce '<head>' $FETCHED` = 1 ]

	test_filter elide_attributes removes boolean and default attributes.
	check $WGET_PREREQ $URL
	grep "disabled=" $FETCHED # boolean, should not find
	check [ $? != 0 ]
	grep "type=" $FETCHED # default, should not find
	check [ $? != 0 ]

	test_filter extend_cache rewrites an image tag.
	fetch_until $URL 'grep -c src.*40265e' 1
	check $WGET_PREREQ $URL

	test_filter move_css_to_head does what it says on the tin.
	check $WGET_PREREQ $URL
	check grep -q "'<head><link'" $FETCHED # link moved to head

	test_filter inline_css converts a link tag to a style tag
	fetch_until $URL 'grep -c style' 2

	test_filter inline_javascript inlines a small JS file
	fetch_until $URL 'grep -c document.write' 1

	test_filter outline_css outlines large styles, but not small ones.
	check $WGET_PREREQ $URL
	check egrep -q "'<link.text/css.large'" $FETCHED # outlined
	check egrep -q "'<style.*small'" $FETCHED # not outlined

	test_filter outline_javascript outlines large scripts, but not small ones.
	check $WGET_PREREQ $URL
	check egrep -q "'<script.src=.large'" $FETCHED # outlined
	check egrep -q "'<script.small.var hello'" $FETCHED # not outlined

	echo TEST: compression is enabled for rewritten JS.
	JS_URL=$(egrep -o http://.*.js $FETCHED)
	check "$WGET -O /dev/null -q -S --header='Accept-Encoding: gzip' \
	$JS_URL 2>&1 \| grep -qi 'Content-Encoding: gzip'"

	test_filter remove_comments removes comments but not IE directives.
	check $WGET_PREREQ $URL
	grep "removed" $FETCHED # comment, should not find
	check [ $? != 0 ]
	check grep -q preserved $FETCHED # preserves IE directives

	test_filter remove_quotes does what it says on the tin.
	check $WGET_PREREQ $URL
	check [ `sed 's/ /\n/g' $FETCHED \| grep -c '"' ` = 2 ] # 2 quoted attrs
	check [ `grep -c "'" $FETCHED` = 0 ] # no apostrophes

	test_filter rewrite_css removes comments and saves a bunch of bytes.
	check $WGET_PREREQ $URL
	grep "comment" $FETCHED # comment, should not find
	check [ $? != 0 ]
	check [ `stat -c %s $FETCHED` -lt 315 ] # down from 472

	test_filter rewrite_images inlines, compresses, and resizes.
	fetch_until $URL 'grep -c image/png' 1 # inlined
	check $WGET_PREREQ $URL
	check [ `stat -c %s $OUTDIR/1023x766Puzzle*` -lt 241260 ] # compressed
	check [ `stat -c %s $OUTDIR/256x192Puzzle*` -lt 24126 ] # resized

	echo TEST: compression is not enabled for rewritten images.
	IMG_URL=$(egrep -o http://.*.jpg $FETCHED \| head -n1)
	IMG_HEADERS=$($WGET -O /dev/null -q -S --header='Accept-Encoding: gzip' \
	$IMG_URL 2>&1)
	# Make sure we have some valid headers.
	echo \"$IMG_HEADERS\" \| grep -qi 'Content-Type: image/jpeg'
	check [ $? = 0 ]
	# Make sure the response was not gzipped.
	echo "$IMG_HEADERS" \| grep -qi 'Content-Encoding: gzip'
	check [ $? != 0 ]

	test_filter rewrite_javascript removes comments and saves a bunch of bytes.
	fetch_until $URL 'grep -c src.*9257c' 2 # external scripts rewritten
	check $WGET_PREREQ $URL
	grep -R "removed" $OUTDIR # comments, should not find any
	check [ $? != 0 ]
	check [ `stat -c %s $FETCHED` -lt 1560 ] # net savings
	check grep -q preserved $FETCHED # preserves certain comments
	# rewritten JS is cache-extended
	check grep -q "'Cache-control: public, max-age=31536000'" $WGET_OUTPUT
	rm -rf $OUTDIR
	echo "PASS."