devel/trace_stress_test.sh - incubator-pagespeed-mod - Git at Google

 #!/bin/bash
 #
 # Copyright 2012 Google Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # This scripts reads a list of URLs from the provided file, and
 # fetches them in parallel from a local slurping proxy in a randomized
 # order. Loading times and statuses for them are then output to
 # /tmp/latency-(encoding of settings).txt

 # number of fetches to do in parallel
 if [ -z $PAR ]; then
   PAR=10
 fi

 # number of times to run
 if [ -z $RUNS ]; then
   RUNS=3
 fi

 # How many times to repeat each trace without restarting the workers
 if [ -z $EXP ]; then
   EXP=3
 fi

 # Proxy machine. If you specify this, make sure to give an IP address,
 # as doing DNS lookups for it can slow things down a lot
 if [ -z $PROXY_HOST ]; then
   PROXY_HOST=127.0.0.1
 fi

 # .. and port
 if [ -z $PROXY_PORT ]; then
   PROXY_PORT=8080
 fi

 # Extra flags to pass to fetch_all.py
 FLAGS=${FLAGS:-}

 USER_AGENT_FLAG=${USER_AGENT:+--user_agent}

 if [ $# -lt 1 ]; then
   echo "Usage: devel/trace_stress_test.sh urls_file ..."
   echo "Shuffles each urls_file in turn, runs through shuffled file using"
   echo "$PAR parallel wget jobs.  Repeats this process $RUN times."
   exit 2
 fi

 OUR_PATH=`dirname $0`
 STAMP=`date +%Y%m%d-%H%M`
 LATENCY_REPORT=/tmp/latency-$PROXY_HOST-R$RUNS-P$PAR-E$EXP-$STAMP.txt
 TAIL_HEAD_TEMP=/tmp/tail_head.$$

 echo "time status url" > $LATENCY_REPORT

 # Examines file in $1, starting at line $2, and the next $3 lines into file $4.
 function tail_head {
   input_file=$1
   start_pos=$2
   num_lines=$3
   outfile=$4

   # We make a temp file because otherwise we (at least Josh) get a lot of
   # "tail: write error" printed out.
   tail $input_file -n +$start_pos < $input_file > $TAIL_HEAD_TEMP
   head $TAIL_HEAD_TEMP -n $num_lines >$outfile
 }

 function single_run {
   FILE=$1
   # Shuffle the log and split it into pieces
   SHUF_FILE=`mktemp`
   for I in `seq 1 $EXP`; do
     shuf $FILE >> $SHUF_FILE
   done
   LINES=`wc -l $SHUF_FILE | sed s#$SHUF_FILE##`
   # Setting chunk size slightly too large balances load a little better, most
   # obvious when $LINES < $PAR.
   CHUNK=`expr 1 + $LINES / $PAR`

   # feed each chunk to a separate wget
   PIECES=
   LOGS=
   POS=0
   for I in `seq 1 $PAR`; do
     CUR_CHUNK=$CHUNK
     if [ $I -eq $PAR ]; then
       # make sure we also include the remainder
       EXTRA=`expr $LINES - $PAR \* $CHUNK`
       CUR_CHUNK=`expr $CUR_CHUNK + $EXTRA`
     fi
     PIECE=`mktemp`
     LOG=`mktemp`
     PIECES="$PIECES $PIECE"
     LOGS="$LOGS $LOG"
     tail_head $SHUF_FILE $POS $CUR_CHUNK $PIECE
     $OUR_PATH/fetch_all.py $FLAGS $USER_AGENT_FLAG $USER_AGENT \
         --proxy_host $PROXY_HOST --proxy_port $PROXY_PORT \
         --urls_file $PIECE &> $LOG &
     POS=`expr $POS + $CHUNK`
   done

   # Wait for all to finish
   wait

   # Print out the summary messages
   cat $LOGS >> $LATENCY_REPORT

   # clean up
   rm $PIECES
   rm $LOGS
   rm $SHUF_FILE
 }

 START=$SECONDS

 for RUN in `seq 1 $RUNS`; do
   echo "Run $RUN"
   for FILE in "$@"; do
     echo "File $FILE"
     single_run "$FILE"
   done
   echo "----------------------------------------------------------------------"
 done

 STOP=$SECONDS
 LINES=`tail -n +2 $LATENCY_REPORT|wc -l`
 ELAPSED=`expr $STOP - $START`
 QPS=`expr $LINES / $ELAPSED`
 echo "QPS estimate (inaccurate for short runs):" $QPS "requests/sec"
 echo
 $OUR_PATH/trace_stress_test_percentiles.sh $LATENCY_REPORT | cut -c 1-80
 echo
 echo "10 worst latencies:"
 head -n 10 ${LATENCY_REPORT%%.txt}-sorted.txt
 echo
 echo "Status statistics:"
 tail -n +2 $LATENCY_REPORT | cut -d ' ' -f 2 | sort | uniq -c
 echo "Full latency report in:" $LATENCY_REPORT

 rm -f $TAIL_HEAD_TEMP
	#!/bin/bash
	#
	# Copyright 2012 Google Inc.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# This scripts reads a list of URLs from the provided file, and
	# fetches them in parallel from a local slurping proxy in a randomized
	# order. Loading times and statuses for them are then output to
	# /tmp/latency-(encoding of settings).txt

	# number of fetches to do in parallel
	if [ -z $PAR ]; then
	PAR=10
	fi

	# number of times to run
	if [ -z $RUNS ]; then
	RUNS=3
	fi

	# How many times to repeat each trace without restarting the workers
	if [ -z $EXP ]; then
	EXP=3
	fi

	# Proxy machine. If you specify this, make sure to give an IP address,
	# as doing DNS lookups for it can slow things down a lot
	if [ -z $PROXY_HOST ]; then
	PROXY_HOST=127.0.0.1
	fi

	# .. and port
	if [ -z $PROXY_PORT ]; then
	PROXY_PORT=8080
	fi

	# Extra flags to pass to fetch_all.py
	FLAGS=${FLAGS:-}

	USER_AGENT_FLAG=${USER_AGENT:+--user_agent}

	if [ $# -lt 1 ]; then
	echo "Usage: devel/trace_stress_test.sh urls_file ..."
	echo "Shuffles each urls_file in turn, runs through shuffled file using"
	echo "$PAR parallel wget jobs. Repeats this process $RUN times."
	exit 2
	fi

	OUR_PATH=`dirname $0`
	STAMP=`date +%Y%m%d-%H%M`
	LATENCY_REPORT=/tmp/latency-$PROXY_HOST-R$RUNS-P$PAR-E$EXP-$STAMP.txt
	TAIL_HEAD_TEMP=/tmp/tail_head.$$

	echo "time status url" > $LATENCY_REPORT

	# Examines file in $1, starting at line $2, and the next $3 lines into file $4.
	function tail_head {
	input_file=$1
	start_pos=$2
	num_lines=$3
	outfile=$4

	# We make a temp file because otherwise we (at least Josh) get a lot of
	# "tail: write error" printed out.
	tail $input_file -n +$start_pos < $input_file > $TAIL_HEAD_TEMP
	head $TAIL_HEAD_TEMP -n $num_lines >$outfile
	}

	function single_run {
	FILE=$1
	# Shuffle the log and split it into pieces
	SHUF_FILE=`mktemp`
	for I in `seq 1 $EXP`; do
	shuf $FILE >> $SHUF_FILE
	done
	LINES=`wc -l $SHUF_FILE \| sed s#$SHUF_FILE##`
	# Setting chunk size slightly too large balances load a little better, most
	# obvious when $LINES < $PAR.
	CHUNK=`expr 1 + $LINES / $PAR`

	# feed each chunk to a separate wget
	PIECES=
	LOGS=
	POS=0
	for I in `seq 1 $PAR`; do
	CUR_CHUNK=$CHUNK
	if [ $I -eq $PAR ]; then
	# make sure we also include the remainder
	EXTRA=`expr $LINES - $PAR \* $CHUNK`
	CUR_CHUNK=`expr $CUR_CHUNK + $EXTRA`
	fi
	PIECE=`mktemp`
	LOG=`mktemp`
	PIECES="$PIECES $PIECE"
	LOGS="$LOGS $LOG"
	tail_head $SHUF_FILE $POS $CUR_CHUNK $PIECE
	$OUR_PATH/fetch_all.py $FLAGS $USER_AGENT_FLAG $USER_AGENT \
	--proxy_host $PROXY_HOST --proxy_port $PROXY_PORT \
	--urls_file $PIECE &> $LOG &
	POS=`expr $POS + $CHUNK`
	done

	# Wait for all to finish
	wait

	# Print out the summary messages
	cat $LOGS >> $LATENCY_REPORT

	# clean up
	rm $PIECES
	rm $LOGS
	rm $SHUF_FILE
	}

	START=$SECONDS

	for RUN in `seq 1 $RUNS`; do
	echo "Run $RUN"
	for FILE in "$@"; do
	echo "File $FILE"
	single_run "$FILE"
	done
	echo "----------------------------------------------------------------------"
	done

	STOP=$SECONDS
	LINES=`tail -n +2 $LATENCY_REPORT\|wc -l`
	ELAPSED=`expr $STOP - $START`
	QPS=`expr $LINES / $ELAPSED`
	echo "QPS estimate (inaccurate for short runs):" $QPS "requests/sec"
	echo
	$OUR_PATH/trace_stress_test_percentiles.sh $LATENCY_REPORT \| cut -c 1-80
	echo
	echo "10 worst latencies:"
	head -n 10 ${LATENCY_REPORT%%.txt}-sorted.txt
	echo
	echo "Status statistics:"
	tail -n +2 $LATENCY_REPORT \| cut -d ' ' -f 2 \| sort \| uniq -c
	echo "Full latency report in:" $LATENCY_REPORT

	rm -f $TAIL_HEAD_TEMP