| #!/bin/bash |
| # |
| # Copyright 2012 Google Inc. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # This scripts reads a list of URLs from the provided file, and |
| # fetches them in parallel from a local slurping proxy in a randomized |
| # order. Loading times and statuses for them are then output to |
| # /tmp/latency-(encoding of settings).txt |
| |
| # number of fetches to do in parallel |
| if [ -z $PAR ]; then |
| PAR=10 |
| fi |
| |
| # number of times to run |
| if [ -z $RUNS ]; then |
| RUNS=3 |
| fi |
| |
| # How many times to repeat each trace without restarting the workers |
| if [ -z $EXP ]; then |
| EXP=3 |
| fi |
| |
| # Proxy machine. If you specify this, make sure to give an IP address, |
| # as doing DNS lookups for it can slow things down a lot |
| if [ -z $PROXY_HOST ]; then |
| PROXY_HOST=127.0.0.1 |
| fi |
| |
| # .. and port |
| if [ -z $PROXY_PORT ]; then |
| PROXY_PORT=8080 |
| fi |
| |
| # Extra flags to pass to fetch_all.py |
| FLAGS=${FLAGS:-} |
| |
| USER_AGENT_FLAG=${USER_AGENT:+--user_agent} |
| |
| if [ $# -lt 1 ]; then |
| echo "Usage: devel/trace_stress_test.sh urls_file ..." |
| echo "Shuffles each urls_file in turn, runs through shuffled file using" |
| echo "$PAR parallel wget jobs. Repeats this process $RUN times." |
| exit 2 |
| fi |
| |
| OUR_PATH=`dirname $0` |
| STAMP=`date +%Y%m%d-%H%M` |
| LATENCY_REPORT=/tmp/latency-$PROXY_HOST-R$RUNS-P$PAR-E$EXP-$STAMP.txt |
| TAIL_HEAD_TEMP=/tmp/tail_head.$$ |
| |
| echo "time status url" > $LATENCY_REPORT |
| |
| # Examines file in $1, starting at line $2, and the next $3 lines into file $4. |
| function tail_head { |
| input_file=$1 |
| start_pos=$2 |
| num_lines=$3 |
| outfile=$4 |
| |
| # We make a temp file because otherwise we (at least Josh) get a lot of |
| # "tail: write error" printed out. |
| tail $input_file -n +$start_pos < $input_file > $TAIL_HEAD_TEMP |
| head $TAIL_HEAD_TEMP -n $num_lines >$outfile |
| } |
| |
| function single_run { |
| FILE=$1 |
| # Shuffle the log and split it into pieces |
| SHUF_FILE=`mktemp` |
| for I in `seq 1 $EXP`; do |
| shuf $FILE >> $SHUF_FILE |
| done |
| LINES=`wc -l $SHUF_FILE | sed s#$SHUF_FILE##` |
| # Setting chunk size slightly too large balances load a little better, most |
| # obvious when $LINES < $PAR. |
| CHUNK=`expr 1 + $LINES / $PAR` |
| |
| # feed each chunk to a separate wget |
| PIECES= |
| LOGS= |
| POS=0 |
| for I in `seq 1 $PAR`; do |
| CUR_CHUNK=$CHUNK |
| if [ $I -eq $PAR ]; then |
| # make sure we also include the remainder |
| EXTRA=`expr $LINES - $PAR \* $CHUNK` |
| CUR_CHUNK=`expr $CUR_CHUNK + $EXTRA` |
| fi |
| PIECE=`mktemp` |
| LOG=`mktemp` |
| PIECES="$PIECES $PIECE" |
| LOGS="$LOGS $LOG" |
| tail_head $SHUF_FILE $POS $CUR_CHUNK $PIECE |
| $OUR_PATH/fetch_all.py $FLAGS $USER_AGENT_FLAG $USER_AGENT \ |
| --proxy_host $PROXY_HOST --proxy_port $PROXY_PORT \ |
| --urls_file $PIECE &> $LOG & |
| POS=`expr $POS + $CHUNK` |
| done |
| |
| # Wait for all to finish |
| wait |
| |
| # Print out the summary messages |
| cat $LOGS >> $LATENCY_REPORT |
| |
| # clean up |
| rm $PIECES |
| rm $LOGS |
| rm $SHUF_FILE |
| } |
| |
| START=$SECONDS |
| |
| for RUN in `seq 1 $RUNS`; do |
| echo "Run $RUN" |
| for FILE in "$@"; do |
| echo "File $FILE" |
| single_run "$FILE" |
| done |
| echo "----------------------------------------------------------------------" |
| done |
| |
| STOP=$SECONDS |
| LINES=`tail -n +2 $LATENCY_REPORT|wc -l` |
| ELAPSED=`expr $STOP - $START` |
| QPS=`expr $LINES / $ELAPSED` |
| echo "QPS estimate (inaccurate for short runs):" $QPS "requests/sec" |
| echo |
| $OUR_PATH/trace_stress_test_percentiles.sh $LATENCY_REPORT | cut -c 1-80 |
| echo |
| echo "10 worst latencies:" |
| head -n 10 ${LATENCY_REPORT%%.txt}-sorted.txt |
| echo |
| echo "Status statistics:" |
| tail -n +2 $LATENCY_REPORT | cut -d ' ' -f 2 | sort | uniq -c |
| echo "Full latency report in:" $LATENCY_REPORT |
| |
| rm -f $TAIL_HEAD_TEMP |
| |