| #!/bin/bash |
| |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # https://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| set -ex |
| |
| function usage { |
| echo "`basename $0` --help" |
| echo "`basename $0` [--min] [--out-dir D] [--iters N] [--skip-one]\\" |
| echo " [--only-combine] [--perf-args STRING] [-Dkey=value]* \\" |
| echo " [--] [-Dkey=value]* branch_1[:name_1] .. [-Dkey=value]* branch_n[:name_n]" |
| echo |
| echo "Run a set of trials Perf.java trials can compare results." |
| echo "A 'trial' is N runs of Perf.java against the code as it" |
| echo "exists on a branch in git. By comparing Perf.java output" |
| echo "generated by different branches in git, we can understand" |
| echo "the relative performance of those branches." |
| echo |
| echo "This script must be run in the lang/java/ipc directory of" |
| echo "the Avro source code, on a computer where Maven is installed" |
| echo "and the other build-prerequisites of Avro are in place. This" |
| echo "script will do a 'mvn clean install' of Avro from withing" |
| echo "the lang/java directory, before running tests." |
| echo |
| echo "The way Perf.java works is that it times an 'inner loop' that" |
| echo "reads or writes a large number of records (the exact number can" |
| echo "be controlled by a system property as described below). This" |
| echo "inner loop is called a 'cycle.' Perf.java runs a medium number of" |
| echo "these cycles, and outputs either the average or the minimum" |
| echo "of their running times. This script runs Perf.java a small number" |
| echo "of times (controllable by the --iters flag), and takes either the" |
| echo "average or minimum of those. The result of all this is the results" |
| echo "of a single 'trial.'" |
| echo |
| echo "The basic model is that there is a 'baseline' trial plus any" |
| echo "number of 'treatment' trials. The goal is to compare the" |
| echo "performance of each treatment against the baseline. The main" |
| echo "output is written to the file 'summary.csv'. This file contains" |
| echo "one line per performance test run by Perf.java. Each row contains" |
| echo "a 'results' column for each trial, followed by a 'comparison' column" |
| echo "for each treatment trial. The results column contains the average" |
| echo "(or minimum) of the runtimes of all cycles over all iterations of" |
| echo "the trial. The comparison columns contains the difference between" |
| echo "the performance of the treatment and the baseline, as a percent" |
| echo "of the baseline. Specifically, it countains" |
| echo " 100*(treatment-baseline)/baseline, i.e., positive numbers mean" |
| echo "we've seen a speedup." |
| echo |
| echo "By default, the running times of cycles are averaged together." |
| echo "The --min flag changes that to taking the minimum." |
| echo |
| echo "By default, output is written to the current working directory." |
| echo "However, lots of intermediate files are generated, so it's recommended" |
| echo "that the --out-dir argument is used to redirect the output to" |
| echo "a different working directory." |
| echo |
| echo "By default, the number of iterations in a trial is 4, but this can" |
| echo "be changed with the --iters flag." |
| echo |
| echo "Perf.java takes a number of command-line arguments, and can be" |
| echo "influenced by system properties. Command-line arguments can be" |
| echo "passed using the --perf-args flag. When using this switch, pass" |
| echo "your Perf.java command-line arguments in a single string, even if" |
| echo "there are more than one of them. You can set system properties" |
| echo " using -Dkey=value switch, just as you would with Maven. System" |
| echo "properties that come before the the '--' switch and the first" |
| echo "branch are passed to all trials. System properties that come after" |
| echo "the '--' switch and/or first branch are passed to the branch that" |
| echo "follows them. Commonly used system properties include:" |
| echo |
| echo " org.apache.avro.io.perf.count -- the number of elements" |
| echo "generated for the inner-most loop of the performance test. Defaults" |
| echo "to 250K. Must be a multple of 4." |
| echo |
| echo " org.apache.avro.io.perf.cycles -- the number of times the inner-" |
| echo "most loop is called within an invocation of Perf.java. Defaults" |
| echo " to 800." |
| echo |
| echo " org.apache.avro.io.perf.use-direct -- use DirectBinaryEncoder instead" |
| echo "of BufferedBinaryEncoder for write tests. It is slower, but performance-wise" |
| echo "it can be more consistent, which helps when trying to detect small performance" |
| echo "improvements." |
| echo |
| echo " org.apache.avro.specific.use_custom_coders -- flag that turns on" |
| echo " the use of the custom-coder optimization in the SpecificRecord tests." |
| echo "Defaults to 'false;' set to 'true' to turn them on." |
| echo |
| echo "Trials, as indicated, are branches in git. The branch_i arguments" |
| echo " indicate which what branches make up a trial. The first of these" |
| echo "(branch_1) is considered the \"baseline\" trial: it's the trial" |
| echo "that all the others are compared against. (However, if the --skip-one" |
| echo "is provided, the result from the first trial is ignored and the second" |
| echo "becomes the baseline.)" |
| echo |
| echo "Each trial has a name as well as a branch. By default, the name of" |
| echo "the branch is the name of the trial, but an explicit name can be given" |
| echo "by suffixing the branch name with a trial name (e.g., 'foo:bar' will" |
| echo "use the branch 'foo' for a trial, but the trial will be named 'bar')." |
| echo "Trials must have unique names, so when multiple trials are run off the" |
| echo "same branch, explicit trial names must be used." |
| echo |
| echo "In addition to writing 'summary.csv', this script outputs other files," |
| echo "allowing you to analyze the granular results of a test run. The file" |
| echo "results.csv contains a row per test in Perf.java. Each column" |
| echo "contains the result of a single run of Perf.java. If N is the" |
| echo "number of iterations in a trial, then the first N columns are the" |
| echo "results from the individual iterations of the first trial, the" |
| echo "next N are the results from the second trial, and so forth. In" |
| echo "addition, for each branch B being tested, there are multiple" |
| echo "files 'B_i.csv' for each iteration i in the trial. These per-trial" |
| echo "files have two columns, the first being the name of the test, the" |
| echo "second being the result of that test. Thus, 'result.csv' is the" |
| echo "result of joining these per-trial files on the trial-name, and" |
| echo "summary.csv averages (or takes the minimum) of these per-trial" |
| echo "results, and adds the comparison column." |
| echo |
| echo "If the --only-combine flag is given, then the script will assume" |
| echo "that the B_i files have been generated, and will simply join them" |
| echo "to compute results.csv and summary.csv. This allows you to debug" |
| echo "the code that combines these files without having to wait around" |
| echo "for Perf.java to be run a bunch of times." |
| } |
| |
| if [[ "$1" == "--help" ]]; then |
| usage |
| exit 0 |
| fi |
| |
| if [[ ! `pwd` =~ java/ipc ]]; then |
| echo "Must be run from lang/java/ipc" |
| echo "Type `basename $0` --help for help" |
| exit 1 |
| fi |
| |
| TEST="-c nt" |
| EXTRA_CLI="" |
| OUT="." |
| SKIP_ONE="false" |
| STATIC_SYSPROPS=() |
| ITERS=4 |
| |
| # DBG=echo |
| |
| function Perf_java { |
| local fname=$1 |
| shift |
| |
| if [[ "$DBG" != "" ]]; then |
| $DBG MAVEN_OPTS=-server mvn exec:java -Dexec.classpathScope=test \ |
| -Dexec.mainClass=org.apache.avro.io.Perf ${STATIC_SYSPROPS[@]} \ |
| -Dexec.args="${TEST} -o ${fname} ${EXTRA_CLI}" \ |
| $@ |
| else |
| mvn exec:java -Dexec.classpathScope=test \ |
| -Dexec.mainClass=org.apache.avro.io.Perf ${STATIC_SYSPROPS[@]} \ |
| -Dexec.args="${TEST} -o ${fname} ${EXTRA_CLI}" \ |
| $@ |
| fi |
| } |
| |
| function run_trial { |
| local lastbranch=$1 |
| local thisbranch=$2 |
| local thistrialname=$3 |
| shift 3 |
| |
| if [[ "$thisbranch" != "$lastbranch" ]]; then |
| $DBG git checkout $thisbranch |
| (cd ..; $DBG mvn clean && $DBG mvn -pl "avro,compiler,maven-plugin,ipc" install -DskipTests) |
| fi |
| for i in $(seq 1 ${ITERS}); do Perf_java ${OUT}/${thistrialname}${i}.csv $@; done |
| } |
| |
| function run_trials { |
| local -a allprops=( ) |
| |
| while (( "$#" )); do |
| case "$1" in |
| --) |
| break; |
| ;; |
| *) |
| allprops+=( $1 ) |
| shift |
| ;; |
| esac |
| done |
| |
| local -a thisprops=( ) |
| local lastbranch="" |
| local thisbranch |
| local thistrialname |
| |
| while (( "$#" )); do |
| case "$1" in |
| --) # Ignore these |
| shift |
| ;; |
| -D*) |
| thisprops+=( $1 ) |
| shift |
| ;; |
| *) |
| thisbranch=$1 |
| thistrialname=$2 |
| git rev-parse --verify $thisbranch |
| run_trial "$lastbranch" $thisbranch $thistrialname ${allprops[@]} ${thisprops[@]} |
| lastbranch=$thisbranch |
| thisprops=( ) |
| shift 2 |
| ;; |
| esac |
| done |
| } |
| |
| function join_results { |
| pushd ${OUT} |
| local header="TestName" |
| for b in $@; do |
| for i in $(seq 1 ${ITERS}); do |
| header="${header},${b}${i}" |
| done |
| done |
| # echo $header > results.csv |
| if [[ "$SKIP_ONE" == "true" ]]; then shift; fi |
| cut -d , -f 1,2 ${1}1.csv | sort >> results.csv |
| if [[ 1 < "${ITERS}" ]]; then |
| for i in $(seq 2 ${ITERS}); do |
| cut -d , -f 1,2 ${1}$i.csv | sort | join -t , results.csv - > tmp.csv |
| mv tmp.csv results.csv |
| done |
| fi |
| shift |
| for b in $@; do |
| for i in $(seq 1 ${ITERS}); do |
| cut -d , -f 1,2 ${b}$i.csv | sort | join -t , results.csv - > tmp.csv |
| mv tmp.csv results.csv |
| done |
| done |
| popd |
| } |
| |
| AVG='BEGIN { RS=" "; } { s += $1; n += 1; } END { printf "%f", s/n; }' |
| MIN='BEGIN { RS=" "; m = 10000000000; } { if ($1 < m) m = $1; } END { printf "%f", m; }' |
| PERCENT='{ printf "%f", 100*($1-$2)/$1; }' |
| |
| function print_line { |
| local line=$1 |
| shift |
| local awks |
| if [[ "$TEST" == "-c nt" ]]; then awks="$AVG"; else awks="$MIN"; fi |
| |
| local -a results=( ) |
| for t in ${trials[*]}; do |
| local result="" |
| for i in $(seq 1 $ITERS); do |
| result="$result $1" |
| shift |
| done |
| result=$(echo $result | awk "$awks") |
| results+=( $result ) |
| line="${line},${result}" |
| done |
| |
| local baseline=0 |
| if [[ "$SKIP_ONE" == "true" ]]; then start=1; fi |
| for i in $(seq `expr ${baseline} + 1` `expr ${#trials[*]} - 1`); do |
| result=$(echo "${results[$baseline]} ${results[$i]}" | awk "$PERCENT") |
| line="${line},${result}" |
| done |
| echo "$line" |
| } |
| |
| |
| |
| ### |
| ### ACTUAL SCRIPT STARTS HERE |
| ### |
| |
| declare command="$0 $*" |
| declare onlycombine="false" |
| declare -a run_trials_args=( ) |
| declare -a trials=( ) |
| |
| while (( "$#" )); do |
| case "$1" in |
| --help) |
| usage |
| exit |
| ;; |
| --min) |
| TEST="-c nm" |
| shift |
| ;; |
| --out-dir) |
| if [[ $OUT != "." ]]; then |
| echo "Cannot use --out-dir twice." |
| echo "Type `basename $0` --help for help" |
| exit 1 |
| fi |
| OUT=$2 |
| mkdir -p $OUT |
| shift 2 |
| ;; |
| --iters) |
| ITERS=$2 |
| shift 2 |
| ;; |
| --only-combine) |
| onlycombine="true" |
| shift |
| ;; |
| --skip-one) |
| SKIP_ONE="true" |
| shift |
| ;; |
| --perf-args) |
| EXTRA_CLI=$2 |
| shift 2 |
| ;; |
| -D*) |
| if [[ ! $1 =~ ^-D[^\ =]+= ]]; then |
| echo "Bad system property: $1" |
| echo "Type `basename $0` --help for help" |
| exit 1 |
| fi |
| run_trials_args+=( $1 ) |
| shift |
| ;; |
| --) |
| run_trials_args+=( $1 ) |
| shift |
| ;; |
| --*) |
| echo "Unknown switch: $1" |
| echo "Type `basename $0` --help for help" |
| exit 1 |
| ;; |
| *) |
| if [[ "$1" =~ ^([^:]*):(.*) ]]; then |
| thisbranch=${BASH_REMATCH[1]} |
| thistrialname=${BASH_REMATCH[2]} |
| else |
| thisbranch=$1 |
| thistrialname=$1 |
| fi |
| if [[ "$thisbranch" == "" || "$thistrialname" == "" ]]; then |
| echo "Neither branch ($thisbranch) nor trial ($thistrialname) names may be empty" |
| echo "Type `basename $0` --help for help" |
| exit 1 |
| fi |
| if [[ "${trials[@]}" =~ $thistrialname ]]; then |
| echo "Trial named '$thistrialname' is not unique" |
| echo "Type `basename $0` --help for help" |
| exit 1 |
| fi |
| trials+=( "$thistrialname" ) |
| run_trials_args+=( "--" "$thisbranch" "$thistrialname" ) |
| shift |
| ;; |
| esac |
| done |
| |
| # Document how the outputs were generated |
| echo "$command" > $OUT/command.txt |
| |
| if [[ ${onlycombine} == "false" ]]; then |
| run_trials ${run_trials_args[@]} |
| fi |
| |
| join_results ${trials[@]} |
| |
| cat $OUT/results.csv | while read line; do |
| fields=( $(echo $line | tr "," " ") ) |
| print_line "${fields[@]}" |
| done > $OUT/summary.csv |