| #!/usr/bin/env bash |
| # REQUIRE: db_bench binary exists in the current directory |
| # |
| # This should be used with the LevelDB fork listed here to use additional test options. |
| # For more details on the changes see the blog post listed below. |
| # https://github.com/mdcallag/leveldb-1 |
| # http://smalldatum.blogspot.com/2015/04/comparing-leveldb-and-rocksdb-take-2.html |
| |
| if [ $# -ne 1 ]; then |
| echo -n "./benchmark.sh [fillseq/overwrite/readrandom/readwhilewriting]" |
| exit 0 |
| fi |
| |
| # size constants |
| K=1024 |
| M=$((1024 * K)) |
| G=$((1024 * M)) |
| |
| if [ -z $DB_DIR ]; then |
| echo "DB_DIR is not defined" |
| exit 0 |
| fi |
| |
| output_dir=${OUTPUT_DIR:-/tmp/} |
| if [ ! -d $output_dir ]; then |
| mkdir -p $output_dir |
| fi |
| |
| # all multithreaded tests run with sync=1 unless |
| # $DB_BENCH_NO_SYNC is defined |
| syncval="1" |
| if [ ! -z $DB_BENCH_NO_SYNC ]; then |
| echo "Turning sync off for all multithreaded tests" |
| syncval="0"; |
| fi |
| |
| num_threads=${NUM_THREADS:-16} |
| # Only for *whilewriting, *whilemerging |
| writes_per_second=${WRITES_PER_SECOND:-$((10 * K))} |
| cache_size=${CACHE_SIZE:-$((1 * G))} |
| |
| num_keys=${NUM_KEYS:-$((1 * G))} |
| key_size=20 |
| value_size=${VALUE_SIZE:-400} |
| block_size=${BLOCK_SIZE:-4096} |
| |
| const_params=" |
| --db=$DB_DIR \ |
| \ |
| --num=$num_keys \ |
| --value_size=$value_size \ |
| --cache_size=$cache_size \ |
| --compression_ratio=0.5 \ |
| \ |
| --write_buffer_size=$((2 * M)) \ |
| \ |
| --histogram=1 \ |
| \ |
| --bloom_bits=10 \ |
| --open_files=$((20 * K))" |
| |
| params_w="$const_params " |
| |
| function summarize_result { |
| test_out=$1 |
| test_name=$2 |
| bench_name=$3 |
| nthr=$4 |
| |
| usecs_op=$( grep ^${bench_name} $test_out | awk '{ printf "%.1f", $3 }' ) |
| mb_sec=$( grep ^${bench_name} $test_out | awk '{ printf "%.1f", $5 }' ) |
| ops=$( grep "^Count:" $test_out | awk '{ print $2 }' ) |
| ops_sec=$( echo "scale=0; (1000000.0 * $nthr) / $usecs_op" | bc ) |
| avg=$( grep "^Count:" $test_out | awk '{ printf "%.1f", $4 }' ) |
| p50=$( grep "^Min:" $test_out | awk '{ printf "%.1f", $4 }' ) |
| echo -e "$ops_sec\t$mb_sec\t$usecs_op\t$avg\t$p50\t$test_name" \ |
| >> $output_dir/report.txt |
| } |
| |
| function run_fillseq { |
| # This runs with a vector memtable and the WAL disabled to load faster. It is still crash safe and the |
| # client can discover where to restart a load after a crash. I think this is a good way to load. |
| echo "Loading $num_keys keys sequentially" |
| cmd="./db_bench --benchmarks=fillseq \ |
| --use_existing_db=0 \ |
| --sync=0 \ |
| $params_w \ |
| --threads=1 \ |
| --seed=$( date +%s ) \ |
| 2>&1 | tee -a $output_dir/benchmark_fillseq.v${value_size}.log" |
| echo $cmd | tee $output_dir/benchmark_fillseq.v${value_size}.log |
| eval $cmd |
| summarize_result $output_dir/benchmark_fillseq.v${value_size}.log fillseq.v${value_size} fillseq 1 |
| } |
| |
| function run_change { |
| operation=$1 |
| echo "Do $num_keys random $operation" |
| out_name="benchmark_${operation}.t${num_threads}.s${syncval}.log" |
| cmd="./db_bench --benchmarks=$operation \ |
| --use_existing_db=1 \ |
| --sync=$syncval \ |
| $params_w \ |
| --threads=$num_threads \ |
| --seed=$( date +%s ) \ |
| 2>&1 | tee -a $output_dir/${out_name}" |
| echo $cmd | tee $output_dir/${out_name} |
| eval $cmd |
| summarize_result $output_dir/${out_name} ${operation}.t${num_threads}.s${syncval} $operation $num_threads |
| } |
| |
| function run_readrandom { |
| echo "Reading $num_keys random keys" |
| out_name="benchmark_readrandom.t${num_threads}.log" |
| cmd="./db_bench --benchmarks=readrandom \ |
| --use_existing_db=1 \ |
| $params_w \ |
| --threads=$num_threads \ |
| --seed=$( date +%s ) \ |
| 2>&1 | tee -a $output_dir/${out_name}" |
| echo $cmd | tee $output_dir/${out_name} |
| eval $cmd |
| summarize_result $output_dir/${out_name} readrandom.t${num_threads} readrandom $num_threads |
| } |
| |
| function run_readwhile { |
| operation=$1 |
| echo "Reading $num_keys random keys while $operation" |
| out_name="benchmark_readwhile${operation}.t${num_threads}.log" |
| cmd="./db_bench --benchmarks=readwhile${operation} \ |
| --use_existing_db=1 \ |
| --sync=$syncval \ |
| $params_w \ |
| --threads=$num_threads \ |
| --writes_per_second=$writes_per_second \ |
| --seed=$( date +%s ) \ |
| 2>&1 | tee -a $output_dir/${out_name}" |
| echo $cmd | tee $output_dir/${out_name} |
| eval $cmd |
| summarize_result $output_dir/${out_name} readwhile${operation}.t${num_threads} readwhile${operation} $num_threads |
| } |
| |
| function now() { |
| echo `date +"%s"` |
| } |
| |
| report="$output_dir/report.txt" |
| schedule="$output_dir/schedule.txt" |
| |
| echo "===== Benchmark =====" |
| |
| # Run!!! |
| IFS=',' read -a jobs <<< $1 |
| for job in ${jobs[@]}; do |
| |
| if [ $job != debug ]; then |
| echo "Start $job at `date`" | tee -a $schedule |
| fi |
| |
| start=$(now) |
| if [ $job = fillseq ]; then |
| run_fillseq |
| elif [ $job = overwrite ]; then |
| run_change overwrite |
| elif [ $job = readrandom ]; then |
| run_readrandom |
| elif [ $job = readwhilewriting ]; then |
| run_readwhile writing |
| elif [ $job = debug ]; then |
| num_keys=1000; # debug |
| echo "Setting num_keys to $num_keys" |
| else |
| echo "unknown job $job" |
| exit |
| fi |
| end=$(now) |
| |
| if [ $job != debug ]; then |
| echo "Complete $job in $((end-start)) seconds" | tee -a $schedule |
| fi |
| |
| echo -e "ops/sec\tmb/sec\tusec/op\tavg\tp50\tTest" |
| tail -1 $output_dir/report.txt |
| |
| done |