blob: a119b9e18872d8158657a5d6982626c9479de3d8 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
library(ggplot2)
library(reshape)
source("multiplot.R")
source("si_vec.R")
d.kudu <- read.table(file="/tmp/kudu.tsv", header=T)
d.kudu$system <- as.factor("kudu")
d.kudu <- subset(d.kudu, select = -c(num_layers))
d.hbase <- read.table(file="/tmp/hbase.tsv", header=T)
d.hbase$system <- as.factor("hbase")
d.hbase <- subset(d.hbase, select = -c(num_storefiles))
d <- rbind(d.kudu, d.hbase)
d$insert_rate = c(0, diff(d$inserted)/diff(d$time))
d$scan_rate = c(0, diff(d$scanned)/diff(d$time))
d <- subset(d, select = -c(scanned))
d.melted <- melt(d, id=c("time", "system"))
vlines <- c(
geom_vline(xintercept=d.kudu[d.kudu$inserted >= 200*1000*1000,][1,]$time, colour="blue"),
geom_vline(xintercept=d.hbase[d.hbase$inserted >= 200*1000*1000,][1,]$time, colour="red"))
smooth.span <- 1.0/max(d$time)
p.scan_rate <- ggplot(subset(d.melted, variable=="scan_rate")) +
aes(x=time, y=value, colour=system) +
geom_line() +
scale_y_log10(labels=si_vec) +
labs(title="Scan rate during insert workload\n(log scale)",
x=NULL, y="Rows/sec") +
vlines
p.insert_rate <- ggplot(subset(d.melted, variable=="insert_rate")) +
aes(x=time, y=value, colour=system) +
stat_smooth(span=smooth.span) +
geom_line(alpha=0.4) +
scale_y_continuous(labels=si_vec) +
labs(title="Insert rate during insert workload",
x="Time (s)", y="Rows/sec") +
vlines
scan_rate_histo <- ggplot(d, aes(scan_rate, fill=system)) +
geom_density(alpha=0.5) +
scale_x_log10(labels=si_vec) +
labs(x="Scan rate (rows/sec)")
insert_rate_histo <- ggplot(d, aes(insert_rate, fill=system)) +
geom_density(alpha=0.5) +
scale_x_continuous(labels=si_vec) +
labs(x="Insert rate (rows/sec)")
tryCatch({dev.off()}, error=function(e){})
multiplot(p.scan_rate, p.insert_rate);
dev.new()
multiplot(scan_rate_histo, insert_rate_histo)