blob: 38e856fdd9f9ab1e42b24ac3a1e75247a12f2d28 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Hashcode for 50m records is 85b9cec947fc5d849f0a778801696d2b
# Script to load data using YCSB on Cassandra multi node cluster.
#!/bin/bash
set -e
# Record count set to 50000000, change this value to load as per requirement.
recordcount=50000000
# Function to delete the temporary cassandra service in an erroneous and successful situation
function delete_service {
cd ../LargeITCluster
kubectl delete -f cassandra-svc-temp.yaml
}
# Delete cassandra single node set up before exit
trap delete_service EXIT
# Check and delete cassandra service if already exists
if [ "$(kubectl get svc -o=name | grep cassandra-temp)" ]; then
echo "Service cassandra-temp already exists"
echo "Deleting service cassandra-temp "
delete_service
fi
# Temporarily set up cassandra single node cluster for invoking cqlsh on actual cluster remotely
kubectl create -f cassandra-svc-temp.yaml
num_of_replicas=$(kubectl get statefulset cassandra --output=jsonpath={.spec.replicas})
echo "Script to load data on $num_of_replicas replicas"
echo "Waiting for Cassandra pods to be in ready state"
# Wait until all the pods configured as per number of replicas, come in running state
i=0
while [ $i -lt $num_of_replicas ]
do
container_state="$(kubectl get pods -l app=cassandra -o jsonpath="{.items[$i].status.containerStatuses[0].ready}")"
while ! $container_state; do
sleep 10s
container_state="$(kubectl get pods -l app=cassandra -o jsonpath="{.items[$i].status.containerStatuses[0].ready}")"
echo "."
done
ready_pod="$(kubectl get pods -l app=cassandra -o jsonpath="{.items[$i].metadata.name}")"
echo "$ready_pod is ready"
i=$((i+1))
done
echo "Waiting for temporary pod to be in ready state"
temp_container_state="$(kubectl get pods -l name=cassandra-temp -o jsonpath="{.items[0].status.containerStatuses[0].ready}")"
while ! $temp_container_state; do
sleep 10s
temp_container_state="$(kubectl get pods -l name=cassandra-temp -o jsonpath="{.items[0].status.containerStatuses[0].ready}")"
echo "."
done
temp_running_seed="$(kubectl get pods -l name=cassandra-temp -o jsonpath="{.items[0].metadata.name}")"
# After starting the service, it takes couple of minutes to generate the external IP for the
# service. Hence, wait for sometime and identify external IP of the pod
external_ip="$(kubectl get svc cassandra-external -o jsonpath=\
'{.status.loadBalancer.ingress[0].ip}')"
echo "Waiting for the Cassandra service to come up ........"
while [ -z "$external_ip" ]
do
sleep 10s
external_ip="$(kubectl get svc cassandra-external -o jsonpath='{.status.loadBalancer.ingress[0].ip}')"
echo "."
done
echo "External IP - $external_ip"
echo "Loading data"
# Create keyspace
keyspace_creation_command="drop keyspace if exists ycsb;create keyspace ycsb WITH REPLICATION = {\
'class' : 'SimpleStrategy', 'replication_factor': 3 };"
kubectl exec -ti $temp_running_seed -- cqlsh $external_ip -e "$keyspace_creation_command"
echo "Keyspace creation............"
echo "-----------------------------"
echo "$keyspace_creation_command"
echo
# Create table
table_creation_command="use ycsb;drop table if exists usertable;create table usertable (\
y_id varchar primary key,field0 varchar,field1 varchar,field2 varchar,field3 varchar,\
field4 varchar,field5 varchar,field6 varchar,field7 varchar,field8 varchar,field9 varchar);"
kubectl exec -ti $temp_running_seed -- cqlsh $external_ip -e "$table_creation_command"
echo "Table creation .............."
echo "-----------------------------"
echo "$table_creation_command"
# Create index
index_creation_command="CREATE INDEX IF NOT EXISTS field0_index ON ycsb.usertable (field0);"
kubectl exec -ti $temp_running_seed -- cqlsh $external_ip -e "$index_creation_command"
cd ../ycsb-0.12.0
echo "Starting to load data on ${external_ip}"
echo "-----------------------------"
# dataintegrity flag is set to true to load deterministic data
./bin/ycsb load cassandra-cql -p hosts=${external_ip} -p dataintegrity=true -p recordcount=\
${recordcount} -p insertorder=ordered -p fieldlength=20 -threads 200 -P workloads/workloadd \
-s > workloada_load_res.txt