blob: dcdddeb95a896ca8470d0b994f5460531e34d113 [file] [log] [blame]
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# This script means to create many testing objects (database, tables,
# partitions and a wide table with many partitions). The way to run it:
# !/usr/bin/env bash
# export HS2="HOSTNAME"
# export REALM="REALM.NAME"
# bash /root/tests/create-many-dbs-tables.sh &
# bash /root/tests/create-many-dbs-tables.sh &
if [[ ${HS2} == "" ]]; then
echo "error: need to export HS2=hostname"
exit 1
fi
if [[ ${REALM} == "" ]]; then
echo "error: need to export REALM"
exit 1
fi
# Define default test scale
NUM_OF_DATABASES=60
NUM_OF_TABLES_PER_DATABASE=20
NUM_OF_ROLES_FOR_DATABASES=60 # <= NUM_OF_DATABASES
NUM_OF_ROLES_FOR_TABLES_PER_DATABASE=5 # <= NUM_OF_TABLES_PER_DATABASE
NUM_OF_GROUPS=60 # >= NUM_OF_DATABASES
# Number of partitions varies between max and min
MAX_NUM_OF_PARTITIONS_PER_TABLE=10
MIN_NUM_OF_PARTITIONS_PER_TABLE=2
BASE_EXTERNAL_DIR="/data"
LOCAL_OUTPUT_DIR="/tmp"
BL="beeline -n hive -p hive --silent=true -u 'jdbc:hive2://${HS2}:10000/default;principal=hive/_HOST@${REALM}'"
# Number of external partitions wide tables have
declare -a NUM_OF_WIDE_TABLE_PARTITIONS=(10 100 1000)
wLen=${#NUM_OF_WIDE_TABLE_PARTITIONS[@]}
process_id=$$
while getopts "d:t:g:b:l" OPTION
do case "${OPTION}" in
b) BASE_EXTERNAL_DIR="$OPTARG";;
d) NUM_OF_DATABASES="$OPTARG";;
l) LOCAL_OUTPUT_DIR="$OPTARG";;
t) NUM_OF_TABLES_PER_DATABASE="$OPTARG";;
g) NUM_OF_GROUPS="$OPTARG";;
[?]) print >&2 "Usage: $0 [-b BASE_EXTERNAL_DIR] [-d NUM_OF_DATABASES] [-l LOCAL_OUTPUT_DIR] [-t NUM_OF_TABLES_PER_DATABASE] [-g NUM_OF_GROUPS]"
exit 1;;
esac
done
NUM_OF_PERMISSIONS=$(( NUM_OF_ROLES_FOR_DATABASES + NUM_OF_ROLES_FOR_TABLES_PER_DATABASE * NUM_OF_DATABASES))
AVG_NUM_OF_PARTITIONS_PER_TABLE=$((( MAX_NUM_OF_PARTITIONS_PER_TABLE + MIN_NUM_OF_PARTITIONS_PER_TABLE) / 2 ))
echo "[${process_id}] Scale numbers:"
echo "[${process_id}] number of databases: ${NUM_OF_DATABASES}"
echo "[${process_id}] number of tables: $((NUM_OF_TABLES_PER_DATABASE * NUM_OF_DATABASES))"
echo "[${process_id}] number of wide tables: ${wLen}"
echo "[${process_id}] number of partitions per table: ${AVG_NUM_OF_PARTITIONS_PER_TABLE}"
echo "[${process_id}] number of min partitions per wide table: ${NUM_OF_WIDE_TABLE_PARTITIONS[0]}"
echo "[${process_id}] number of max partitions per wide table: ${NUM_OF_WIDE_TABLE_PARTITIONS[${wLen}-1]}"
echo "[${process_id}] number of permissions: ${NUM_OF_PERMISSIONS}"
echo "[${process_id}] number of groups: ${NUM_OF_GROUPS}"
# Random string as prefix for test databases and tables
prefix_string=$(cat /dev/urandom | tr -dc 'a-z' | fold -w 4 | head -n 1)
prefix_string=${prefix_string}$(date +%s | cut -c1-4)
DB_NAME=${prefix_string}_db
function validate_ret () {
ret=$1
if [[ ${ret} != "" && ${ret} -ne 0 ]]; then
echo "ERROR!! when running query in bulk mode"
exit $ret
fi
}
function get_group () {
count=$1
group_name=group_$((count % NUM_OF_GROUPS))
echo "$group_name"
}
# Create groups
function create_groups () {
for g in $(seq ${NUM_OF_GROUPS}); do
group_name=$(get_group $g)
getent passwd ${group_name} | grep "${group_name}" 1>&2>/dev/null
if [[ $? -ne 0 ]]; then
sudo groupadd ${group_name}
sudo useradd -g ${group_name} ${group_name}
fi
done
}
# Convenience function to create one table with many external partitons
function create_wide_table () {
db_name=$1
tbl_name=$2
num_of_pars=$3
file_name=$4
dir_file_name=$5
echo "-- [${process_id}] Create ${tbl_name} in ${db_name} with ${num_of_pars} external partitions; " >> ${file_name}
echo "CREATE DATABASE IF NOT EXISTS ${db_name}; " >> ${file_name}
echo "USE ${db_name};" >> ${file_name}
table_dir=${BASE_EXTERNAL_DIR}/${db_name}/${tbl_name}
echo "sudo -u hdfs hdfs dfs -rm -R -skipTrash ${table_dir} 2>/dev/null" >> ${dir_file_name}
echo "DROP TABLE IF EXISTS ${tbl_name}; " >> ${file_name}
echo "CREATE TABLE ${tbl_name} (s STRING, i INT) PARTITIONED BY (par INT);" >> ${file_name}
echo "-- create ${num_of_pars} partitions on table ${tbl_name}" >> ${file_name}
for p in $(seq ${num_of_pars}); do
dir=${table_dir}/$p
echo "sudo -u hdfs hdfs dfs -mkdir -p ${dir}" >> ${dir_file_name}
echo "ALTER TABLE ${tbl_name} ADD PARTITION (par=$p) LOCATION '${dir}';" >> ${file_name}
done
}
# Convenience function to create wide tables with many external partitions
function create_external_par_dirs_bulk_file () {
file_name=$1
dir_file_name=$2
echo "-- [${process_id}] Start bulk process to create wide tables" > ${file_name}
echo "# [${process_id}] Start to create external dirs for partitions" > ${dir_file_name}
db_id=$(awk -v n="${NUM_OF_DATABASES}" 'BEGIN{srand();print int(rand()*n+1)}')
db_name=${DB_NAME}_${db_id}
for p in "${!NUM_OF_WIDE_TABLE_PARTITIONS[@]}"; do
tbl_name=${db_name}_wide_tbl_$p
create_wide_table ${db_name} ${tbl_name} ${NUM_OF_WIDE_TABLE_PARTITIONS[p]} ${file_name} ${dir_file_name}
done
chmod a+x ${file_name}
chmod a+x ${dir_file_name}
}
# Create internal databases and their tables in one bulk file
function create_dbs_tbls_bulk_file () {
file_name=$1
echo "-- [${process_id}] start bulk load " > ${file_name}
for d in $(seq ${NUM_OF_DATABASES}); do
db_name=${DB_NAME}_${d}
echo "drop database if exists ${db_name}; " >> ${file_name}
echo "create database ${db_name}; " >> ${file_name}
echo "use ${db_name};" >> ${file_name}
NUM_OF_COLS=$(awk -v mn="${MIN_NUM_OF_PARTITIONS_PER_TABLE}" -v mx="${MAX_NUM_OF_PARTITIONS_PER_TABLE}" 'BEGIN{srand();print int(rand()*(mx-mn)+1)}')
NUM_OF_PARS=$(awk -v mn="${MIN_NUM_OF_PARTITIONS_PER_TABLE}" -v mx="${MAX_NUM_OF_PARTITIONS_PER_TABLE}" 'BEGIN{srand();print int(rand()*(mx-mn)+1)}')
for t in $(seq ${NUM_OF_TABLES_PER_DATABASE}); do
tbl_name=${db_name}_tbl_${t}
# create table
echo "create table ${tbl_name} (col_start INT, " >> ${file_name}
for c in $(seq ${NUM_OF_COLS}); do
echo "col_${c} STRING, " >> ${file_name}
done
echo "col_end INT) partitioned by (par_start STRING, " >> ${file_name}
# create many partitions
for p in $(seq ${NUM_OF_PARS}); do
echo "par_${p} INT, " >> ${file_name}
done
echo "par_end STRING); " >> ${file_name}
done
done
chmod a+x ${file_name}
}
# Create database roles
function create_dbs_roles () {
db_file_name=$1
total_db_permissions=0
echo "-- [${process_id}] Start to create database roles" > ${db_file_name}
for d in $(seq ${NUM_OF_ROLES_FOR_DATABASES}); do
db_name=${DB_NAME}_${d}
role_name=${db_name}_db_role_${d}
group_name=$(get_group $d)
echo "create role ${role_name}; " >> ${db_file_name}
echo "grant all on database ${db_name} to role ${role_name}; " >> ${db_file_name}
echo "grant ${role_name} to group ${group_name};" >> ${db_file_name}
done
chmod a+x ${db_file_name}
}
# Create table roles
function create_tbls_roles () {
tbl_file_name=$1
echo "-- [${process_id}] Start to create table roles;" > ${tbl_file_name}
# create table roles
for d in $(seq ${NUM_OF_DATABASES}); do
db_name=${DB_NAME}_${d}
echo "USE ${db_name};" >> ${tbl_file_name}
for t in $(seq ${NUM_OF_ROLES_FOR_TABLES_PER_DATABASE}); do
tbl_name=${db_name}_tbl_${t}
role_name=${tbl_name}_role_${t}
echo "CREATE ROLE ${role_name};" >> ${tbl_file_name}
rand_number=$(awk 'BEGIN{srand();print int(rand()*3)}')
case "$((rand_number % 3))" in
0) echo "grant all on table ${tbl_name} to role ${role_name}; " >> ${tbl_file_name}
;;
1) echo "grant insert on table ${tbl_name} to role ${role_name}; " >> ${tbl_file_name}
;;
*) echo "grant select on table ${tbl_name} to role ${role_name}; " >> ${tbl_file_name}
;;
esac
group_name=$(get_group $d)
echo "grant role ${role_name} to group ${group_name}; " >> ${tbl_file_name}
done
done
chmod a+x ${tbl_file_name}
}
###########################
# Start from here!
###########################
create_groups
echo "# [${process_id}] Created ${NUM_OF_GROUPS} groups"
# Use Hive to create the partitions because it supports bulk adding of partitions.
# Hive doesn't allow fully qualified table names in ALTER statements, so start with a
# USE <db>.
create_tables_file_name=${LOCAL_OUTPUT_DIR}/hive_${prefix_string}_bulk_tables.q
create_dbs_tbls_bulk_file ${create_tables_file_name}
echo "# [${process_id}] Created ${create_tables_file_name} to create databases and tables in bulk mode"
create_wide_tables_file_name=${LOCAL_OUTPUT_DIR}/hive_${prefix_string}_bulk_wide_tables.q
create_wide_tables_dir_file_name=${LOCAL_OUTPUT_DIR}/hive_${prefix_string}_bulk_wide_tables_dirs.sh
create_external_par_dirs_bulk_file ${create_wide_tables_file_name} ${create_wide_tables_dir_file_name}
echo "# [${process_id}] Created ${create_wide_tables_file_name} to create wide tables with external partitions in bulk mode"
echo "# [${process_id}] Created ${create_wide_tables_dir_file_name} to create external dirs for external partitions in bulk mode"
create_db_role_file_name=${LOCAL_OUTPUT_DIR}/hive_${prefix_string}_bulk_db_roles.q
create_dbs_roles ${create_db_role_file_name}
echo "# [${process_id}] Created ${create_db_role_file_name} to create database roles"
create_tbl_role_file_name=${LOCAL_OUTPUT_DIR}/hive_${prefix_string}_bulk_tbl_roles.q
create_tbls_roles ${create_tbl_role_file_name}
echo "# [${process_id}] Created ${create_tbl_role_file_name} to create table roles"
sudo -u hive hive -S -f ${create_tables_file_name}
validate_ret $?
echo "# [${process_id}] Succeessfully ran bulk file ${create_tables_file_name} to create databases and tables"
. ${create_wide_tables_dir_file_name}
echo "# [${process_id}] Successfully ran ${create_wide_tables_dir_file_name} to create dirs for external partitions"
sudo -u hive hive -S -f ${create_wide_tables_file_name}
validate_ret $?
echo "# [${process_id}] Successfully ran bulk file ${create_wide_tables_file_name} to create wide tables with external partitions"
sudo -u hive ${BL} -f ${create_db_role_file_name} 1>/dev/null # to remove white lines after execution
validate_ret $?
echo "# [${process_id}] Successfully created database level roles and privileges"
sudo -u hive ${BL} -f ${create_tbl_role_file_name} 1>/dev/null # to remove white lines after execution
validate_ret $?
echo "# [${process_id}] Successfully created table level roles and privileges"
res_file=${LOCAL_OUTPUT_DIR}/hive_${prefix_string}.res
echo "-- [${process_id}] List all databases and roles in ${res_file}" > ${res_file}
sudo -u hive ${BL} -e "show databases" 2>/dev/null 1>>${res_file}
sudo -u hive ${BL} -e "show roles" 2>/dev/null 1>>${res_file}
echo "[${process_id}] Successfully listed all databases and roles in ${res_file}"