blob: 6e73a20c385887d6339120fb0da4d8e00bf8a1e1 [file] [log] [blame]
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
set -e
function log() {
printf "hdfs_setup.sh:\n $1\n"
}
function safe_mkdir() {
# takes the hdfs command options and a directory
# checks for the directory before trying to create it
# keeps the script from existing on existing folders
local hdfs_cmd=$1
local dir=$2
if $(hdfs dfs -test -d ${dir}); then
log "${dir} already exists"
else
log "running mkdir on ${dir}"
${hdfs_cmd} dfs -mkdir ${dir}
fi
}
SPOTCONF="/etc/spot.conf"
DSOURCES=('flow' 'dns' 'proxy')
DFOLDERS=('binary'
'stage'
'hive'
'hive/oa'
'hive/oa/chords'
'hive/oa/edge'
'hive/oa/summary'
'hive/oa/suspicious'
'hive/oa/storyboard'
'hive/oa/threat_investigation'
'hive/oa/timeline'
'hive/oa/dendro'
'hive/oa/threat_dendro'
)
# input options
for arg in "$@"; do
case $arg in
"--no-sudo")
log "not using sudo"
no_sudo=true
shift
;;
"-c")
shift
SPOTCONF=$1
log "Spot Configuration file: ${SPOTCONF}"
shift
;;
"-d")
shift
db_override=$1
shift
;;
esac
done
# Sourcing spot configuration variables
log "Sourcing ${SPOTCONF}\n"
source $SPOTCONF
if [[ ${no_sudo} == "true" ]]; then
hdfs_cmd="hdfs"
if [[ ! -z "${HADOOP_USER_NAME}" ]]; then
log "HADOOP_USER_NAME: ${HADOOP_USER_NAME}"
else
log "setting HADOOP_USER_NAME to hdfs"
HADOOP_USER_NAME=hdfs
fi
else
hdfs_cmd="sudo -u hdfs hdfs"
fi
if [[ -z "${db_override}" ]]; then
DBENGINE=$(echo ${DBENGINE} | tr '[:upper:]' '[:lower:]')
log "setting database engine to ${DBENGINE}"
else
DBENGINE=$(echo ${db_override} | tr '[:upper:]' '[:lower:]')
log "setting database engine to $db_override"
fi
case ${DBENGINE} in
impala)
db_shell="impala-shell -i ${IMPALA_DEM}"
if [[ ${KERBEROS} == "true" ]]; then
db_shell="${db_shell} -k"
fi
db_query="${db_shell} -q"
db_script="${db_shell} --var=huser=${HUSER} --var=dbname=${DBNAME} -c -f"
;;
hive)
db_shell="hive"
db_query="${db_shell} -e"
db_script="${db_shell} -hiveconf huser=${HUSER} -hiveconf dbname=${DBNAME} -f"
;;
beeline)
db_shell="beeline -u jdbc:${JDBC_URL}"
db_query="${db_shell} -e"
db_script="${db_shell} --hivevar huser=${HUSER} --hivevar dbname=${DBNAME} -f"
;;
*)
log "DBENGINE not compatible or not set in spot.conf: DBENGINE--> ${DBENGINE:-empty}"
exit 1
;;
esac
# Creating HDFS user's folder
safe_mkdir ${hdfs_cmd} ${HUSER}
${hdfs_cmd} dfs -chown ${USER}:supergroup ${HUSER}
${hdfs_cmd} dfs -chmod 775 ${HUSER}
# Creating HDFS paths for each use case
for d in "${DSOURCES[@]}"
do
echo "creating /$d"
safe_mkdir hdfs ${HUSER}/$d
for f in "${DFOLDERS[@]}"
do
echo "creating $d/$f"
safe_mkdir ${hdfs_cmd} ${HUSER}/$d/$f
done
# Modifying permission on HDFS folders to allow Impala to read/write
hdfs dfs -chmod -R 775 ${HUSER}/$d
${hdfs_cmd} dfs -setfacl -R -m user:${db_override}:rwx ${HUSER}/$d
${hdfs_cmd} dfs -setfacl -R -m user:${USER}:rwx ${HUSER}/$d
done
# Creating Spot Database
${db_query} "CREATE DATABASE IF NOT EXISTS ${DBNAME}";
# Creating tables
for d in "${DSOURCES[@]}"
do
${db_script} "./${DBENGINE}/create_${d}_parquet.hql"
done