blob: 29416935c5657e071598953dac2e129a99440fc7 [file] [log] [blame]
#!/bin/bash
# @@@ START COPYRIGHT @@@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# @@@ END COPYRIGHT @@@
#
# This script will configure HBase with HBase-trx
# and co-processors needed for Trafodion. It uses
# Cloudera Manager's REST api to do this.
#
# NOTE: Only for Cloudera installations
TRAF_CONFIG=/etc/trafodion/trafodion_config
source $TRAF_CONFIG
export PDSH="pdsh -R exec"
export PDSH_SSH_CMD="ssh -q -n %h"
export PDCP="pdcp -R ssh"
export PDSH_HADOOP_NODES="$PDSH $MY_HBASE_NODES $PDSH_SSH_CMD"
export PDCP_HADOOP_NODES="$PDCP $MY_HBASE_NODES"
export HDFS_NODE=$(echo $HDFS_NODES | head -n1 | awk '{print $1;}')
export HBASE_NODE=$(echo $HBASE_NODES | head -n1 | awk '{print $1;}')
#=====================================
# create principals for hdfs user if secure hadoop is enabled
if [[ $SECURE_HADOOP == "Y" ]]; then
REALM=${ADMIN_PRINCIPAL#*"@"}
HDFS_PRINCIPAL=$(sudo klist -kte $HDFS_KEYTAB |grep $REALM | grep 'hdfs/' | awk '{print $4}' | uniq)
MESSAGE="***INFO: obtain and cache the Kerberos ticket-granting ticket for hdfs user"
MESSAGE=$MESSAGE" using keytab $HDFS_KEYTAB and principal $HDFS_PRINCIPAL"
echo $MESSAGE
sudo -u $HDFS_USER kinit -kt $HDFS_KEYTAB $HDFS_PRINCIPAL
if [ $? != 0 ]; then
echo "***ERROR: failed to obtain and cache the Kerberos ticket-granting ticket for hdfs user"
exit -1
fi
fi
#=====================================
# copy Trafodion trx jar to Cloudera's plugins directory on all nodes
cd $UNTAR_DIR
if [[ $CDH_5_3_HDP_2_2_SUPPORT == "N" ]]; then
if [[ $cdhVersion == "5.4" ]]; then
hbase_trx_jar="hbase-trx-cdh5_4-*.jar"
elif [[ $cdhVersion == "5.5" ]]; then
hbase_trx_jar="hbase-trx-cdh5_5-*.jar"
elif [[ $cdhVersion == "5.7" ]]; then
hbase_trx_jar="hbase-trx-cdh5_7-*.jar"
fi
else
hbase_trx_jar="hbase-trx-cdh5_3-*.jar"
fi
traf_util_jar="trafodion-utility-*.jar"
# The permissions the Trafodion build process creates on the hbase-trx jar
# files does not work well with the installation process so we change them
sudo chmod -R 777 $UNTAR_DIR/export/lib
if [ ! -f $UNTAR_DIR/export/lib/$hbase_trx_jar ]; then
echo "***ERROR: unable to find $UNTAR_DIR/export/lib/$hbase_trx_jar"
exit -1
fi
# if more than one node then copy to all nodes
echo "***INFO: copying $hbase_trx_jar to all nodes"
if [ $node_count -ne 1 ]; then
$PDSH_HADOOP_NODES sudo rm -rf $HADOOP_PATH/hbase-trx* 2>/dev/null
$PDSH_HADOOP_NODES sudo rm -rf /usr/lib/hbase/lib/hbase-trx* 2>/dev/null
$PDSH_HADOOP_NODES sudo rm -rf /usr/share/cmf/lib/plugins/hbase-trx* 2>/dev/null
$PDSH_HADOOP_NODES sudo rm -rf /usr/lib/hbase/lib/trafodion* 2>/dev/null
$PDSH_HADOOP_NODES sudo rm -rf /usr/share/cmf/lib/plugins/trafodion* 2>/dev/null
$TRAF_PDSH mkdir -p $LOCAL_WORKDIR 2>/dev/null
$PDSH_HADOOP_NODES mkdir -p $LOCAL_WORKDIR 2>/dev/null
cp $UNTAR_DIR/export/lib/$hbase_trx_jar $LOCAL_WORKDIR
cp $UNTAR_DIR/export/lib/$traf_util_jar $LOCAL_WORKDIR
$PDCP_HADOOP_NODES $LOCAL_WORKDIR/$hbase_trx_jar $LOCAL_WORKDIR
$PDCP_HADOOP_NODES $LOCAL_WORKDIR/$traf_util_jar $LOCAL_WORKDIR
$PDSH_HADOOP_NODES sudo cp $LOCAL_WORKDIR/$hbase_trx_jar $HADOOP_PATH
$PDSH_HADOOP_NODES sudo cp $LOCAL_WORKDIR/$traf_util_jar $HADOOP_PATH
$PDSH_HADOOP_NODES sudo chmod 644 $HADOOP_PATH/$hbase_trx_jar
$PDSH_HADOOP_NODES sudo chmod 644 $HADOOP_PATH/$traf_util_jar
$PDSH_HADOOP_NODES rm $LOCAL_WORKDIR/$hbase_trx_jar 2>/dev/null
$PDSH_HADOOP_NODES rm $LOCAL_WORKDIR/$traf_util_jar 2>/dev/null
else
for node in $HBASE_NODES
do
ssh -q -n $node sudo rm -rf /usr/lib/hbase/lib/hbase-trx* 2>/dev/null
ssh -q -n $node sudo rm -rf /usr/share/cmf/lib/plugins/hbase-trx* 2>/dev/null
ssh -q -n $node sudo rm -rf /usr/lib/hbase/lib/trafodion* 2>/dev/null
ssh -q -n $node sudo rm -rf /usr/share/cmf/lib/plugins/trafodion* 2>/dev/null
ssh -q -n $node sudo rm -rf $HADOOP_PATH/hbase-trx* 2>/dev/null
ssh -q -n $node sudo mkdir -p $TRAF_WORKDIR 2>/dev/null
ssh -q -n $node sudo chmod 777 $TRAF_WORKDIR
scp -q $UNTAR_DIR/export/lib/$hbase_trx_jar $(whoami)@$node:$TRAF_WORKDIR
scp -q $UNTAR_DIR/export/lib/$traf_util_jar $(whoami)@$node:$TRAF_WORKDIR
ssh -q -n $node sudo cp $TRAF_WORKDIR/$hbase_trx_jar $HADOOP_PATH
ssh -q -n $node sudo cp $TRAF_WORKDIR/$traf_util_jar $HADOOP_PATH
ssh -q -n $node sudo chmod 644 $HADOOP_PATH/$hbase_trx_jar
ssh -q -n $node sudo chmod 644 $HADOOP_PATH/$traf_util_jar
done
fi
#====================================
#Make sure hbase-trx*jar got copied
for node in $HBASE_NODES
do
copiedOver=$(ssh -q -n $node sudo ls $HADOOP_PATH/hbase-trx* | wc -l)
if [[ $copiedOver -ne "1" ]]; then
echo "***ERROR: $hbase_trx_jar was not copied on $node"
echo "***ERROR: Please investigate why this happened"
echo "***ERROR: Trafodion can not start without this. EXITING..."
exit -1
fi
done
echo "***INFO: $hbase_trx_jar copied correctly! Huzzah."
#=====================================
# create new directories for bulkload and lobs if not already there
rm $LOCAL_WORKDIR/traf_temp_output 2>/dev/null
#Copy hbase-site.xml file
ssh -q -n $HBASE_NODE sudo cp /etc/hbase/conf/hbase-site.xml $HOME
ssh -q -n $HBASE_NODE sudo chown "$(whoami)"."$(whoami)" "$HOME"/hbase-site.xml
ssh -q -n $HBASE_NODE sudo chmod 777 $HOME/hbase-site.xml
scp -q $(whoami)@$HBASE_NODE:$HOME/hbase-site.xml $HOME
if [[ $? -gt 1 ]]; then
echo "***ERROR: Unable to find /etc/hbase/conf/hbase-site.xml file on $HBASE_NODE or unable to copy."
exit -1
fi
sudo cp $HOME/hbase-site.xml $TRAF_WORKDIR
sudo chown trafodion.trafodion $TRAF_WORKDIR/hbase-site.xml
ssh -q -n $HDFS_NODE 'sudo su' "$HDFS_USER" '--command "' "$HADOOP_BIN_PATH"'/hdfs dfs -mkdir -p /user/'"$TRAF_USER"'"'
if [ $? != 0 ]; then
echo "***ERROR: '$HADOOP_BIN_PATH/hdfs dfs -mkdir -p /user/$TRAF_USER' command failed"
exit -1
fi
ssh -q -n $HDFS_NODE 'sudo su' "$HDFS_USER" '--command "' "$HADOOP_BIN_PATH"'/hdfs dfs -chown -R '"$TRAF_USER"' /user/'"$TRAF_USER"'"'
ssh -q -n $HDFS_NODE 'sudo su' "$HDFS_USER" '--command "' "$HADOOP_BIN_PATH"'/hadoop fs -mkdir -p /hbase-staging"'
if [ $? != 0 ]; then
echo "***ERROR: '$HADOOP_BIN_PATH/hadoop fs -mkdir -p /hbase-staging' command failed"
exit -1
fi
ssh -q -n $HDFS_NODE 'sudo su' "$HDFS_USER" '--command "' "$HADOOP_BIN_PATH"'/hadoop fs -chown -R '"$HBASE_USER"':'"$HBASE_GROUP"' /hbase-staging"'
ssh -q -n $HDFS_NODE 'sudo su' "$HDFS_USER" '--command "' "$HADOOP_BIN_PATH"'/hadoop fs -mkdir -p /bulkload"'
if [ $? != 0 ]; then
echo "***ERROR: '$HADOOP_BIN_PATH/hadoop fs -mkdir -p /bulkload' command failed"
exit -1
fi
ssh -q -n $HDFS_NODE 'sudo su' "$HDFS_USER" '--command "' "$HADOOP_BIN_PATH"'/hadoop fs -chown -R '"$TRAF_USER"':trafodion /bulkload"'
# Create lobs directory
ssh -q -n $HDFS_NODE 'sudo su' "$HDFS_USER" '--command "'"$HADOOP_BIN_PATH"'/hadoop fs -mkdir -p /lobs"'
if [ $? != 0 ]; then
echo "***ERROR: '$HADOOP_BIN_PATH/hadoop fs -mkdir -p /lobs' command failed"
exit -1
fi
ssh -q -n $HDFS_NODE 'sudo su' "$HDFS_USER" '--command "'"$HADOOP_BIN_PATH"'/hadoop fs -chown -R '"$TRAF_USER"':trafodion /lobs"'
#Create Backup directory
ssh -q -n $HDFS_NODE 'sudo su' "$HDFS_USER" '--command "' "$HADOOP_BIN_PATH"'/hadoop fs -mkdir -p /trafodion_backups"'
if [ $? != 0 ]; then
echo "***ERROR: '$HADOOP_BIN_PATH/hadoop fs -mkdir -p /trafodion_backups' command failed"
exit -1
fi
ssh -q -n $HDFS_NODE 'sudo su' "$HDFS_USER" '--command "'"$HADOOP_BIN_PATH"'/hadoop fs -chown -R '"$TRAF_USER"':trafodion /trafodion_backups"'
ssh -q -n $HDFS_NODE 'sudo su' "$HDFS_USER" '--command "' "$HADOOP_BIN_PATH"'/hadoop fs -chmod 777 /trafodion_backups"'
#=====================================
# Modify hadoop settings as needed by Trafodion
rm $HOME/traf_hdfs1_config_temp 2> /dev/null
rm $HOME/traf_hbase_config_temp 2> /dev/null
# change the hdfs configuration using Cloudera's REST API
curl -k -X PUT -H 'Content-Type:application/json' -u $ADMIN:$PASSWORD --data \
'{ "items": [ {
"name":"dfs_namenode_acls_enabled",
"value":"true"
}, {
"name":"dfs_umaskmode",
"value":"0002"
} ]
}' \
$URL/api/v1/clusters/$CLUSTER_NAME/services/$HDFS/config > $LOCAL_WORKDIR/traf_hdfs_config_temp
if [ $? != 0 ]; then
echo "***ERROR: Unable to modify HDFS configuration through Cloudera's REST API."
echo "***ERROR: Check if Cloudera URL is correct, may need to enter external IP address."
echo "***ERROR: Check if iptables/firewall is configured correctly and ports a
re enabled."
echo "***ERROR: Check that HDFS is running without error."
exit -1
fi
# in most cases curl does not return an error
# so curl's actual output needs to be checked, too
curl_error=$(grep TITLE $LOCAL_WORKDIR/traf_hdfs_config_temp | grep Error | wc -l)
if [ $curl_error -ne 0 ]; then
echo "***ERROR: Unable to modify hdfs configuration through Cloudera's REST API."
echo "***ERROR: Check if Cloudera URL is correct, may need to enter external IP address
."
echo "***ERROR: Check if iptables/firewall is configured correctly and ports a
re enabled." 2>&1
echo "***ERROR: Check that HDFS is running without error."
exit -1
fi
rm $LOCAL_WORKDIR/traf_hdfs_config_temp 2> /dev/null
# change the hbase configuration using Cloudera Manager's REST api
# NOTE: hbase.regionserver.lease.period is used as it is equivalent to
# hbase.client.scanner.timeout.period and Cloudera only allows
# hbase.regionserver.lease.period to be set through the REST API.
curl -k -X PUT -H 'Content-Type:application/json' -u $ADMIN:$PASSWORD --data \
'{ "roleTypeConfigs" : [ {
"roleType" : "MASTER",
"items" : [ {
"name" : "hbase_master_config_safety_valve",
"value" : "<property>\r\n <name>hbase.snapshot.master.timeoutMillis</name>\r\n <value>600000</value>\r\n</property>\r\n"
} ]
}, {
"roleType" : "REGIONSERVER",
"items" : [ {
"name" : "hbase_coprocessor_region_classes",
"value" : "org.apache.hadoop.hbase.coprocessor.transactional.TrxRegionObserver,org.apache.hadoop.hbase.coprocessor.transactional.TrxRegionEndpoint,org.apache.hadoop.hbase.coprocessor.AggregateImplementation"
}, {
"name" : "hbase_regionserver_lease_period",
"value" : "3600000"
}, {
"name" : "hbase_hregion_memstore_flush_size",
"value" : "536870912"
}, {
"name" : "hbase_hregion_memstore_block_multiplier",
"value" : "7"
}, {
"name" : "hbase_hstore_blockingStoreFiles",
"value" : "200"
}, {
"name" : "hbase_regionserver_config_safety_valve",
"value" : "<property>\r\n <name>hbase.hregion.impl</name>\r\n <value>org.apache.hadoop.hbase.regionserver.transactional.TransactionalRegion</value>\r\n</property>\r\n <property>\r\n <name>hbase.regionserver.region.split.policy</name>\r\n <value>org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy</value>\r\n</property>\r\n <property>\r\n <name>hbase.snapshot.enabled</name>\r\n <value>true</value>\r\n</property>\r\n <property>\r\n <name>hbase.snapshot.region.timeout</name>\r\n <value>600000</value>\r\n</property>\r\n "
} ]
} ]
}' \
$URL/api/v1/clusters/$CLUSTER_NAME/services/$HBASE/config > $LOCAL_WORKDIR/traf_hbase_config_temp
if [ $? != 0 ]; then
echo "***ERROR: Unable to modify HBase configuration through Cloudera's REST API."
echo "***ERROR: Check that HBase is running without error."
exit -1
fi
# in most cases curl does not return an error
# so curl's actual output needs to be checked, too
curl_error=$(grep TITLE $LOCAL_WORKDIR/traf_hbase_config_temp | grep Error | wc -l)
if [ $curl_error -ne 0 ]; then
echo "***ERROR: Unable to modify HBase configuration through Cloudera's REST API."
echo "***ERROR: Check that HBase is running without error."
cat $LOCAL_WORKDIR/traf_hbase_config_temp
exit -1
fi
curl_error=$(grep message $LOCAL_WORKDIR/traf_hbase_config_temp | wc -l)
if [ $curl_error -ne 0 ]; then
echo "***ERROR: Unable to modify HBase configuration through Cloudera's REST API."
echo "***ERROR: Check that HBase is running without error."
cat $LOCAL_WORKDIR/traf_hbase_config_temp
exit -1
fi
rm $LOCAL_WORKDIR/traf_hbase_config_temp 2> /dev/null
# Change zookeeper config using Cloudera REST API
curl -k -X PUT -H 'Content-Type:application/json' -u $ADMIN:$PASSWORD --data \
'{ "roleTypeConfigs" : [ {
"roleType" : "SERVER",
"items": [ {
"name" : "maxClientCnxns",
"value" : "0"
} ]
} ]
}' \
$URL/api/v1/clusters/$CLUSTER_NAME/services/$ZOOKEEPER/config > $LOCAL_WORKDIR/traf_zookeeper_config_temp
# in most cases curl does not return an error
# so curl's actual output needs to be checked, too
curl_error=$(grep TITLE $LOCAL_WORKDIR/traf_zookeeper_config_temp | grep Error | wc -l)
if [ $curl_error -ne 0 ]; then
echo "***ERROR: Unable to modify Zookeeper configuration through Cloudera's REST API."
echo "***ERROR: Check that Zookeeper is running without error."
cat $LOCAL_WORKDIR/traf_zookeeper_config_temp
exit -1
fi
curl_error=$(grep message $LOCAL_WORKDIR/traf_zookeeper_config_temp | wc -l)
if [ $curl_error -ne 0 ]; then
echo "***ERROR: Unable to modify Zookeeper configuration through Cloudera's REST API."
echo "***ERROR: Check that Zookeeper is running without error."
cat $LOCAL_WORKDIR/traf_zookeeper_config_temp
exit -1
fi
rm $LOCAL_WORKDIR/traf_zookeeper_config_temp 2> /dev/null
#=====================================
# restart Cloudera to pick up all the changes just made
poll_time=30
echo "***INFO: restarting Hadoop to pickup Trafodion transaction jar"
echo "***INFO: ...polling every $poll_time seconds until restart is completed."
restart_info=$(curl -k -X POST -u $ADMIN:$PASSWORD \
$URL/api/v1/clusters/$CLUSTER_NAME/commands/restart)
echo $restart_info
command_id=$(echo $restart_info | grep id | awk '{print $4}' | sed -e 's@,@@' )
# poll until restart is completed as a restart can take a while
active=1
while [ $active -ne 0 ]; do
sleep $poll_time
curl -k -u $ADMIN:$PASSWORD \
$URL/api/v1/commands/$command_id \
> $LOCAL_WORKDIR/hbase_restart_status_temp
cat $LOCAL_WORKDIR/hbase_restart_status_temp
echo "***INFO: ...polling every $poll_time seconds until restart is completed."
# if restart command is still active, then active will not equal 0
active=$(cat $LOCAL_WORKDIR/hbase_restart_status_temp | grep '"active" : true' | wc -l)
done
# make sure restart completed successfully
failures=$(cat $LOCAL_WORKDIR/hbase_restart_status_temp | grep '"success" : false' | wc -l)
if [ $failures -ne 0 ]; then
echo "***ERROR: Unable to restart Hadoop."
exit -1
fi
echo "***INFO: Hadoop restart completed successfully"
# wait to make sure HDFS is fully restarted and out of safemode
echo "***INFO: waiting for HDFS to exit safemode"
ssh -q -n $HDFS_NODE 'sudo su hdfs --command "' "$HADOOP_BIN_PATH"'/hdfs dfsadmin -safemode wait"'
#====================================================
# NOTE: These command must be done AFTER acls are
# enabled and HDFS has been restarted
echo "***INFO: Setting HDFS ACLs for snapshot scan support"
ssh -q -n $HDFS_NODE 'sudo su hdfs --command "' "$HADOOP_BIN_PATH"'/hdfs dfs -mkdir -p /hbase/archive"'
errorFound=$?
if [ $errorFound != 0 ]; then
echo "ERROR Code: $errorFound"
echo "***ERROR: ($HADOOP_BIN_PATH/hdfs dfs -mkdir -p /hbase/archive) command failed"
exit -1
fi
ssh -q -n $HDFS_NODE 'sudo su hdfs --command "' "$HADOOP_BIN_PATH"'/hdfs dfs -chown hbase:hbase /hbase/archive"'
errorFound=$?
if [ $errorFound != 0 ]; then
echo "ERROR Code: $errorFound"
echo "***ERROR: ($HADOOP_BIN_PATH/hdfs dfs -chown hbase:hbase /hbase/archive) command failed"
exit -1
fi
ssh -q -n $HDFS_NODE 'sudo su hdfs --command "' "$HADOOP_BIN_PATH"'/hdfs dfs -setfacl -R -m user:'"$TRAF_USER"':rwx /hbase/archive"'
errorFound=$?
if [ $errorFound != 0 ]; then
echo "ERROR Code: $errorFound"
echo "***ERROR: ($HADOOP_BIN_PATH/hdfs dfs -setfacl -R -m user:$TRAF_USER:rwx /hbase/archive) command failed"
exit -1
fi
ssh -q -n $HDFS_NODE 'sudo su hdfs --command "' "$HADOOP_BIN_PATH"'/hdfs dfs -setfacl -R -m default:user:'"$TRAF_USER"':rwx /hbase/archive"'
errorFound=$?
if [ $errorFound != 0 ]; then
echo "ERROR Code: $errorFound"
echo "***ERROR: ($HADOOP_BIN_PATH/hdfs dfs -setfacl -R -m default:user:$TRAF_USER:rwx /hbase/archive) command failed"
exit -1
fi
ssh -q -n $HDFS_NODE 'sudo su hdfs --command "' "$HADOOP_BIN_PATH"'/hdfs dfs -setfacl -R -m mask::rwx /hbase/archive"'
errorFound=$?
if [ $errorFound != 0 ]; then
echo "ERROR Code: $errorFound"
echo "***ERROR: ($HADOOP_BIN_PATH/hdfs dfs -setfacl -R -m mask::rwx /hbase/archive) command failed"
exit -1
fi
MODS_COMPLETE="Y"
sudo chmod 777 $TRAF_CONFIG
sed -i '/MODS_COMPLETE\=/d' $TRAF_CONFIG
echo "export MODS_COMPLETE=\"$MODS_COMPLETE\"" >> $TRAF_CONFIG
sudo chmod 777 $TRAF_CONFIG
source $TRAF_CONFIG
TRAF_CONFIG_FILE="trafodion_config"
TRAF_CONFIG_DIR="/etc/trafodion"
if [ $node_count -ne 1 ]; then
cp $TRAF_CONFIG $LOCAL_WORKDIR
$TRAF_PDCP $LOCAL_WORKDIR/$TRAF_CONFIG_FILE $HOME
$TRAF_PDSH sudo mkdir -p $TRAF_CONFIG_DIR
$TRAF_PDSH sudo cp -rf $HOME/$TRAF_CONFIG_FILE $TRAF_CONFIG_DIR
$TRAF_PDSH sudo chmod 777 $TRAF_CONFIG
fi