#!/bin/bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

# This script bootstraps a system for Impala development from almost nothing; it is known
# to work on Ubuntu 16.04. It clobbers some local environment and system
# configurations, so it is best to run this in a fresh install. It also sets up the
# ~/.bashrc for the calling user and impala-config-local.sh with some environment
# variables to make Impala compile and run after this script is complete.
# When IMPALA_HOME is set, the script will bootstrap Impala development in the
# location specified.
#
# The intended user is a person who wants to start contributing code to Impala. This
# script serves as an executable reference point for how to get started.
#
# To run this in a Docker container:
#
#   1. Run with --privileged
#   2. Give the container a non-root sudoer wih NOPASSWD:
#      apt-get update
#      apt-get install sudo
#      adduser --disabled-password --gecos '' impdev
#      echo 'impdev ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
#   3. Run this script as that user: su - impdev -c /bootstrap_development.sh

set -eu -o pipefail

if [[ -t 1 ]] # if on an interactive terminal
then
  echo "This script will clobber some system settings. Are you sure you want to"
  echo -n "continue? "
  while true
  do
    read -p "[yes/no] " ANSWER
    ANSWER=$(echo "$ANSWER" | tr /a-z/ /A-Z/)
    if [[ $ANSWER = YES ]]
    then
      break
    elif [[ $ANSWER = NO ]]
    then
      echo "OK, Bye!"
      exit 1
    fi
  done
else
  export DEBIAN_FRONTEND=noninteractive
fi

set -x

source /etc/lsb-release

if ! [[ $DISTRIB_ID = Ubuntu ]]
then
  echo "This script only supports Ubuntu" >&2
  exit 1
fi

if ! [[ $DISTRIB_RELEASE = 16.04 ]]
then
  echo "This script only supports 16.04" >&2
  exit 1
fi

REAL_APT_GET=$(which apt-get)
function apt-get {
  for ITER in $(seq 1 20); do
    echo "ATTEMPT: ${ITER}"
    if sudo -E "${REAL_APT_GET}" "$@"
    then
      return 0
    fi
    sleep "${ITER}"
  done
  echo "NO MORE RETRIES"
  return 1
}

echo ">>> Installing packages"

apt-get update
apt-get --yes install apt-utils
apt-get --yes install git

echo ">>> Checking out Impala"

# If there is no Impala git repo, get one now

: ${IMPALA_HOME:=~/Impala}
if ! [[ -d "$IMPALA_HOME" ]]
then
  time -p git clone https://git-wip-us.apache.org/repos/asf/impala.git "$IMPALA_HOME"
fi
cd "$IMPALA_HOME"
SET_IMPALA_HOME="export IMPALA_HOME=$(pwd)"
echo "$SET_IMPALA_HOME" >> ~/.bashrc
eval "$SET_IMPALA_HOME"

echo ">>> Installing build tools"
apt-get --yes install ccache g++ gcc libffi-dev liblzo2-dev libkrb5-dev \
        krb5-admin-server krb5-kdc krb5-user libsasl2-dev libsasl2-modules \
        libsasl2-modules-gssapi-mit libssl-dev make maven ninja-build ntp \
        ntpdate python-dev python-setuptools postgresql ssh wget vim-common psmisc \
        lsof openjdk-8-jdk openjdk-8-source openjdk-8-dbg

if ! { service --status-all | grep -E '^ \[ \+ \]  ssh$'; }
then
  sudo service ssh start
fi

# TODO: config ccache to give it plenty of space
# TODO: check that there is enough space on disk to do a build and data load
# TODO: make this work with non-bash shells

SET_JAVA_HOME="export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64"
echo "$SET_JAVA_HOME" >> "${IMPALA_HOME}/bin/impala-config-local.sh"
eval "$SET_JAVA_HOME"

echo ">>> Configuring system"

sudo service ntp stop
sudo ntpdate us.pool.ntp.org
# If on EC2, use Amazon's ntp servers
if which dmidecode && { sudo dmidecode -s bios-version | grep amazon; }
then
  sudo sed -i 's/ubuntu\.pool/amazon\.pool/' /etc/ntp.conf
  grep amazon /etc/ntp.conf
  grep ubuntu /etc/ntp.conf
fi
# While it is nice to have ntpd running to keep the clock in sync, that does not work in a
# --privileged docker container, and a non-privileged container cannot run ntpdate, which
# is strictly needed by Kudu.
# TODO: Make privileged docker start ntpd
sudo service ntp start || grep docker /proc/1/cgroup

# IMPALA-3932, IMPALA-3926
if [[ $DISTRIB_RELEASE = 16.04 ]]
then
  SET_LD_LIBRARY_PATH='export LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu/${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}'
fi
echo "$SET_LD_LIBRARY_PATH" >> "${IMPALA_HOME}/bin/impala-config-local.sh"
eval "$SET_LD_LIBRARY_PATH"

# TODO: What are the security implications of this?
for PG_AUTH_FILE in /etc/postgresql/*/main/pg_hba.conf
do
  sudo sed -ri 's/local +all +all +peer/local all all trust/g' $PG_AUTH_FILE
done
sudo service postgresql restart
sudo /etc/init.d/postgresql reload
sudo service postgresql restart

# Set up postgress for HMS
if ! [[ 1 = $(sudo -u postgres psql -At -c "SELECT count(*) FROM pg_roles WHERE rolname = 'hiveuser';") ]]
then
  sudo -u postgres psql -c "CREATE ROLE hiveuser LOGIN PASSWORD 'password';"
fi
sudo -u postgres psql -c "ALTER ROLE hiveuser WITH CREATEDB;"
sudo -u postgres psql -c "SELECT * FROM pg_roles WHERE rolname = 'hiveuser';"

# Setup ssh to ssh to localhost
mkdir -p ~/.ssh
chmod go-rwx ~/.ssh
if ! [[ -f ~/.ssh/id_rsa ]]
then
  ssh-keygen -t rsa -N '' -q -f ~/.ssh/id_rsa
fi
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
echo "NoHostAuthenticationForLocalhost yes" >> ~/.ssh/config
ssh localhost whoami

# Workarounds for HDFS networking issues
echo "127.0.0.1 $(hostname -s) $(hostname)" | sudo tee -a /etc/hosts
# In Docker, one can change /etc/hosts as above but not with sed -i. The error message is
# "sed: cannot rename /etc/sedc3gPj8: Device or resource busy". The following lines are
# basically sed -i but with cp instead of mv for -i part.
NEW_HOSTS=$(mktemp)
sed 's/127.0.1.1/127.0.0.1/g' /etc/hosts > "${NEW_HOSTS}"
diff -u /etc/hosts "${NEW_HOSTS}" || true
sudo cp "${NEW_HOSTS}" /etc/hosts
rm "${NEW_HOSTS}"

sudo mkdir -p /var/lib/hadoop-hdfs
sudo chown $(whoami) /var/lib/hadoop-hdfs/

# TODO: restrict this to only the users it is needed for
echo "* - nofile 1048576" | sudo tee -a /etc/security/limits.conf

# LZO is not needed to compile or run Impala, but it is needed for the data load
echo ">>> Checking out Impala-lzo"
: ${IMPALA_LZO_HOME:="${IMPALA_HOME}/../Impala-lzo"}
if ! [[ -d "$IMPALA_LZO_HOME" ]]
then
  git clone https://github.com/cloudera/impala-lzo.git "$IMPALA_LZO_HOME"
fi

echo ">>> Checking out and building hadoop-lzo"

: ${HADOOP_LZO_HOME:="${IMPALA_HOME}/../hadoop-lzo"}
if ! [[ -d "$HADOOP_LZO_HOME" ]]
then
  git clone https://github.com/cloudera/hadoop-lzo.git "$HADOOP_LZO_HOME"
fi
cd "$HADOOP_LZO_HOME"
time -p ant package
cd "$IMPALA_HOME"
