| #!/usr/bin/env bash |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| set -e |
| function usage { |
| echo "Usage: ${0} [options] /path/to/component/bin-install /path/to/hadoop/executable /path/to/hadoop/hadoop-yarn-server-tests-tests.jar /path/to/hadoop/hadoop-mapreduce-client-jobclient-tests.jar /path/to/mapred/executable" |
| echo "" |
| echo " --zookeeper-data /path/to/use Where the embedded zookeeper instance should write its data." |
| echo " defaults to 'zk-data' in the working-dir." |
| echo " --working-dir /path/to/use Path for writing configs and logs. must exist." |
| echo " defaults to making a directory via mktemp." |
| echo " --hadoop-client-classpath /path/to/some.jar:/path/to/another.jar classpath for hadoop jars." |
| echo " defaults to 'hadoop classpath'" |
| echo " --hbase-client-install /path/to/unpacked/client/tarball if given we'll look here for hbase client jars instead of the bin-install" |
| echo " --force-data-clean Delete all data in HDFS and ZK prior to starting up hbase" |
| echo " --single-process Run as single process instead of pseudo-distributed" |
| echo "" |
| exit 1 |
| } |
| # if no args specified, show usage |
| if [ $# -lt 5 ]; then |
| usage |
| fi |
| |
| # Get arguments |
| declare component_install |
| declare hadoop_exec |
| declare working_dir |
| declare zk_data_dir |
| declare clean |
| declare distributed="true" |
| declare hadoop_jars |
| declare hbase_client |
| while [ $# -gt 0 ] |
| do |
| case "$1" in |
| --working-dir) shift; working_dir=$1; shift;; |
| --force-data-clean) shift; clean="true";; |
| --zookeeper-data) shift; zk_data_dir=$1; shift;; |
| --single-process) shift; distributed="false";; |
| --hadoop-client-classpath) shift; hadoop_jars="$1"; shift;; |
| --hbase-client-install) shift; hbase_client="$1"; shift;; |
| --) shift; break;; |
| -*) usage ;; |
| *) break;; # terminate while loop |
| esac |
| done |
| |
| # should still have where component checkout is. |
| if [ $# -lt 5 ]; then |
| usage |
| fi |
| component_install="$(cd "$(dirname "$1")"; pwd)/$(basename "$1")" |
| hadoop_exec="$(cd "$(dirname "$2")"; pwd)/$(basename "$2")" |
| yarn_server_tests_test_jar="$(cd "$(dirname "$3")"; pwd)/$(basename "$3")" |
| mapred_jobclient_test_jar="$(cd "$(dirname "$4")"; pwd)/$(basename "$4")" |
| mapred_exec="$(cd "$(dirname "$5")"; pwd)/$(basename "$5")" |
| |
| if [ ! -x "${hadoop_exec}" ]; then |
| echo "hadoop cli does not appear to be executable." >&2 |
| exit 1 |
| fi |
| |
| if [ ! -x "${mapred_exec}" ]; then |
| echo "mapred cli does not appear to be executable." >&2 |
| exit 1 |
| fi |
| |
| if [ ! -d "${component_install}" ]; then |
| echo "Path to HBase binary install should be a directory." >&2 |
| exit 1 |
| fi |
| |
| if [ ! -f "${yarn_server_tests_test_jar}" ]; then |
| echo "Specified YARN server tests test jar is not a file." >&2 |
| exit 1 |
| fi |
| |
| if [ ! -f "${mapred_jobclient_test_jar}" ]; then |
| echo "Specified MapReduce jobclient test jar is not a file." >&2 |
| exit 1 |
| fi |
| |
| if [ -z "${working_dir}" ]; then |
| if ! working_dir="$(mktemp -d -t hbase-pseudo-dist-test)" ; then |
| echo "Failed to create temporary working directory. Please specify via --working-dir" >&2 |
| exit 1 |
| fi |
| else |
| # absolutes please |
| working_dir="$(cd "$(dirname "${working_dir}")"; pwd)/$(basename "${working_dir}")" |
| if [ ! -d "${working_dir}" ]; then |
| echo "passed working directory '${working_dir}' must already exist." >&2 |
| exit 1 |
| fi |
| fi |
| |
| if [ -z "${zk_data_dir}" ]; then |
| zk_data_dir="${working_dir}/zk-data" |
| mkdir "${zk_data_dir}" |
| else |
| # absolutes please |
| zk_data_dir="$(cd "$(dirname "${zk_data_dir}")"; pwd)/$(basename "${zk_data_dir}")" |
| if [ ! -d "${zk_data_dir}" ]; then |
| echo "passed directory for unpacking the source tarball '${zk_data_dir}' must already exist." |
| exit 1 |
| fi |
| fi |
| |
| if [ -z "${hbase_client}" ]; then |
| hbase_client="${component_install}" |
| else |
| echo "Using HBase client-side artifact" |
| # absolutes please |
| hbase_client="$(cd "$(dirname "${hbase_client}")"; pwd)/$(basename "${hbase_client}")" |
| if [ ! -d "${hbase_client}" ]; then |
| echo "If given hbase client install should be a directory with contents of the client tarball." >&2 |
| exit 1 |
| fi |
| fi |
| |
| if [ -n "${hadoop_jars}" ]; then |
| declare -a tmp_jars |
| for entry in $(echo "${hadoop_jars}" | tr ':' '\n'); do |
| tmp_jars=("${tmp_jars[@]}" "$(cd "$(dirname "${entry}")"; pwd)/$(basename "${entry}")") |
| done |
| hadoop_jars="$(IFS=:; echo "${tmp_jars[*]}")" |
| fi |
| |
| |
| echo "You'll find logs and temp files in ${working_dir}" |
| |
| function redirect_and_run { |
| log_base=$1 |
| shift |
| echo "$*" >"${log_base}.err" |
| "$@" >"${log_base}.out" 2>>"${log_base}.err" |
| } |
| |
| (cd "${working_dir}" |
| |
| echo "Hadoop version information:" |
| "${hadoop_exec}" version |
| hadoop_version=$("${hadoop_exec}" version | head -n 1) |
| hadoop_version="${hadoop_version#Hadoop }" |
| if [ "${hadoop_version%.*.*}" -gt 2 ]; then |
| "${hadoop_exec}" envvars |
| else |
| echo "JAVA_HOME: ${JAVA_HOME}" |
| fi |
| |
| # Ensure that if some other Hadoop install happens to be present in the environment we ignore it. |
| HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP="true" |
| export HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP |
| |
| if [ -n "${clean}" ]; then |
| echo "Cleaning out ZooKeeper..." |
| rm -rf "${zk_data_dir:?}/*" |
| fi |
| |
| echo "HBase version information:" |
| "${component_install}/bin/hbase" version 2>/dev/null |
| hbase_version=$("${component_install}/bin/hbase" version | head -n 1 2>/dev/null) |
| hbase_version="${hbase_version#HBase }" |
| |
| if [ ! -s "${hbase_client}/lib/shaded-clients/hbase-shaded-mapreduce-${hbase_version}.jar" ]; then |
| echo "HBase binary install doesn't appear to include a shaded mapreduce artifact." >&2 |
| exit 1 |
| fi |
| |
| if [ ! -s "${hbase_client}/lib/shaded-clients/hbase-shaded-client-${hbase_version}.jar" ]; then |
| echo "HBase binary install doesn't appear to include a shaded client artifact." >&2 |
| exit 1 |
| fi |
| |
| if [ ! -s "${hbase_client}/lib/shaded-clients/hbase-shaded-client-byo-hadoop-${hbase_version}.jar" ]; then |
| echo "HBase binary install doesn't appear to include a shaded client artifact." >&2 |
| exit 1 |
| fi |
| |
| echo "Writing out configuration for HBase." |
| rm -rf "${working_dir}/hbase-conf" |
| mkdir "${working_dir}/hbase-conf" |
| |
| if [ -f "${component_install}/conf/log4j.properties" ]; then |
| cp "${component_install}/conf/log4j.properties" "${working_dir}/hbase-conf/log4j.properties" |
| else |
| cat >"${working_dir}/hbase-conf/log4j.properties" <<EOF |
| # Define some default values that can be overridden by system properties |
| hbase.root.logger=INFO,console |
| |
| # Define the root logger to the system property "hbase.root.logger". |
| log4j.rootLogger=${hbase.root.logger} |
| |
| # Logging Threshold |
| log4j.threshold=ALL |
| # console |
| log4j.appender.console=org.apache.log4j.ConsoleAppender |
| log4j.appender.console.target=System.err |
| log4j.appender.console.layout=org.apache.log4j.PatternLayout |
| log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %.1000m%n |
| EOF |
| fi |
| |
| cat >"${working_dir}/hbase-conf/hbase-site.xml" <<EOF |
| <?xml version="1.0"?> |
| <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> |
| <!-- |
| /** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| --> |
| <configuration> |
| <property> |
| <name>hbase.rootdir</name> |
| <!-- We rely on the defaultFS being set in our hadoop confs --> |
| <value>/hbase</value> |
| </property> |
| <property> |
| <name>hbase.zookeeper.property.dataDir</name> |
| <value>${zk_data_dir}</value> |
| </property> |
| <property> |
| <name>hbase.cluster.distributed</name> |
| <value>${distributed}</value> |
| </property> |
| </configuration> |
| EOF |
| |
| if [ "true" = "${distributed}" ]; then |
| cat >"${working_dir}/hbase-conf/regionservers" <<EOF |
| localhost |
| EOF |
| fi |
| |
| function cleanup { |
| |
| echo "Shutting down HBase" |
| HBASE_CONF_DIR="${working_dir}/hbase-conf/" "${component_install}/bin/stop-hbase.sh" |
| |
| if [ -f "${working_dir}/hadoop.pid" ]; then |
| echo "Shutdown: listing HDFS contents" |
| redirect_and_run "${working_dir}/hadoop_listing_at_end" \ |
| "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -ls -R / |
| |
| echo "Shutting down Hadoop" |
| kill -6 "$(cat "${working_dir}/hadoop.pid")" |
| fi |
| } |
| |
| trap cleanup EXIT SIGQUIT |
| |
| echo "Starting up Hadoop" |
| |
| if [ "${hadoop_version%.*.*}" -gt 2 ]; then |
| "${mapred_exec}" minicluster -format -writeConfig "${working_dir}/hbase-conf/core-site.xml" -writeDetails "${working_dir}/hadoop_cluster_info.json" >"${working_dir}/hadoop_cluster_command.out" 2>"${working_dir}/hadoop_cluster_command.err" & |
| else |
| HADOOP_CLASSPATH="${yarn_server_tests_test_jar}" "${hadoop_exec}" jar "${mapred_jobclient_test_jar}" minicluster -format -writeConfig "${working_dir}/hbase-conf/core-site.xml" -writeDetails "${working_dir}/hadoop_cluster_info.json" >"${working_dir}/hadoop_cluster_command.out" 2>"${working_dir}/hadoop_cluster_command.err" & |
| fi |
| |
| echo "$!" > "${working_dir}/hadoop.pid" |
| |
| sleep_time=2 |
| until [ -s "${working_dir}/hbase-conf/core-site.xml" ]; do |
| printf '\twaiting for Hadoop to finish starting up.\n' |
| sleep "${sleep_time}" |
| sleep_time="$((sleep_time*2))" |
| done |
| |
| if [ "${hadoop_version%.*.*}" -gt 2 ]; then |
| echo "Verifying configs" |
| "${hadoop_exec}" --config "${working_dir}/hbase-conf/" conftest |
| fi |
| |
| if [ -n "${clean}" ]; then |
| echo "Cleaning out HDFS..." |
| "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -rm -r /hbase |
| "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -rm -r example/ |
| "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -rm -r example-region-listing.data |
| fi |
| |
| echo "Listing HDFS contents" |
| redirect_and_run "${working_dir}/hadoop_cluster_smoke" \ |
| "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -ls -R / |
| |
| echo "Starting up HBase" |
| HBASE_CONF_DIR="${working_dir}/hbase-conf/" "${component_install}/bin/start-hbase.sh" |
| |
| sleep_time=2 |
| until "${component_install}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/waiting_hbase_startup.log" 2>&1 <<EOF |
| count 'hbase:meta' |
| EOF |
| do |
| printf '\tretry waiting for hbase to come up.\n' |
| sleep "${sleep_time}" |
| sleep_time="$((sleep_time*2))" |
| done |
| |
| echo "Setting up table 'test:example' with 1,000 regions" |
| "${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/table_create.log" 2>&1 <<EOF |
| create_namespace 'test' |
| create 'test:example', 'family1', 'family2', {NUMREGIONS => 1000, SPLITALGO => 'UniformSplit'} |
| EOF |
| |
| echo "writing out example TSV to example.tsv" |
| cat >"${working_dir}/example.tsv" <<EOF |
| row1 value8 value8 |
| row3 value2 |
| row2 value9 |
| row10 value1 |
| pow1 value8 value8 |
| pow3 value2 |
| pow2 value9 |
| pow10 value1 |
| paw1 value8 value8 |
| paw3 value2 |
| paw2 value9 |
| paw10 value1 |
| raw1 value8 value8 |
| raw3 value2 |
| raw2 value9 |
| raw10 value1 |
| aow1 value8 value8 |
| aow3 value2 |
| aow2 value9 |
| aow10 value1 |
| aaw1 value8 value8 |
| aaw3 value2 |
| aaw2 value9 |
| aaw10 value1 |
| how1 value8 value8 |
| how3 value2 |
| how2 value9 |
| how10 value1 |
| zow1 value8 value8 |
| zow3 value2 |
| zow2 value9 |
| zow10 value1 |
| zaw1 value8 value8 |
| zaw3 value2 |
| zaw2 value9 |
| zaw10 value1 |
| haw1 value8 value8 |
| haw3 value2 |
| haw2 value9 |
| haw10 value1 |
| low1 value8 value8 |
| low3 value2 |
| low2 value9 |
| low10 value1 |
| law1 value8 value8 |
| law3 value2 |
| law2 value9 |
| law10 value1 |
| EOF |
| |
| echo "uploading example.tsv to HDFS" |
| "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -mkdir example |
| "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -copyFromLocal "${working_dir}/example.tsv" "example/" |
| |
| echo "Importing TSV via shaded client artifact for HBase - MapReduce integration." |
| # hbase_thirdparty_jars=("${component_install}"/lib/htrace-core4*.jar \ |
| # "${component_install}"/lib/slf4j-api-*.jar \ |
| # "${component_install}"/lib/commons-logging-*.jar \ |
| # "${component_install}"/lib/slf4j-log4j12-*.jar \ |
| # "${component_install}"/lib/log4j-1.2.*.jar \ |
| # "${working_dir}/hbase-conf/log4j.properties") |
| # hbase_dep_classpath=$(IFS=:; echo "${hbase_thirdparty_jars[*]}") |
| hbase_dep_classpath="$("${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" mapredcp)" |
| HADOOP_CLASSPATH="${hbase_dep_classpath}" redirect_and_run "${working_dir}/mr-importtsv" \ |
| "${hadoop_exec}" --config "${working_dir}/hbase-conf/" jar "${hbase_client}/lib/shaded-clients/hbase-shaded-mapreduce-${hbase_version}.jar" importtsv -Dimporttsv.columns=HBASE_ROW_KEY,family1:column1,family1:column4,family1:column3 test:example example/ -libjars "${hbase_dep_classpath}" |
| "${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/scan_import.out" 2>"${working_dir}/scan_import.err" <<EOF |
| scan 'test:example' |
| EOF |
| |
| echo "Verifying row count from import." |
| import_rowcount=$(echo 'count "test:example"' | "${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive 2>/dev/null | tail -n 1) |
| if [ ! "${import_rowcount}" -eq 48 ]; then |
| echo "ERROR: Instead of finding 48 rows, we found ${import_rowcount}." |
| exit 2 |
| fi |
| |
| if [ -z "${hadoop_jars}" ]; then |
| echo "Hadoop client jars not given; getting them from 'hadoop classpath' for the example." |
| hadoop_jars=$("${hadoop_exec}" --config "${working_dir}/hbase-conf/" classpath) |
| fi |
| |
| echo "Building shaded client example." |
| cat >"${working_dir}/HBaseClientReadWriteExample.java" <<EOF |
| import org.apache.hadoop.conf.Configuration; |
| import org.apache.hadoop.fs.FileSystem; |
| import org.apache.hadoop.fs.FSDataInputStream; |
| import org.apache.hadoop.fs.FSDataOutputStream; |
| import org.apache.hadoop.fs.Path; |
| import org.apache.hadoop.hbase.Cell; |
| import org.apache.hadoop.hbase.CellBuilder; |
| import org.apache.hadoop.hbase.CellBuilderFactory; |
| import org.apache.hadoop.hbase.CellBuilderType; |
| import org.apache.hadoop.hbase.ClusterMetrics; |
| import org.apache.hadoop.hbase.HBaseConfiguration; |
| import org.apache.hadoop.hbase.RegionMetrics; |
| import org.apache.hadoop.hbase.ServerMetrics; |
| import org.apache.hadoop.hbase.TableName; |
| import org.apache.hadoop.hbase.client.Admin; |
| import org.apache.hadoop.hbase.client.Connection; |
| import org.apache.hadoop.hbase.client.ConnectionFactory; |
| import org.apache.hadoop.hbase.client.Put; |
| import org.apache.hadoop.hbase.client.Table; |
| import org.apache.hadoop.hbase.util.Bytes; |
| |
| import java.util.LinkedList; |
| import java.util.List; |
| |
| |
| public class HBaseClientReadWriteExample { |
| private static final byte[] FAMILY_BYTES = Bytes.toBytes("family2"); |
| |
| public static void main(String[] args) throws Exception { |
| Configuration hbase = HBaseConfiguration.create(); |
| Configuration hadoop = new Configuration(); |
| try (Connection connection = ConnectionFactory.createConnection(hbase)) { |
| System.out.println("Generating list of regions"); |
| final List<String> regions = new LinkedList<>(); |
| try (Admin admin = connection.getAdmin()) { |
| final ClusterMetrics cluster = admin.getClusterMetrics(); |
| System.out.println(String.format("\tCluster reports version %s, ave load %f, region count %d", cluster.getHBaseVersion(), cluster.getAverageLoad(), cluster.getRegionCount())); |
| for (ServerMetrics server : cluster.getLiveServerMetrics().values()) { |
| for (RegionMetrics region : server.getRegionMetrics().values()) { |
| regions.add(region.getNameAsString()); |
| } |
| } |
| } |
| final Path listing = new Path("example-region-listing.data"); |
| System.out.println("Writing list to HDFS"); |
| try (FileSystem fs = FileSystem.newInstance(hadoop)) { |
| final Path path = fs.makeQualified(listing); |
| try (FSDataOutputStream out = fs.create(path)) { |
| out.writeInt(regions.size()); |
| for (String region : regions) { |
| out.writeUTF(region); |
| } |
| out.hsync(); |
| } |
| } |
| final List<Put> puts = new LinkedList<>(); |
| final Put marker = new Put(new byte[] { (byte)0 }); |
| System.out.println("Reading list from HDFS"); |
| try (FileSystem fs = FileSystem.newInstance(hadoop)) { |
| final Path path = fs.makeQualified(listing); |
| final CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY); |
| try (FSDataInputStream in = fs.open(path)) { |
| final int count = in.readInt(); |
| marker.addColumn(FAMILY_BYTES, Bytes.toBytes("count"), Bytes.toBytes(count)); |
| for(int i = 0; i < count; i++) { |
| builder.clear(); |
| final byte[] row = Bytes.toBytes(in.readUTF()); |
| final Put put = new Put(row); |
| builder.setRow(row); |
| builder.setFamily(FAMILY_BYTES); |
| builder.setType(Cell.Type.Put); |
| put.add(builder.build()); |
| puts.add(put); |
| } |
| } |
| } |
| System.out.println("Writing list into HBase table"); |
| try (Table table = connection.getTable(TableName.valueOf("test:example"))) { |
| table.put(marker); |
| table.put(puts); |
| } |
| } |
| } |
| } |
| EOF |
| redirect_and_run "${working_dir}/hbase-shaded-client-compile" \ |
| javac -cp "${hbase_client}/lib/shaded-clients/hbase-shaded-client-byo-hadoop-${hbase_version}.jar:${hadoop_jars}" "${working_dir}/HBaseClientReadWriteExample.java" |
| echo "Running shaded client example. It'll fetch the set of regions, round-trip them to a file in HDFS, then write them one-per-row into the test table." |
| # The order of classpath entries here is important. if we're using non-shaded Hadoop 3 / 2.9.0 jars, we have to work around YARN-2190. |
| redirect_and_run "${working_dir}/hbase-shaded-client-example" \ |
| java -cp "${working_dir}/hbase-conf/:${hbase_client}/lib/shaded-clients/hbase-shaded-client-byo-hadoop-${hbase_version}.jar:${hbase_dep_classpath}:${working_dir}:${hadoop_jars}" HBaseClientReadWriteExample |
| |
| echo "Checking on results of example program." |
| "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -copyToLocal "example-region-listing.data" "${working_dir}/example-region-listing.data" |
| |
| "${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/scan_example.out" 2>"${working_dir}/scan_example.err" <<EOF |
| scan 'test:example' |
| EOF |
| |
| echo "Verifying row count from example." |
| example_rowcount=$(echo 'count "test:example"' | "${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive 2>/dev/null | tail -n 1) |
| if [ "${example_rowcount}" -gt "1050" ]; then |
| echo "Found ${example_rowcount} rows, which is enough to cover 48 for import, 1000 example's use of user table regions, 2 for example's use of meta/namespace regions, and 1 for example's count record" |
| else |
| echo "ERROR: Only found ${example_rowcount} rows." |
| fi |
| |
| ) |