BIGTOP-2325. Deployment recipes for HAWQ
diff --git a/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml b/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml
index de98502..cf8ffcb 100644
--- a/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml
+++ b/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml
@@ -174,3 +174,9 @@
zeppelin::server::hiveserver2_url: "jdbc:hive2://%{hiera('hadoop-hive::common::hiveserver2_host')}:%{hiera('hadoop-hive::common::hiveserver2_port')}"
zeppelin::server::hiveserver2_user: "%{hiera('bigtop::hiveserver2_user')}"
zeppelin::server::hiveserver2_password: "%{hiera('bigtop::hiveserver2_password')}"
+
+# hawq
+bigtop::hawq_master_node: "%{hiera('bigtop::hawq_master_node')}"
+bigtop::hawq_master_port: "5432"
+bigtop::hawq_master_datadir: "/var/run/hawq/work/masterdd"
+bigtop::hawq_segment_datadir: "/var/run/hawq/work/segmendd"
diff --git a/bigtop-deploy/puppet/manifests/cluster.pp b/bigtop-deploy/puppet/manifests/cluster.pp
index a0be567..f80ef5a 100644
--- a/bigtop-deploy/puppet/manifests/cluster.pp
+++ b/bigtop-deploy/puppet/manifests/cluster.pp
@@ -105,6 +105,9 @@
zeppelin => {
master => ["zeppelin-server"],
},
+ hawq => {
+ master => ["hawq"],
+ },
}
class hadoop_cluster_node (
@@ -159,6 +162,7 @@
"hadoop_hive",
"hadoop_oozie",
"hadoop_pig",
+ "hawq",
"sqoop2",
"hadoop_zookeeper",
"hcatalog",
diff --git a/bigtop-deploy/puppet/modules/hawq/templates/gpcheck.cnf b/bigtop-deploy/puppet/modules/hawq/templates/gpcheck.cnf
new file mode 100644
index 0000000..11ae02f
--- /dev/null
+++ b/bigtop-deploy/puppet/modules/hawq/templates/gpcheck.cnf
@@ -0,0 +1,58 @@
+[global]
+configfile_version = 4
+
+[linux.mount]
+mount.points = /
+
+[linux.sysctl]
+sysctl.kernel.shmmax = 500000000
+sysctl.kernel.shmmni = 4096
+sysctl.kernel.shmall = 4000000000
+sysctl.kernel.sem = 250 512000 100 2048
+sysctl.kernel.sysrq = 1
+sysctl.kernel.core_uses_pid = 1
+sysctl.kernel.msgmnb = 65536
+sysctl.kernel.msgmax = 65536
+sysctl.kernel.msgmni = 2048
+sysctl.net.ipv4.tcp_syncookies = 0
+sysctl.net.ipv4.ip_forward = 0
+sysctl.net.ipv4.conf.default.accept_source_route = 0
+sysctl.net.ipv4.tcp_tw_recycle = 1
+sysctl.net.ipv4.tcp_max_syn_backlog = 200000
+sysctl.net.ipv4.conf.all.arp_filter = 1
+sysctl.net.ipv4.ip_local_port_range = 1281 65535
+sysctl.net.core.netdev_max_backlog = 200000
+sysctl.vm.overcommit_memory = 2
+sysctl.fs.nr_open = 3000000
+sysctl.kernel.threads-max = 798720
+sysctl.kernel.pid_max = 798720
+# increase network
+sysctl.net.core.rmem_max = 2097152
+sysctl.net.core.wmem_max = 2097152
+
+[linux.limits]
+soft.nofile = 2900000
+hard.nofile = 2900000
+soft.nproc = 131072
+hard.nproc = 131072
+
+[linux.diskusage]
+diskusage.monitor.mounts = /
+diskusage.monitor.usagemax = 90%
+
+[hdfs]
+dfs.mem.namenode.heap = 40960
+dfs.mem.datanode.heap = 6144
+# in hdfs-site.xml
+dfs.support.append = true
+dfs.client.enable.read.from.local = true
+dfs.block.local-path-access.user = gpadmin
+dfs.datanode.max.transfer.threads = 40960
+dfs.client.socket-timeout = 300000000
+dfs.datanode.socket.write.timeout = 7200000
+dfs.namenode.handler.count = 60
+ipc.server.handler.queue.size = 3300
+dfs.datanode.handler.count = 60
+ipc.client.connection.maxidletime = 3600000
+dfs.namenode.accesstime.precision = -1
+
diff --git a/bigtop-deploy/puppet/modules/hawq/templates/hawq-site.xml b/bigtop-deploy/puppet/modules/hawq/templates/hawq-site.xml
new file mode 100644
index 0000000..713fa40
--- /dev/null
+++ b/bigtop-deploy/puppet/modules/hawq/templates/hawq-site.xml
@@ -0,0 +1,158 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<configuration>
+ <property>
+ <name>hawq_master_address_host</name>
+ <value><%= @hawq_head %></value>
+ <description>The host name of hawq master.</description>
+ </property>
+
+ <property>
+ <name>hawq_master_address_port</name>
+ <value><%= @hawq_head_port %></value>
+ <description>The port of hawq master.</description>
+ </property>
+
+ <property>
+ <name>hawq_standby_address_host</name>
+ <value>none</value>
+ <description>The host name of hawq standby master.</description>
+ </property>
+
+ <property>
+ <name>hawq_segment_address_port</name>
+ <value>40000</value>
+ <description>The port of hawq segment.</description>
+ </property>
+
+ <property>
+ <name>hawq_dfs_url</name>
+ <value><%= @hadoop_head_node %>:<%= @hadoop_namenode_port %>/hawq_default</value>
+ <description>URL for accessing HDFS.</description>
+ </property>
+
+ <property>
+ <name>hawq_master_directory</name>
+ <value><% @hawq_masterdata_dir %></value>
+ <description>The directory of hawq master.</description>
+ </property>
+
+ <property>
+ <name>hawq_segment_directory</name>
+ <value><% @hawq_segmentdata_dir %>d</value>
+ <description>The directory of hawq segment.</description>
+ </property>
+
+ <property>
+ <name>hawq_master_temp_directory</name>
+ <value>/tmp</value>
+ <description>The temporary directory reserved for hawq master.</description>
+ </property>
+
+ <property>
+ <name>hawq_segment_temp_directory</name>
+ <value>/tmp</value>
+ <description>The temporary directory reserved for hawq segment.</description>
+ </property>
+
+ <!-- HAWQ resource manager parameters -->
+ <property>
+ <name>hawq_global_rm_type</name>
+ <value>none</value>
+ <description>The resource manager type to start for allocating resource.
+ 'none' means hawq resource manager exclusively uses whole
+ cluster; 'yarn' means hawq resource manager contacts YARN
+ resource manager to negotiate resource.
+ </description>
+ </property>
+
+ <property>
+ <name>hawq_rm_memory_limit_perseg</name>
+ <value>64GB</value>
+ <description>The limit of memory usage in a hawq segment when
+ hawq_global_rm_type is set 'none'.
+ </description>
+ </property>
+
+ <property>
+ <name>hawq_rm_nvcore_limit_perseg</name>
+ <value>16</value>
+ <description>The limit of virtual core usage in a hawq segment when
+ hawq_global_rm_type is set 'none'.
+ </description>
+ </property>
+
+ <property>
+ <name>hawq_rm_yarn_address</name>
+ <value><%= @hawq_yarn_rm_host %>:<%= @hawq_yarn_rm_port %></value>
+ <description>The address of YARN resource manager server.</description>
+ </property>
+
+ <property>
+ <name>hawq_rm_yarn_scheduler_address</name>
+ <value>localhost:8030</value>
+ <description>The address of YARN scheduler server.</description>
+ </property>
+
+ <property>
+ <name>hawq_rm_yarn_queue_name</name>
+ <value>default</value>
+ <description>The YARN queue name to register hawq resource manager.</description>
+ </property>
+
+ <property>
+ <name>hawq_rm_yarn_app_name</name>
+ <value>hawq</value>
+ <description>The application name to register hawq resource manager in YARN.</description>
+ </property>
+ <!-- HAWQ resource manager parameters end here. -->
+
+ <!-- HAWQ resource enforcement parameters -->
+ <property>
+ <name>hawq_re_cpu_enable</name>
+ <value>false</value>
+ <description>The control to enable/disable CPU resource enforcement.</description>
+ </property>
+
+ <property>
+ <name>hawq_re_cgroup_mount_point</name>
+ <value>/sys/fs/cgroup</value>
+ <description>The mount point of CGroup file system for resource enforcement.
+ For example, /sys/fs/cgroup/cpu/hawq for CPU sub-system.
+ </description>
+ </property>
+
+ <property>
+ <name>hawq_re_cgroup_hierarchy_name</name>
+ <value>hawq</value>
+ <description>The name of the hierarchy to accomodate CGroup directories/files for resource enforcement.
+ For example, /sys/fs/cgroup/cpu/hawq for CPU sub-system.
+ </description>
+ </property>
+
+ <property>
+ <name>hawq_rm_nvseg_perquery_perseg_limit</name>
+ <value>8</value>
+ <description>This is something that init tries to push in</description>
+ </property>
+ <!-- HAWQ resource enforcement parameters end here. -->
+</configuration>
diff --git a/bigtop-deploy/puppet/modules/hawq/templates/hawq.default b/bigtop-deploy/puppet/modules/hawq/templates/hawq.default
new file mode 100644
index 0000000..8190260
--- /dev/null
+++ b/bigtop-deploy/puppet/modules/hawq/templates/hawq.default
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+export HAWQ_HOME="/usr/lib/hawq"
+## Due to some weird scripting, hawq doesn't work without GPHOME
+export GPHOME=$HAWQ_HOME
+export HAWQ_CONF_DIR="/etc/hawq/conf"
+
+export HAWQ_PID_DIR="/var/run/hawq"
+export HAWQ_LOG_DIR="/var/log/hawq"
+export HAWQ_WORK_DIR="/var/run/hawq/work"
+export HAWQ_IDENT_STRING=hawq
+
+export PATH=$HAWQ_HOME/bin:$HAWQ_HOME/ext/python/bin:$PATH
+export LD_LIBRARY_PATH=$HAWQ_HOME/lib:$HAWQ_HOME/ext/python/lib:$LD_LIBRARY_PATH
+export PYTHONPATH=$HAWQ_HOME/lib/python:$HAWQ_HOME/lib/python/pygresql:$PYTHONPATH
+export OPENSSL_CONF=$HAWQ_CONF_DIR/openssl.cnf
+export LIBHDFS3_CONF=$HAWQ_CONF_DIR/hdfs-client.xml
+export LIBYARN_CONF=$HAWQ_CONF_DIR/yarn-client.xml
+export HAWQSITE_CONF=$HAWQ_CONF_DIR/hawq-site.xml
+
+export HAWQ_MASTER_PORT=5432
+export HAWQ_SEGMENT_PORT=40000
+export HAWQ_MASTERDATA_DIR=<%= @hawq_masterdata_dir %>
+export HAWQ_SEGMENTDATA_DIR=<%= @hawq_segmentdata_dir %>
+
+export HAWQ_TIMEOUT=30
+export HAWQ_SHUTDOWN_MODE=smart
diff --git a/bigtop-deploy/puppet/modules/hawq/templates/hdfs-client.xml b/bigtop-deploy/puppet/modules/hawq/templates/hdfs-client.xml
new file mode 100644
index 0000000..3f08696
--- /dev/null
+++ b/bigtop-deploy/puppet/modules/hawq/templates/hdfs-client.xml
@@ -0,0 +1,331 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+
+<configuration>
+
+ <!-- KDC
+ <property>
+ <name>hadoop.security.authentication</name>
+ <value>kerberos</value>
+ </property>
+ KDC -->
+
+ <!-- HA
+ <property>
+ <name>dfs.nameservices</name>
+ <value>phdcluster</value>
+ </property>
+
+ <property>
+ <name>dfs.ha.namenodes.phdcluster</name>
+ <value>nn1,nn2</value>
+ </property>
+
+ <property>
+ <name>dfs.namenode.rpc-address.phdcluster.nn1</name>
+ <value>mdw:9000</value>
+ </property>
+
+ <property>
+ <name>dfs.namenode.rpc-address.phdcluster.nn2</name>
+ <value>smdw:9000</value>
+ </property>
+
+<property>
+<name>dfs.namenode.http-address.phdcluster.nn1</name>
+<value>mdw:50070</value>
+</property>
+
+<property>
+<name>dfs.namenode.http-address.phdcluster.nn2</name>
+<value>smdw:50070</value>
+</property>
+
+HA -->
+
+ <!-- RPC client configuration -->
+ <property>
+ <name>rpc.client.timeout</name>
+ <value>3600000</value>
+ <description>
+ timeout interval of a RPC invocation in millisecond. default is 3600000.
+ </description>
+ </property>
+ <property>
+ <name>rpc.client.connect.tcpnodelay</name>
+ <value>true</value>
+ <description>
+ whether set socket TCP_NODELAY to true when connect to RPC server. default is true.
+ </description>
+ </property>
+
+ <property>
+ <name>rpc.client.max.idle</name>
+ <value>10000</value>
+ <description>
+ the max idle time of a RPC connection in millisecond. default is 10000.
+ </description>
+ </property>
+
+ <property>
+ <name>rpc.client.ping.interval</name>
+ <value>10000</value>
+ <description>
+ the interval which the RPC client send a heart beat to server. 0 means disable, default is 10000.
+ </description>
+ </property>
+
+ <property>
+ <name>rpc.client.connect.timeout</name>
+ <value>600000</value>
+ <description>
+ the timeout interval in millisecond when the RPC client is trying to setup the connection. default is 600000.
+ </description>
+ </property>
+
+ <property>
+ <name>rpc.client.connect.retry</name>
+ <value>10</value>
+ <description>
+ the max retry times if the RPC client fail to setup the connection to server. default is 10.
+ </description>
+ </property>
+
+ <property>
+ <name>rpc.client.read.timeout</name>
+ <value>3600000</value>
+ <description>
+ the timeout interval in millisecond when the RPC client is trying to read from server. default is 3600000.
+ </description>
+ </property>
+
+ <property>
+ <name>rpc.client.write.timeout</name>
+ <value>3600000</value>
+ <description>
+ the timeout interval in millisecond when the RPC client is trying to write to server. default is 3600000.
+ </description>
+ </property>
+
+ <property>
+ <name>rpc.client.socket.linger.timeout</name>
+ <value>-1</value>
+ <description>
+ set value to socket SO_LINGER when connect to RPC server. -1 means default OS value. default is -1.
+ </description>
+ </property>
+
+ <!-- dfs client configuration -->
+ <property>
+ <name>dfs.client.read.shortcircuit</name>
+ <value>true</value>
+ <description>
+ whether reading block file bypass datanode if the block and the client are on the same node. default is true.
+ </description>
+ </property>
+
+ <property>
+ <name>dfs.default.replica</name>
+ <value>3</value>
+ <description>
+ the default number of replica. default is 3.
+ </description>
+ </property>
+
+ <property>
+ <name>dfs.prefetchsize</name>
+ <value>10</value>
+ <description>
+ the default number of blocks which information will be prefetched. default is 10.
+ </description>
+ </property>
+
+ <property>
+ <name>dfs.client.failover.max.attempts</name>
+ <value>15</value>
+ <description>
+ if multiply namenodes are configured, it is the max retry times when the dfs client try to issue a RPC call. default is 15.
+ </description>
+ </property>
+
+ <property>
+ <name>dfs.default.blocksize</name>
+ <value>134217728</value>
+ <description>
+ default block size. default is 134217728.
+ </description>
+ </property>
+
+ <property>
+ <name>dfs.client.log.severity</name>
+ <value>INFO</value>
+ <description>
+ the minimal log severity level, valid values include FATAL, ERROR, INFO, DEBUG1, DEBUG2, DEBUG3. default is INFO.
+ </description>
+ </property>
+
+ <!-- input client configuration -->
+ <property>
+ <name>input.connect.timeout</name>
+ <value>600000</value>
+ <description>
+ the timeout interval in millisecond when the input stream is trying to setup the connection to datanode. default is 600000.
+ </description>
+ </property>
+
+ <property>
+ <name>input.read.timeout</name>
+ <value>3600000</value>
+ <description>
+ the timeout interval in millisecond when the input stream is trying to read from datanode. default is 3600000.
+ </description>
+ </property>
+
+ <property>
+ <name>input.write.timeout</name>
+ <value>3600000</value>
+ <description>
+ the timeout interval in millisecond when the input stream is trying to write to datanode. default is 3600000.
+ </description>
+ </property>
+
+ <property>
+ <name>input.localread.default.buffersize</name>
+ <value>2097152</value>
+ <description>
+ number of bytes of the buffer which is used to hold the data from block file and verify checksum.
+ it is only used when "dfs.client.read.shortcircuit" is set to true. default is 1048576.
+ </description>
+ </property>
+
+ <property>
+ <name>input.localread.blockinfo.cachesize</name>
+ <value>1000</value>
+ <description>
+ the size of block file path information cache. default is 1000.
+ </description>
+ </property>
+
+ <property>
+ <name>input.read.getblockinfo.retry</name>
+ <value>3</value>
+ <description>
+ the max retry times when the client fail to get block information from namenode. default is 3.
+ </description>
+ </property>
+
+ <!-- output client configuration -->
+ <property>
+ <name>output.replace-datanode-on-failure</name>
+ <value>false</value>
+ <description>
+ whether the client add new datanode into pipeline if the number of nodes in pipeline is less the specified number of replicas. default is true.
+ </description>
+ </property>
+
+ <property>
+ <name>output.default.chunksize</name>
+ <value>512</value>
+ <description>
+ the number of bytes of a chunk in pipeline. default is 512.
+ </description>
+ </property>
+
+ <property>
+ <name>output.default.packetsize</name>
+ <value>65536</value>
+ <description>
+ the number of bytes of a packet in pipeline. default is 65536.
+ </description>
+ </property>
+
+ <property>
+ <name>output.default.write.retry</name>
+ <value>10</value>
+ <description>
+ the max retry times when the client fail to setup the pipeline. default is 10.
+ </description>
+ </property>
+
+ <property>
+ <name>output.connect.timeout</name>
+ <value>600000</value>
+ <description>
+ the timeout interval in millisecond when the output stream is trying to setup the connection to datanode. default is 600000.
+ </description>
+ </property>
+
+ <property>
+ <name>output.read.timeout</name>
+ <value>3600000</value>
+ <description>
+ the timeout interval in millisecond when the output stream is trying to read from datanode. default is 3600000.
+ </description>
+ </property>
+
+ <property>
+ <name>output.write.timeout</name>
+ <value>3600000</value>
+ <description>
+ the timeout interval in millisecond when the output stream is trying to write to datanode. default is 3600000.
+ </description>
+ </property>
+
+ <property>
+ <name>output.packetpool.size</name>
+ <value>1024</value>
+ <description>
+ the max number of packets in a file's packet pool. default is 1024.
+ </description>
+ </property>
+
+ <property>
+ <name>output.close.timeout</name>
+ <value>900000</value>
+ <description>
+ the timeout interval in millisecond when close an output stream. default is 900000.
+ </description>
+ </property>
+
+ <property>
+ <name>dfs.domain.socket.path</name>
+ <value>/var/lib/hadoop-hdfs/dn_socket</value>
+ <description>
+ Optional. This is a path to a UNIX domain socket that will be used for
+ communication between the DataNode and local HDFS clients.
+ If the string "_PORT" is present in this path, it will be replaced by the
+ TCP port of the DataNode.
+ </description>
+ </property>
+
+ <property>
+ <name>dfs.client.use.legacy.blockreader.local</name>
+ <value>false</value>
+ <description>
+ Legacy short-circuit reader implementation based on HDFS-2246 is used
+ if this configuration parameter is true.
+ This is for the platforms other than Linux
+ where the new implementation based on HDFS-347 is not available.
+ </description>
+ </property>
+
+</configuration>
diff --git a/bigtop-deploy/puppet/modules/hawq/templates/sysctl.conf b/bigtop-deploy/puppet/modules/hawq/templates/sysctl.conf
new file mode 100644
index 0000000..2ac1598
--- /dev/null
+++ b/bigtop-deploy/puppet/modules/hawq/templates/sysctl.conf
@@ -0,0 +1,24 @@
+kernel.shmmax = 1000000000
+kernel.shmmni = 4096
+kernel.shmall = 4000000000
+kernel.sem = 250 512000 100 2048
+kernel.sysrq = 1
+kernel.core_uses_pid = 1
+kernel.msgmnb = 65536
+kernel.msgmax = 65536
+kernel.msgmni = 2048
+net.ipv4.tcp_syncookies = 0
+net.ipv4.ip_forward = 0
+net.ipv4.conf.default.accept_source_route = 0
+net.ipv4.tcp_tw_recycle = 1
+net.ipv4.tcp_max_syn_backlog = 200000
+net.ipv4.conf.all.arp_filter = 1
+net.ipv4.ip_local_port_range = 1281 65535
+net.core.netdev_max_backlog = 200000
+vm.overcommit_memory = 2
+fs.nr_open = 3000000
+kernel.threads-max = 798720
+kernel.pid_max = 798720
+# increase network
+net.core.rmem_max=2097152
+net.core.wmem_max=2097152
diff --git a/bigtop-deploy/puppet/modules/hawq/templates/yarn-client.xml b/bigtop-deploy/puppet/modules/hawq/templates/yarn-client.xml
new file mode 100644
index 0000000..a5f9df7
--- /dev/null
+++ b/bigtop-deploy/puppet/modules/hawq/templates/yarn-client.xml
@@ -0,0 +1,123 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+
+<configuration>
+
+ <!-- KDC
+ <property>
+ <name>hadoop.security.authentication</name>
+ <value>kerberos</value>
+ </property>
+ KDC -->
+
+ <!-- HA
+ <property>
+ <name>yarn.resourcemanager.ha</name>
+ <value>%RESOURCEMANAGER%:8032,%RESOURCEMANAGER2%:8032</value>
+ </property>
+ <property>
+ <name>yarn.resourcemanager.scheduler.ha</name>
+ <value>%RESOURCEMANAGER%:8030,%RESOURCEMANAGER2%:8030</value>
+ </property>
+ HA -->
+
+ <!-- RPC client configuration -->
+ <property>
+ <name>rpc.client.timeout</name>
+ <value>3600000</value>
+ <description>
+ timeout interval of a RPC invocation in millisecond. default is 3600000.
+ </description>
+ </property>
+
+ <property>
+ <name>rpc.client.connect.tcpnodelay</name>
+ <value>true</value>
+ <description>
+ whether set socket TCP_NODELAY to true when connect to RPC server. default is true.
+ </description>
+ </property>
+
+ <property>
+ <name>rpc.client.max.idle</name>
+ <value>10000</value>
+ <description>
+ the max idle time of a RPC connection in millisecond. default is 10000.
+ </description>
+ </property>
+
+ <property>
+ <name>rpc.client.ping.interval</name>
+ <value>10000</value>
+ <description>
+ the interval which the RPC client send a heart beat to server. 0 means disable, default is 10000.
+ </description>
+ </property>
+
+ <property>
+ <name>rpc.client.connect.timeout</name>
+ <value>600000</value>
+ <description>
+ the timeout interval in millisecond when the RPC client is trying to setup the connection. default is 600000.
+ </description>
+ </property>
+
+ <property>
+ <name>rpc.client.connect.retry</name>
+ <value>10</value>
+ <description>
+ the max retry times if the RPC client fail to setup the connection to server. default is 10.
+ </description>
+ </property>
+
+ <property>
+ <name>rpc.client.read.timeout</name>
+ <value>3600000</value>
+ <description>
+ the timeout interval in millisecond when the RPC client is trying to read from server. default is 3600000.
+ </description>
+ </property>
+
+ <property>
+ <name>rpc.client.write.timeout</name>
+ <value>3600000</value>
+ <description>
+ the timeout interval in millisecond when the RPC client is trying to write to server. default is 3600000.
+ </description>
+ </property>
+
+ <property>
+ <name>rpc.client.socket.linger.timeout</name>
+ <value>-1</value>
+ <description>
+ set value to socket SO_LINGER when connect to RPC server. -1 means default OS value. default is -1.
+ </description>
+ </property>
+
+ <property>
+ <name>yarn.client.failover.max.attempts</name>
+ <value>15</value>
+ <description>
+ if multiply resource managers are configured, it is the max retry times when the yarn client try to issue a RPC call. default is 15.
+ </description>
+ </property>
+</configuration>
diff --git a/bigtop-deploy/puppet/modules/hawq/tests/init.pp b/bigtop-deploy/puppet/modules/hawq/tests/init.pp
new file mode 100644
index 0000000..37a5a08
--- /dev/null
+++ b/bigtop-deploy/puppet/modules/hawq/tests/init.pp
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+hawq::cluster_node { "test-hawq-node": }
diff --git a/bigtop-packages/src/common/hadoop/init-hcfs.json b/bigtop-packages/src/common/hadoop/init-hcfs.json
index f71f385..dea5f90 100644
--- a/bigtop-packages/src/common/hadoop/init-hcfs.json
+++ b/bigtop-packages/src/common/hadoop/init-hcfs.json
@@ -64,6 +64,7 @@
["/tmp/hadoop-yarn","777","mapred","mapred"],
["/var/log/hadoop-yarn/apps","1777","yarn","mapred"],
["/hbase",null,"hbase","hbase"],
+ ["/hawq_default","755","hawq","hawq"],
["/solr",null,"solr","solr"],
["/benchmarks","777",null,null],
["/user","755","HCFS_SUPER_USER",null],
diff --git a/bigtop-packages/src/common/hawq/hawq-master.svc b/bigtop-packages/src/common/hawq/hawq-master.svc
index 4fa5097..e5aca0d 100644
--- a/bigtop-packages/src/common/hawq/hawq-master.svc
+++ b/bigtop-packages/src/common/hawq/hawq-master.svc
@@ -34,7 +34,7 @@
exit 0
fi
- su -s /bin/bash ${SVC_USER} -c "${EXEC_PATH} start $SVC_OPTS"
+ su -s /bin/bash ${SVC_USER} -c "${EXEC_PATH} start master $SVC_OPTS"
checkstatusofproc
RETVAL=$?
@@ -51,7 +51,7 @@
stop() {
log_success_msg "Stopping $DESC (${DAEMON}): "
- su -s /bin/bash ${SVC_USER} -c "${EXEC_PATH} stop $SVC_OPTS"
+ su -s /bin/bash ${SVC_USER} -c "${EXEC_PATH} stop master $SVC_OPTS"
sleep 3
RETVAL=$?
diff --git a/bigtop-packages/src/common/hawq/hawq.default b/bigtop-packages/src/common/hawq/hawq.default
index f8cf6e1..574e845 100644
--- a/bigtop-packages/src/common/hawq/hawq.default
+++ b/bigtop-packages/src/common/hawq/hawq.default
@@ -21,6 +21,8 @@
export HAWQ_WORK_DIR="/var/run/hawq/work"
export HAWQ_IDENT_STRING=hawq
+export HAWQ_DATA_DIR=$HAWQ_WORK_DIR/hawq-data-directory
+
export PATH=$HAWQ_HOME/bin:$HAWQ_HOME/ext/python/bin:$PATH
export LD_LIBRARY_PATH=$HAWQ_HOME/lib:$HAWQ_HOME/ext/python/lib:$LD_LIBRARY_PATH
export PYTHONPATH=$HAWQ_HOME/lib/python:$HAWQ_HOME/lib/python/pygresql:$PYTHONPATH
@@ -28,3 +30,11 @@
export LIBHDFS3_CONF=$HAWQ_CONF_DIR/hdfs-client.xml
export LIBYARN_CONF=$HAWQ_CONF_DIR/yarn-client.xml
export HAWQSITE_CONF=$HAWQ_CONF_DIR/hawq-site.xml
+
+export HAWQ_MASTER_PORT=5432
+export HAWQ_SEGMENT_PORT=40000
+export HAWQ_MASTERDATA_DIR=$HAWQ_DATA_DIR/masterdd
+export HAWQ_SEGMENTDATA_DIR=$HAWQ_DATA_DIR/segmentdd
+
+export HAWQ_TIMEOUT=30
+export HAWQ_SHUTDOWN_MODE=smart
diff --git a/bigtop-packages/src/deb/hawq/rules b/bigtop-packages/src/deb/hawq/rules
index b4a3c9a..1e256a1 100755
--- a/bigtop-packages/src/deb/hawq/rules
+++ b/bigtop-packages/src/deb/hawq/rules
@@ -30,7 +30,7 @@
dh $@
override_dh_auto_build:
- bash debian/do-component-build
+ bash debian/do-component-build
override_dh_auto_install:
@@ -46,7 +46,7 @@
bash debian/init.d.tmpl debian/hawq-master.svc deb debian/${hawq_pkg_name}-master-service.init
bash debian/init.d.tmpl debian/hawq-segment.svc deb debian/${hawq_pkg_name}-segment-service.init
-## Let's override the auto_configure and auto_clean to make sure existing
+## Let's override the auto_configure and auto_clean to make sure existing
## top-level Makefile doesn't interfere with the package creation
override_dh_auto_configure:
diff --git a/bigtop.bom b/bigtop.bom
index f8a6878..5670546 100644
--- a/bigtop.bom
+++ b/bigtop.bom
@@ -90,7 +90,7 @@
zookeeper:['hadoop', 'hbase'],
hadoop:['ignite-hadoop', 'hbase', 'crunch', 'pig', 'hive', 'tez', 'sqoop', 'sqoop2',
'oozie', 'mahout', 'flume', 'giraph', 'solr', 'crunch', 'spark',
- 'phoenix', 'tachyon', 'kafka', 'ycsb', 'kite', 'hama', 'zeppelin',
+ 'phoenix', 'tachyon', 'kafka', 'ycsb', 'kite', 'hama', 'zeppelin', 'hawq',
'tajo', 'apex'
],
hbase:['phoenix','giraph','ycsb'],
diff --git a/bigtop_toolchain/manifests/libhdfs.pp b/bigtop_toolchain/manifests/libhdfs.pp
index 4698108..eef5dd9 100644
--- a/bigtop_toolchain/manifests/libhdfs.pp
+++ b/bigtop_toolchain/manifests/libhdfs.pp
@@ -18,8 +18,32 @@
include bigtop_toolchain::packages
-
case $operatingsystem {
+ /Ubuntu|Debian/: {
+ $hackrepourl = "https://bintray.com/artifact/download/wangzw/deb/dists/trusty/contrib/binary-amd64"
+ $libhdfs = "libhdfs3_2.2.31-1_amd64.deb"
+ $libhdfs_dev = "libhdfs3-dev_2.2.31-1_amd64.deb"
+
+ $bisonurl_dev = "http://launchpadlibrarian.net/140087283"
+ $bisonurl = "http://launchpadlibrarian.net/140087282"
+ $bison = "bison_2.7.1.dfsg-1_amd64.deb"
+ $bison_dev = "libbison-dev_2.7.1.dfsg-1_amd64.deb"
+
+ exec {"install":
+ path => "/usr/bin:/bin:/usr/sbin:/sbin",
+ cwd => "/usr/src",
+ command => "dpkg --install $libhdfs $libhdfs_dev $bison $bison_dev",
+ require => [ Exec[ 'download', 'download-bison' ], Package[ $packages::pkgs ] ],
+ }
+
+ exec {"download-bison":
+ path => "/usr/bin",
+ cwd => "/usr/src",
+ command => "curl -L $bisonurl/$bison -o $bison ; curl -L $bisonurl_dev/$bison_dev -o $bison_dev",
+ creates => "/usr/src/$bison, /usr/src/$bison_dev",
+ }
+ }
+
/(?i:(centos|fedora|amazon))/: {
$apache_prefix = nearest_apache_mirror()
@@ -27,12 +51,6 @@
$libhdfs = "libhdfs3-2.2.31-1.el7.centos.x86_64.rpm"
$libhdfs_dev = "libhdfs3-devel-2.2.31-1.el7.centos.x86_64.rpm"
- exec {"download":
- path => "/usr/bin",
- cwd => "/usr/src",
- command => "curl -L $hackrepourl/$libhdfs -o $libhdfs ; curl -L $hackrepourl/$libhdfs_dev -o $libhdfs_dev",
- creates => "/usr/src/$libhdfs, /usr/src/$libhdfs_dev",
- }
exec {"install":
path => "/usr/bin",
@@ -42,4 +60,11 @@
}
}
}
+
+ exec {"download":
+ path => "/usr/bin",
+ cwd => "/usr/src",
+ command => "curl -L $hackrepourl/$libhdfs -o $libhdfs ; curl -L $hackrepourl/$libhdfs_dev -o $libhdfs_dev",
+ creates => "/usr/src/$libhdfs, /usr/src/$libhdfs_dev",
+ }
}
diff --git a/bigtop_toolchain/manifests/packages.pp b/bigtop_toolchain/manifests/packages.pp
index d80a3ed..817c89d 100644
--- a/bigtop_toolchain/manifests/packages.pp
+++ b/bigtop_toolchain/manifests/packages.pp
@@ -14,7 +14,7 @@
# limitations under the License.
class bigtop_toolchain::packages {
- case $operatingsystem{
+ case $operatingsystem {
/(?i:(centos|fedora))/: {
# Fedora 20 and CentOS 7 or above are using mariadb, while CentOS 6 is still mysql
if ($operatingsystem == "CentOS") and ($operatingsystemmajrelease <=6) {