bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml - bigtop - Git at Google

 ---
 ### This file implements defaults and some dependant parameter defaulting logic.
 ### Every parameter can be overridden using the hiera lookup hierarchy. The enclosd
 ### hiera.yaml provides for this by adding a site.yaml to the lookup where
 ### site-specific overrides can be placed. Therefore this file should never need
 ### changing by site admins.

 # FQDN of Namenode
 #bigtop::hadoop_head_node: "hadoopmaster.example.com"
 # FQDN of standby node (enables HA if set)
 #bigtop::hadoop_standby_head_node: "standbyNN.example.com"
 # FQDN of gateway node (if separate from NN)
 #bigtop::hadoop_gateway_node: "gateway.example.com"

 # A list of stack' components to be deployed can be specified via special
 # "$components" list. If $components isn't set then everything in the stack will
 # be installed as usual. Otherwise only a specified list will be set
 # Possible elements:
 # hadoop,yarn,hbase,alluxio,flink,solrcloud,spark,oozie,hcat,sqoop,httpfs,
 # pig,hive,zookeeper,ycsb,qfs
 # Example (to deploy only HDFS and YARN server and gateway parts)
 # This can be a comma-separated list or an array.
 #hadoop_cluster_node::cluster_components:
 #  - hadoop
 #  - yarn

 # A list of roles can be assigned to each node in a cluster via "bigtop::roles"
 # hiera conf. This is useful if you want to run specific daemons/processes of a component
 # (lets say hadoop) on specific nodes. To enable roles, you need to explicitly set
 # "bigtop::roles_enabled" conf to true. By default, it is not enabled. If not enabled,
 # bigtop will honour the hadoop_cluster_node::cluster_components parameter and assume
 # respective roles to each node based on hadoop_head_node, hadoop_gateway_node
 # and other nodes (worker nodes) classification. For full list of roles
 # supported by each component, refer to $roles_map datastructure in cluster.pp file
 # for example:

 #bigtop::roles:
 # - namenode
 # - resourcemanager

 # By specifying above roles, you instruct a node to run only namenode and resourcemanager.
 # When roles are disabled, all worker daemons for each component assume hadoop_head_node as
 # their master. With roles enabled, this assumption may no longer be valid and its upto the user
 # to specify the FQDN for each master daemons to all nodes so that worker daemons can communicate
 # with master. FQDN and port for master daemons of each component can be specified using below
 # configurations.

 # Storage directories (will be created if doesn't exist)
 #hadoop::hadoop_storage_dirs:
 #  - /data/1
 #  - /data/2
 #  - /data/3
 #  - /data/4

 #bigtop::bigtop_repo_uri: "http://mirror.example.com/path/to/mirror/"

 # Use a pre-installed java environment. The default value of 'false' will cause
 # the default jdk by bigtop to be installed.
 # this to 'true' requires a compatible java environment be avaialble prior to Bigtop
 # installation.
 #bigtop::jdk_preinstalled: false

 # Test-only variable controls if user hdfs' sshkeys should be installed to allow
 # for passwordless login across the cluster. Required by some integration tests
 #hadoop::common_hdfs::testonly_hdfs_sshkeys: "no"

 # Default
 #hadoop::common_hdfs::ha: "disabled"

 # Kerberos
 #hadoop::hadoop_security_authentication: "kerberos"
 #kerberos::krb_site::domain: "bigtop.apache.org"
 #kerberos::krb_site::realm: "BIGTOP.APACHE.ORG"
 #kerberos::krb_site::kdc_server: "%{hiera('bigtop::hadoop_head_node')}"
 #kerberos::krb_site::kdc_port: "88"
 #kerberos::krb_site::admin_port: "749"
 #kerberos::krb_site::keytab_export_dir: "/var/lib/bigtop_keytabs"

 # applies to hdfs, yarn, mapred, kms and httpfs
 hadoop::kerberos_realm: "%{hiera('kerberos::krb_site::realm')}"

 hadoop::common_hdfs::hadoop_namenode_host: "%{hiera('bigtop::hadoop_head_node')}"
 # actually default but needed for hadoop_namenode_uri here
 hadoop::common_hdfs::hadoop_namenode_port: "8020"

 # If there is a datanode/network failure in the write pipeline, DFSClient will try
 # to remove the failed datanode from the pipeline and then continue writing with the
 # remaining datanodes. As a result, the number of datanodes in the pipeline is decreased.
 # The feature is to add new datanodes to the pipeline. This is a site-wide property to
 # enable/disable the feature. When the cluster size is extremely small, e.g. 3 nodes or
 # less, cluster administrators may want to set the policy to NEVER in the default
 # configuration file or disable this feature. Otherwise, users may experience an unusually
 # high rate of pipeline failures since it is impossible to find new datanodes for replacement.
 # See also dfs.client.block.write.replace-datanode-on-failure.policy
 # hadoop::common_hdfs::hdfs_replace_datanode_on_failure: "false"

 # Set as shown below in site.yaml to also enable Kerberos authentication
 # on the web GUIs of journalnode, namenode, datanode, resourcemanager and
 # nodemanager when you enable Kerberos for Hadoop API communication. This
 # intentionally is not the default right now.
 #hadoop::common_hdfs::hadoop_http_authentication_type: "%{hiera('hadoop::hadoop_security_authentication')}"
 #
 # A secret is necessary for the cross-service-cross-node session cookie.
 # Provide this by setting the following to something long and secret:
 #hadoop::common_hdfs::hadoop_http_authentication_signature_secret:

 # ... or have trocla generate secrets for Hadoop components automatically. For
 # this to work, the trocla puppet module must be installed.
 #hadoop::generate_secrets: true

 hadoop::common_yarn::hadoop_ps_host: "%{hiera('bigtop::hadoop_head_node')}"
 hadoop::common_yarn::hadoop_rm_host: "%{hiera('bigtop::hadoop_head_node')}"
 hadoop::common_yarn::hadoop_rm_port: "8032"

 hadoop::common_mapred_app::jobtracker_host: "%{hiera('bigtop::hadoop_head_node')}"
 hadoop::common_mapred_app::mapreduce_jobhistory_host: "%{hiera('bigtop::hadoop_head_node')}"
 # actually default but needed for hadoop::common_yarn::yarn_log_server_url here
 bigtop::hadoop_history_server_port: "19888"
 bigtop::hadoop_history_server_url: "http://%{hiera('hadoop::common_mapred_app::mapreduce_jobhistory_host')}:%{hiera('bigtop::hadoop_history_server_port')}"
 hadoop::common_yarn::yarn_log_server_url: "%{hiera('bigtop::hadoop_history_server_url')}/jobhistory/logs"

 hadoop::httpfs::hadoop_httpfs_port: "14000"

 hadoop::kms_host: "%{hiera('bigtop::hadoop_head_node')}"
 hadoop::kms_port: "9600"

 bigtop::hadoop_zookeeper_port: "2181"
 hadoop::zk: "%{hiera('bigtop::hadoop_head_node')}:%{hiera('bigtop::hadoop_zookeeper_port')}"

 bigtop::hadoop_namenode_uri: "hdfs://%{hiera('hadoop::common_hdfs::hadoop_namenode_host')}:%{hiera('hadoop::common_hdfs::hadoop_namenode_port')}"
 hadoop_hbase::base_relative_rootdir: "/hbase"
 hadoop_hbase::common_config::rootdir: "%{hiera('bigtop::hadoop_namenode_uri')}%{hiera('hadoop_hbase::base_relative_rootdir')}"
 hadoop_hbase::common_config::zookeeper_quorum: "%{hiera('bigtop::hadoop_head_node')}"
 hadoop_hbase::common_config::kerberos_realm: "%{hiera('kerberos::site::realm')}"
 hadoop_hbase::client::thrift: true
 hadoop_hbase::deploy::auxiliary: true

 solr::server::root_url: "%{hiera('bigtop::hadoop_namenode_uri')}"
 solr::server::zk: "%{hiera('hadoop::zk')}"
 solr::server::kerberos_realm: "%{hiera('kerberos::site::realm')}"
 # Default but needed here to make sure, hue uses the same port
 solr::server::port: "8983"

 hadoop_oozie::server::kerberos_realm: "%{hiera('kerberos::site::realm')}"

 hcatalog::server::kerberos_realm: "%{hiera('kerberos::site::realm')}"
 hcatalog::webhcat::server::kerberos_realm: "%{hiera('kerberos::site::realm')}"

 # spark
 spark::common::master_host: "%{hiera('bigtop::hadoop_head_node')}"
 # to enable spark HA, ensure zookeeper is available and uncomment the line below
 #spark::common::zookeeper_connection_string: "%{hiera('hadoop::zk')}"

 alluxio::common::master_host: "%{hiera('bigtop::hadoop_head_node')}"

 # qfs
 qfs::common::metaserver_host: "%{hiera('bigtop::hadoop_head_node')}"
 qfs::common::metaserver_port: "30000"
 qfs::common::chunkserver_port: "30000"
 qfs::common::metaserver_client_port: "20000"
 qfs::common::chunkserver_client_port: "22000"

 hadoop_zookeeper::server::myid: "0"
 hadoop_zookeeper::server::ensemble:
   - ["0", "%{hiera('bigtop::hadoop_head_node')}:2888:3888"]
 hadoop_zookeeper::server::kerberos_realm: "%{hiera('kerberos::site::realm')}"

 # those are only here because they were present as extlookup keys previously
 bigtop::hadoop_rm_http_port: "8088"
 bigtop::hadoop_rm_proxy_port: "8088"
 bigtop::hbase_thrift_port: "9090"
 bigtop::hadoop_oozie_port: "11000"

 hadoop_hive::common_config::hbase_zookeeper_quorum: "%{hiera('hadoop_hbase::common_config::zookeeper_quorum')}"
 hadoop_hive::common_config::kerberos_realm: "%{hiera('kerberos::site::realm')}"
 hadoop_hive::common_config::metastore_uris: "thrift://%{hiera('bigtop::hadoop_head_node')}:9083"
 # set this to true in production to avoid potential metastore corruption
 hadoop_hive::common_config::metastore_schema_verification: false

 # tez
 hadoop::common::tez_conf_dir: "/etc/tez/conf"
 hadoop::common::tez_jars: "/usr/lib/tez"

 # to enable tez in hadoop, uncomment the lines below
 # hadoop::common::use_tez: true
 # hadoop::common_mapred_app::mapreduce_framework_name: "yarn-tez"

 # to enable tez in hive, uncomment the lines below
 # hadoop_hive::common_config::hive_execution_engine: "tez"

 #kafka
 kafka::server::port: "9092"
 kafka::server::zookeeper_connection_string: "%{hiera('bigtop::hadoop_head_node')}:2181"

 zeppelin::server::spark_master_url: "yarn-client"
 zeppelin::server::hiveserver2_url: "jdbc:hive2://%{hiera('hadoop-hive::common::hiveserver2_host')}:%{hiera('hadoop-hive::common::hiveserver2_port')}"
 zeppelin::server::hiveserver2_user: "%{hiera('bigtop::hiveserver2_user')}"
 zeppelin::server::hiveserver2_password: "%{hiera('bigtop::hiveserver2_password')}"

 # Flink
 flink::common::jobmanager_host: "%{hiera('bigtop::hadoop_head_node')}"
 flink::common::jobmanager_port: "6123"
 flink::common::jobmanager_memory: "1600m"
 flink::common::taskmanager_memory: "1728m"
 flink::common::taskmanager_number_of_taskslots: "1"
 flink::common::parallelism_default: "1"
 flink::common::jobmanager_failover_strategy: "region"
 flink::common::rest_port: "8081"

 # GPDB
 # The first element is FQDN for master node and the succeeding ones are for segment nodes.
 gpdb::common::nodes: ["%{hiera('bigtop::hadoop_head_node')}", "%{hiera('bigtop::hadoop_head_node')}"]
 gpdb::common::gp_home: "/usr/lib/gpdb"
 gpdb::common::db_base_dir: "/data_gp"
 gpdb::common::master_db_port: "5432"
 gpdb::common::segment_db_port_prefix: "4000"

 ambari::agent::server_host: "%{hiera('bigtop::hadoop_head_node')}"
	---
	### This file implements defaults and some dependant parameter defaulting logic.
	### Every parameter can be overridden using the hiera lookup hierarchy. The enclosd
	### hiera.yaml provides for this by adding a site.yaml to the lookup where
	### site-specific overrides can be placed. Therefore this file should never need
	### changing by site admins.

	# FQDN of Namenode
	#bigtop::hadoop_head_node: "hadoopmaster.example.com"
	# FQDN of standby node (enables HA if set)
	#bigtop::hadoop_standby_head_node: "standbyNN.example.com"
	# FQDN of gateway node (if separate from NN)
	#bigtop::hadoop_gateway_node: "gateway.example.com"

	# A list of stack' components to be deployed can be specified via special
	# "$components" list. If $components isn't set then everything in the stack will
	# be installed as usual. Otherwise only a specified list will be set
	# Possible elements:
	# hadoop,yarn,hbase,alluxio,flink,solrcloud,spark,oozie,hcat,sqoop,httpfs,
	# pig,hive,zookeeper,ycsb,qfs
	# Example (to deploy only HDFS and YARN server and gateway parts)
	# This can be a comma-separated list or an array.
	#hadoop_cluster_node::cluster_components:
	# - hadoop
	# - yarn

	# A list of roles can be assigned to each node in a cluster via "bigtop::roles"
	# hiera conf. This is useful if you want to run specific daemons/processes of a component
	# (lets say hadoop) on specific nodes. To enable roles, you need to explicitly set
	# "bigtop::roles_enabled" conf to true. By default, it is not enabled. If not enabled,
	# bigtop will honour the hadoop_cluster_node::cluster_components parameter and assume
	# respective roles to each node based on hadoop_head_node, hadoop_gateway_node
	# and other nodes (worker nodes) classification. For full list of roles
	# supported by each component, refer to $roles_map datastructure in cluster.pp file
	# for example:

	#bigtop::roles:
	# - namenode
	# - resourcemanager

	# By specifying above roles, you instruct a node to run only namenode and resourcemanager.
	# When roles are disabled, all worker daemons for each component assume hadoop_head_node as
	# their master. With roles enabled, this assumption may no longer be valid and its upto the user
	# to specify the FQDN for each master daemons to all nodes so that worker daemons can communicate
	# with master. FQDN and port for master daemons of each component can be specified using below
	# configurations.

	# Storage directories (will be created if doesn't exist)
	#hadoop::hadoop_storage_dirs:
	# - /data/1
	# - /data/2
	# - /data/3
	# - /data/4

	#bigtop::bigtop_repo_uri: "http://mirror.example.com/path/to/mirror/"

	# Use a pre-installed java environment. The default value of 'false' will cause
	# the default jdk by bigtop to be installed.
	# this to 'true' requires a compatible java environment be avaialble prior to Bigtop
	# installation.
	#bigtop::jdk_preinstalled: false

	# Test-only variable controls if user hdfs' sshkeys should be installed to allow
	# for passwordless login across the cluster. Required by some integration tests
	#hadoop::common_hdfs::testonly_hdfs_sshkeys: "no"

	# Default
	#hadoop::common_hdfs::ha: "disabled"

	# Kerberos
	#hadoop::hadoop_security_authentication: "kerberos"
	#kerberos::krb_site::domain: "bigtop.apache.org"
	#kerberos::krb_site::realm: "BIGTOP.APACHE.ORG"
	#kerberos::krb_site::kdc_server: "%{hiera('bigtop::hadoop_head_node')}"
	#kerberos::krb_site::kdc_port: "88"
	#kerberos::krb_site::admin_port: "749"
	#kerberos::krb_site::keytab_export_dir: "/var/lib/bigtop_keytabs"

	# applies to hdfs, yarn, mapred, kms and httpfs
	hadoop::kerberos_realm: "%{hiera('kerberos::krb_site::realm')}"

	hadoop::common_hdfs::hadoop_namenode_host: "%{hiera('bigtop::hadoop_head_node')}"
	# actually default but needed for hadoop_namenode_uri here
	hadoop::common_hdfs::hadoop_namenode_port: "8020"

	# If there is a datanode/network failure in the write pipeline, DFSClient will try
	# to remove the failed datanode from the pipeline and then continue writing with the
	# remaining datanodes. As a result, the number of datanodes in the pipeline is decreased.
	# The feature is to add new datanodes to the pipeline. This is a site-wide property to
	# enable/disable the feature. When the cluster size is extremely small, e.g. 3 nodes or
	# less, cluster administrators may want to set the policy to NEVER in the default
	# configuration file or disable this feature. Otherwise, users may experience an unusually
	# high rate of pipeline failures since it is impossible to find new datanodes for replacement.
	# See also dfs.client.block.write.replace-datanode-on-failure.policy
	# hadoop::common_hdfs::hdfs_replace_datanode_on_failure: "false"

	# Set as shown below in site.yaml to also enable Kerberos authentication
	# on the web GUIs of journalnode, namenode, datanode, resourcemanager and
	# nodemanager when you enable Kerberos for Hadoop API communication. This
	# intentionally is not the default right now.
	#hadoop::common_hdfs::hadoop_http_authentication_type: "%{hiera('hadoop::hadoop_security_authentication')}"
	#
	# A secret is necessary for the cross-service-cross-node session cookie.
	# Provide this by setting the following to something long and secret:
	#hadoop::common_hdfs::hadoop_http_authentication_signature_secret:

	# ... or have trocla generate secrets for Hadoop components automatically. For
	# this to work, the trocla puppet module must be installed.
	#hadoop::generate_secrets: true

	hadoop::common_yarn::hadoop_ps_host: "%{hiera('bigtop::hadoop_head_node')}"
	hadoop::common_yarn::hadoop_rm_host: "%{hiera('bigtop::hadoop_head_node')}"
	hadoop::common_yarn::hadoop_rm_port: "8032"

	hadoop::common_mapred_app::jobtracker_host: "%{hiera('bigtop::hadoop_head_node')}"
	hadoop::common_mapred_app::mapreduce_jobhistory_host: "%{hiera('bigtop::hadoop_head_node')}"
	# actually default but needed for hadoop::common_yarn::yarn_log_server_url here
	bigtop::hadoop_history_server_port: "19888"
	bigtop::hadoop_history_server_url: "http://%{hiera('hadoop::common_mapred_app::mapreduce_jobhistory_host')}:%{hiera('bigtop::hadoop_history_server_port')}"
	hadoop::common_yarn::yarn_log_server_url: "%{hiera('bigtop::hadoop_history_server_url')}/jobhistory/logs"

	hadoop::httpfs::hadoop_httpfs_port: "14000"

	hadoop::kms_host: "%{hiera('bigtop::hadoop_head_node')}"
	hadoop::kms_port: "9600"

	bigtop::hadoop_zookeeper_port: "2181"
	hadoop::zk: "%{hiera('bigtop::hadoop_head_node')}:%{hiera('bigtop::hadoop_zookeeper_port')}"

	bigtop::hadoop_namenode_uri: "hdfs://%{hiera('hadoop::common_hdfs::hadoop_namenode_host')}:%{hiera('hadoop::common_hdfs::hadoop_namenode_port')}"
	hadoop_hbase::base_relative_rootdir: "/hbase"
	hadoop_hbase::common_config::rootdir: "%{hiera('bigtop::hadoop_namenode_uri')}%{hiera('hadoop_hbase::base_relative_rootdir')}"
	hadoop_hbase::common_config::zookeeper_quorum: "%{hiera('bigtop::hadoop_head_node')}"
	hadoop_hbase::common_config::kerberos_realm: "%{hiera('kerberos::site::realm')}"
	hadoop_hbase::client::thrift: true
	hadoop_hbase::deploy::auxiliary: true

	solr::server::root_url: "%{hiera('bigtop::hadoop_namenode_uri')}"
	solr::server::zk: "%{hiera('hadoop::zk')}"
	solr::server::kerberos_realm: "%{hiera('kerberos::site::realm')}"
	# Default but needed here to make sure, hue uses the same port
	solr::server::port: "8983"

	hadoop_oozie::server::kerberos_realm: "%{hiera('kerberos::site::realm')}"

	hcatalog::server::kerberos_realm: "%{hiera('kerberos::site::realm')}"
	hcatalog::webhcat::server::kerberos_realm: "%{hiera('kerberos::site::realm')}"

	# spark
	spark::common::master_host: "%{hiera('bigtop::hadoop_head_node')}"
	# to enable spark HA, ensure zookeeper is available and uncomment the line below
	#spark::common::zookeeper_connection_string: "%{hiera('hadoop::zk')}"

	alluxio::common::master_host: "%{hiera('bigtop::hadoop_head_node')}"

	# qfs
	qfs::common::metaserver_host: "%{hiera('bigtop::hadoop_head_node')}"
	qfs::common::metaserver_port: "30000"
	qfs::common::chunkserver_port: "30000"
	qfs::common::metaserver_client_port: "20000"
	qfs::common::chunkserver_client_port: "22000"

	hadoop_zookeeper::server::myid: "0"
	hadoop_zookeeper::server::ensemble:
	- ["0", "%{hiera('bigtop::hadoop_head_node')}:2888:3888"]
	hadoop_zookeeper::server::kerberos_realm: "%{hiera('kerberos::site::realm')}"

	# those are only here because they were present as extlookup keys previously
	bigtop::hadoop_rm_http_port: "8088"
	bigtop::hadoop_rm_proxy_port: "8088"
	bigtop::hbase_thrift_port: "9090"
	bigtop::hadoop_oozie_port: "11000"

	hadoop_hive::common_config::hbase_zookeeper_quorum: "%{hiera('hadoop_hbase::common_config::zookeeper_quorum')}"
	hadoop_hive::common_config::kerberos_realm: "%{hiera('kerberos::site::realm')}"
	hadoop_hive::common_config::metastore_uris: "thrift://%{hiera('bigtop::hadoop_head_node')}:9083"
	# set this to true in production to avoid potential metastore corruption
	hadoop_hive::common_config::metastore_schema_verification: false

	# tez
	hadoop::common::tez_conf_dir: "/etc/tez/conf"
	hadoop::common::tez_jars: "/usr/lib/tez"

	# to enable tez in hadoop, uncomment the lines below
	# hadoop::common::use_tez: true
	# hadoop::common_mapred_app::mapreduce_framework_name: "yarn-tez"

	# to enable tez in hive, uncomment the lines below
	# hadoop_hive::common_config::hive_execution_engine: "tez"

	#kafka
	kafka::server::port: "9092"
	kafka::server::zookeeper_connection_string: "%{hiera('bigtop::hadoop_head_node')}:2181"

	zeppelin::server::spark_master_url: "yarn-client"
	zeppelin::server::hiveserver2_url: "jdbc:hive2://%{hiera('hadoop-hive::common::hiveserver2_host')}:%{hiera('hadoop-hive::common::hiveserver2_port')}"
	zeppelin::server::hiveserver2_user: "%{hiera('bigtop::hiveserver2_user')}"
	zeppelin::server::hiveserver2_password: "%{hiera('bigtop::hiveserver2_password')}"

	# Flink
	flink::common::jobmanager_host: "%{hiera('bigtop::hadoop_head_node')}"
	flink::common::jobmanager_port: "6123"
	flink::common::jobmanager_memory: "1600m"
	flink::common::taskmanager_memory: "1728m"
	flink::common::taskmanager_number_of_taskslots: "1"
	flink::common::parallelism_default: "1"
	flink::common::jobmanager_failover_strategy: "region"
	flink::common::rest_port: "8081"

	# GPDB
	# The first element is FQDN for master node and the succeeding ones are for segment nodes.
	gpdb::common::nodes: ["%{hiera('bigtop::hadoop_head_node')}", "%{hiera('bigtop::hadoop_head_node')}"]
	gpdb::common::gp_home: "/usr/lib/gpdb"
	gpdb::common::db_base_dir: "/data_gp"
	gpdb::common::master_db_port: "5432"
	gpdb::common::segment_db_port_prefix: "4000"

	ambari::agent::server_host: "%{hiera('bigtop::hadoop_head_node')}"