| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| class spark { |
| |
| class deploy ($roles) { |
| if ('spark-client' in $roles) { |
| include spark::client |
| } |
| |
| if ('spark-on-yarn' in $roles) { |
| include spark::yarn |
| } |
| |
| if ('spark-yarn-slave' in $roles) { |
| include spark::yarn_slave |
| } |
| |
| if ('spark-master' in $roles) { |
| include spark::master |
| } |
| |
| if ('spark-worker' in $roles) { |
| include spark::worker |
| } |
| |
| if ('spark-history-server' in $roles) { |
| include spark::history_server |
| } |
| |
| if ('spark-thriftserver' in $roles) { |
| include spark::spark_thriftserver |
| } |
| |
| } |
| |
| class spark_thriftserver { |
| include spark::common |
| |
| package { 'spark-thriftserver': |
| ensure => latest, |
| } |
| |
| service { 'spark-thriftserver': |
| ensure => running, |
| subscribe => [ |
| Package['spark-thriftserver'], |
| File['/etc/spark/conf/spark-env.sh'], |
| File['/etc/spark/conf/spark-defaults.conf'], |
| ], |
| hasrestart => true, |
| hasstatus => true, |
| } |
| |
| } |
| |
| class client { |
| include spark::common |
| include spark::sparkr |
| |
| package { 'spark-python': |
| ensure => latest, |
| require => Package['spark-core'], |
| } |
| |
| package { 'spark-external': |
| ensure => latest, |
| require => Package['spark-core'], |
| } |
| } |
| |
| class master { |
| include spark::common |
| |
| package { "spark-master": |
| ensure => latest, |
| } |
| |
| service { 'spark-master': |
| ensure => running, |
| subscribe => [ |
| Package['spark-master'], |
| File['/etc/spark/conf/spark-env.sh'], |
| File['/etc/spark/conf/spark-defaults.conf'], |
| ], |
| hasrestart => true, |
| hasstatus => true, |
| } |
| } |
| |
| class worker { |
| include spark::common |
| |
| package { "spark-worker": |
| ensure => latest, |
| } |
| |
| service { 'spark-worker': |
| ensure => running, |
| subscribe => [ |
| Package['spark-worker'], |
| File['/etc/spark/conf/spark-env.sh'], |
| File['/etc/spark/conf/spark-defaults.conf'], |
| ], |
| hasrestart => true, |
| hasstatus => true, |
| } |
| } |
| |
| class history_server { |
| include spark::common |
| |
| package { 'spark-history-server': |
| ensure => latest, |
| } |
| |
| service { 'spark-history-server': |
| ensure => running, |
| subscribe => [ |
| Package['spark-history-server'], |
| File['/etc/spark/conf/spark-env.sh'], |
| File['/etc/spark/conf/spark-defaults.conf'], |
| ], |
| hasrestart => true, |
| hasstatus => true, |
| } |
| } |
| |
| class yarn { |
| include spark::common |
| include spark::datanucleus |
| } |
| |
| class yarn_slave { |
| include spark::yarn_shuffle |
| include spark::datanucleus |
| } |
| |
| class yarn_shuffle { |
| package { 'spark-yarn-shuffle': |
| ensure => latest, |
| } |
| } |
| |
| class datanucleus { |
| package { 'spark-datanucleus': |
| ensure => latest, |
| } |
| } |
| |
| class sparkr { |
| # BIGTOP-3579. On these distros, the default version of R is earlier than 3.5.0, |
| # which is required to run SparkR. So the newer version of R is installed here. |
| if (($operatingsystem == 'Ubuntu' and versioncmp($operatingsystemmajrelease, '18.04') <= 0) or |
| ($operatingsystem == 'Debian' and versioncmp($operatingsystemmajrelease, '10') < 0)) { |
| $url = "http://cran.r-project.org/src/base/R-3/" |
| $rfile = "R-3.6.3.tar.gz" |
| $rdir = "R-3.6.3" |
| |
| $pkgs = [ |
| "g++", |
| "gcc", |
| "gfortran", |
| "libbz2-dev", |
| "libcurl4-gnutls-dev", |
| "liblzma-dev", |
| "libpcre3-dev", |
| "libreadline-dev", |
| "libz-dev", |
| "make", |
| ] |
| package { $pkgs: |
| ensure => installed, |
| before => [Exec["install_R"]], |
| } |
| |
| exec { "download_R": |
| cwd => "/usr/src", |
| command => "/usr/bin/wget $url/$rfile && mkdir -p $rdir && /bin/tar -xvzf $rfile -C $rdir --strip-components=1 && cd $rdir", |
| creates => "/usr/src/$rdir", |
| } |
| exec { "install_R": |
| cwd => "/usr/src/$rdir", |
| command => "/usr/src/$rdir/configure --with-recommended-packages=yes --without-x --with-cairo --with-libpng --with-libtiff --with-jpeglib --with-tcltk --with-blas --with-lapack --enable-R-shlib --prefix=/usr/local && /usr/bin/make && /usr/bin/make install && /sbin/ldconfig", |
| creates => "/usr/local/bin/R", |
| require => [Exec["download_R"]], |
| before => [Package["spark-sparkr"]], |
| timeout => 3000 |
| } |
| } |
| |
| package { 'spark-sparkr': |
| ensure => latest, |
| } |
| } |
| |
| class common( |
| $spark_thrift_server_without_hive = true, |
| $spark_hadoop_javax_jdo_option_ConnectionURL = undef, |
| $spark_hadoop_javax_jdo_option_ConnectionDriverName = undef, |
| $spark_hive_server2_thrift_port = undef, |
| $spark_sql_warehouse_dir = undef, |
| $master_url = undef, |
| $master_host = $fqdn, |
| $zookeeper_connection_string = undef, |
| $master_port = 7077, |
| $worker_port = 7078, |
| $master_ui_port = 8080, |
| $worker_ui_port = 8081, |
| $history_ui_port = 18080, |
| $use_yarn_shuffle_service = false, |
| $event_log_dir = "hdfs:///var/log/spark/apps", |
| $history_log_dir = "hdfs:///var/log/spark/apps", |
| $extra_lib_dirs = "/usr/lib/hadoop/lib/native", |
| $driver_mem = "1g", |
| $executor_mem = "1g", |
| ) { |
| |
| ### This is an ungodly hack to deal with the consequence of adding |
| ### unconditional hive-support into Spark |
| ### The addition is tracked by BIGTOP-2154 |
| ### The real fix will come in BIGTOP-2268 |
| include spark::datanucleus |
| |
| package { 'spark-core': |
| ensure => latest, |
| } |
| |
| if $zookeeper_connection_string == undef { |
| $spark_daemon_java_opts = "\"-Dspark.deploy.recoveryMode=NONE\"" |
| } else { |
| $spark_daemon_java_opts = "\"-Dspark.deploy.recoveryMode=ZOOKEEPER -Dspark.deploy.zookeeper.url=${zookeeper_connection_string}\"" |
| } |
| |
| file { '/etc/spark/conf/spark-env.sh': |
| content => template('spark/spark-env.sh'), |
| require => Package['spark-core'], |
| } |
| |
| file { '/etc/spark/conf/spark-defaults.conf': |
| content => template('spark/spark-defaults.conf'), |
| require => Package['spark-core'], |
| } |
| |
| file { '/etc/spark/conf/log4j.properties': |
| source => '/etc/spark/conf/log4j.properties.template', |
| require => Package['spark-core'], |
| } |
| } |
| } |