AMBARI-24843. Make Ambaripreupload.py more configurable (#2563)

commit: 2237560490df7f8a23c056853d493a31309547dd [log] [tgz]
author: Doroszlai, Attila <6454655+adoroszlai@users.noreply.github.com> Wed Oct 31 09:44:26 2018 +0100
committer: GitHub <noreply@github.com> Wed Oct 31 09:44:26 2018 +0100
tree: 65d1caeb47511941a16cc190d31375ba15ff7b06
parent: c22b8e7cafdff214079653e8cbef1fd6baa467b4 [diff]
diff --git a/ambari-server/src/main/resources/scripts/Ambaripreupload.py b/ambari-server/src/main/resources/scripts/Ambaripreupload.py
index e0595a8..69eb14e 100644
--- a/ambari-server/src/main/resources/scripts/Ambaripreupload.py
+++ b/ambari-server/src/main/resources/scripts/Ambaripreupload.py

@@ -7,57 +7,48 @@
 to you under the Apache License, Version 2.0 (the
 "License"); you may not use this file except in compliance
 with the License.  You may obtain a copy of the License at
- 
+
     http://www.apache.org/licenses/LICENSE-2.0
- 
+
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
- 
+
 """
+import functools
+import glob
 import os
+import re
 import sys
 import tarfile
+import tempfile
+import time
+
 from contextlib import closing
 from optparse import OptionParser
+from xml.dom import minidom
+
 os.environ["PATH"] += os.pathsep + "/var/lib/ambari-agent"
 sys.path.append("/usr/lib/ambari-server/lib")
 
-import glob
-from logging import thread
-import re
-import tempfile
-import time
-import functools
-from xml.dom import minidom
-from xml.dom.minidom import parseString
- 
-from resource_management import *
+from ambari_server.serverClassPath import JDBC_DRIVER_PATH_PROPERTY
+from ambari_server.serverConfiguration import get_value_from_properties, get_ambari_properties
+
+from resource_management.core import File
 from resource_management.core import shell
-from resource_management.core.base import Resource, ForcedListArgument, ResourceArgument, BooleanArgument
-from resource_management.core.exceptions import Fail
+from resource_management.core.environment import Environment
 from resource_management.core.logger import Logger
-from resource_management.core.resources.system import Execute, Directory
-from resource_management.libraries.functions.default import default
+from resource_management.core.resources.system import Directory
+from resource_management.core.resources.system import Execute
+from resource_management.core.source import StaticFile
+from resource_management.libraries import ConfigDictionary
 from resource_management.libraries.functions.format import format
 from resource_management.libraries.functions.oozie_prepare_war import prepare_war
 from resource_management.libraries.resources.hdfs_resource import HdfsResource
-from resource_management.libraries.resources.execute_hadoop import ExecuteHadoop
-from resource_management import Script
 
-SQL_DRIVER_PATH = "/var/lib/ambari-server/resources/sqljdbc41.jar"
- 
-"""
-This file provides helper methods needed for the versioning of RPMs. Specifically, it does dynamic variable
-interpretation to replace strings like {{ stack_version_formatted }}  where the value of the
-variables cannot be determined ahead of time, but rather, depends on what files are found.
- 
-It assumes that {{ stack_version_formatted }} is constructed as ${major.minor.patch.rev}-${build_number}
-E.g., 998.2.2.1.0-998
-Please note that "-${build_number}" is optional.
-"""
+DEFAULT_SQL_DRIVER_PATH = get_value_from_properties(get_ambari_properties(), JDBC_DRIVER_PATH_PROPERTY, "/var/lib/ambari-server/resources/sqljdbc41.jar")
 
 with Environment() as env:
   def get_stack_version():
@@ -74,34 +65,44 @@
         Logger.warning("Could not verify HDP version by calling '%s'. Return Code: %s, Output: %s." %
                        (get_stack_version_cmd, str(code), str(out)))
         return 1
-     
+
       matches = re.findall(r"([\d\.]+\-\d+)", out)
       stack_version = matches[0] if matches and len(matches) > 0 else None
-     
+
       if not stack_version:
         Logger.error("Could not parse HDP version from output of hdp-select: %s" % str(out))
         return 1
     else:
       stack_version = options.hdp_version
-      
+
     return stack_version
-  
+
   parser = OptionParser()
+  parser.add_option("-d", "--database-driver", dest="sql_driver_path", default=DEFAULT_SQL_DRIVER_PATH,
+                    help="Path to JDBC driver")
+  parser.add_option("-f", "--fs-type", dest="fs_type", default="wasb",
+                    help="Expected protocol of fs.defaultFS")
   parser.add_option("-v", "--hdp-version", dest="hdp_version", default="",
                     help="hdp-version used in path of tarballs")
   parser.add_option("-u", "--upgrade", dest="upgrade", action="store_true",
-                    help="flag to indicate script is being run for upgrade", default=False)  
+                    help="flag to indicate script is being run for upgrade", default=False)
   (options, args) = parser.parse_args()
 
-  
+  if not os.path.exists(options.sql_driver_path):
+    Logger.error("SQL driver file {} does not exist".format(options.sql_driver_path))
+    sys.exit(1)
+
+  Logger.info("Using SQL driver from {}".format(options.sql_driver_path))
+  sql_driver_filename = os.path.basename(options.sql_driver_path)
+
   # See if hdfs path prefix is provided on the command line. If yes, use that value, if no
   # use empty string as default.
   hdfs_path_prefix = ""
   if len(args) > 0:
     hdfs_path_prefix = args[0]
-  
+
   stack_version = get_stack_version()
-  
+
   def getPropertyValueFromConfigXMLFile(xmlfile, name, defaultValue=None):
     xmldoc = minidom.parse(xmlfile)
     propNodes = [node.parentNode for node in xmldoc.getElementsByTagName("name") if node.childNodes[0].nodeValue == name]
@@ -113,27 +114,27 @@
           else:
             return defaultValue
     return defaultValue
-  
+
   def get_fs_root(fsdefaultName=None):
     fsdefaultName = "fake"
-     
+    expected_fs_protocol = options.fs_type + '://'
+
     while True:
       fsdefaultName =  getPropertyValueFromConfigXMLFile("/etc/hadoop/conf/core-site.xml", "fs.defaultFS")
-  
-      if fsdefaultName and fsdefaultName.startswith("wasb://"):
+
+      if fsdefaultName and fsdefaultName.startswith(expected_fs_protocol):
         break
-        
-      print "Waiting to read appropriate value of fs.defaultFS from /etc/hadoop/conf/core-site.xml ..."
+
+      Logger.info("Waiting to read appropriate value of fs.defaultFS from /etc/hadoop/conf/core-site.xml ...")
       time.sleep(10)
-      pass
-  
-    print "Returning fs.defaultFS -> " + fsdefaultName
+
+    Logger.info("Returning fs.defaultFS -> " + fsdefaultName)
     return fsdefaultName
-   
+
   # These values must be the suffix of the properties in cluster-env.xml
   TAR_SOURCE_SUFFIX = "_tar_source"
   TAR_DESTINATION_FOLDER_SUFFIX = "_tar_destination_folder"
-  
+
   class params:
     hdfs_path_prefix = hdfs_path_prefix
     hdfs_user = "hdfs"
@@ -145,7 +146,7 @@
     oozie_user = "oozie"
     execute_path = "/usr/hdp/" + stack_version + "/hadoop/bin"
     ambari_libs_dir = "/var/lib/ambari-agent/lib"
-    hdfs_site = ConfigDictionary({'dfs.webhdfs.enabled':False, 
+    hdfs_site = ConfigDictionary({'dfs.webhdfs.enabled':False,
     })
     fs_default = get_fs_root()
     slider_home_dir = '/usr/hdp/' + stack_version + '/slider'
@@ -160,7 +161,7 @@
     oozie_env_sh_template = \
   '''
   #!/bin/bash
-  
+
   export OOZIE_CONFIG=${{OOZIE_CONFIG:-/usr/hdp/{0}/oozie/conf}}
   export OOZIE_DATA=${{OOZIE_DATA:-/var/lib/oozie/data}}
   export OOZIE_LOG=${{OOZIE_LOG:-/var/log/oozie}}
@@ -169,7 +170,7 @@
   export CATALINA_PID=${{CATALINA_PID:-/var/run/oozie/oozie.pid}}
   export OOZIE_CATALINA_HOME=/usr/lib/bigtop-tomcat
   '''.format(stack_version)
-    
+
     HdfsResource = functools.partial(
       HdfsResource,
       user=hdfs_user,
@@ -183,7 +184,7 @@
       default_fs = fs_default,
       hdfs_resource_ignore_file = "/var/lib/ambari-agent/data/.hdfs_resource_ignore",
     )
-   
+
   def _copy_files(source_and_dest_pairs, file_owner, group_owner, kinit_if_needed):
     """
     :param source_and_dest_pairs: List of tuples (x, y), where x is the source file in the local file system,
@@ -192,10 +193,10 @@
     :param group_owner: Owning group to set for the file copied to HDFS (typically hadoop group)
     :param kinit_if_needed: kinit command if it is needed, otherwise an empty string
     :return: Returns 0 if at least one file was copied and no exceptions occurred, and 1 otherwise.
-   
+
     Must kinit before calling this function.
     """
-   
+
     for (source, destination) in source_and_dest_pairs:
       params.HdfsResource(destination,
                     action="create_on_execute",
@@ -205,8 +206,8 @@
                     group=group_owner,
                     source=source,
       )
-   
-   
+
+
   def copy_tarballs_to_hdfs(source, dest, stack_select_component_name, component_user, file_owner, group_owner):
     """
     :param tarball_prefix: Prefix of the tarball must be one of tez, hive, mr, pig
@@ -215,40 +216,40 @@
     :param file_owner: Owner of the files copied to HDFS (typically hdfs account)
     :param group_owner: Group owner of the files copied to HDFS (typically hadoop group)
     :return: Returns 0 on success, 1 if no files were copied, and in some cases may raise an exception.
-   
+
     In order to call this function, params.py must have all of the following,
     stack_version_formatted, kinit_path_local, security_enabled, hdfs_user, hdfs_principal_name, hdfs_user_keytab,
     hadoop_bin_dir, hadoop_conf_dir, and HdfsDirectory as a partial function.
     """
-   
+
     component_tar_source_file, component_tar_destination_folder = source, dest
-   
+
     if not os.path.exists(component_tar_source_file):
       Logger.warning("Could not find file: %s" % str(component_tar_source_file))
       return 1
-   
-  
-   
+
+
+
     file_name = os.path.basename(component_tar_source_file)
     destination_file = os.path.join(component_tar_destination_folder, file_name)
     destination_file = destination_file.replace("{{ stack_version_formatted }}", stack_version)
-   
-  
+
+
     kinit_if_needed = ""
     if params.security_enabled:
       kinit_if_needed = format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name};")
-   
+
     if kinit_if_needed:
       Execute(kinit_if_needed,
               user=component_user,
               path='/bin'
       )
-   
+
     source_and_dest_pairs = [(component_tar_source_file, destination_file), ]
     return _copy_files(source_and_dest_pairs, file_owner, group_owner, kinit_if_needed)
-  
+
   def createHdfsResources():
-    print "Creating hdfs directories..."
+    Logger.info("Creating hdfs directories...")
     params.HdfsResource(format('{hdfs_path_prefix}/atshistory'), user='hdfs', change_permissions_for_parents=True, owner='yarn', group='hadoop', type='directory', action= ['create_on_execute'], mode=0755)
     params.HdfsResource(format('{hdfs_path_prefix}/user/hcat'), owner='hcat', type='directory', action=['create_on_execute'], mode=0755)
     params.HdfsResource(format('{hdfs_path_prefix}/hive/warehouse'), owner='hive', type='directory', action=['create_on_execute'], mode=0777)
@@ -289,20 +290,20 @@
     if not 'hdfs_files' in env.config:
       Logger.info("Not creating .hdfs_resource_ignore as no resources to use.")
       return
-    
+
     file_content = ""
     for file in env.config['hdfs_files']:
       if not file['target'].startswith(hdfs_path_prefix):
         raise Exception("Something created outside hdfs_path_prefix!")
       file_content += file['target'][len(hdfs_path_prefix):]
       file_content += "\n"
-      
+
     with open("/var/lib/ambari-agent/data/.hdfs_resource_ignore", "a+") as fp:
       fp.write(file_content)
-      
+
   def putSQLDriverToOozieShared():
-    params.HdfsResource(hdfs_path_prefix + '/user/oozie/share/lib/sqoop/{0}'.format(os.path.basename(SQL_DRIVER_PATH)),
-                        owner='hdfs', type='file', action=['create_on_execute'], mode=0644, source=SQL_DRIVER_PATH)
+    params.HdfsResource(hdfs_path_prefix + '/user/oozie/share/lib/sqoop/{0}'.format(sql_driver_filename),
+                        owner='hdfs', type='file', action=['create_on_execute'], mode=0644, source=options.sql_driver_path)
 
   def recreate_slider_tarball():
     """
@@ -347,13 +348,12 @@
   )
 
   oozie_libext_dir = params.oozie_libext_dir
-  sql_driver_filename = os.path.basename(SQL_DRIVER_PATH)
   oozie_home=params.oozie_home
   configure_cmds = []
   configure_cmds.append(('tar','-xvf', oozie_home + '/oozie-sharelib.tar.gz','-C', oozie_home))
-  configure_cmds.append(('cp', "/usr/share/HDP-oozie/ext-2.2.zip", SQL_DRIVER_PATH, oozie_libext_dir))
+  configure_cmds.append(('cp', "/usr/share/HDP-oozie/ext-2.2.zip", options.sql_driver_path, oozie_libext_dir))
   configure_cmds.append(('chown', 'oozie:hadoop', oozie_libext_dir + "/ext-2.2.zip", oozie_libext_dir + "/" + sql_driver_filename))
-   
+
   no_op_test = "ls /var/run/oozie/oozie.pid >/dev/null 2>&1 && ps -p `cat /var/run/oozie/oozie.pid` >/dev/null 2>&1"
 
   File("/etc/oozie/conf/oozie-env.sh",
@@ -365,14 +365,14 @@
   skip_recreate_sharelib = format("test -f {hashcode_file} && test -d {oozie_home}/share")
 
   Execute( configure_cmds,
-           not_if  = format("{no_op_test} || {skip_recreate_sharelib}"), 
+           not_if  = format("{no_op_test} || {skip_recreate_sharelib}"),
            sudo = True,
            )
-  
+
   File(hashcode_file,
        mode = 0644,
   )
-  
+
   prepare_war(params)
 
   oozie_shared_lib = format("/usr/hdp/{stack_version}/oozie/share")
@@ -415,7 +415,7 @@
         try:
           Execute(format("rm -f {oozie_shared_lib}/lib/spark/spark-examples*.jar"))
         except:
-          print "No spark-examples jar files found in Spark client lib."
+          Logger.warning("No spark-examples jar files found in Spark client lib.")
 
         # Copy /usr/hdp/{stack_version}/spark-client/python/lib/*.zip & *.jar to /usr/hdp/{stack_version}/oozie/share/lib/spark
         Execute(format("cp -f {spark_client_dir}/python/lib/*.zip {oozie_shared_lib}/lib/spark"))
@@ -423,7 +423,7 @@
         try:
           Execute(format("cp -f {spark_client_dir}/python/lib/*.jar {oozie_shared_lib}/lib/spark"))
         except:
-          print "No jar files found in Spark client python lib."
+          Logger.warning("No jar files found in Spark client python lib.")
 
         Execute(("chmod", "-R", "0755", format('{oozie_shared_lib}/lib/spark')),
                 sudo=True)
@@ -436,7 +436,7 @@
         #          format("{oozie_shared_lib}/lib_{millis}")),
         #         sudo=True)
       except Exception, e:
-        print 'Exception occurred while preparing oozie share lib: '+ repr(e)
+        Logger.warning('Exception occurred while preparing oozie share lib: '+ repr(e))
 
     params.HdfsResource(format("{oozie_hdfs_user_dir}/share"),
       action="create_on_execute",
@@ -447,7 +447,7 @@
       source = oozie_shared_lib,
     )
 
-  print "Copying tarballs..."
+  Logger.info("Copying tarballs...")
   # TODO, these shouldn't hardcode the stack root or destination stack name.
   copy_tarballs_to_hdfs(format("/usr/hdp/{stack_version}/hadoop/mapreduce.tar.gz"), hdfs_path_prefix+"/hdp/apps/{{ stack_version_formatted }}/mapreduce/", 'hadoop-mapreduce-historyserver', params.mapred_user, params.hdfs_user, params.user_group)
   copy_tarballs_to_hdfs(format("/usr/hdp/{stack_version}/tez/lib/tez.tar.gz"), hdfs_path_prefix+"/hdp/apps/{{ stack_version_formatted }}/tez/", 'hadoop-mapreduce-historyserver', params.mapred_user, params.hdfs_user, params.user_group)
@@ -460,7 +460,7 @@
   copy_tarballs_to_hdfs(format("/usr/hdp/{stack_version}/hadoop-mapreduce/hadoop-streaming.jar"), hdfs_path_prefix+"/hdp/apps/{{ stack_version_formatted }}/mapreduce/", 'hadoop-mapreduce-historyserver', params.mapred_user, params.hdfs_user, params.user_group)
   copy_tarballs_to_hdfs(format("/usr/hdp/{stack_version}/sqoop/sqoop.tar.gz"), hdfs_path_prefix+"/hdp/apps/{{ stack_version_formatted }}/sqoop/", 'hadoop-mapreduce-historyserver', params.mapred_user, params.hdfs_user, params.user_group)
   copy_tarballs_to_hdfs(format("/usr/hdp/{stack_version}/slider/lib/slider.tar.gz"), hdfs_path_prefix+"/hdp/apps/{{ stack_version_formatted }}/slider/", 'hadoop-mapreduce-historyserver', params.hdfs_user, params.hdfs_user, params.user_group)
-  
+
   createHdfsResources()
   copy_zeppelin_dependencies_to_hdfs(format("/usr/hdp/{stack_version}/zeppelin/interpreter/spark/dep/zeppelin-spark-dependencies*.jar"))
   putSQLDriverToOozieShared()
@@ -474,20 +474,20 @@
   # Create everything in one jar call (this is fast).
   # (! Before everything should be executed with action="create_on_execute/delete_on_execute" for this time-optimization to work)
   try:
-    params.HdfsResource(None, 
+    params.HdfsResource(None,
                  logoutput=True,
                  action="execute"
     )
   except:
     os.remove("/var/lib/ambari-agent/data/.hdfs_resource_ignore")
     raise
-  print "Completed tarball copy."
+  Logger.info("Completed tarball copy.")
 
   if not options.upgrade:
-    print "Executing stack-selector-tool for stack {0} ...".format(stack_version)
+    Logger.info("Executing stack-selector-tool for stack {0} ...".format(stack_version))
     Execute(
       ('/usr/bin/hdp-select', 'set', 'all', stack_version),
       sudo = True
     )
 
-  print "Ambari preupload script completed."
+  Logger.info("Ambari preupload script completed.")
commit	2237560490df7f8a23c056853d493a31309547dd	[log] [tgz]
author	Doroszlai, Attila <6454655+adoroszlai@users.noreply.github.com>	Wed Oct 31 09:44:26 2018 +0100
committer	GitHub <noreply@github.com>	Wed Oct 31 09:44:26 2018 +0100
tree	65d1caeb47511941a16cc190d31375ba15ff7b06
parent	c22b8e7cafdff214079653e8cbef1fd6baa467b4 [diff]