| #!/usr/bin/env python |
| # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # |
| # Main Madpack installation executable. |
| # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # |
| import sys |
| import getpass |
| import re |
| import os |
| import glob |
| import traceback |
| import subprocess |
| import datetime |
| import tempfile |
| import shutil |
| import unittest |
| |
| from upgrade_util import ChangeHandler |
| from upgrade_util import ViewDependency |
| from upgrade_util import TableDependency |
| from upgrade_util import ScriptCleaner |
| |
| from itertools import izip_longest |
| |
| # Required Python version |
| py_min_ver = [2, 6] |
| |
| # Check python version |
| if sys.version_info[:2] < py_min_ver: |
| print("ERROR: python version too old (%s). You need %s or greater." % |
| ('.'.join(str(i) for i in sys.version_info[:3]), '.'.join(str(i) for i in py_min_ver))) |
| exit(1) |
| |
| # Find MADlib root directory. This file is installed to |
| # $MADLIB_ROOT/madpack/madpack.py, so to get $MADLIB_ROOT we need to go |
| # two levels up in the directory hierarchy. We use (a) os.path.realpath and |
| # (b) __file__ (instead of sys.argv[0]) because madpack.py could be called |
| # (a) through a symbolic link and (b) not as the main module. |
| maddir = os.path.abspath(os.path.dirname(os.path.realpath(__file__)) + "/..") # MADlib root dir |
| sys.path.append(maddir + "/madpack") |
| |
| # Import MADlib python modules |
| import argparse |
| import configyml |
| |
| # Some read-only variables |
| this = os.path.basename(sys.argv[0]) # name of this script |
| |
| # Default directories |
| maddir_conf = maddir + "/config" # Config dir |
| maddir_lib = maddir + "/lib/libmadlib.so" # C/C++ libraries |
| |
| # Read the config files |
| ports = configyml.get_ports(maddir_conf) # object made of Ports.yml |
| rev = configyml.get_version(maddir_conf) # MADlib OS-level version |
| portid_list = [] |
| for port in ports: |
| portid_list.append(port) |
| |
| SUPPORTED_PORTS = ('postgres', 'greenplum', 'hawq') |
| |
| # Global variables |
| portid = None # Target port ID (eg: pg90, gp40) |
| dbconn = None # DB Connection object |
| dbver = None # DB version |
| con_args = {} # DB connection arguments |
| verbose = None # Verbose flag |
| keeplogs = None |
| tmpdir = None |
| is_hawq2 = False |
| |
| |
| def _make_dir(dir): |
| """ |
| # Create a temp dir |
| # @param dir temp directory path |
| """ |
| if not os.path.isdir(dir): |
| try: |
| os.makedirs(dir) |
| except: |
| print "ERROR: can not create directory: %s. Check permissions." % dir |
| exit(1) |
| # ------------------------------------------------------------------------------ |
| |
| |
| def _error(msg, stop): |
| """ |
| Error message wrapper |
| @param msg error message |
| @param stop program exit flag |
| """ |
| # Print to stdout |
| print this + ' : ERROR : ' + msg |
| # stack trace is not printed |
| if stop: |
| exit(2) |
| # ------------------------------------------------------------------------------ |
| |
| |
| def _info(msg, verbose=True): |
| """ |
| Info message wrapper (verbose) |
| @param msg info message |
| @param verbose prints only if True |
| """ |
| # Print to stdout |
| if verbose: |
| print this + ' : INFO : ' + msg |
| # ------------------------------------------------------------------------------ |
| |
| |
| def run_query(sql, show_error, con_args=con_args): |
| # Define sqlcmd |
| sqlcmd = 'psql' |
| delimiter = ' <$madlib_delimiter$> ' |
| |
| # Test the DB cmd line utility |
| std, err = subprocess.Popen(['which', sqlcmd], stdout=subprocess.PIPE, |
| stderr=subprocess.PIPE).communicate() |
| if std == '': |
| _error("Command not found: %s" % sqlcmd, True) |
| |
| # Run the query |
| runcmd = [sqlcmd, |
| '-h', con_args['host'].split(':')[0], |
| '-p', con_args['host'].split(':')[1], |
| '-d', con_args['database'], |
| '-U', con_args['user'], |
| '-F', delimiter, |
| '--no-password', |
| '--no-psqlrc', |
| '--no-align', |
| '-c', sql] |
| runenv = os.environ |
| if 'password' in con_args: |
| runenv["PGPASSWORD"] = con_args['password'] |
| runenv["PGOPTIONS"] = '-c search_path=public -c client_min_messages=error' |
| std, err = subprocess.Popen(runcmd, env=runenv, stdout=subprocess.PIPE, |
| stderr=subprocess.PIPE).communicate() |
| |
| if err: |
| if show_error: |
| _error("SQL command failed: \nSQL: %s \n%s" % (sql, err), False) |
| if 'password' in err: |
| raise EnvironmentError |
| else: |
| raise Exception |
| |
| # Convert the delimited output into a dictionary |
| results = [] # list of rows |
| i = 0 |
| for line in std.splitlines(): |
| if i == 0: |
| cols = [name for name in line.split(delimiter)] |
| else: |
| row = {} # dict of col_name:col_value pairs |
| c = 0 |
| for val in line.split(delimiter): |
| row[cols[c]] = val |
| c += 1 |
| results.insert(i, row) |
| i += 1 |
| # Drop the last line: "(X rows)" |
| try: |
| results.pop() |
| except: |
| pass |
| |
| return results |
| # ------------------------------------------------------------------------------ |
| |
| |
| def _internal_run_query(sql, show_error): |
| """ |
| Runs a SQL query on the target platform DB |
| using the default command-line utility. |
| Very limited: |
| - no text output with "new line" characters allowed |
| @param sql query text to execute |
| @param show_error displays the SQL error msg |
| """ |
| return run_query(sql, show_error, con_args) |
| # ------------------------------------------------------------------------------ |
| |
| |
| def _get_relative_maddir(maddir, port): |
| """ Return a relative path version of maddir |
| |
| GPDB and HAWQ installations have a symlink outside of GPHOME that |
| links to the current GPHOME. After a DB upgrade, this symlink is updated to |
| the new GPHOME. |
| |
| 'maddir_lib', which uses the absolute path of GPHOME, is hardcoded into each |
| madlib function definition. Replacing the GPHOME path with the equivalent |
| relative path makes it simpler to perform DB upgrades without breaking MADlib. |
| """ |
| if port not in ('greenplum', 'hawq'): |
| # do nothing for postgres |
| return maddir |
| |
| # e.g. maddir_lib = $GPHOME/madlib/Versions/1.9/lib/libmadlib.so |
| # 'madlib' is supposed to be in this path, which is the default folder |
| # used by GPPKG to install madlib |
| try: |
| abs_gphome, tail = maddir.split('madlib/') |
| except ValueError: |
| return maddir |
| |
| link_name = 'greenplum-db' if port == 'greenplum' else 'hawq' |
| |
| # Check outside $GPHOME if there is a symlink to this absolute path |
| # os.pardir is equivalent to .. |
| # os.path.normpath removes the extraneous .. from that path |
| rel_gphome = os.path.normpath(os.path.join(abs_gphome, os.pardir, link_name)) |
| if os.path.islink(rel_gphome) and os.path.realpath(rel_gphome) == os.path.realpath(abs_gphome): |
| # if the relative link exists and is pointing to current location |
| return os.path.join(rel_gphome, 'madlib', tail) |
| else: |
| return maddir |
| # ------------------------------------------------------------------------------ |
| |
| |
| def _run_sql_file(schema, maddir_mod_py, module, sqlfile, |
| tmpfile, logfile, pre_sql, upgrade=False, |
| sc=None): |
| """ |
| Run SQL file |
| @param schema name of the target schema |
| @param maddir_mod_py name of the module dir with Python code |
| @param module name of the module |
| @param sqlfile name of the file to parse |
| @param tmpfile name of the temp file to run |
| @param logfile name of the log file (stdout) |
| @param pre_sql optional SQL to run before executing the file |
| @param upgrade are we upgrading as part of this sql run |
| @param sc object of ScriptCleaner |
| """ |
| |
| # Check if the SQL file exists |
| if not os.path.isfile(sqlfile): |
| _error("Missing module SQL file (%s)" % sqlfile, False) |
| raise ValueError("Missing module SQL file (%s)" % sqlfile) |
| |
| # Prepare the file using M4 |
| try: |
| f = open(tmpfile, 'w') |
| # Add the before SQL |
| if pre_sql: |
| f.writelines([pre_sql, '\n\n']) |
| f.flush() |
| # Find the madpack dir (platform specific or generic) |
| if os.path.isdir(maddir + "/ports/" + portid + "/" + dbver + "/madpack"): |
| maddir_madpack = maddir + "/ports/" + portid + "/" + dbver + "/madpack" |
| else: |
| maddir_madpack = maddir + "/madpack" |
| maddir_ext_py = maddir + "/lib/python" |
| |
| m4args = ['m4', |
| '-P', |
| '-DMADLIB_SCHEMA=' + schema, |
| '-DPLPYTHON_LIBDIR=' + maddir_mod_py, |
| '-DEXT_PYTHON_LIBDIR=' + maddir_ext_py, |
| '-DMODULE_PATHNAME=' + maddir_lib, |
| '-DMODULE_NAME=' + module, |
| '-I' + maddir_madpack, |
| sqlfile] |
| |
| _info("> ... parsing: " + " ".join(m4args), verbose) |
| |
| subprocess.call(m4args, stdout=f) |
| f.close() |
| except: |
| _error("Failed executing m4 on %s" % sqlfile, False) |
| raise Exception |
| |
| # Only update function definition |
| sub_module = '' |
| if upgrade: |
| # get filename from complete path without the extension |
| sub_module = os.path.splitext(os.path.basename(sqlfile))[0] |
| _info(sub_module, False) |
| if sub_module not in sc.get_change_handler().newmodule: |
| sql = open(tmpfile).read() |
| sql = sc.cleanup(sql) |
| open(tmpfile, 'w').write(sql) |
| |
| # Run the SQL using DB command-line utility |
| if portid in ('greenplum', 'postgres', 'hawq'): |
| sqlcmd = 'psql' |
| # Test the DB cmd line utility |
| std, err = subprocess.Popen(['which', sqlcmd], stdout=subprocess.PIPE, |
| stderr=subprocess.PIPE).communicate() |
| if not std: |
| _error("Command not found: %s" % sqlcmd, True) |
| |
| runcmd = [sqlcmd, '-a', |
| '-v', 'ON_ERROR_STOP=1', |
| '-h', con_args['host'].split(':')[0], |
| '-p', con_args['host'].split(':')[1], |
| '-d', con_args['database'], |
| '-U', con_args['user'], |
| '--no-password', |
| '-f', tmpfile] |
| runenv = os.environ |
| if 'password' in con_args: |
| runenv["PGPASSWORD"] = con_args['password'] |
| runenv["PGOPTIONS"] = '-c client_min_messages=notice' |
| |
| # Open log file |
| try: |
| log = open(logfile, 'w') |
| except: |
| _error("Cannot create log file: %s" % logfile, False) |
| raise Exception |
| |
| # Run the SQL |
| try: |
| _info("> ... executing " + tmpfile, verbose) |
| retval = subprocess.call(runcmd, env=runenv, stdout=log, stderr=log) |
| except: |
| _error("Failed executing %s" % tmpfile, False) |
| raise Exception |
| finally: |
| log.close() |
| |
| return retval |
| # ------------------------------------------------------------------------------ |
| |
| |
| def _get_madlib_dbrev(schema): |
| """ |
| Read MADlib version from database |
| @param dbconn database conection object |
| @param schema MADlib schema name |
| """ |
| try: |
| row = _internal_run_query("SELECT count(*) AS cnt FROM pg_tables " + |
| "WHERE schemaname='" + schema + "' AND " + |
| "tablename='migrationhistory'", True) |
| if int(row[0]['cnt']) > 0: |
| row = _internal_run_query("""SELECT version FROM %s.migrationhistory |
| ORDER BY applied DESC LIMIT 1""" % schema, True) |
| if row: |
| return row[0]['version'] |
| except: |
| _error("Failed reading MADlib db version", True) |
| |
| return None |
| # ------------------------------------------------------------------------------ |
| |
| |
| def _get_dbver(): |
| """ Read version number from database (of form X.Y) """ |
| try: |
| versionStr = _internal_run_query("SELECT pg_catalog.version()", True)[0]['version'] |
| if portid == 'postgres': |
| match = re.search("PostgreSQL[a-zA-Z\s]*(\d+\.\d+)", versionStr) |
| elif portid == 'greenplum': |
| # for Greenplum the 3rd digit is necessary to differentiate |
| # 4.3.5+ from versions < 4.3.5 |
| match = re.search("Greenplum[a-zA-Z\s]*(\d+\.\d+\.\d+)", versionStr) |
| elif portid == 'hawq': |
| match = re.search("HAWQ[a-zA-Z\s]*(\d+\.\d+)", versionStr) |
| return None if match is None else match.group(1) |
| except: |
| _error("Failed reading database version", True) |
| # ------------------------------------------------------------------------------ |
| |
| |
| def _check_db_port(portid): |
| """ |
| Make sure we are connected to the expected DB platform |
| @param portid expected DB port id - to be validates |
| """ |
| # Postgres |
| try: |
| row = _internal_run_query("SELECT version() AS version", True) |
| except: |
| _error("Cannot validate DB platform type", True) |
| if row and row[0]['version'].lower().find(portid) >= 0: |
| if portid == 'postgres': |
| if row[0]['version'].lower().find('greenplum') < 0: |
| return True |
| elif portid == 'greenplum': |
| if row[0]['version'].lower().find('hawq') < 0: |
| return True |
| elif portid == 'hawq': |
| return True |
| return False |
| # ------------------------------------------------------------------------------ |
| |
| |
| def _is_rev_gte(left, right): |
| """ Return if left >= right |
| |
| Args: |
| @param left: list. Revision numbers in a list form (as returned by |
| _get_rev_num). |
| @param right: list. Revision numbers in a list form (as returned by |
| _get_rev_num). |
| |
| Returns: |
| Boolean |
| |
| If left and right are all numeric then regular list comparison occurs. |
| If either one contains a string, then comparison occurs till both have int. |
| First list to have a string is considered smaller |
| (including if the other does not have an element in corresponding index) |
| |
| Examples: |
| [1, 9, 0] >= [1, 9, 0] |
| [1, 9, 1] >= [1, 9, 0] |
| [1, 9, 1] >= [1, 9] |
| [1, 10] >= [1, 9, 1] |
| [1, 9, 0] >= [1, 9, 0, 'dev'] |
| [1, 9, 1] >= [1, 9, 0, 'dev'] |
| [1, 9, 0] >= [1, 9, 'dev'] |
| [1, 9, 'rc'] >= [1, 9, 'dev'] |
| [1, 9, 'rc', 0] >= [1, 9, 'dev', 1] |
| [1, 9, 'rc', '1'] >= [1, 9, 'rc', '1'] |
| """ |
| def all_numeric(l): |
| return not l or all(isinstance(i, int) for i in l) |
| |
| if all_numeric(left) and all_numeric(right): |
| return left >= right |
| else: |
| for i, (l_e, r_e) in enumerate(izip_longest(left, right)): |
| if isinstance(l_e, int) and isinstance(r_e, int): |
| if l_e == r_e: |
| continue |
| else: |
| return l_e > r_e |
| elif isinstance(l_e, int) or isinstance(r_e, int): |
| # [1, 9, 0] > [1, 9, 'dev'] |
| # [1, 9, 0] > [1, 9] |
| return isinstance(l_e, int) |
| else: |
| # both are not int |
| if r_e is None: |
| # [1, 9, 'dev'] < [1, 9] |
| return False |
| else: |
| return l_e is None or left[i:] >= right[i:] |
| return True |
| # ---------------------------------------------------------------------- |
| |
| |
| def _get_rev_num(rev): |
| """ |
| Convert version string into number for comparison |
| @param rev version text |
| It is expected to follow Semantic Versioning (semver.org) |
| Valid inputs: |
| 1.9.0, 1.10.0, 2.5.0 |
| 1.0.0-alpha, 1.0.0-alpha.1, 1.0.0-0.3.7, 1.0.0-x.7.z.92 |
| 1.0.0+20130313144700, 1.0.0-beta+exp.sha.5114f85 |
| """ |
| try: |
| rev_parts = re.split('[-+_]', rev) |
| # get numeric part of the version string |
| num = [int(i) for i in rev_parts[0].split('.')] |
| num += [0] * (3 - len(num)) # normalize num to be of length 3 |
| # get identifier part of the version string |
| if len(rev_parts) > 1: |
| num.extend(map(str, rev_parts[1:])) |
| if not num: |
| num = [0] |
| return num |
| except: |
| # invalid revision |
| return [0] |
| # ------------------------------------------------------------------------------ |
| |
| |
| def _print_revs(rev, dbrev, con_args, schema): |
| """ |
| Print version information |
| @param rev OS-level MADlib version |
| @param dbrev DB-level MADlib version |
| @param con_args database connection arguments |
| @param schema MADlib schema name |
| """ |
| _info("MADlib tools version = %s (%s)" % (str(rev), sys.argv[0]), True) |
| if con_args: |
| try: |
| _info("MADlib database version = %s (host=%s, db=%s, schema=%s)" |
| % (dbrev, con_args['host'], con_args['database'], schema), True) |
| except: |
| _info("MADlib database version = [Unknown] (host=%s, db=%s, schema=%s)" |
| % (dbrev, con_args['host'], con_args['database'], schema), True) |
| return |
| # ------------------------------------------------------------------------------ |
| |
| |
| def _plpy_check(py_min_ver): |
| """ |
| Check pl/python existence and version |
| @param py_min_ver min Python version to run MADlib |
| """ |
| |
| _info("Testing PL/Python environment...", True) |
| |
| # Check PL/Python existence |
| rv = _internal_run_query("SELECT count(*) AS CNT FROM pg_language " |
| "WHERE lanname = 'plpythonu'", True) |
| if int(rv[0]['cnt']) > 0: |
| _info("> PL/Python already installed", verbose) |
| else: |
| _info("> PL/Python not installed", verbose) |
| _info("> Creating language PL/Python...", True) |
| try: |
| _internal_run_query("CREATE LANGUAGE plpythonu;", True) |
| except: |
| _error('Cannot create language plpythonu. Stopping installation...', False) |
| raise Exception |
| |
| # Check PL/Python version |
| _internal_run_query("DROP FUNCTION IF EXISTS plpy_version_for_madlib();", False) |
| _internal_run_query(""" |
| CREATE OR REPLACE FUNCTION plpy_version_for_madlib() |
| RETURNS TEXT AS |
| $$ |
| import sys |
| # return '.'.join(str(item) for item in sys.version_info[:3]) |
| return str(sys.version_info[:3]).replace(',','.').replace(' ','').replace(')','').replace('(','') |
| $$ |
| LANGUAGE plpythonu; |
| """, True) |
| rv = _internal_run_query("SELECT plpy_version_for_madlib() AS ver;", True) |
| python = rv[0]['ver'] |
| py_cur_ver = [int(i) for i in python.split('.')] |
| if py_cur_ver >= py_min_ver: |
| _info("> PL/Python version: %s" % python, verbose) |
| else: |
| _error("PL/Python version too old: %s. You need %s or greater" |
| % (python, '.'.join(str(i) for i in py_min_ver)), False) |
| raise Exception |
| _internal_run_query("DROP FUNCTION IF EXISTS plpy_version_for_madlib();", False) |
| _info("> PL/Python environment OK (version: %s)" % python, True) |
| # ------------------------------------------------------------------------------ |
| |
| |
| def _db_install(schema, dbrev, testcase): |
| """ |
| Install MADlib |
| @param schema MADlib schema name |
| @param dbrev DB-level MADlib version |
| @param testcase command-line args for a subset of modules |
| """ |
| _info("Installing MADlib into %s schema..." % schema.upper(), True) |
| |
| temp_schema = schema + '_v' + ''.join(map(str, _get_rev_num(dbrev))) |
| # Check the status of MADlib objects in database |
| madlib_exists = False if dbrev is None else True |
| |
| # Test if schema is writable |
| try: |
| _internal_run_query("CREATE TABLE %s.__madlib_test_table (A INT);" % schema, False) |
| _internal_run_query("DROP TABLE %s.__madlib_test_table;" % schema, False) |
| schema_writable = True |
| except: |
| schema_writable = False |
| # CASE #1: Target schema exists with MADlib objects: |
| if schema_writable and madlib_exists: |
| # work-around before UDT is available in HAWQ |
| if portid == 'hawq': |
| _info("***************************************************************************", True) |
| _info("* Schema MADLIB already exists", True) |
| _info("* For HAWQ, MADlib objects will be overwritten to the 'MADLIB' schema", True) |
| _info("* It may drop any database objects (tables, views, etc.) that depend on 'MADLIB' SCHEMA!!!!!!!!!!!!!", True) |
| _info("***************************************************************************", True) |
| _info("Would you like to continue? [Y/N]", True) |
| go = raw_input('>>> ').upper() |
| while go not in ('Y', 'YES', 'N', 'NO'): |
| go = raw_input('Yes or No >>> ').upper() |
| if go in ('N', 'NO'): |
| _info('Installation stopped.', True) |
| return |
| # Rolling back in HAWQ will drop catalog functions. For exception, we |
| # simply push the exception to the caller to terminate the install |
| _db_create_objects(schema, None, testcase=testcase, hawq_debug=True) |
| else: |
| _info("***************************************************************************", True) |
| _info("* Schema %s already exists" % schema.upper(), True) |
| _info("* Installer will rename it to %s" % temp_schema.upper(), True) |
| _info("***************************************************************************", True) |
| _info("Would you like to continue? [Y/N]", True) |
| go = raw_input('>>> ').upper() |
| while go not in ('Y', 'YES', 'N', 'NO'): |
| go = raw_input('Yes or No >>> ').upper() |
| if go in ('N', 'NO'): |
| _info('Installation stopped.', True) |
| return |
| |
| # Rename MADlib schema |
| _db_rename_schema(schema, temp_schema) |
| |
| # Create MADlib schema |
| try: |
| _db_create_schema(schema) |
| except: |
| _db_rollback(schema, temp_schema) |
| |
| # Create MADlib objects |
| try: |
| _db_create_objects(schema, temp_schema, testcase=testcase) |
| except: |
| _db_rollback(schema, temp_schema) |
| |
| # CASE #2: Target schema exists w/o MADlib objects: |
| # For HAWQ, after the DB initialization, there is no |
| # madlib.migrationhistory table, thus madlib_exists is False |
| elif schema_writable and not madlib_exists: |
| # Create MADlib objects |
| try: |
| _db_create_objects(schema, None, testcase=testcase) |
| except: |
| _error("Building database objects failed. " |
| "Before retrying: drop %s schema OR install MADlib into " |
| "a different schema." % schema.upper(), True) |
| |
| # |
| # CASE #3: Target schema does not exist: |
| # |
| elif not schema_writable: |
| if portid == 'hawq' and not is_hawq2: |
| # Rolling back in HAWQ will drop catalog functions. For exception, we |
| # simply push the exception to the caller to terminate the install |
| raise Exception("MADLIB schema is required for HAWQ") |
| |
| _info("> Schema %s does not exist" % schema.upper(), verbose) |
| |
| # Create MADlib schema |
| try: |
| _db_create_schema(schema) |
| except: |
| _db_rollback(schema, None) |
| |
| # Create MADlib objects |
| try: |
| _db_create_objects(schema, None, testcase=testcase) |
| except: |
| _db_rollback(schema, None) |
| |
| _info("MADlib %s installed successfully in %s schema." % (str(rev), schema.upper()), True) |
| # ------------------------------------------------------------------------------ |
| |
| |
| def _db_upgrade(schema, dbrev): |
| """ |
| Upgrade MADlib |
| @param schema MADlib schema name |
| @param dbrev DB-level MADlib version |
| """ |
| if _is_rev_gte(_get_rev_num(dbrev), _get_rev_num(rev)): |
| _info("Current MADlib version already up to date.", True) |
| return |
| |
| if _is_rev_gte([1,7,1],_get_rev_num(dbrev)): |
| _error(""" |
| MADlib versions prior to v1.8 are not supported for upgrade. |
| Please try upgrading to v1.9.1 and then upgrade to this version. |
| """, True) |
| return |
| |
| _info("Upgrading MADlib into %s schema..." % schema.upper(), True) |
| _info("\tDetecting dependencies...", True) |
| |
| _info("\tLoading change list...", True) |
| ch = ChangeHandler(schema, portid, con_args, maddir, dbrev, is_hawq2) |
| |
| _info("\tDetecting table dependencies...", True) |
| td = TableDependency(schema, portid, con_args) |
| |
| _info("\tDetecting view dependencies...", True) |
| vd = ViewDependency(schema, portid, con_args) |
| |
| abort = False |
| if td.has_dependency(): |
| _info("*" * 50, True) |
| _info("\tFollowing user tables/indexes are dependent on MADlib objects:", True) |
| _info(td.get_dependency_str(), True) |
| _info("*" * 50, True) |
| cd_udt = [udt for udt in td.get_depended_udt() if udt in ch.udt] |
| if len(cd_udt) > 0: |
| _error(""" |
| User has objects dependent on following updated MADlib types! |
| {0} |
| These objects need to be dropped before upgrading. |
| """.format('\n\t\t\t'.join(cd_udt)), False) |
| |
| # we add special handling for 'linregr_result' |
| if 'linregr_result' in cd_udt: |
| _info("""Dependency on 'linregr_result' could be due to objects |
| created from the output of the aggregate 'linregr'. |
| Please refer to the Linear Regression documentation |
| <http://madlib.incubator.apache.org/docs/latest/group__grp__linreg.html#warning> |
| for the recommended solution. |
| """, False) |
| abort = True |
| |
| c_udoc = ch.get_udoc_oids() |
| d_udoc = td.get_depended_udoc_oids() |
| cd_udoc = [udoc for udoc in d_udoc if udoc in c_udoc] |
| if len(cd_udoc) > 0: |
| _error(""" |
| User has objects dependent on the following updated MADlib operator classes! |
| oid={0} |
| These objects need to be dropped before upgrading. |
| """.format('\n\t\t\t'.join(cd_udoc)), False) |
| abort = True |
| |
| if vd.has_dependency(): |
| _info("*" * 50, True) |
| _info("\tFollowing user views are dependent on MADlib objects:", True) |
| _info(vd.get_dependency_graph_str(), True) |
| _info("*" * 50, True) |
| |
| c_udf = ch.get_udf_signature() |
| d_udf = vd.get_depended_func_signature('UDF') |
| cd_udf = [udf for udf in d_udf if udf in c_udf] |
| if len(cd_udf) > 0: |
| _error(""" |
| User has objects dependent on following updated MADlib functions! |
| {0} |
| These objects will fail to work with the updated functions and |
| need to be dropped before starting upgrade again. |
| """.format('\n\t\t\t\t\t'.join(cd_udf)), False) |
| abort = True |
| |
| c_uda = ch.get_uda_signature() |
| d_uda = vd.get_depended_func_signature('UDA') |
| cd_uda = [uda for uda in d_uda if uda in c_uda] |
| if len(cd_uda) > 0: |
| _error(""" |
| User has objects dependent on following updated MADlib aggregates! |
| {0} |
| These objects will fail to work with the new aggregates and |
| need to be dropped before starting upgrade again. |
| """.format('\n\t\t\t\t\t'.join(cd_uda)), False) |
| abort = True |
| |
| c_udo = ch.get_udo_oids() |
| d_udo = vd.get_depended_opr_oids() |
| cd_udo = [udo for udo in d_udo if udo in c_udo] |
| if len(cd_udo) > 0: |
| _error(""" |
| User has objects dependent on following updated MADlib operators! |
| oid={0} |
| These objects will fail to work with the new operators and |
| need to be dropped before starting upgrade again. |
| """.format('\n\t\t\t\t\t'.join(cd_udo)), False) |
| abort = True |
| |
| if abort: |
| _error("""------- Upgrade aborted. ------- |
| Backup and drop all objects that depend on MADlib before trying upgrade again. |
| Use madpack reinstall to automatically drop these objects only if appropriate.""", True) |
| else: |
| _info("No dependency problem found, continuing to upgrade ...", True) |
| |
| _info("\tReading existing UDAs/UDTs...", False) |
| sc = ScriptCleaner(schema, portid, con_args, ch) |
| _info("Script Cleaner initialized ...", False) |
| |
| ch.drop_changed_uda() |
| ch.drop_changed_udoc() |
| ch.drop_changed_udo() |
| ch.drop_changed_udc() |
| ch.drop_changed_udf() |
| ch.drop_changed_udt() # assume dependent udf for udt does not change |
| ch.drop_traininginfo_4dt() # used types: oid, text, integer, float |
| _db_create_objects(schema, None, True, sc) |
| |
| _info("MADlib %s upgraded successfully in %s schema." % (str(rev), schema.upper()), True) |
| # ------------------------------------------------------------------------------ |
| |
| |
| def _db_rename_schema(from_schema, to_schema): |
| """ |
| Rename schema |
| @param from_schema name of the schema to rename |
| @param to_schema new name for the schema |
| """ |
| |
| _info("> Renaming schema %s to %s" % (from_schema.upper(), to_schema.upper()), True) |
| try: |
| _internal_run_query("ALTER SCHEMA %s RENAME TO %s;" % (from_schema, to_schema), True) |
| except: |
| _error('Cannot rename schema. Stopping installation...', False) |
| raise Exception |
| # ------------------------------------------------------------------------------ |
| |
| |
| def _db_create_schema(schema): |
| """ |
| Create schema |
| @param from_schema name of the schema to rename |
| @param to_schema new name for the schema |
| """ |
| |
| _info("> Creating %s schema" % schema.upper(), True) |
| try: |
| _internal_run_query("CREATE SCHEMA %s;" % schema, True) |
| except: |
| _info('Cannot create new schema. Rolling back installation...', True) |
| pass |
| # ------------------------------------------------------------------------------ |
| |
| |
| def _db_create_objects(schema, old_schema, upgrade=False, sc=None, testcase="", |
| hawq_debug=False): |
| """ |
| Create MADlib DB objects in the schema |
| @param schema Name of the target schema |
| @param sc ScriptCleaner object |
| @param testcase Command-line args for modules to install |
| |
| @param hawq_debug |
| """ |
| if not upgrade and not hawq_debug: |
| # Create MigrationHistory table |
| try: |
| _info("> Creating %s.MigrationHistory table" % schema.upper(), True) |
| _internal_run_query("DROP TABLE IF EXISTS %s.migrationhistory;" % schema, True) |
| sql = """CREATE TABLE %s.migrationhistory |
| (id serial, version varchar(255), |
| applied timestamp default current_timestamp);""" % schema |
| _internal_run_query(sql, True) |
| except: |
| _error("Cannot crate MigrationHistory table", False) |
| raise Exception |
| |
| # Copy MigrationHistory table for record keeping purposes |
| if old_schema: |
| try: |
| _info("> Saving data from %s.MigrationHistory table" % old_schema.upper(), True) |
| sql = """INSERT INTO %s.migrationhistory (version, applied) |
| SELECT version, applied FROM %s.migrationhistory |
| ORDER BY id;""" % (schema, old_schema) |
| _internal_run_query(sql, True) |
| except: |
| _error("Cannot copy MigrationHistory table", False) |
| raise Exception |
| |
| # Stamp the DB installation |
| try: |
| _info("> Writing version info in MigrationHistory table", True) |
| _internal_run_query("INSERT INTO %s.migrationhistory(version) " |
| "VALUES('%s')" % (schema, str(rev)), True) |
| except: |
| _error("Cannot insert data into %s.migrationhistory table" % schema, False) |
| raise Exception |
| |
| # Run migration SQLs |
| if upgrade: |
| _info("> Creating/Updating objects for modules:", True) |
| else: |
| _info("> Creating objects for modules:", True) |
| |
| caseset = (set([test.strip() for test in testcase.split(',')]) |
| if testcase != "" else set()) |
| |
| modset = {} |
| for case in caseset: |
| if case.find('/') > -1: |
| [mod, algo] = case.split('/') |
| if mod not in modset: |
| modset[mod] = [] |
| if algo not in modset[mod]: |
| modset[mod].append(algo) |
| else: |
| modset[case] = [] |
| |
| # Loop through all modules/modules |
| # portspecs is a global variable |
| for moduleinfo in portspecs['modules']: |
| |
| # Get the module name |
| module = moduleinfo['name'] |
| |
| # Skip if doesn't meet specified modules |
| if modset is not None and len(modset) > 0 and module not in modset: |
| continue |
| |
| _info("> - %s" % module, True) |
| |
| # Find the Python module dir (platform specific or generic) |
| if os.path.isdir(maddir + "/ports/" + portid + "/" + dbver + "/modules/" + module): |
| maddir_mod_py = maddir + "/ports/" + portid + "/" + dbver + "/modules" |
| else: |
| maddir_mod_py = maddir + "/modules" |
| |
| # Find the SQL module dir (platform specific or generic) |
| if os.path.isdir(maddir + "/ports/" + portid + "/modules/" + module): |
| maddir_mod_sql = maddir + "/ports/" + portid + "/modules" |
| elif os.path.isdir(maddir + "/modules/" + module): |
| maddir_mod_sql = maddir + "/modules" |
| else: |
| # This was a platform-specific module, for which no default exists. |
| # We can just skip this module. |
| continue |
| |
| # Make a temp dir for log files |
| cur_tmpdir = tmpdir + "/" + module |
| _make_dir(cur_tmpdir) |
| |
| # Loop through all SQL files for this module |
| mask = maddir_mod_sql + '/' + module + '/*.sql_in' |
| sql_files = glob.glob(mask) |
| |
| if not sql_files: |
| _error("No files found in: %s" % mask, True) |
| |
| # Execute all SQL files for the module |
| for sqlfile in sql_files: |
| algoname = os.path.basename(sqlfile).split('.')[0] |
| if portid == 'hawq' and not is_hawq2 and algoname in ('svec'): |
| continue |
| |
| # run only algo specified |
| if module in modset and len(modset[module]) > 0 \ |
| and algoname not in modset[module]: |
| continue |
| |
| # Set file names |
| tmpfile = cur_tmpdir + '/' + os.path.basename(sqlfile) + '.tmp' |
| logfile = cur_tmpdir + '/' + os.path.basename(sqlfile) + '.log' |
| retval = _run_sql_file(schema, maddir_mod_py, module, sqlfile, |
| tmpfile, logfile, None, upgrade, |
| sc) |
| # Check the exit status |
| if retval != 0: |
| _error("Failed executing %s" % tmpfile, False) |
| _error("Check the log at %s" % logfile, False) |
| raise Exception |
| # ------------------------------------------------------------------------------ |
| |
| |
| def _db_rollback(drop_schema, keep_schema): |
| """ |
| Rollback installation |
| @param drop_schema name of the schema to drop |
| @param keep_schema name of the schema to rename and keep |
| """ |
| _info("Rolling back the installation...", True) |
| |
| if not drop_schema: |
| _error('No schema name to drop. Stopping rollback...', True) |
| |
| # Drop the current schema |
| _info("> Dropping schema %s" % drop_schema.upper(), verbose) |
| try: |
| _internal_run_query("DROP SCHEMA %s CASCADE;" % (drop_schema), True) |
| except: |
| _error("Cannot drop schema %s. Stopping rollback..." % drop_schema.upper(), True) |
| |
| # Rename old to current schema |
| if keep_schema: |
| _db_rename_schema(keep_schema, drop_schema) |
| |
| _info("Rollback finished successfully.", True) |
| raise Exception |
| # ------------------------------------------------------------------------------ |
| |
| |
| def unescape(string): |
| """ |
| Unescape separation characters in connection strings, i.e., remove first |
| backslash from "\/", "\@", "\:", and "\\". |
| """ |
| if string is None: |
| return None |
| else: |
| return re.sub(r'\\(?P<char>[/@:\\])', '\g<char>', string) |
| # ------------------------------------------------------------------------------ |
| |
| |
| def parseConnectionStr(connectionStr): |
| """ |
| @brief Parse connection strings of the form |
| <tt>[username[/password]@][hostname][:port][/database]</tt> |
| |
| Separation characters (/@:) and the backslash (\) need to be escaped. |
| @returns A tuple (username, password, hostname, port, database). Field not |
| specified will be None. |
| """ |
| match = re.search( |
| r'((?P<user>([^/@:\\]|\\/|\\@|\\:|\\\\)+)' + |
| r'(/(?P<password>([^/@:\\]|\\/|\\@|\\:|\\\\)*))?@)?' + |
| r'(?P<host>([^/@:\\]|\\/|\\@|\\:|\\\\)+)?' + |
| r'(:(?P<port>[0-9]+))?' + |
| r'(/(?P<database>([^/@:\\]|\\/|\\@|\\:|\\\\)+))?', connectionStr) |
| return ( |
| unescape(match.group('user')), |
| unescape(match.group('password')), |
| unescape(match.group('host')), |
| match.group('port'), |
| unescape(match.group('database'))) |
| # ------------------------------------------------------------------------------ |
| |
| |
| def parse_arguments(): |
| parser = argparse.ArgumentParser( |
| prog="madpack", |
| description='MADlib package manager (' + str(rev) + ')', |
| argument_default=False, |
| formatter_class=argparse.RawTextHelpFormatter, |
| epilog="""Example: |
| |
| $ madpack install -s madlib -p greenplum -c gpadmin@mdw:5432/testdb |
| |
| This will install MADlib objects into a Greenplum database called TESTDB |
| running on server MDW:5432. Installer will try to login as GPADMIN |
| and will prompt for password. The target schema will be MADLIB. |
| """) |
| |
| help_msg = """One of the following options: |
| install : run sql scripts to load into DB |
| upgrade : run sql scripts to upgrade |
| uninstall : run sql scripts to uninstall from DB |
| reinstall : performs uninstall and install |
| version : compare and print MADlib version (binaries vs database objects) |
| install-check : test all installed modules |
| |
| (uninstall is currently unavailable for the HAWQ port)""" |
| choice_list = ['install', 'update', 'upgrade', 'uninstall', |
| 'reinstall', 'version', 'install-check'] |
| |
| parser.add_argument('command', metavar='COMMAND', nargs=1, |
| choices=choice_list, help=help_msg) |
| |
| parser.add_argument( |
| '-c', '--conn', metavar='CONNSTR', nargs=1, dest='connstr', default=None, |
| help="""Connection string of the following syntax: |
| [user[/password]@][host][:port][/database] |
| If not provided default values will be derived for PostgerSQL and Greenplum: |
| - user: PGUSER or USER env variable or OS username |
| - pass: PGPASSWORD env variable or runtime prompt |
| - host: PGHOST env variable or 'localhost' |
| - port: PGPORT env variable or '5432' |
| - db: PGDATABASE env variable or OS username""") |
| |
| parser.add_argument('-s', '--schema', nargs=1, dest='schema', |
| metavar='SCHEMA', default='madlib', |
| help="Target schema for the database objects.") |
| |
| parser.add_argument('-p', '--platform', nargs=1, dest='platform', |
| metavar='PLATFORM', choices=portid_list, |
| help="Target database platform, current choices: " + str(portid_list)) |
| |
| parser.add_argument('-v', '--verbose', dest='verbose', |
| action="store_true", help="Verbose mode.") |
| |
| parser.add_argument('-l', '--keeplogs', dest='keeplogs', default=False, |
| action="store_true", help="Do not remove installation log files.") |
| |
| parser.add_argument('-d', '--tmpdir', dest='tmpdir', default='/tmp/', |
| help="Temporary directory location for installation log files.") |
| |
| parser.add_argument('-t', '--testcase', dest='testcase', default="", |
| help="Module names to test, comma separated. Effective only for install-check.") |
| |
| # Get the arguments |
| return parser.parse_args() |
| |
| |
| def main(argv): |
| args = parse_arguments() |
| |
| global verbose |
| verbose = args.verbose |
| _info("Arguments: " + str(args), verbose) |
| global keeplogs |
| keeplogs = args.keeplogs |
| |
| global tmpdir |
| try: |
| tmpdir = tempfile.mkdtemp('', 'madlib.', args.tmpdir) |
| except OSError, e: |
| tmpdir = e.filename |
| _error("cannot create temporary directory: '%s'." % tmpdir, True) |
| |
| # Parse SCHEMA |
| if len(args.schema[0]) > 1: |
| schema = args.schema[0].lower() |
| else: |
| schema = args.schema.lower() |
| |
| # Parse DB Platform (== PortID) and compare with Ports.yml |
| global portid |
| if args.platform: |
| try: |
| # Get the DB platform name == DB port id |
| portid = args.platform[0].lower() |
| ports[portid] |
| except: |
| portid = None |
| _error("Can not find specs for port %s" % (args.platform[0]), True) |
| else: |
| portid = None |
| |
| # Parse CONNSTR (only if PLATFORM and DBAPI2 are defined) |
| if portid: |
| connStr = "" if args.connstr is None else args.connstr[0] |
| (c_user, c_pass, c_host, c_port, c_db) = parseConnectionStr(connStr) |
| |
| # Find the default values for PG and GP |
| if portid in SUPPORTED_PORTS: |
| if c_user is None: |
| c_user = os.environ.get('PGUSER', getpass.getuser()) |
| if c_pass is None: |
| c_pass = os.environ.get('PGPASSWORD', None) |
| if c_host is None: |
| c_host = os.environ.get('PGHOST', 'localhost') |
| if c_port is None: |
| c_port = os.environ.get('PGPORT', '5432') |
| if c_db is None: |
| c_db = os.environ.get('PGDATABASE', c_user) |
| |
| # Set connection variables |
| global con_args |
| con_args['host'] = c_host + ':' + c_port |
| con_args['database'] = c_db |
| con_args['user'] = c_user |
| if c_pass is not None: |
| con_args['password'] = c_pass |
| |
| # Try connecting to the database |
| _info("Testing database connection...", verbose) |
| |
| try: |
| # check for password only if required |
| _internal_run_query("SELECT 1", False) |
| except EnvironmentError: |
| con_args['password'] = getpass.getpass("Password for user %s: " % c_user) |
| _internal_run_query("SELECT 1", False) |
| except: |
| _error('Failed to connect to database', True) |
| |
| # Get DB version |
| global dbver |
| dbver = _get_dbver() |
| global is_hawq2 |
| if portid == "hawq" and _is_rev_gte(_get_rev_num(dbver), _get_rev_num('2.0')): |
| is_hawq2 = True |
| else: |
| is_hawq2 = False |
| |
| # HAWQ < 2.0 has hard-coded schema name 'madlib' |
| if portid == 'hawq' and not is_hawq2 and schema.lower() != 'madlib': |
| _error("*** Installation is currently restricted only to 'madlib' schema ***", True) |
| |
| # update maddir to use a relative path if available |
| global maddir |
| maddir = _get_relative_maddir(maddir, portid) |
| |
| # Get MADlib version in DB |
| dbrev = _get_madlib_dbrev(schema) |
| |
| portdir = os.path.join(maddir, "ports", portid) |
| supportedVersions = [dirItem for dirItem in os.listdir(portdir) |
| if os.path.isdir(os.path.join(portdir, dirItem)) and |
| re.match("^\d+", dirItem)] |
| if dbver is None: |
| dbver = ".".join( |
| map(str, max([versionStr.split('.') |
| for versionStr in supportedVersions]))) |
| _info("Could not parse version string reported by {DBMS}. Will " |
| "default to newest supported version of {DBMS} " |
| "({version}).".format(DBMS=ports[portid]['name'], |
| version=dbver), True) |
| else: |
| _info("Detected %s version %s." % (ports[portid]['name'], dbver), |
| True) |
| |
| if portid == "hawq": |
| # HAWQ (starting 2.0) and GPDB (starting 5.0) uses semantic versioning, |
| # which implies all HAWQ 2.x or GPDB 5.x versions will have binary |
| # compatibility. Hence, we can keep single folder for all 2.X / 5.X. |
| if (_is_rev_gte(_get_rev_num(dbver), _get_rev_num('2.0')) and |
| not _is_rev_gte(_get_rev_num(dbver), _get_rev_num('3.0'))): |
| is_hawq2 = True |
| dbver = '2' |
| elif portid == 'greenplum': |
| # similar to HAWQ above, collapse all 5.X versions |
| if (_is_rev_gte(_get_rev_num(dbver), _get_rev_num('5.0')) and |
| not _is_rev_gte(_get_rev_num(dbver), _get_rev_num('6.0'))): |
| dbver = '5' |
| # Due to the ABI incompatibility between 4.3.4 and 4.3.5, |
| # MADlib treats 4.3.5+ as DB version 4.3ORCA which is different |
| # from 4.3. The name is suffixed with ORCA since optimizer (ORCA) is |
| # 'on' by default in 4.3.5 |
| elif _is_rev_gte(_get_rev_num(dbver), _get_rev_num('4.3.4')): |
| dbver = '4.3ORCA' |
| else: |
| # only need the first two digits for <= 4.3.4 |
| dbver = '.'.join(dbver.split('.')[:2]) |
| |
| if not os.path.isdir(os.path.join(portdir, dbver)): |
| _error("This version is not among the %s versions for which " |
| "MADlib support files have been installed (%s)." % |
| (ports[portid]['name'], ", ".join(supportedVersions)), True) |
| |
| # Validate that db platform is correct |
| if not _check_db_port(portid): |
| _error("Invalid database platform specified.", True) |
| |
| # Adjust MADlib directories for this port (if they exist) |
| global maddir_conf |
| if os.path.isdir(maddir + "/ports/" + portid + "/" + dbver + "/config"): |
| maddir_conf = maddir + "/ports/" + portid + "/" + dbver + "/config" |
| else: |
| maddir_conf = maddir + "/config" |
| |
| global maddir_lib |
| if os.path.isfile(maddir + "/ports/" + portid + "/" + dbver + |
| "/lib/libmadlib.so"): |
| maddir_lib = maddir + "/ports/" + portid + "/" + dbver + \ |
| "/lib/libmadlib.so" |
| else: |
| maddir_lib = maddir + "/lib/libmadlib.so" |
| |
| # Get the list of modules for this port |
| global portspecs |
| portspecs = configyml.get_modules(maddir_conf) |
| else: |
| con_args = None |
| dbrev = None |
| |
| # Parse COMMAND argument and compare with Ports.yml |
| # Debugging... |
| # print "OS rev: " + str(rev) + " > " + str(_get_rev_num(rev)) |
| # print "DB rev: " + str(dbrev) + " > " + str(_get_rev_num(dbrev)) |
| |
| # Make sure we have the necessary parameters to continue |
| if args.command[0] != 'version': |
| if not portid: |
| _error("Missing -p/--platform parameter.", True) |
| if not con_args: |
| _error("Unknown problem with database connection string: %s" % con_args, True) |
| |
| # COMMAND: version |
| if args.command[0] == 'version': |
| _print_revs(rev, dbrev, con_args, schema) |
| |
| # COMMAND: uninstall/reinstall |
| if args.command[0] in ('uninstall',) and (portid == 'hawq' and not is_hawq2): |
| _error("madpack uninstall is currently not available for HAWQ", True) |
| |
| if args.command[0] in ('uninstall', 'reinstall') and (portid != 'hawq' or is_hawq2): |
| if _get_rev_num(dbrev) == [0]: |
| _info("Nothing to uninstall. No version found in schema %s." % schema.upper(), True) |
| return |
| |
| # Find any potential data to lose |
| affected_objects = _internal_run_query(""" |
| SELECT |
| n1.nspname AS schema, |
| relname AS relation, |
| attname AS column, |
| typname AS type |
| FROM |
| pg_attribute a, |
| pg_class c, |
| pg_type t, |
| pg_namespace n, |
| pg_namespace n1 |
| WHERE |
| n.nspname = '%s' |
| AND t.typnamespace = n.oid |
| AND a.atttypid = t.oid |
| AND c.oid = a.attrelid |
| AND c.relnamespace = n1.oid |
| AND c.relkind = 'r' |
| ORDER BY |
| n1.nspname, relname, attname, typname""" % schema.lower(), True) |
| |
| _info("*** Uninstalling MADlib ***", True) |
| _info("***********************************************************************************", True) |
| _info("* Schema %s and all database objects depending on it will be dropped!" % schema.upper(), True) |
| if affected_objects: |
| _info("* If you continue the following data will be lost (schema : table.column : type):", True) |
| for ao in affected_objects: |
| _info('* - ' + ao['schema'] + ' : ' + ao['relation'] + '.' + |
| ao['column'] + ' : ' + ao['type'], True) |
| _info("***********************************************************************************", True) |
| _info("Would you like to continue? [Y/N]", True) |
| go = raw_input('>>> ').upper() |
| while go != 'Y' and go != 'N': |
| go = raw_input('Yes or No >>> ').upper() |
| |
| # 2) Do the uninstall/drop |
| if go == 'N': |
| _info('No problem. Nothing dropped.', True) |
| return |
| |
| elif go == 'Y': |
| _info("> dropping schema %s" % schema.upper(), verbose) |
| try: |
| _internal_run_query("DROP SCHEMA %s CASCADE;" % (schema), True) |
| except: |
| _error("Cannot drop schema %s." % schema.upper(), True) |
| |
| _info('Schema %s (and all dependent objects) has been dropped.' % schema.upper(), True) |
| _info('MADlib uninstalled successfully.', True) |
| |
| else: |
| return |
| |
| # COMMAND: install/reinstall |
| if args.command[0] in ('install', 'reinstall'): |
| # Refresh MADlib version in DB, None for GP/PG |
| if args.command[0] == 'reinstall': |
| print "Setting MADlib database version to be None for reinstall" |
| dbrev = None |
| |
| _info("*** Installing MADlib ***", True) |
| |
| # 1) Compare OS and DB versions. |
| # noop if OS <= DB. |
| _print_revs(rev, dbrev, con_args, schema) |
| if _is_rev_gte(_get_rev_num(dbrev), _get_rev_num(rev)): |
| _info("Current MADlib version already up to date.", True) |
| return |
| # proceed to create objects if nothing installed in DB or for HAWQ < 2.0 |
| elif dbrev is None or (portid == 'hawq' and not is_hawq2): |
| pass |
| # error and refer to upgrade if OS > DB |
| else: |
| _error("""Aborting installation: existing MADlib version detected in {0} schema |
| To upgrade the {0} schema to MADlib v{1} please run the following command: |
| madpack upgrade -s {0} -p {2} [-c ...] |
| """.format(schema, rev, portid), True) |
| |
| # 2) Run installation |
| try: |
| _plpy_check(py_min_ver) |
| _db_install(schema, dbrev, args.testcase) |
| except: |
| _error("MADlib installation failed.", True) |
| |
| # COMMAND: upgrade |
| if args.command[0] in ('upgrade', 'update'): |
| _info("*** Upgrading MADlib ***", True) |
| dbrev = _get_madlib_dbrev(schema) |
| |
| # 1) Check DB version. If None, nothing to upgrade. |
| if not dbrev: |
| _info("MADlib is not installed in {schema} schema and there " |
| "is nothing to upgrade. Please use install " |
| "instead.".format(schema=schema.upper()), |
| True) |
| return |
| |
| # 2) Compare OS and DB versions. Continue if OS > DB. |
| _print_revs(rev, dbrev, con_args, schema) |
| if _is_rev_gte(_get_rev_num(dbrev), _get_rev_num(rev)): |
| _info("Current MADlib version is already up-to-date.", True) |
| return |
| |
| if float('.'.join(dbrev.split('.')[0:2])) < 1.0: |
| _info("The version gap is too large, upgrade is supported only for " |
| "packages greater than or equal to v1.0.", True) |
| return |
| |
| # 3) Run upgrade |
| try: |
| _plpy_check(py_min_ver) |
| _db_upgrade(schema, dbrev) |
| except Exception as e: |
| # Uncomment the following lines when debugging |
| print "Exception: " + str(e) |
| print sys.exc_info() |
| traceback.print_tb(sys.exc_info()[2]) |
| _error("MADlib upgrade failed.", True) |
| |
| # COMMAND: install-check |
| if args.command[0] == 'install-check': |
| |
| # 1) Compare OS and DB versions. Continue if OS = DB. |
| if _get_rev_num(dbrev) != _get_rev_num(rev): |
| _print_revs(rev, dbrev, con_args, schema) |
| _info("Versions do not match. Install-check stopped.", True) |
| return |
| |
| # Create install-check user |
| test_user = ('madlib_' + |
| rev.replace('.', '').replace('-', '_') + |
| '_installcheck') |
| try: |
| _internal_run_query("DROP USER IF EXISTS %s;" % (test_user), False) |
| except: |
| _internal_run_query("DROP OWNED BY %s CASCADE;" % (test_user), True) |
| _internal_run_query("DROP USER IF EXISTS %s;" % (test_user), True) |
| _internal_run_query("CREATE USER %s;" % (test_user), True) |
| |
| _internal_run_query("GRANT USAGE ON SCHEMA %s TO %s;" % (schema, test_user), True) |
| |
| # 2) Run test SQLs |
| _info("> Running test scripts for:", verbose) |
| |
| caseset = (set([test.strip() for test in args.testcase.split(',')]) |
| if args.testcase != "" else set()) |
| |
| modset = {} |
| for case in caseset: |
| if case.find('/') > -1: |
| [mod, algo] = case.split('/') |
| if mod not in modset: |
| modset[mod] = [] |
| if algo not in modset[mod]: |
| modset[mod].append(algo) |
| else: |
| modset[case] = [] |
| |
| # Loop through all modules |
| for moduleinfo in portspecs['modules']: |
| |
| # Get module name |
| module = moduleinfo['name'] |
| |
| # Skip if doesn't meet specified modules |
| if modset is not None and len(modset) > 0 and module not in modset: |
| continue |
| # JIRA: MADLIB-1078 fix |
| # Skip pmml during install-check (when run without the -t option). |
| # We can still run install-check on pmml with '-t' option. |
| if not modset and module in ['pmml']: |
| continue |
| _info("> - %s" % module, verbose) |
| |
| # Make a temp dir for this module (if doesn't exist) |
| cur_tmpdir = tmpdir + '/' + module + '/test' # tmpdir is a global variable |
| _make_dir(cur_tmpdir) |
| |
| # Find the Python module dir (platform specific or generic) |
| if os.path.isdir(maddir + "/ports/" + portid + "/" + dbver + "/modules/" + module): |
| maddir_mod_py = maddir + "/ports/" + portid + "/" + dbver + "/modules" |
| else: |
| maddir_mod_py = maddir + "/modules" |
| |
| # Find the SQL module dir (platform specific or generic) |
| if os.path.isdir(maddir + "/ports/" + portid + "/modules/" + module): |
| maddir_mod_sql = maddir + "/ports/" + portid + "/modules" |
| else: |
| maddir_mod_sql = maddir + "/modules" |
| |
| # Prepare test schema |
| test_schema = "madlib_installcheck_%s" % (module) |
| _internal_run_query("DROP SCHEMA IF EXISTS %s CASCADE; CREATE SCHEMA %s;" % |
| (test_schema, test_schema), True) |
| _internal_run_query("GRANT ALL ON SCHEMA %s TO %s;" % |
| (test_schema, test_user), True) |
| |
| # Switch to test user and prepare the search_path |
| pre_sql = '-- Switch to test user:\n' \ |
| 'SET ROLE %s;\n' \ |
| '-- Set SEARCH_PATH for install-check:\n' \ |
| 'SET search_path=%s,%s;\n' \ |
| % (test_user, test_schema, schema) |
| |
| # Loop through all test SQL files for this module |
| sql_files = maddir_mod_sql + '/' + module + '/test/*.sql_in' |
| for sqlfile in sorted(glob.glob(sql_files), reverse=True): |
| # work-around for HAWQ |
| algoname = os.path.basename(sqlfile).split('.')[0] |
| # run only algo specified |
| if module in modset and len(modset[module]) > 0 \ |
| and algoname not in modset[module]: |
| continue |
| |
| # Set file names |
| tmpfile = cur_tmpdir + '/' + os.path.basename(sqlfile) + '.tmp' |
| logfile = cur_tmpdir + '/' + os.path.basename(sqlfile) + '.log' |
| |
| # If there is no problem with the SQL file |
| milliseconds = 0 |
| |
| # Run the SQL |
| run_start = datetime.datetime.now() |
| retval = _run_sql_file(schema, maddir_mod_py, module, |
| sqlfile, tmpfile, logfile, pre_sql) |
| # Runtime evaluation |
| run_end = datetime.datetime.now() |
| milliseconds = round((run_end - run_start).seconds * 1000 + |
| (run_end - run_start).microseconds / 1000) |
| |
| # Check the exit status |
| if retval != 0: |
| _error("Failed executing %s" % tmpfile, False) |
| _error("Check the log at %s" % logfile, False) |
| result = 'FAIL' |
| keeplogs = True |
| # Since every single statement in the test file gets logged, |
| # an empty log file indicates an empty or a failed test |
| elif os.path.isfile(logfile) and os.path.getsize(logfile) > 0: |
| result = 'PASS' |
| # Otherwise |
| else: |
| result = 'ERROR' |
| |
| # Spit the line |
| print "TEST CASE RESULT|Module: " + module + \ |
| "|" + os.path.basename(sqlfile) + "|" + result + \ |
| "|Time: %d milliseconds" % (milliseconds) |
| |
| # Cleanup test schema for the module |
| _internal_run_query("DROP SCHEMA IF EXISTS %s CASCADE;" % (test_schema), True) |
| |
| # Drop install-check user |
| _internal_run_query("DROP OWNED BY %s CASCADE;" % (test_user), True) |
| _internal_run_query("DROP USER %s;" % (test_user), True) |
| |
| |
| # ----------------------------------------------------------------------- |
| # Unit tests |
| # ----------------------------------------------------------------------- |
| class RevTest(unittest.TestCase): |
| |
| def setUp(self): |
| pass |
| |
| def tearDown(self): |
| pass |
| |
| def test_get_rev_num(self): |
| # not using assertGreaterEqual to keep Python 2.6 compatibility |
| self.assertTrue(_get_rev_num('4.3.10') >= _get_rev_num('4.3.5')) |
| self.assertTrue(_get_rev_num('1.9.10-dev') >= _get_rev_num('1.9.9')) |
| self.assertNotEqual(_get_rev_num('1.9.10-dev'), _get_rev_num('1.9.10')) |
| self.assertEqual(_get_rev_num('1.9.10'), [1, 9, 10]) |
| self.assertEqual(_get_rev_num('1.0.0+20130313144700'), [1, 0, 0, '20130313144700']) |
| self.assertNotEqual(_get_rev_num('1.0.0+20130313144700'), |
| _get_rev_num('1.0.0-beta+exp.sha.5114f85')) |
| |
| def test_is_rev_gte(self): |
| # 1.0.0-alpha < 1.0.0-alpha.1 < 1.0.0-alpha.beta < |
| # 1.0.0-beta < 1.0.0-beta.2 < 1.0.0-beta.11 < 1.0.0-rc.1 < 1.0.0 |
| self.assertTrue(_is_rev_gte([], [])) |
| self.assertTrue(_is_rev_gte([1, 9], [1, None])) |
| self.assertFalse(_is_rev_gte([1, None], [1, 9])) |
| |
| self.assertTrue(_is_rev_gte(_get_rev_num('4.3.10'), _get_rev_num('4.3.5'))) |
| self.assertTrue(_is_rev_gte(_get_rev_num('1.9.0'), _get_rev_num('1.9.0'))) |
| self.assertTrue(_is_rev_gte(_get_rev_num('1.9.1'), _get_rev_num('1.9.0'))) |
| self.assertTrue(_is_rev_gte(_get_rev_num('1.9.1'), _get_rev_num('1.9'))) |
| self.assertTrue(_is_rev_gte(_get_rev_num('1.9.0'), _get_rev_num('1.9.0-dev'))) |
| self.assertTrue(_is_rev_gte(_get_rev_num('1.9.1'), _get_rev_num('1.9-dev'))) |
| self.assertTrue(_is_rev_gte(_get_rev_num('1.9.0-dev'), _get_rev_num('1.9.0-dev'))) |
| self.assertTrue(_is_rev_gte([1, 9, 'rc', 1], [1, 9, 'dev', 0])) |
| |
| self.assertFalse(_is_rev_gte(_get_rev_num('1.9.1'), _get_rev_num('1.10'))) |
| self.assertFalse(_is_rev_gte([1, 9, 'dev', 1], [1, 9, 'rc', 0])) |
| self.assertFalse(_is_rev_gte([1, 9, 'alpha'], [1, 9, 'alpha', 0])) |
| self.assertFalse(_is_rev_gte([1, 9, 'alpha', 1], [1, 9, 'alpha', 'beta'])) |
| self.assertFalse(_is_rev_gte([1, 9, 'alpha.1'], [1, 9, 'alpha.beta'])) |
| self.assertFalse(_is_rev_gte([1, 9, 'beta', 2], [1, 9, 'beta', 4])) |
| self.assertFalse(_is_rev_gte([1, 9, 'beta', '1'], [1, 9, 'rc', '0'])) |
| self.assertFalse(_is_rev_gte([1, 9, 'rc', 1], [1, 9, 0])) |
| self.assertFalse(_is_rev_gte([1, 9, '0.2'], [1, 9, '0.3'])) |
| self.assertFalse(_is_rev_gte([1, 9, 'build2'], [1, 9, 'build3'])) |
| |
| self.assertFalse(_is_rev_gte(_get_rev_num('1.0.0+20130313144700'), |
| _get_rev_num('1.0.0-beta+exp.sha.5114f85'))) |
| |
| |
| # ------------------------------------------------------------------------------ |
| # Start Here |
| # ------------------------------------------------------------------------------ |
| if __name__ == "__main__": |
| RUN_TESTS = False |
| |
| if RUN_TESTS: |
| unittest.main() |
| else: |
| # Run main |
| main(sys.argv[1:]) |
| |
| # Optional log files cleanup |
| # keeplogs and tmpdir are global variables |
| if not keeplogs: |
| shutil.rmtree(tmpdir) |
| else: |
| print "INFO: Log files saved in " + tmpdir |