| #!/usr/bin/env python |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| ## # # # # # # # # # # # # # # # # # # # # # # # # # # # # |
| # Main Madpack installation executable. |
| ## # # # # # # # # # # # # # # # # # # # # # # # # # # # # |
| import sys |
| import getpass |
| import re |
| import os |
| import glob |
| import traceback |
| import subprocess |
| import datetime |
| from time import strftime |
| import tempfile |
| import shutil |
| import yaml |
| from collections import defaultdict |
| from gppylib.operations.gpMigratorUtil import * |
| |
| logger = get_default_logger() |
| |
| # ---------------------------------------------------------------------- |
| |
| def run_sql(sql, portid, con_args): |
| """ |
| @brief Wrapper function for ____run_sql_query |
| """ |
| return ____run_sql_query(sql, True, portid, con_args) |
| |
| # ---------------------------------------------------------------------- |
| |
| def get_signature_for_compare(schema, proname, rettype, argument): |
| """ |
| @brief Get the signature of a UDF/UDA for comparison |
| """ |
| signature = '{0} {1}.{2}({3})'.format(rettype.strip(), schema.strip(), |
| proname.strip(), argument.strip()) |
| signature = re.sub('"', '', signature) |
| return signature.lower() |
| |
| # ---------------------------------------------------------------------- |
| |
| class UpgradeBase: |
| """ |
| @brief Base class for handling the upgrade |
| """ |
| def __init__(self, schema, portid, con_args): |
| self._schema = schema.lower() |
| self._portid = portid |
| self._con_args = con_args |
| self._schema_oid = None |
| self._get_schema_oid() |
| |
| # ================================ |
| |
| """ |
| @brief Wrapper function for run_sql |
| """ |
| def _run_sql(self, sql): |
| return run_sql(sql, self._portid, self._con_args) |
| |
| # ================================ |
| |
| """ |
| @brief Get the oids of some objects from the catalog in the current version |
| """ |
| def _get_schema_oid(self): |
| self._schema_oid = self._run_sql(""" |
| SELECT oid FROM pg_namespace WHERE nspname = '{schema}' |
| """.format(schema=self._schema))[0]['oid'] |
| |
| # ================================ |
| |
| def _get_function_info(self, oid): |
| """ |
| @brief Get the function name, return type, and arguments given an oid |
| @note The function can only handle the case that proallargtypes is null, |
| refer to pg_catalog.pg_get_function_identity_argument and |
| pg_catalog.pg_get_function_result in PG for a complete implementation, which are |
| not supported by GP |
| """ |
| row = self._run_sql(""" |
| SELECT |
| max(proname) AS proname, |
| max(rettype) AS rettype, |
| array_to_string( |
| array_agg(argname || ' ' || argtype order by i), ',') AS argument |
| FROM |
| ( |
| SELECT |
| proname, |
| textin(regtypeout(prorettype::regtype)) AS rettype, |
| CASE array_upper(proargtypes,1) WHEN -1 THEN '' |
| ELSE textin(regtypeout(unnest(proargtypes)::regtype)) |
| END AS argtype, |
| CASE WHEN proargnames IS NULL THEN '' |
| ELSE unnest(proargnames) |
| END AS argname, |
| CASE array_upper(proargtypes,1) WHEN -1 THEN 1 |
| ELSE generate_series(0, array_upper(proargtypes, 1)) |
| END AS i |
| FROM |
| pg_proc AS p |
| WHERE |
| oid = {oid} |
| ) AS f |
| """.format(oid=oid)) |
| return {"proname": row[0]['proname'], |
| "rettype": row[0]['rettype'], |
| "argument": row[0]['argument']} |
| |
| # ---------------------------------------------------------------------- |
| |
| class ChangeHandler(UpgradeBase): |
| """ |
| @brief This class reads changes from the configuration file and handles |
| the dropping of objects |
| """ |
| def __init__(self, schema, portid, con_args, maddir): |
| UpgradeBase.__init__(self, schema, portid, con_args) |
| self._opr_ind_svec = None |
| self._get_opr_indepent_svec() |
| self._maddir = maddir |
| #self._mad_dbrev = mad_dbrev |
| self._newmodule = None |
| self._udt = None |
| self._udf = None |
| self._uda = None |
| self._udc = None |
| self._load() |
| |
| # ================================ |
| |
| def _get_opr_indepent_svec(self): |
| """ |
| @brief Get the User Defined Operators independent of svec in the current version |
| """ |
| rows = self._run_sql(""" |
| SELECT |
| oprname, |
| tl.typname AS typ_left, |
| nspl.nspname AS nsp_left, |
| tr.typname AS typ_right, |
| nspr.nspname AS nsp_right |
| FROM |
| pg_operator AS o, |
| pg_type AS tl, |
| pg_type AS tr, |
| pg_namespace AS nspl, |
| pg_namespace AS nspr |
| WHERE |
| oprnamespace = {schema_oid} AND |
| ( |
| oprleft <> '{schema_madlib}.svec'::regtype AND |
| oprright <> '{schema_madlib}.svec'::regtype AND |
| oprleft = tl.oid AND |
| oprright = tr.oid AND |
| tl.typnamespace = nspl.oid AND |
| tr.typnamespace = nspr.oid |
| ) |
| """.format(schema_madlib=self._schema, schema_oid=self._schema_oid)) |
| self._opr_ind_svec = {} |
| for row in rows: |
| self._opr_ind_svec[row['oprname']] = row |
| |
| # ================================ |
| |
| def _load_config_param(self, config_iterable): |
| """ |
| Replace schema_madlib with the appropriate schema name and |
| make all function names lower case to ensure ease of comparison. |
| |
| Args: |
| @param config_iterable is an iterable of dictionaries, each with |
| key = object name (eg. function name) and value = details |
| for the object. The details for the object are assumed to |
| be in a dictionary with following keys: |
| rettype: Return type |
| argument: List of arguments |
| |
| Returns: |
| A dictionary that lists all specific objects (functions, aggregates, etc) |
| with object name as key and a list as value, where the list |
| contains all the items present in |
| |
| another dictionary with objects details |
| as the value. |
| """ |
| _return_obj = defaultdict(list) |
| if config_iterable is not None: |
| for each_config in config_iterable: |
| for obj_name, obj_details in each_config.iteritems(): |
| rettype = obj_details['rettype'].lower().replace( |
| 'schema_madlib', self._schema) |
| if obj_details['argument'] is not None: |
| argument = obj_details['argument'].lower().replace( |
| 'schema_madlib', self._schema) |
| all_arguments = [each_arg.strip() |
| for each_arg in argument.split(',')] |
| else: |
| all_arguments = [] |
| _return_obj[obj_name].append( |
| {'rettype': rettype, |
| 'argument': ','.join(all_arguments)}) |
| return _return_obj |
| |
| # ================================ |
| |
| def _load(self): |
| """ |
| @brief Load the configuration file |
| """ |
| filename = os.path.join(self._maddir, 'changelist.yaml') |
| |
| config = yaml.load(open(filename)) |
| self._newmodule = config['new module'] if config['new module'] else {} |
| self._udt = config['udt'] if config['udt'] else {} |
| self._udc = config['udc'] if config['udc'] else {} |
| self._udf = self._load_config_param(config['udf']) |
| self._uda = self._load_config_param(config['uda']) |
| |
| # ================================ |
| |
| @property |
| def newmodule(self): |
| return self._newmodule |
| |
| # ================================ |
| |
| @property |
| def udt(self): |
| return self._udt |
| |
| # ================================ |
| |
| @property |
| def uda(self): |
| return self._uda |
| |
| # ================================ |
| |
| @property |
| def udf(self): |
| return self._udf |
| |
| # ================================ |
| |
| @property |
| def udc(self): |
| return self._udc |
| |
| # ================================ |
| |
| def get_udf_signature(self): |
| """ |
| @brief Get the list of UDF signatures for comparison |
| """ |
| res = defaultdict(bool) |
| for udf in self._udf: |
| for item in self._udf[udf]: |
| signature = get_signature_for_compare( |
| self._schema, udf, item['rettype'], item['argument']) |
| res[signature] = True |
| return res |
| |
| # ================================ |
| |
| def get_uda_signature(self): |
| """ |
| @brief Get the list of UDA signatures for comparison |
| """ |
| res = defaultdict(bool) |
| for uda in self._uda: |
| for item in self._uda[uda]: |
| signature = get_signature_for_compare( |
| self._schema, uda, item['rettype'], item['argument']) |
| res[signature] = True |
| return res |
| |
| # ---------------------------------------------------------------------- |
| |
| class ViewDependency(UpgradeBase): |
| """ |
| @brief This class detects the direct/recursive view dependencies on MADLib |
| UDFs/UDAs defined in the current version |
| """ |
| def __init__(self, schema, portid, con_args, ch): |
| UpgradeBase.__init__(self, schema, portid, con_args) |
| self._ch = ch |
| self._view2proc = None |
| self._view2view = None |
| self._view2def = None |
| self._detect_direct_view_dependency() |
| self._detect_recursive_view_dependency() |
| self._filter_recursive_view_dependency() |
| |
| # ================================ |
| |
| """ |
| @brief Detect direct view dependencies on MADLib UDFs/UDAs |
| """ |
| def _detect_direct_view_dependency(self): |
| rows = self._run_sql(""" |
| SELECT |
| view, nsp.nspname AS schema, procname, procoid, proisagg |
| FROM |
| pg_namespace nsp, |
| ( |
| SELECT |
| c.relname AS view, |
| c.relnamespace AS namespace, |
| p.proname As procname, |
| p.oid AS procoid, |
| p.proisagg AS proisagg |
| FROM |
| pg_class AS c, |
| pg_rewrite AS rw, |
| pg_depend AS d, |
| pg_proc AS p |
| WHERE |
| c.oid = rw.ev_class AND |
| rw.oid = d.objid AND |
| d.classid = 'pg_rewrite'::regclass AND |
| d.refclassid = 'pg_proc'::regclass AND |
| d.refobjid = p.oid AND |
| p.pronamespace = {schema_madlib_oid} |
| ) t1 |
| WHERE |
| t1.namespace = nsp.oid |
| """.format(schema_madlib_oid=self._schema_oid)) |
| |
| self._view2proc = defaultdict(list) |
| for row in rows: |
| if row['procname'] in self._ch.udf.keys() + self._ch.uda.keys(): |
| key = (row['schema'], row['view']) |
| self._view2proc[key].append( |
| (row['procname'], row['procoid'], |
| True if row['proisagg'] == 't' else False)) |
| |
| # ================================ |
| |
| """ |
| @brief Detect recursive view dependencies (view on view) |
| """ |
| def _detect_recursive_view_dependency(self): |
| rows = self._run_sql(""" |
| SELECT |
| nsp1.nspname AS depender_schema, |
| depender, |
| nsp2.nspname AS dependee_schema, |
| dependee |
| FROM |
| pg_namespace AS nsp1, |
| pg_namespace AS nsp2, |
| ( |
| SELECT |
| c.relname depender, |
| c.relnamespace AS depender_nsp, |
| c1.relname AS dependee, |
| c1.relnamespace AS dependee_nsp |
| FROM |
| pg_rewrite AS rw, |
| pg_depend AS d, |
| pg_class AS c, |
| pg_class AS c1 |
| WHERE |
| rw.ev_class = c.oid AND |
| rw.oid = d.objid AND |
| d.classid = 'pg_rewrite'::regclass AND |
| d.refclassid = 'pg_class'::regclass AND |
| d.refobjid = c1.oid AND |
| c1.relkind = 'v' AND |
| c.relname <> c1.relname |
| GROUP BY |
| depender, depender_nsp, dependee, dependee_nsp |
| ) t1 |
| WHERE |
| t1.depender_nsp = nsp1.oid AND |
| t1.dependee_nsp = nsp2.oid |
| """) |
| |
| self._view2view = defaultdict(list) |
| for row in rows: |
| key = (row['depender_schema'], row['depender']) |
| val = (row['dependee_schema'], row['dependee']) |
| self._view2view[key].append(val) |
| |
| # ================================ |
| |
| """ |
| @brief Filter out recursive view dependencies which are independent of |
| MADLib UDFs/UDAs |
| """ |
| def _filter_recursive_view_dependency(self): |
| # Get recursive dependee list |
| dependeelist = [] |
| checklist = self._view2proc |
| while True: |
| dependeelist.extend(checklist.keys()) |
| new_checklist = defaultdict(bool) |
| for depender in self._view2view.keys(): |
| for dependee in self._view2view[depender]: |
| if dependee in checklist: |
| new_checklist[depender] = True |
| break |
| if len(new_checklist) == 0: |
| break |
| else: |
| checklist = new_checklist |
| |
| # Filter recursive dependencies not related with MADLib UDF/UDAs |
| fil_view2view = defaultdict(list) |
| for depender in self._view2view: |
| dependee = self._view2view[depender] |
| dependee = [r for r in dependee if r in dependeelist] |
| if len(dependee) > 0: |
| fil_view2view[depender] = dependee |
| |
| self._view2view = fil_view2view |
| |
| # ================================ |
| |
| """ |
| @brief Build the dependency graph (depender-to-dependee adjacency list) |
| """ |
| def _build_dependency_graph(self, hasProcDependency=False): |
| der2dee = self._view2view.copy() |
| for view in self._view2proc: |
| if view not in self._view2view: |
| der2dee[view] = [] |
| if hasProcDependency: |
| der2dee[view].extend(self._view2proc[view]) |
| |
| graph = der2dee.copy() |
| for der in der2dee: |
| for dee in der2dee[der]: |
| if dee not in graph: |
| graph[dee] = [] |
| return graph |
| |
| # ================================ |
| |
| """ |
| @brief Check dependencies |
| """ |
| def has_dependency(self): |
| return len(self._view2proc) > 0 |
| |
| # ================================ |
| |
| """ |
| @brief Get the depended UDF/UDA signatures for comparison |
| """ |
| def get_depended_func_signature(self, aggregate=True): |
| res = {} |
| for procs in self._view2proc.values(): |
| for proc in procs: |
| if proc[2] is aggregate and (self._schema, proc) not in res: |
| funcinfo = self._get_function_info(proc[1]) |
| if aggregate: |
| funcinfo['argument'] = ','.join([s.strip() for s in funcinfo['argument'].split(',')]) |
| else: |
| funcinfo['argument'] = (','.join([m.group(1).strip() |
| for m in re.finditer(r"[\"\w]+([^,]*)", funcinfo['argument']) |
| if m.group(1) != ''])) |
| signature = get_signature_for_compare(self._schema, proc[0], |
| funcinfo['rettype'], |
| funcinfo['argument']) |
| res[signature] = True |
| return res |
| |
| # ================================ |
| |
| def get_proc_w_dependency(self, aggregate=True): |
| res = [] |
| for procs in self._view2proc.values(): |
| for proc in procs: |
| if proc[2] is aggregate and (self._schema, proc) not in res: |
| res.append((self._schema, proc)) |
| res.sort() |
| return res |
| |
| # ================================ |
| |
| def get_depended_uda(self): |
| """ |
| @brief Get dependent UDAs |
| """ |
| self.get_proc_w_dependency(aggregate=True) |
| |
| # ================================ |
| |
| def get_depended_udf(self): |
| """ |
| @brief Get dependent UDFs |
| """ |
| self.get_proc_w_dependency(aggregate=False) |
| |
| # ================================ |
| |
| def _node_to_str(self, node): |
| if len(node) == 2: |
| res = '%s.%s' % (node[0], node[1]) |
| else: |
| res = '%s.%s' % (self._schema, node[0]) |
| return res |
| |
| # ================================ |
| |
| def _nodes_to_str(self, nodes): |
| return [self._node_to_str(i) for i in nodes] |
| |
| # ================================ |
| |
| def get_dependency_graph_str(self): |
| """ |
| @brief Get the dependency graph string for print |
| """ |
| graph = self._build_dependency_graph(True) |
| nodes = list(graph.keys()) |
| nodes.sort() |
| res = ["Dependency Graph (Depender-Dependee Adjacency List):\n"] |
| for node in nodes: |
| depends = self._nodes_to_str(graph[node]) |
| if len(depends) > 0: |
| res.append("{0} -> [{1}]".format( |
| self._node_to_str(node), |
| ','.join(depends))) |
| return "\n\t\t\t\t".join(res) |
| |
| # ---------------------------------------------------------------------- |
| |
| class TableDependency(UpgradeBase): |
| """ |
| @brief This class detects the table dependencies on MADLib UDTs defined in the |
| current version |
| """ |
| def __init__(self, schema, portid, con_args, ch): |
| UpgradeBase.__init__(self, schema, portid, con_args) |
| self._ch = ch |
| self._table2type = None |
| self._detect_table_dependency() |
| |
| # ================================ |
| |
| def _detect_table_dependency(self): |
| """ |
| @brief Detect the table dependencies on MADLib UDTs |
| """ |
| rows = self._run_sql(""" |
| SELECT |
| nsp.nspname AS schema, |
| relname AS relation, |
| attname AS column, |
| typname AS type |
| FROM |
| pg_attribute a, |
| pg_class c, |
| pg_type t, |
| pg_namespace nsp |
| WHERE |
| t.typnamespace = {schema_madlib_oid} |
| AND a.atttypid = t.oid |
| AND c.oid = a.attrelid |
| AND c.relnamespace = nsp.oid |
| AND c.relkind = 'r' |
| ORDER BY |
| nsp.nspname, relname, attname, typname |
| """.format(schema_madlib_oid=self._schema_oid)) |
| |
| self._table2type = defaultdict(list) |
| for row in rows: |
| key = (row['schema'], row['relation']) |
| if (row['type'] != 'svec' and row['type'] != 'bytea8' and |
| row['type'] in [i.keys()[0] for i in self._ch.udt]): |
| self._table2type[key].append( |
| (row['column'], row['type'])) |
| |
| # ================================ |
| |
| def has_dependency(self): |
| """ |
| @brief Check dependencies |
| """ |
| return len(self._table2type) > 0 |
| |
| # ================================ |
| |
| def get_depended_udt(self): |
| """ |
| @brief Get the list of depended UDTs |
| """ |
| res = defaultdict(bool) |
| for table in self._table2type: |
| for (col, typ) in self._table2type[table]: |
| if typ not in res: |
| res[typ] = True |
| return res |
| |
| # ================================ |
| |
| def get_dependency_str(self): |
| """ |
| @brief Get the dependencies in string for print |
| """ |
| res = ['Table Dependency (schema.table.column -> MADlib type):\n'] |
| for table in self._table2type: |
| for (col, udt) in self._table2type[table]: |
| res.append("{0}.{1}.{2} -> {3}".format(table[0], table[1], col, |
| udt)) |
| return "\n\t\t\t\t".join(res) |
| |
| # ---------------------------------------------------------------------- |
| |
| # Required Python version |
| py_min_ver = [2,6] |
| |
| # Check python version |
| if sys.version_info[:2] < py_min_ver: |
| print "ERROR: python version too old (%s). You need %s or greater." \ |
| % ('.'.join(str(i) for i in sys.version_info[:3]), '.'.join(str(i) for i in py_min_ver)) |
| exit(1) |
| |
| # The default path for this script and changelist file is hardcoded here |
| maddir = os.path.dirname(os.path.abspath(__file__)) |
| |
| # Import MADlib python modules |
| import argparse |
| |
| # ---------------------------------------------------------------------- |
| |
| # Some read-only variables |
| this = os.path.basename(sys.argv[0]) # name of this script |
| |
| # Global variables |
| portid = None # Target port ID (eg: pg90, gp40) |
| dbconn = None # DB Connection object |
| dbver = None # DB version |
| con_args = {} # DB connection arguments |
| verbose = None # Verbose flag |
| |
| # ---------------------------------------------------------------------- |
| |
| ## # # # # # # # # # # # # # # # # # # # # # # # # # # # # |
| # Create a temp dir |
| # @param dir temp directory path |
| ## # # # # # # # # # # # # # # # # # # # # # # # # # # # # |
| def __make_dir(dir): |
| if not os.path.isdir(dir): |
| try: |
| os.makedirs(dir) |
| except: |
| print "ERROR: can not create directory: %s. Check permissions." % dir |
| exit(1) |
| |
| # ---------------------------------------------------------------------- |
| |
| ## # # # # # # # # # # # # # # # # # # # # # # # # # # # # |
| # Runs a SQL query on the target platform DB |
| # using the default command-line utility. |
| # Very limited: |
| # - no text output with "new line" characters allowed |
| # @param sql query text to execute |
| # @param show_error displays the SQL error msg |
| ## # # # # # # # # # # # # # # # # # # # # # # # # # # # # |
| def __run_sql_query(sql, show_error): |
| global portid |
| global con_args |
| return ____run_sql_query(sql, show_error, portid, con_args) |
| |
| # ---------------------------------------------------------------------- |
| |
| def ____run_sql_query(sql, show_error, portid = portid, con_args = con_args): |
| # psql |
| if portid in ('greenplum', 'postgres', 'hawq'): |
| |
| # Define sqlcmd |
| sqlcmd = 'psql' |
| delimiter = '|' |
| |
| # Test the DB cmd line utility |
| std, err = subprocess.Popen(['which', sqlcmd], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() |
| if std == '': |
| logger.fatal("Command not found: %s" % sqlcmd) |
| raise RuntimeError("Command not found: %s" % sqlcmd) |
| |
| # Run the query |
| runcmd = [ sqlcmd, |
| '-h', con_args['host'].split(':')[0], |
| '-p', con_args['host'].split(':')[1], |
| '-d', con_args['database'], |
| '-U', con_args['user'], |
| '-F', delimiter, |
| '-Ac', "set CLIENT_MIN_MESSAGES=error; " + sql] |
| runenv = os.environ |
| runenv["PGPASSWORD"] = con_args['password'] |
| runenv["PGOPTIONS"] = '-c search_path=public' |
| std, err = subprocess.Popen(runcmd, env=runenv, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() |
| if err: |
| logger.fatal("SQL command failed: \nSQL: %s \n%s" % (sql, err)) |
| raise RuntimeError("SQL command failed: \nSQL: %s \n%s" % (sql, err)) |
| |
| # Convert the delimited output into a dictionary |
| results = [] # list of rows |
| i = 0 |
| for line in std.splitlines(): |
| if i == 0: |
| cols = [name for name in line.split(delimiter)] |
| else: |
| row = {} # dict of col_name:col_value pairs |
| c = 0 |
| for val in line.split(delimiter): |
| row[cols[c]] = val |
| c += 1 |
| results.insert(i,row) |
| i += 1 |
| # Drop the last line: "(X rows)" |
| try: |
| results.pop() |
| except: |
| pass |
| |
| return results |
| |
| # ---------------------------------------------------------------------- |
| |
| ## # # # # # # # # # # # # # # # # # # # # # # # # # # # # |
| # Upgrade MADlib |
| # @param schema MADlib schema name |
| # @param dbrev DB-level MADlib version |
| ## # # # # # # # # # # # # # # # # # # # # # # # # # # # # |
| def __db_upgrade(schema): |
| #logger.info("Detecting dependencies...") |
| |
| #logger.info("Loading change list...") |
| ch = ChangeHandler(schema, portid, con_args, maddir) |
| |
| #logger.info("Detecting table dependencies...") |
| td = TableDependency(schema, portid, con_args, ch) |
| |
| #logger.info("Detecting view dependencies...") |
| vd = ViewDependency(schema, portid, con_args, ch) |
| |
| has_dependency = False |
| |
| if td.has_dependency(): |
| logger.info("*"*50) |
| logger.info("Following user tables are dependent on MADlib types:") |
| logger.info(td.get_dependency_str() + "\n") |
| logger.info("*"*50) |
| cd_udt = [udt for udt in td.get_depended_udt() |
| if udt in [i.keys()[0] for i in ch.udt]] |
| if len(cd_udt) > 0: |
| has_dependency = True |
| logger.info(""" |
| |
| User has objects dependent on the following MADlib types |
| which are to be dropped during migration! |
| |
| {0} |
| |
| These objects need to be dropped before proceeding with |
| migration. Please backup them if necessary. |
| """.format('\n\t\t\t'.join(cd_udt))) |
| |
| if vd.has_dependency(): |
| logger.info("*"*50) |
| logger.info("Following user views are dependent on MADlib objects:") |
| logger.info(vd.get_dependency_graph_str() + "\n") |
| logger.info("*"*50) |
| |
| c_udf = ch.get_udf_signature() |
| d_udf = vd.get_depended_func_signature(False) |
| cd_udf = [udf for udf in d_udf if udf in c_udf] |
| if len(cd_udf) > 0: |
| has_dependency = True |
| logger.info(""" |
| |
| User has objects dependent on the following MADlib functions |
| which are to be dropped during migration! |
| |
| {0} |
| |
| These objects need to be dropped before proceeding with |
| migration. Please backup them if necessary. |
| """.format('\n\t\t\t'.join(cd_udf))) |
| |
| c_uda = ch.get_uda_signature() |
| d_uda = vd.get_depended_func_signature(True) |
| cd_uda = [uda for uda in d_uda if uda in c_uda] |
| if len(cd_uda) > 0: |
| has_dependency = True |
| logger.info(""" |
| |
| User has objects dependent on the following MADlib aggregates |
| which are to be dropped during migration! |
| |
| {0} |
| |
| These objects need to be dropped before proceeding with |
| migration. Please backup them if necessary. |
| """.format('\n\t\t\t'.join(cd_uda))) |
| |
| return has_dependency |
| |
| # ---------------------------------------------------------------------- |
| |
| def unescape(string): |
| """ |
| Unescape separation characters in connection strings, i.e., remove first |
| backslash from "\/", "\@", "\:", and "\\". |
| """ |
| if string is None: |
| return None |
| else: |
| return re.sub(r'\\(?P<char>[/@:\\])', '\g<char>', string) |
| |
| # ---------------------------------------------------------------------- |
| |
| def parseConnectionStr(connectionStr): |
| """ |
| @brief Parse connection strings of the form |
| <tt>[username[/password]@][hostname][:port][/database]</tt> |
| |
| Separation characters (/@:) and the backslash (\) need to be escaped. |
| @returns A tuple (username, password, hostname, port, database). Field not |
| specified will be None. |
| """ |
| match = re.search( |
| r'((?P<user>([^/@:\\]|\\/|\\@|\\:|\\\\)+)' + |
| r'(/(?P<password>([^/@:\\]|\\/|\\@|\\:|\\\\)*))?@)?' + |
| r'(?P<host>([^/@:\\]|\\/|\\@|\\:|\\\\)+)?' + |
| r'(:(?P<port>[0-9]+))?' + |
| r'(/(?P<database>([^/@:\\]|\\/|\\@|\\:|\\\\)+))?', connectionStr) |
| return ( |
| unescape(match.group('user')), |
| unescape(match.group('password')), |
| unescape(match.group('host')), |
| match.group('port'), |
| unescape(match.group('database'))) |
| |
| # ---------------------------------------------------------------------- |
| |
| ## # # # # # # # # # # # # # # # # # # # # # # # # # # # # |
| # Main |
| ## # # # # # # # # # # # # # # # # # # # # # # # # # # # # |
| def depcheck_for_madlib(argv = ''): |
| |
| parser = argparse.ArgumentParser( |
| description='MADlib package manager (' + '' + ')', |
| argument_default=False, |
| formatter_class=argparse.RawTextHelpFormatter, |
| epilog="""Example: |
| |
| $ ./depcheck.py -c gpadmin@mdw:5432 |
| |
| This will detect all the dependencies on MADlib objects in all the |
| database running on server mdw:5432. Installer will try to login as |
| GPADMIN and will prompt for password. |
| """) |
| |
| parser.add_argument( |
| '-c', '--conn', metavar='CONNSTR', nargs=1, dest='connstr', default=None, |
| help= "Connection string of the following syntax:\n" |
| + " [user[/password]@][host][:port]\n" |
| + "If not provided default values will be derived for PostgerSQL and Greenplum:\n" |
| + "- user: PGUSER or USER env variable or OS username\n" |
| + "- pass: PGPASSWORD env variable or runtime prompt\n" |
| + "- host: PGHOST env variable or 'localhost'\n" |
| + "- port: PGPORT env variable or '5432'\n" |
| ) |
| |
| args = parser.parse_args(argv) |
| schema = "madlib" |
| |
| ## |
| # Parse DB Platform (== PortID) and compare with Ports.yml |
| ## |
| global portid |
| global dbver |
| |
| portid = "postgres" |
| |
| ## |
| # Parse CONNSTR (only if PLATFORM and DBAPI2 are defined) |
| ## |
| if portid: |
| connStr = "" if args.connstr is None else args.connstr[0] |
| (c_user, c_pass, c_host, c_port, c_db) = parseConnectionStr(connStr) |
| |
| # Find the default values for PG and GP |
| if portid in ('postgres', 'greenplum', 'hawq'): |
| if c_user is None: |
| c_user = os.environ.get('PGUSER', getpass.getuser()) |
| if c_pass is None: |
| c_pass = os.environ.get('PGPASSWORD', None) |
| if c_host is None: |
| c_host = os.environ.get('PGHOST', 'localhost') |
| if c_port is None: |
| c_port = os.environ.get('PGPORT', '5432') |
| if c_db is None: |
| c_db = os.environ.get('PGDATABASE', c_user) |
| |
| # Get password |
| if c_pass is None: |
| c_pass = getpass.getpass("Password for user %s: " % c_user) |
| |
| # Set connection variables |
| global con_args |
| con_args['host'] = c_host + ':' + c_port |
| con_args['database'] = "template1" |
| con_args['user'] = c_user |
| con_args['password'] = c_pass |
| else: |
| con_args = None |
| dbrev = None |
| |
| db_list = __run_sql_query("SELECT d.datname as Name FROM pg_catalog.pg_database d ORDER BY 1;", True) |
| |
| has_dependency = False |
| try: |
| for c_db in db_list: |
| con_args['database'] = c_db['name'] |
| try: |
| logger.info("Detecting the dependencies on MADlib in database " + c_db['name'] + " ... ") |
| schema_exist = __run_sql_query("SELECT n.nspname AS Name FROM pg_catalog.pg_namespace n WHERE n.nspname = 'madlib';", False) |
| if len(schema_exist) > 0: |
| if __db_upgrade(schema): |
| has_dependency = True |
| else: |
| logger.info("No dependency on MADlib objects in database " + c_db['name'] + " is detected.") |
| else: |
| logger.info("No MADlib is installed and no need to check dependencies.") |
| except: |
| logger.info("No MADlib is installed and no need to check dependencies.") |
| pass |
| except Exception as e: |
| #Uncomment the following lines when debugging |
| #print "Exception: " + str(e) |
| #print sys.exc_info() |
| #traceback.print_tb(sys.exc_info()[2]) |
| logger.fatal("Dependency detection failed.") |
| raise RuntimeError("Dependency detection failed.") |
| |
| if has_dependency: |
| raise RuntimeError("User has objects that depend on MADlib objects to be dropped in the migration." |
| " Please remove those dependencies before proceeding with the migration." |
| " Make backups for these objects if necessary.") |
| |
| # ---------------------------------------------------------------------- |
| |
| ## # # # # # # # # # # # # # # # # # # # # # # # # # # # # |
| # Start Here |
| ## # # # # # # # # # # # # # # # # # # # # # # # # # # # # |
| if __name__ == "__main__": |
| |
| # Run main |
| depcheck_for_madlib(sys.argv[1:]) |
| |