Madpack: Add UDO and UDOC automation

JIRA: MADLIB-1281

- Add scripts for detecting changed/dropped UDOs and UDOCs.
- Expand the create_changelist.py file to consume these scripts and
create changelists with these fields filled if necessary.
- Fix the update_util.py to use the correct dictionary key.
- Add drop operator class command to the svac.sql_in to make sure the
old class is removed before creating the updated one.

Closes #337
diff --git a/methods/svec/src/pg_gp/svec.sql_in b/methods/svec/src/pg_gp/svec.sql_in
index 6e9825d..2d0e21e 100644
--- a/methods/svec/src/pg_gp/svec.sql_in
+++ b/methods/svec/src/pg_gp/svec.sql_in
@@ -1188,6 +1188,8 @@
 	leftarg = int4, rightarg = MADLIB_SCHEMA.svec, procedure = MADLIB_SCHEMA.svec_concat_replicate
 );
 
+DROP OPERATOR CLASS IF EXISTS MADLIB_SCHEMA.svec_ops USING btree;
+
 CREATE OPERATOR CLASS MADLIB_SCHEMA.svec_ops
 DEFAULT FOR TYPE MADLIB_SCHEMA.svec USING btree AS
 OPERATOR        1       MADLIB_SCHEMA.< ,
diff --git a/src/madpack/create_changelist.py b/src/madpack/create_changelist.py
index a689319..a88b71d 100644
--- a/src/madpack/create_changelist.py
+++ b/src/madpack/create_changelist.py
@@ -63,7 +63,16 @@
     raise SystemExit
 
 print "Creating changelist {0}".format(ch_filename)
-os.system("rm -f /tmp/madlib_tmp_nm.txt /tmp/madlib_tmp_udf.txt /tmp/madlib_tmp_udt.txt")
+os.system("""
+    rm -f /tmp/madlib_tmp_nm.txt \
+    /tmp/madlib_tmp_udf.txt \
+    /tmp/madlib_tmp_udt.txt \
+    /tmp/madlib_tmp_udo.txt \
+    /tmp/madlib_tmp_udoc.txt \
+    /tmp/madlib_tmp_typedep.txt \
+    /tmp/madlib_tmp_typedep_udo.txt \
+    /tmp/madlib_tmp_typedep_udoc.txt
+    """)
 try:
     # Find the new modules using the git diff
     err1 = os.system("git diff {old_vers} {new_vers} --name-only --diff-filter=A > /tmp/madlib_tmp_nm.txt".format(**locals()))
@@ -224,8 +233,87 @@
 # List of the UDC, UDO and UDOC changes.
 """)
     f.write("udc:\n")
+
+    # Find the changed operators
+
+    f.write("\n# Changes in the operators (UDO)\n")
     f.write("udo:\n")
+
+    os.system("psql {0} -f diff_udo.sql > /tmp/madlib_tmp_udo.txt".format(database))
+    with open('/tmp/madlib_tmp_udo.txt') as fp:
+        for line in fp:
+            if ' | ' in line:
+                    sp = line.split(' | ')
+
+                    if sp[0].strip() == 'name':
+                        f.write("    - '" + sp[1].strip() + "':\n")
+                    elif sp[0].strip() == 'rettype':
+                        f.write('        rettype: ' + sp[1].strip() + "\n")
+                    elif sp[0].strip() == 'oprright':
+                        f.write('        rightarg: ' + sp[1].strip() + "\n")
+                    elif sp[0].strip() == 'oprleft':
+                        f.write('        leftarg: ' + sp[1].strip() + "\n")
+
+    for t in udt_list:
+
+        os.system("""psql {database} -c "DROP TABLE IF EXISTS __tmp__madlib__ " > /dev/null """.format(**locals()))
+
+        os.system("""psql {database} -c "SELECT get_udos('__tmp__madlib__', 'madlib_old_vers', '{t}')" > /dev/null """.format(**locals()))
+
+        os.system("""psql {database} -x -c "SELECT name, rettype, oprright, oprleft FROM __tmp__madlib__ ORDER BY name DESC" > /tmp/madlib_tmp_typedep_udo.txt """.format(**locals()))
+
+        os.system("""psql {database} -c "DROP TABLE IF EXISTS __tmp__madlib__ " > /dev/null """.format(**locals()))
+
+        with open('/tmp/madlib_tmp_typedep_udo.txt') as fp:
+            for line in fp:
+                if '|' in line:
+                    sp = line.split('|')
+
+                    if sp[0].strip() == 'name':
+                        f.write("    - '" + sp[1].strip() + "':\n")
+                    elif sp[0].strip() == 'rettype':
+                        f.write('        rettype: ' + sp[1].strip() + "\n")
+                    elif sp[0].strip() == 'oprright':
+                        f.write('        rightarg: ' + sp[1].strip() + "\n")
+                    elif sp[0].strip() == 'oprleft':
+                        f.write('        leftarg: ' + sp[1].strip() + "\n")
+
+
+    # Find the changed operator classes
+
+    f.write("\n# Changes in the operator classes (UDOC)\n")
+    os.system("psql {0} -f diff_udoc.sql > /tmp/madlib_tmp_udoc.txt".format(database))
     f.write("udoc:\n")
+    with open('/tmp/madlib_tmp_udoc.txt') as fp:
+        for line in fp:
+            if '|' in line:
+                    sp = line.split('|')
+
+                    if sp[0].strip() == 'opfamily_name':
+                        f.write("    - '" + sp[1].strip() + "':\n")
+                    elif sp[0].strip() == 'index_method':
+                        f.write('        index_method: ' + sp[1].strip() + "\n")
+
+    for t in udt_list:
+
+        os.system("""psql {database} -c "DROP TABLE IF EXISTS __tmp__madlib__ " > /dev/null """.format(**locals()))
+
+        os.system("""psql {database} -c "SELECT get_udocs('__tmp__madlib__', 'madlib_old_vers', '{t}')" > /dev/null """.format(**locals()))
+
+        os.system("""psql {database} -x -c "SELECT opfamily_name, index_method FROM __tmp__madlib__ ORDER BY name DESC" > /tmp/madlib_tmp_typedep_udoc.txt """.format(**locals()))
+
+        os.system("""psql {database} -c "DROP TABLE IF EXISTS __tmp__madlib__ " > /dev/null """.format(**locals()))
+
+        with open('/tmp/madlib_tmp_typedep_udoc.txt') as fp:
+            for line in fp:
+                if '|' in line:
+                    sp = line.split('|')
+
+                    if sp[0].strip() == 'opfamily_name':
+                        f.write("    - " + sp[1].strip() + ":\n")
+                    elif sp[0].strip() == 'index':
+                        f.write('        index: ' + sp[1].strip() + "\n")
+
     f.close()
 
     # Copy the new changelist file to its proper location
@@ -237,6 +325,14 @@
     print "Something went wrong! The changelist might be wrong/corrupted."
     raise
 finally:
-    os.system("rm -f /tmp/madlib_tmp_nm.txt /tmp/madlib_tmp_udf.txt "
-              "/tmp/madlib_tmp_udt.txt /tmp/madlib_tmp_cl.yaml "
-              "/tmp/madlib_tmp_typedep.txt")
+    os.system("""
+        rm -f /tmp/madlib_tmp_nm.txt \
+        /tmp/madlib_tmp_udf.txt \
+        /tmp/madlib_tmp_udt.txt \
+        /tmp/madlib_tmp_udo.txt \
+        /tmp/madlib_tmp_udoc.txt \
+        /tmp/madlib_tmp_typedep.txt \
+        /tmp/madlib_tmp_typedep_udo.txt \
+        /tmp/madlib_tmp_typedep_udoc.txt \
+        /tmp/madlib_tmp_cl.yaml
+        """)
diff --git a/src/madpack/diff_udo.sql b/src/madpack/diff_udo.sql
new file mode 100644
index 0000000..affcca9
--- /dev/null
+++ b/src/madpack/diff_udo.sql
@@ -0,0 +1,81 @@
+------------------------------------------------------------------------------
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+
+--   http://www.apache.org/licenses/LICENSE-2.0
+
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied.  See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+------------------------------------------------------------------------------
+
+SET client_min_messages to ERROR;
+\x on
+
+CREATE OR REPLACE FUNCTION filter_schema(argstr text, schema_name text)
+RETURNS text AS $$
+    if argstr is None:
+        return "NULL"
+    return argstr.replace(schema_name + ".", '')
+$$ LANGUAGE plpythonu;
+
+CREATE OR REPLACE FUNCTION alter_schema(argstr text, schema_name text)
+RETURNS text AS $$
+    if argstr is None:
+        return "NULL"
+    return argstr.replace(schema_name + ".", 'schema_madlib.')
+$$ LANGUAGE plpythonu;
+
+
+CREATE OR REPLACE FUNCTION get_udos(table_name text, schema_name text,
+                                         type_filter text)
+RETURNS VOID AS
+$$
+    import plpy
+
+    plpy.execute("""
+        create table {table_name} AS
+            SELECT *
+            FROM (
+                SELECT n.nspname AS "Schema",
+                       o.oprname AS name,
+                       filter_schema(o.oprcode::text, '{schema_name}') AS oprcode,
+                       alter_schema(pg_catalog.format_type(o.oprleft, NULL), '{schema_name}') AS oprleft,
+                       alter_schema(pg_catalog.format_type(o.oprright, NULL), '{schema_name}') AS oprright,
+                       alter_schema(pg_catalog.format_type(o.oprresult, NULL), '{schema_name}') AS rettype
+                FROM pg_catalog.pg_operator o
+                    LEFT JOIN pg_catalog.pg_namespace n ON n.oid = o.oprnamespace
+                WHERE n.nspname OPERATOR(pg_catalog.~) '^({schema_name})$'
+                ) q
+            WHERE oprleft LIKE 'schema_madlib.{type_filter}'
+                  OR oprleft LIKE 'schema_madlib.{type_filter}[]'
+                  OR oprright LIKE 'schema_madlib.{type_filter}'
+                  OR oprright LIKE 'schema_madlib.{type_filter}[]'
+                  OR rettype LIKE 'schema_madlib.{type_filter}'
+                  OR rettype LIKE 'schema_madlib.{type_filter}[]'
+                  OR '{type_filter}' LIKE 'Full'
+    """.format(table_name=table_name, schema_name=schema_name, type_filter=type_filter))
+$$ LANGUAGE plpythonu;
+
+
+DROP TABLE if exists udo_madlib_old_version;
+DROP TABLE if exists udo_madlib_new_version;
+
+SELECT get_udos('udo_madlib_old_version','madlib_old_vers','Full');
+SELECT get_udos('udo_madlib_new_version','madlib','Full');
+
+
+SELECT old.name AS name , old.oprright AS oprright,
+       old.oprleft AS oprleft, old.rettype AS rettype
+FROM udo_madlib_old_version AS old LEFT JOIN udo_madlib_new_version
+    USING (name, oprcode, oprright, oprleft, rettype)
+WHERE udo_madlib_new_version.name is NULL
+ORDER BY old.name;
diff --git a/src/madpack/diff_udoc.sql b/src/madpack/diff_udoc.sql
new file mode 100644
index 0000000..c1312dc
--- /dev/null
+++ b/src/madpack/diff_udoc.sql
@@ -0,0 +1,79 @@
+------------------------------------------------------------------------------
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+
+--   http://www.apache.org/licenses/LICENSE-2.0
+
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied.  See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+------------------------------------------------------------------------------
+
+SET client_min_messages to ERROR;
+\x on
+
+
+CREATE OR REPLACE FUNCTION filter_schema(argstr text, schema_name text)
+RETURNS text AS $$
+    if argstr is None:
+        return "NULL"
+    return argstr.replace(schema_name + ".", '')
+$$ LANGUAGE plpythonu;
+
+CREATE OR REPLACE FUNCTION alter_schema(argstr text, schema_name text)
+RETURNS text AS $$
+    if argstr is None:
+        return "NULL"
+    return argstr.replace(schema_name + ".", 'schema_madlib.')
+$$ LANGUAGE plpythonu;
+
+CREATE OR REPLACE FUNCTION get_udocs(table_name text, schema_name text,
+                                         type_filter text)
+RETURNS VOID AS
+$$
+    import plpy
+
+    plpy.execute("""
+	CREATE TABLE {table_name} AS
+	SELECT * FROM (
+		SELECT index_method, opfamily_name,
+			   array_to_string(array_agg(alter_schema(opfamily_operator::text, '{schema_name}')), ',')
+			   	AS operators
+		FROM (
+			SELECT am.amname AS index_method, opf.opfname AS opfamily_name,
+		       	   amop.amopopr::regoperator AS opfamily_operator
+		    FROM pg_am am, pg_opfamily opf, pg_amop amop, pg_namespace n
+		    WHERE opf.opfmethod = am.oid AND
+		          amop.amopfamily = opf.oid AND
+		          n.oid = opf.opfnamespace AND
+		          n.nspname OPERATOR(pg_catalog.~) '^({schema_name})$'
+		    ORDER BY index_method, opfamily_name, opfamily_operator
+		    ) q
+		GROUP BY (index_method, opfamily_name)
+		) qq
+	WHERE operators LIKE '%schema_madlib.{type_filter}%' OR '{type_filter}' LIKE 'Full'
+	""".format(table_name=table_name, schema_name=schema_name, type_filter=type_filter))
+
+$$ LANGUAGE plpythonu;
+
+DROP TABLE if exists udoc_madlib_old_version;
+DROP TABLE if exists udoc_madlib_new_version;
+
+SELECT get_udocs('udoc_madlib_old_version','madlib_old_vers','Full');
+SELECT get_udocs('udoc_madlib_new_version','madlib','Full');
+
+
+SELECT old.opfamily_name, old.index_method
+FROM udoc_madlib_old_version AS old LEFT JOIN udoc_madlib_new_version
+	USING (index_method, opfamily_name, operators)
+WHERE udoc_madlib_new_version.opfamily_name is NULL
+ORDER BY 1;
+
diff --git a/src/madpack/upgrade_util.py b/src/madpack/upgrade_util.py
index e50e7b9..90511a3 100644
--- a/src/madpack/upgrade_util.py
+++ b/src/madpack/upgrade_util.py
@@ -214,6 +214,8 @@
         self._add_to_dict(config['udc'], self._udc)
         self._add_to_dict(self._load_config_param(config['udf']), self._udf)
         self._add_to_dict(self._load_config_param(config['uda']), self._uda)
+        self._add_to_dict(self._load_config_param(config['udo']), self._udo)
+        self._add_to_dict(self._load_config_param(config['udoc']), self._udoc)
 
     def _get_relevant_filenames(self, upgrade_from):
         """ Get all changelist files that together describe the upgrade process
@@ -473,7 +475,7 @@
         """
         for op_cls in self._udoc:
             for value in self._udoc[op_cls]:
-                index = value['index']
+                index = value['index_method']
                 _write_to_file(self.output_filehandle, """
                     DROP OPERATOR CLASS IF EXISTS {schema}.{op_cls} USING {index};
                     """.format(schema=self._schema, **locals()))