testutils/metastore/metastore-validation-test.sh - hive - Git at Google

 #!/bin/bash

 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 # This script generates data for hive metastore, inserts it into the DB
 # It verifies that the data exists after the schema has been upgraded.
 # Its a multi-step process.

 # 1) This script parses the schema file for a database specified via --db option.
 # and generates a file that contains just the "create table statements" from
 # the schema.
 # 2) This file is then cleansed and sorted according to the foreign
 # key dependencies. The tables that have no foreign keys float above the
 # ones that depend on them.
 # 3) The file created in step 2 is string substituted to convert the create table
 # statements into "insert into <table>" statements. During this stage, string and int
 # data is generated randomly. These statements are saved to a file. This stage also
 # generates another file where the select count(*) statements are generated.

 # *************************************** README ************************************************
 # This script is used as part of the HIVE METASTORE PRE-COMMIT build to ensure that
 # any changes to the HMS schema is backward compatible with the older schema and that
 # the changes do no delete any data from the current HMS datastore.
 # For example, dropping a column from an existing schema table is something we want to
 # avoid. Adding a column should be fine.
 # This script does the following
 # * Parses the hive schema file (starts at hive-0.10.0) and then compiles a list of tables
 #    in this schema (into /tmp/<dbVendor>-<schemaVersion>-create.sql)
 # * It then generates 1 row (for now) with random column data for each of the tables (into
 #   dbs/<db>/<db>-<version>-insert.sql) for each version of the hive schema.
 # * It then generates a "select count(*)" query statement for the above row so the query output
 #   should be exactly ONE (1) in all cases.

 # * For example:
 # *   INSERT INTO TBLS ( `TBL_ID`, `CREATE_TIME`, `DB_ID`, `LAST_ACCESS_TIME`, `OWNER`, `RETENTION`,
 #       `SD_ID`, `TBL_NAME`, `TBL_TYPE`, `VIEW_EXPANDED_TEXT`, `VIEW_ORIGINAL_TEXT`, `LINK_TARGET_ID` )
 #       VALUES ( 1, 35142, 1, 33771, 'NOLxH4BzdiBbE8i', 83343, 88353, 'hive_testtable', 'bewmH7XTBmxb23C',
 #       'JWKYyQDDYB9UsyV', 'E3AFymvLju857DD', 1 );

 # *   SELECT COUNT(*) FROM TBLS where `TBL_ID` = 1 and `CREATE_TIME` = 35142 and `DB_ID` = 1 and
 #       `LAST_ACCESS_TIME` = 33771 and `OWNER` = 'NOLxH4BzdiBbE8i' and `RETENTION` = 83343 and
 #       `SD_ID` = 88353 and `TBL_NAME` = 'hive_testtable' and `TBL_TYPE` = 'bewmH7XTBmxb23C' and
 #       `VIEW_EXPANDED_TEXT` = 'JWKYyQDDYB9UsyV' and `VIEW_ORIGINAL_TEXT` = 'E3AFymvLju857DD'  ;

 # * Schema is then installed using the dbs/<db>/execute.sh.
 # * The generated row data is then inserted into the DB.
 # * The schema is then upgraded to the next version, says hive-schema-0.11.0. After the upgrade, this script
 #   then executes the SELECT COUNT(*) queries for the tables from the prior version of schema. All tables are
 #   expected to pass the test with a rowcount of 1. If not, then we know we have lost data in the process of
 #   upgrade.

 # * This process of upgrading schema and executing select queries continues until it has reached the latest
 #   version of the schema.

 set -e

 cd $(dirname $0)

 usage() {
   echo "Usage: "
   echo "  $0 --db DB_SERVER"
   echo
   echo "Options"
   echo "  --db DB_SERVER  Specifies a database server where to run the"
   echo "                  upgrade tests."
   echo
 }

 if [ $# -lt 2 ]; then
   usage && exit 1
 fi

 if [ $1 != "--db" ]; then
   echo "Error: unrecognized $1 option."
   echo
   usage && exit 1
 fi

 DB_SERVER="$2"
 if [ ! -d "dbs/$DB_SERVER" ]; then
   log "Error: $DB_SERVER upgrade tests is not supported yet."
   echo
   echo "You must select from any of the following tests:"
   ls dbs/
   exit 1
 fi

 #
 # These tests work by calling special commands from specific
 # servers that are stored as scripts in a directory name
 # related to the server to test.
 #

 # 2 for INFO, 4 for DEBUG
 LOGLEVEL=4

 CONSOLELEVEL=2

 # default length for int datatype
 DEFAULT_INT_SIZE=5

 # default length for string datatype
 DEFAULT_STRING_SIZE=15

 # path to the log file
 OUT_LOG="/tmp/$(basename $0)-$2.log"
 rm -f $OUT_LOG

 getTime() {
   echo `date +"[%m-%d-%Y %H:%M:%S]"`
 }

 # method to print messages to the log file
 log() {
   echo "$(getTime) $@" >> $OUT_LOG
 }

 # method to print messages to STDOUT
 CONSOLE() {
   echo "$(getTime) $@"
 }

 # log method to print error-level messages to the log file
 error() {
   if [[ "$LOGLEVEL" -ge 1 ]]
   then
     log $@
   fi

   if [[ "$CONSOLELEVEL" -ge 1 ]]
   then
     CONSOLE $@
   fi
 }

 # log method to print info-level messages to the log file
 info() {
   if [[ "$LOGLEVEL" -ge 2 ]]
   then
     log $@
   fi

   if [[ "$CONSOLELEVEL" -ge 2 ]]
   then
     CONSOLE $@
   fi
 }

 # log method to print debug-level messages to the log file
 debug() {
   if [[ "$LOGLEVEL" -ge 4 ]]
   then
     log $@
   fi

   if [[ "$CONSOLELEVEL" -ge 4 ]]
   then
     CONSOLE $@
   fi
 }

 # method that takes in 2 arguments and returns true if the second argument is a
 # substring of the first.
 isContains=0
 contains() {
   isContains=0
   string="$1"
   substring="$2"
   if test "${string#*$substring}" != "$string"
   then
     isContains=1
   else
     isContains=0
   fi
 }

 # self test to ensure function is working as expected.
 str1="INTEGER NOT NULL"
 str2="INTEGER"
 contains "$str1" "$str2"
 if [[ "$isContains" -eq 1 ]]
 then
   str2="VARCHAR"
   contains "$str1" "$str2"
   if [[ "$isContains" -eq 1 ]]
   then
     CONSOLE "contains self-test1 failed"
     exit 1
   fi
 else
   CONSOLE "contains self-test2 failed"
   exit 1
 fi


 FCOUNT=0
 # function to count the number of occurances of a string within another string
 num_occurances() {
   fulltext="$1"
   searchChars="$2"
   FCOUNT=0
   debug "[num_occurances] Search params $fulltext:$searchChars"
   FCOUNT=$(grep -o "$searchChars" <<< "$fulltext" | wc -l)
   debug "[num_occurances] Returning $FCOUNT"
 }

 # self test to ensure function is working as expected.
 str1="( INTEGER ())) NOT NULL )"
 num_occurances "$str1" "("
 if [[ $FCOUNT -eq 2 ]]
 then
   num_occurances "$str1" ")"
   if [[ $FCOUNT -ne 4 ]]
   then
     CONSOLE "num_occurances self-test failed"
     exit 1
   fi
 else
   CONSOLE "num_occurances self-test failed"
   exit 1
 fi

 # array that store all legal datatypes from all DB vendors
 TYPES=( "bigint"
         "smallint"
         "int"
         "integer"
         "tinyint"
         "boolean"
         "long varchar"
         "varchar"
         "char"
         "character varying"
         "character"
         "bytea"
         "binary"
         "varbinary"
         "mediumtext"
         "double"
         "double precision"
         "serial"
         "bigserial"
         "bit"
         "BIGINT"
         "SMALLINT"
         "LONG VARCHAR"
         "VARCHAR"
         "INT"
         "INTEGER"
         "TINYINT"
         "BOOLEAN"
         "CHAR"
         "CHARACTER VARYING"
         "CHARACTER"
         "BYTEA"
         "BINARY"
         "VARBINARY"
         "MEDIUMTEXT"
         "DOUBLE"
         "DOUBLE PRECISION"
         "SERIAL"
         "BIGSERIAL"
         "BIT"
         )

 # function to create a new map-type datastructure.
 # this is implemented as file on the local filesystem that contains name/value pairs.
 # first argument is the name of the map. filename matches the map name.
 newhash() {
   rm -f /tmp/hashmap.$1
   touch /tmp/hashmap.$1
 }

 # function to add an entry to the map. Entry is always added to the end of the map.
 # so if you delete and re-add an entry to the map, its position within a file could change.
 # arg1 is name of the map to add to.
 # arg2 is the key for entry.
 # arg3 is the value for the map entry.
 put() {
   echo "$2=$3" >> /tmp/hashmap.$1
 }

 # function to retrieve an entry from the map.
 # arg1 is name of the map to retrive from.
 # arg2 is the key for entry
 get() {
   grep "^$2=" /tmp/hashmap.$1 | cut -d'=' -f2
 }

 # function that returns the size of the map (aka number if entries)
 # arg1 is name of the map to retrive from.
 size() {
   echo `cat /tmp/hashmap.$1 | wc -l`
 }

 # function that returns the key for the first entry in the map.
 # maps to the key of the first line in the file on disk.
 # arg1 is name of the map to retrive from.
 firstKey() {
   head -n 1 /tmp/hashmap.$1 | cut -d'=' -f1
 }

 # function that returns the value of the first entry in the map.
 # maps to the value of the first line in the file on disk.
 # arg1 is name of the map to retrive from.
 firstValue() {
   head -n 1 /tmp/hashmap.$1 | cut -d'=' -f2
 }

 # function that returns true if the map contains the specified key
 # arg1 is name of the map to retrieve from.
 # arg2 key to be found in the map.
 containsKey() {
   # find a line that contains the key value at the begining of line.
   ret=$(grep "^$2=" /tmp/hashmap.$1 | cut -d'=' -f1)
   if [[ "$2" = "$ret" ]]
   then
     echo "true"
   else
    echo "false"
   fi
 }

 # function that is specific to a map named "f_keys"
 # This map contains foreign key references. Given a table name, this function
 # returns the a COMMA-separated list of tables its keys references
 # arg1 is name of the map to retrieve from.
 # arg2 name of the table to return references for.
 get_referenced_tables() {
   if [[ ! "$1" = "f_keys" ]]
   then
     echo "NONE"
     return
   fi

   tables=""
   while read fkc
   do
     key=`echo $fkc | cut -d'=' -f1`
     table=`echo $key | cut -d'.' -f1`
     if [[ "$table" != "$2" ]]
     then
       continue
     fi

     value=`echo $fkc | cut -d'=' -f2`
     reftable=`echo $value | cut -d'.' -f1`
     if [[ "$reftable" = "$table" ]]
     then
       continue
     fi

     if [[ "$tables" = "" ]]
     then
       tables="$reftable"
     else
       tables="$tables,$reftable"
     fi

   done < /tmp/hashmap.$1;
   echo "$tables"
 }

 # function that deletes an entry from a named map.
 # arg1 is name of the map to delete from.
 # arg2 key of the entry to delete from the map.
 delete() {
   if [[ "$1" = "" ]]
   then
     return
   fi
   debug "[delete] deleting entry for $2"
   todo=$(size "$1")
   exists=`cat /tmp/hashmap.$1 | grep "^$2=" | wc -l`
   debug "[delete] entry found in map=$exists"
   if [[ "$exists" -ge "1" ]]
   then
     if [[ "$todo" == 1 ]]
     then
       rm /tmp/hashmap.$1
       touch /tmp/hashmap.$1
       return
     fi
   fi
   cat /tmp/hashmap.$1 | grep -v "^$2=" > /tmp/hashmap.$1.tmp
   mv /tmp/hashmap.$1.tmp /tmp/hashmap.$1
   todo=$(size "unsorted")
   debug "[delete] map size after delete:$todo"
 }

 is_key_like() {
   if [[ "$1" = "" ]]
   then
     return 0
   fi

   matched=0
   while read fkc
   do
     local tabvalue=$(echo $fkc | grep "^$2." | cut -d'=' -f1 | cut -d'.' -f1)
     if [[ "$2" = "$tabvalue" ]]
     then
       matched=1
       break
     fi
   done < /tmp/hashmap.$1;
   echo "[is_key_like] found match for $2:$matched"
 }

 # a map that maps SQL datatypes to datatype for the values to be generated.
 newhash typemap
 put typemap "bigint" "int"
 put typemap "smallint" "smallint"
 put typemap "int" "int"
 put typemap "integer" "int"
 put typemap "tinyint" "tinyint"
 put typemap "bit" "bit"
 put typemap "boolean" "boolean"
 put typemap "mediumint" "int"
 put typemap "long varchar" "longstring"
 put typemap "varchar" "string"
 put typemap "char" "string"
 put typemap "character" "string"
 put typemap "bytea" "string"
 put typemap "binary" "string"
 put typemap "varbinary" "string"
 put typemap "character varying" "string"
 put typemap "mediumtext" "string"
 put typemap "double" "int"
 put typemap "double precision" "int"
 put typemap "serial" "int"
 put typemap "bigserial" "int"
 put typemap "BIGINT" "int"
 put typemap "SMALLINT" "smallint"
 put typemap "MEDIUMINT" "int"
 put typemap "INT" "int"
 put typemap "INTEGER" "int"
 put typemap "TINYINT" "tinyint"
 put typemap "BIT" "bit"
 put typemap "BOOLEAN" "boolean"
 put typemap "LONG VARCHAR" "longstring"
 put typemap "VARCHAR" "string"
 put typemap "CHAR" "string"
 put typemap "BYTEA" "string"
 put typemap "BINARY" "string"
 put typemap "VARBINARY" "string"
 put typemap "CHARACTER" "string"
 put typemap "CHARACTER VARYING" "string"
 put typemap "MEDIUMTEXT" "string"
 put typemap "DOUBLE" "int"
 put typemap "DOUBLE PRECISION" "int"
 put typemap "SERIAL" "int"
 put typemap "BIGSERIAL" "int"

 # a map that contains mappings of foreign key references within the tables.
 # this is sort of hard to parse out of SQL. At some point in the future I will
 # look to have this map auto-generated.
 newhash "f_keys"
 put "f_keys" "BUCKETING_COLS.SD_ID" "SDS.SD_ID"
 put "f_keys" "COLUMNS.SD_ID" "SDS.SD_ID"
 put "f_keys" "COLUMNS_OLD.SD_ID" "SDS.SD_ID"
 put "f_keys" "COLUMNS_V2.CD_ID" "CDS.CD_ID"
 put "f_keys" "DATABASE_PARAMS.DB_ID" "DBS.DB_ID"
 put "f_keys" "DB_PRIVS.DB_ID" "DBS.DB_ID"
 put "f_keys" "FUNCS.DB_ID" "DBS.DB_ID"
 put "f_keys" "FUNC_RU.FUNC_ID" "FUNCS.FUNC_ID"
 put "f_keys" "IDXS.INDEX_TBL_ID" "TBLS.TBL_ID"
 put "f_keys" "IDXS.ORIG_TBL_ID" "TBLS.TBL_ID"
 put "f_keys" "IDXS.SD_ID" "SDS.SD_ID"
 put "f_keys" "INDEX_PARAMS.INDEX_ID" "IDXS.INDEX_ID"
 put "f_keys" "PARTITIONS.LINK_TARGET_ID" "PARTITIONS.PART_ID"
 put "f_keys" "PARTITIONS.SD_ID" "SDS.SD_ID"
 put "f_keys" "PARTITIONS.TBL_ID" "TBLS.TBL_ID"
 put "f_keys" "PARTITION_KEYS.TBL_ID" "TBLS.TBL_ID"
 put "f_keys" "PARTITION_KEY_VALS.PART_ID" "PARTITIONS.PART_ID"
 put "f_keys" "PARTITION_PARAMS.PART_ID" "PARTITIONS.PART_ID"
 put "f_keys" "PART_COL_PRIVS.PART_ID" "PARTITIONS.PART_ID"
 put "f_keys" "PART_COL_STATS.PART_ID" "PARTITIONS.PART_ID"
 put "f_keys" "PART_PRIVS.PART_ID" "PARTITIONS.PART_ID"
 put "f_keys" "ROLE_MAP.ROLE_ID" "ROLES.ROLE_ID"
 put "f_keys" "SDS.CD_ID" "CDS.CD_ID"
 put "f_keys" "SDS.SERDE_ID" "SERDES.SERDE_ID"
 put "f_keys" "SD_PARAMS.SD_ID" "SDS.SD_ID"
 put "f_keys" "SERDE_PARAMS.SERDE_ID" "SERDES.SERDE_ID"
 put "f_keys" "SKEWED_COL_NAMES.SD_ID" "SDS.SD_ID"
 put "f_keys" "SKEWED_COL_VALUE_LOC_MAP.SD_ID" "SDS.SD_ID"
 put "f_keys" "SKEWED_COL_VALUE_LOC_MAP.STRING_LIST_ID_KID" "SKEWED_STRING_LIST.STRING_LIST_ID"
 put "f_keys" "SKEWED_STRING_LIST_VALUES.STRING_LIST_ID" "SKEWED_STRING_LIST.STRING_LIST_ID"
 put "f_keys" "SKEWED_VALUES.SD_ID_OID" "SDS.SD_ID"
 put "f_keys" "SKEWED_VALUES.STRING_LIST_ID_EID" "SKEWED_STRING_LIST.STRING_LIST_ID"
 put "f_keys" "SORT_COLS.SD_ID" "SDS.SD_ID"
 put "f_keys" "TABLE_PARAMS.TBL_ID" "TBLS.TBL_ID"
 put "f_keys" "TAB_COL_STATS.TBL_ID" "TBLS.TBL_ID"
 put "f_keys" "TBLS.DB_ID" "DBS.DB_ID"
 put "f_keys" "TBLS.LINK_TARGET_ID" "TBLS.TBL_ID"
 put "f_keys" "TBLS.SD_ID" "SDS.SD_ID"
 put "f_keys" "TBL_COL_PRIVS.TBL_ID" "TBLS.TBL_ID"
 put "f_keys" "TBL_PRIVS.TBL_ID" "TBLS.TBL_ID"
 put "f_keys" "TXN_COMPONENTS.TC_TXNID" "TXNS.TXN_ID"
 put "f_keys" "TYPE_FIELDS.TYPE_NAME" "TYPES.TYPES_ID"

 # store generated values in map and print it to a file
 # resetting the values from any prior runs
 newhash "genvalues"

 # store names of the tables that have already been sorted
 # resetting the values from any prior runs
 newhash "sorted"

 skipParsing=0
 skipGeneration=0
 if [ "$3" = "--skipParsing" ]; then
   debug "skipParsing is true"
   skipParsing=1
 fi

 if [ "$3" = "--skipProcessing" ]; then
   debug "skipProcessing is true"
   skipParsing=1
   skipGeneration=1
 fi

 VERSION_BASE="0.10.0"
 VERSION_CURR="0.10.0"
 SQL_OUT="/tmp/$DB_SERVER-$VERSION_CURR-create.sql"
 SQL_SORTED="/tmp/$DB_SERVER-$VERSION_CURR-sortedcreate.sql"
 SCRIPT_INSERT="$(pwd)/dbs/$DB_SERVER/$DB_SERVER-$VERSION_CURR-insert.sql"
 SCRIPT_QUERY="$(pwd)/dbs/$DB_SERVER/$DB_SERVER-$VERSION_CURR-query.sql"
 INSERTALL="$(pwd)/dbs/$DB_SERVER/$DB_SERVER-insertall.sql"
 QUERYALL="$(pwd)/dbs/$DB_SERVER/$DB_SERVER-queryall.sql"

 printSQL() {
   echo $@ >> $SQL_OUT
 }

 # method to delete a file from the file system.
 rmfs() {
   if [[ -e "$1" ]]
   then
     debug "Deleting generated file $1"
     rm "$1"
   fi
 }

 DATAFILE="$(pwd)/dataload.properties"

 # method that returns a static value loaded from the properties file for a column. These values are read from a
 # file on disk
 getStaticValue() {
   grep -i "^$1=" "$DATAFILE" | cut -d'=' -f2
 }

 HMS_UPGRADE_DIR="$(pwd)/../../metastore/scripts/upgrade"
 if [ ! -d "$HMS_UPGRADE_DIR/$DB_SERVER" ]; then
   error "Error: $DB_SERVER upgrade scripts are not found on $HMS_UPGRADE_DIR."
   exit 1
 fi

 HIVE_SCHEMA_BASE="$HMS_UPGRADE_DIR/$DB_SERVER/hive-schema-$VERSION_BASE.$DB_SERVER.sql"

 SCRIPT_SRC="$HIVE_SCHEMA_BASE"

 if [ "$skipParsing" -eq "0" ]; then
   rmfs "$SQL_OUT"
   rmfs "$SQL_SORTED"
   rmfs "$SCRIPT_INSERT"
   rmfs "$SCRIPT_QUERY"
   rmfs "$INSERTALL"
   rmfs "$QUERYALL"
 else
   info "skipParsing flag enabled, skipping parsing for $SCRIPT_SRC"
 fi

 if [ "$skipGeneration" -eq "0" ] && [ "$skipParsing" -eq "1" ]; then
   rmfs "$SCRIPT_INSERT"
   rmfs "$SCRIPT_QUERY"
   rmfs "$INSERTALL"
   rmfs "$QUERYALL"
 fi

 #
 # Before running any test, we need to prepare the environment.
 # This is done by calling the prepare script. This script must
 # install, configure and start the database server where to run
 # the upgrade script.
 #

 execute_test() {
   log "Executing sql test: $DB_SERVER/$(basename $1)"
   if ! bash -x -e $SCRIPT_EXECUTE $1; then
     log "Test failed: $DB_SERVER/$(basename $1)"
     return 1
   fi
 }

 # this method parses the SQL schema file and generates a file with all the
 # "create table" definitions. This file is later string-subbed to generate
 # an file with bunch of "insert into <table>" statements.
 parse_schema() {
   if [ -e $SQL_OUT ] && [ "$2" = 0 ]
   then
     debug "[parse_schema] Resetting position in file"
     rmfs "$SQL_OUT"
   fi

   info "[parse_schema] Parsing sql file: $1"
   CREATE_OPEN=false
   TABLE_DEF=""
   COUNTER=0
   shopt -s nocasematch

   while read line
   do
     copy=$line
     # if the line is a comment, begins with #, skip it
     if [[ "$copy" =~ "^--" ]]
     then
       debug "[parse_schema] Comment, skipping ..."
       continue
     fi

     # The following code counts the number of open an close parenthesis within
     # each line read from the schema file. When the openparen count matches the
     # closeparen count, it assumes that the create table definition has ended.
     debug "[parse_schema] Processing $line with $COUNTER and $CREATE_OPEN"
     if [[ "$CREATE_OPEN" = true ]]
     then
       ignore=$(is_throwaway "$copy")
       debug "ignore=$ignore=line=$copy="
       if [[ "$ignore" = "TRUE" ]]
       then
         debug "ignoring line"
         continue
       fi

       #if [[ "$copy" =~ "(" ]]
       num_occurances "$copy" "("
       if [[ $FCOUNT -gt 0 ]]
       then
         COUNTER=$((COUNTER+FCOUNT))
         debug "[parse_schema] count=$COUNTER"
       fi

       #if [[ "$copy" =~ ")" ]]
       num_occurances "$copy" ")"
       if [[ $FCOUNT -gt 0 ]]
       then
         if [[ "$COUNTER" -gt 1 ]]
         then
           debug "[parse_schema] Adding line to table: $line :: $COUNTER"
           TABLE_DEF="$TABLE_DEF $line"
           debug "[parse_schema] TABLEDEF=$TABLE_DEF"
           COUNTER=$((COUNTER-FCOUNT))
           debug "[parse_schema] count=$COUNTER"
         elif [[ "$COUNTER" -eq 1 ]]
         then
           copy=$line
           debug "[parse_schema] CLOSE FOUND $line"
           PART=`echo $copy | cut -d')' -f1 | sed 's/$/)/'`
           debug "[parse_schema] $PART appended to table def"
           TABLE_DEF="$TABLE_DEF $PART"
           COUNTER=0
           CREATE_OPEN=false
           debug "[parse_schema] Full Table definition is : $TABLE_DEF"
           printSQL "$TABLE_DEF"
         fi
       else
         debug "[parse_schema] Adding line to table: $line :: $COUNTER"
         TABLE_DEF="$TABLE_DEF $line"
         debug "[parse_schema] TABLEDEF=$TABLE_DEF"
       fi
     else
       if [[ "$copy" =~ "create table" ]];
       then
         debug "[parse_schema] OPEN FOUND $line"
         TABLE_DEF="$line"

         num_occurances "$copy" ")"
         local closeCount=$FCOUNT
         num_occurances "$copy" "("
         local openCount=$FCOUNT
         debug "[parse_schema] matches $openCount $closeCount"
         if [ "$openCount" -eq "$closeCount" ] && [ "$openCount" -gt 0 ]
         then
           PART=`echo $copy | cut -d';' -f1 | sed 's/$/;/'`
           printSQL "$PART"
           COUNTER=0
           CLOSE_OPEN=false
           continue
         fi

         CREATE_OPEN=true
         if [[ "$FCOUNT" -gt 0 ]]
         then
           COUNTER=$((COUNTER+FCOUNT))
           debug "[parse_schema] count=$COUNTER"
         fi
       fi
     fi
   done < "$1";
 }

 # function to detemine if an argument is a number. This is used to determine the
 # length of the column values that are to be generated.
 is_int() {
   return $(test "$@" -eq "$@" > /dev/null 2>&1);
 }

 # self-test to ensure function is working as expected
 if $(is_int "500");
 then
   if $(is_int "foo");
   then
     # function not working properly
     exit 1
   fi
 else
   # function not working properly
   exit 1
 fi

 # trims the leading and trailing spaces from a line
 trim() {
   local var="$*"
   var="${var#"${var%%[![:space:]]*}"}"   # remove leading whitespace characters
   var="${var%"${var##*[![:space:]]}"}"   # remove trailing whitespace characters
   echo -n "$var"
 }

 # function that generates a random int value of default size
 int() {
   limit="$DEFAULT_INT_SIZE"
   if $(is_int "${1}");
   then
     if [[ "$1" -lt "$limit" ]]
     then
       limit="$1"
     fi
   fi

   limit_int "$limit"
 }

 # function that generates a random int value of a specified length.
 # if the limit passed in is NOT a number, an int of default length is returned.
 limit_int() {
   if $(is_int "${1}");
   then
     limit="$1"
   else
     limit=5
   fi

   if [ "$(uname)" == "Darwin" ]; then
     cat /dev/urandom | env LC_CTYPE=C tr -dc '1-9' | fold -w ${limit:-15} | head -n 1
   elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then
     cat /dev/urandom | tr -dc '1-9' | fold -w ${limit:-15} | head -n 1
   fi
   #cat /dev/urandom | env LC_CTYPE=C tr -dc '1-9' | fold -w ${1:-15} | head -n 1
 }

 # function that returns a random bit value of either 0 or 1
 bit() {
   r_int=$(int)
   echo $[ $[r_int % 2 ]]
 }

 # function that returns a random boolean value of TRUE or FALSE
 boolean() {
   trueval="TRUE"
   falseval="FALSE"

   # have to special case derby because derby has no native bit datatype
   # it uses a char of length 1 to store Y or N for booleans.
   if [[ "$DB_SERVER" = "derby" ]]
   then
     trueval="Y"
     falseval="N"
   fi

   r_bit=$(bit)
   if [[ "$r_bit" == 1 ]]
   then
     echo "$trueval"
   else
     echo "$falseval"
   fi
 }

 tinyint() {
   r_int=$(int)
   echo $[ $[r_int % 127 ]]
 }

 smallint() {
   r_int=$(int)
   echo $[ $[r_int % 32000 ]]
 }

 hex() {
   input="$1"
   hexval=$(xxd -pu <<< "$input")
   echo "$hexval"
 }

 limit_string() {
   if $(is_int "${1}");
   then
     limit="$1"
   else
     limit=5
   fi

   # for derby a char of length generates either a Y or N
   # serves well for boolean types in the schema
   if [[ ( "$DB_SERVER" = "derby" ) && ( "$limit" -eq 1 ) ]]
   then
     boolean
     return
   fi

   if [ "$(uname)" == "Darwin" ]; then
     cat /dev/urandom | env LC_CTYPE=C tr -dc 'a-z0-9' | fold -w ${limit:-32} | head -n 1
   elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then
     cat /dev/urandom | tr -dc 'a-z0-9A-Z' | fold -w ${limit:-32} | head -n 1
   fi
 }

 # function that returns a random string of default length.
 string()
 {
   limit="$DEFAULT_STRING_SIZE"
   if $(is_int "${1}");
   then
     if [[ "$1" -lt "$limit" ]]
     then
       limit="$1"
     fi
   fi

   limit_string "$limit"
 }

 # function to determine if a token within a column definition is to be ignored.
 # this is to differentiate column definitions from PRIMARY/FOREIGN KEY specifications.
 is_throwaway() {
   coldef=$(trim "$1")
   if [[ ( "$coldef" =~ ^PRIMARY\ KEY.* ) || ( "$coldef" =~ FOREIGN\ KEY ) ]]
   then
     echo "TRUE"
   else
     echo "FALSE"
   fi
 }

 # extracts the name of the columns from a column definition within SQL.
 # Assumes that the first token is the name of the column
 extract_colname() {
   coldef=$(trim "$1")
   echo $coldef | cut -d' ' -f1
 }

 # returns properly quoted column string, with ticks, single or double quotes according to the DB's preference.
 # Assumes that the first token is the name of the column
 localize_colname() {
   coldef=$(trim "$1")
   if [[ "$DB_SERVER" = "mysql" ]]
   then
     echo "\`$coldef\`"
   elif [[ "$DB_SERVER" = "derby" ]]
   then
     echo "\"$coldef\""
   else
     echo "$coldef"
   fi
 }

 # extracts the name of the table from a create statement from the parsed SQL.
 # Assumes that the thrid SPACE-separated token is the name of the table.
 # ex: CREATE TABLE DBS ( DB_ID bigint(20) );
 extract_tablename() {
   if [[ "$DB_SERVER" = "derby" ]]
   then
     stmt=`echo "$1" | cut -d' ' -f3 | cut -d'(' -f1 | sed -e 's/APP.//g'`
   else
     #stmt=`echo "$1" | cut -d' ' -f3`
     stmt=`echo "$1" | cut -d'(' -f1 | cut -d' ' -f3`
   fi
   echo "$stmt" | tr -d '"'
 }

 # this function inserts the comma-separated column names for the table
 # into the insert statement
 # first argument is the current SQL insert statement, the second is a list of columns for this table
 # ex: INSERT INTO foo VALUES (1, "ABC")
 # becomes
 # INSERT INTO foo (id, name) VALUES (1, "ABC")
 insert_column_names() {
   line="$1"
   colnames="$2"
   merged=`echo "$line" | sed -e "s/\ VALUES\ /\ ${colnames}\ VALUES\ /g"`
   echo "$merged"
 }

 # extracts the name of the table in its native form from a create statement from the parsed SQL.
 # Assumes that the thrid SPACE-separated token is the name of the table. if there are quotes around it will
 # return the quotes too.
 # ex: CREATE TABLE DBS ( DB_ID bigint(20) );
 extract_raw_tablename() {
   if [[ "$DB_SERVER" = "derby" ]]
   then
     stmt=`echo "$1" | cut -d' ' -f3 | cut -d'(' -f1 | sed -e 's/APP.//g'`
   else
     stmt=`echo "$1" | cut -d'(' -f1 | cut -d' ' -f3`
   fi
   echo "$stmt"
 }

 # function to extract the maxlength of the column from the column definition.
 # if the column definition contains (), then the value within the () is the length.
 # 100 otherwise.
 extract_columnsize() {
   if test "${1#*(}" != "$1"
   then
     echo "$1" | cut -d "(" -f2 | cut -d ")" -f1
   else
     echo "100"
   fi
 }

 # this functions reads all the create table statements from the file generated by
 # parse_schema and then sorts them according to the order in which the insert
 # statements are to be generated. It uses the foreign key references to sort the tables.
 # Any tables without FK references appear above the tables with FKs to these.
 # if table B's column has a FK reference to table A's column, this sort function
 # guarantees that table A appears before table B in the sorted file.
 sort_tabledefs() {
   if [[ -e $SQL_SORTED ]] && [[ "$1" = 0 ]]
   then
     debug "[sort] Resetting the position in file"
     rmfs "$SQL_SORTED"
     #return
   fi

   if [ ! -e $SQL_OUT ]
   then
     info "[sort] $SQL_OUT does not exist, returning .."
     return
   fi

   info "[sort] Sorting table definitions based on dependencies"
   newhash "unsorted"

   while read line
   do
     table=$(extract_tablename "$line")
     debug "[sort] Extracted table name=$table from $line"
     put "unsorted" "$table" "$line"
   done < $SQL_OUT;

   table=""
   todo=$(size "unsorted")
   debug "[sort] $todo entries are to be sorted"
   while [[ "$todo" > 0 ]]
   do
     table=$(firstKey "unsorted")
     line=$(firstValue "unsorted")
     debug "[sort] table name $table"
     references=$(get_referenced_tables "f_keys" "$table")
     debug "[sort] table $table references $references"
     if [[ ! "$references" = "" ]]
     then
       commit="true"
       OLDIFS=$IFS
       IFS=',' eval 'array=($references)'
       IFS=$OLDIFS
       for i in "${!array[@]}"
       do
         hashsize=$(size "sorted")
         if [[ "$hashsize" > 0 ]]
         then
           hasKey=$(containsKey "sorted" "${array[i]}")
           debug "[sort] is table ${array[i]} sorted:$hasKey"
           if [[ ! "$hasKey" = true ]]
           then
             commit="false"
             break
           fi
         else
           commit="false"
         fi
       done
       debug "[sort] commit table $table:$commit"
       if [[ "$commit" = "true" ]]
       then
         echo "$line" >> $SQL_SORTED
         put "sorted" "$table" "$line"
         debug "[sort] deleting table $table from unsorted"
         delete "unsorted" "$table"
         debug "[sort] table $table committed"
       else
         delete "unsorted" "$table"
         put "unsorted" "$table" "$line"
         debug "[sort] table $table deferred"
       fi
     else
       echo "$line" >> $SQL_SORTED
       put "sorted" "$table" "$line"
       delete "unsorted" "$table"
       debug "[sort] table $table committed as it has no references"
     fi

     todo=$(size "unsorted")
     debug "[sort] todo size=$todo"
   done
   info "[sort] Sorting Complete!!!"
 }

 # this functions performs some cleansing on the SQL statements to a unified format.
 # for example, mysql schema file contains "create table if not exists" while PG and derby
 # just have "create table"
 cleanse_sql() {
   info "[Cleansing] Cleansing data"
   TMP="/tmp/insert-$DB_SERVER-$VERSION_CURR-tmp.sql"
   TMP2="/tmp/insert-$DB_SERVER-$VERSION_CURR-tmp2.sql"
   if [[ -e "$SQL_OUT" ]]
   then
     if [[ "$DB_SERVER" = "postgres" ]]
     then
       # TODO perhaps there is a better means to do this with just cat and set
       # but due to inconsistencies in the schema files for postgres, some tables are upper case and some tables are lowercase
       # CREATE TABLE TXNS --> creates a lower case "txns" table
       # CREATE TABLE "TXNS" --> creates a upper case "TXNS" table
       # so we have retain the quotes or lack thereof from the original create table command.
       while read line; do
         # so we cut the statement into 2 parts and only massage the second part.
         part1="$( cut -d '(' -f 1 <<< "$line" )"
         part1modified=$(echo $part1 | sed -e 's/IF\ NOT\ EXISTS\ //g')
         part2="$( cut -d '(' -f 2- <<< "$line" )"
         echo "$part1modified( $part2" >> $TMP
       done < $SQL_OUT
     else
       cat $SQL_OUT | sed -e 's/"//g' | sed -e 's/`//g' | sed -e 's/IF\ NOT\ EXISTS\ //g' > $TMP
     fi

     if [[ "$DB_SERVER" = "mysql" ]]
     then
       cat $TMP | tr -s ' ' | sed 's/,\ PRIMARY\ KEY.*$/)/g' > $TMP2
       mv $TMP2 $TMP
     fi
     mv $TMP $SQL_OUT
   fi
   info "[Cleansing] Cleansing complete!!"
 }

 # this is third and final step in the converting the file with "create table" statements
 # into a file with "insert into" statements where the column values are either looked from
 # from the static data file or generated according to their datatype determined in step 1
 generate_data() {
   info "[generate_data] SQL inserts will be saved to $SCRIPT_INSERT, SQL query to $SCRIPT_QUERY"
   if [ -e $SCRIPT_INSERT ] && [ -e $SCRIPT_QUERY ] && [ "$1" = 0 ]
   then
     debug "[sort] Skipping data generation"
     rmfs "$SCRIPT_INSERT"
     rmfs "$SCRIPT_QUERY"
   fi

   if [[ -e "$QUERYALL" ]]
   then
     cp "$QUERYALL" "$SCRIPT_QUERY"
   fi

   if [[ ! -e $SQL_SORTED ]]
   then
     info "[generate_data] $SQL_SORTED does not exist, returning .."
     return
   fi

   info "[generate_data] Generating table data for $DB_SERVER-$VERSION_CURR"
   INSERT="/tmp/$DB_SERVER-$VERSION_CURR-insert.sql"
   TMP="/tmp/insert-$DB_SERVER-$VERSION_CURR-tmp.sql"
   cat $SQL_SORTED | sed -e 's/IF\ NOT\ EXISTS\ //g' | sed -e 's/CREATE\ TABLE/INSERT\ INTO/g' | sed -e 's/(/\ VALUES\ \(/' > $INSERT

   while read line
   do
     debug "[generate_data] Processing...$line"
     BEGIN=`echo $line | cut -d'(' -f1 | sed 's/$/(/'`
     insert_cols="("
     select="SELECT COUNT(*) FROM"
     REST=`echo $line | cut -d'(' -f2-`
     table=$(extract_raw_tablename "$BEGIN")
     select="$select $table where"
     table=`echo $table | tr -d '"'`
     OLDIFS=$IFS
     IFS=',' eval 'array=($REST)'
     IFS=$OLDIFS
     hasOpen=0
     hasClose=0
     tokenbuffer=""
     for i in "${!array[@]}"
     do
       token="${array[i]}"
       debug "[generate_data] Processing table:$table, token=>${array[i]},hasOpen=$hasOpen,hasClose=$hasClose"

       debug "token value before extracting column=$token"
       colname=$(extract_colname "$token")
       unquotedColname=$(echo $colname | sed -e 's/"//g')
       sqlColname=$(localize_colname "$colname")

       debug "[generate_data] Processing table:$table, column:$colname, tablename.columnname is $table.$colname"

       # the value for this column is generated by the DB. The column def for such DB managed
       # columns is very different. A value of default has to be supplied for this key.
       if [[ "$table.$colname" == "MASTER_KEYS.KEY_ID" ]]
       then
         info "$table.$colname is generated value"
         BEGIN="$BEGIN DEFAULT,"
         insert_cols="$insert_cols $sqlColname,"
         continue
       fi

       for type in "${!TYPES[@]}"
       do
         fk=""
         val=""
         # TODO we are over-iterating here for each column
         debug "[generate_data] column name=$colname"
         staticValue=$(getStaticValue "$table.$colname")
         if [[ ! "$staticValue" = "" ]]
         then
           info "pre-loaded value for $table.$colname is $staticValue"
           put "genvalues" "$table.$colname" "$staticValue"
           BEGIN="$BEGIN $staticValue,"
           insert_cols="$insert_cols $sqlColname,"
           if [ ! "$table.$colname" = "TBLS.LINK_TARGET_ID" ] && [ ! "$table.$colname" = "PARTITIONS.LINK_TARGET_ID" ]
           then
             select="$select $sqlColname = $staticValue and"
           fi
           continue 2
         fi

         contains "$token" "${TYPES[type]}"
         if [[ $isContains -eq 1 ]]
         then
           mappedtype=$(get "typemap" "${TYPES[type]}")
           if [[ "$DB_SERVER" -eq "derby" ]]
           then
             contains "$token" "for bit data"
             if [[ $isContains -eq 1 ]]
             then
               mappedtype="derbybinary"
             fi
           fi

           debug "[generate_data] column type for $table.$colname is ${TYPES[type]}, mapped type=$mappedtype"
           colsize=$(extract_columnsize "$token")
           debug "[generate_data] columnsize=$colsize for $table.$colname"
           fk=$(get "f_keys" "$table.$unquotedColname")
           if [[ ! "$fk" = "" ]]
           then
             val=$(get "genvalues" "$fk")
             info "[generate_data] column $table.$unquotedColname references $fk whose value is $val"
           fi

           if [[ "$val" != "" ]]
           then
             BEGIN="$BEGIN $val,"
             insert_cols="$insert_cols $sqlColname,"
             if [ ! "$table.$colname" = "TBLS.LINK_TARGET_ID" ] && [ ! "$table.$colname" = "PARTITIONS.LINK_TARGET_ID" ]
             then
               select="$select $sqlColname = $val and"
             fi
             continue 2
           fi

           info "[generate_data] Generating random value for column $table.$unquotedColname"
           case "$mappedtype" in
             int )
               if [[ "$val" = "" ]]
               then
                 val=$(int "$colsize")
                 put "genvalues" "$table.$unquotedColname" "$val"
               fi
               ;;
             longstring )
               if [[ "$val" = "" ]]
               then
                 val=$(string "$colsize")
                 val="'$val'"
                 put "genvalues" "$table.$unquotedColname" "$val"
               fi

               BEGIN="$BEGIN $val,"
               insert_cols="$insert_cols $sqlColname,"
               select="$select TRIM($sqlColname) = TRIM($val) and"
               continue 2
               ;;
             string )
               if [[ "$val" = "" ]]
               then
                 val=$(string "$colsize")
                 val="'$val'"
                 put "genvalues" "$table.$unquotedColname" "$val"
               fi
               ;;
             bit )
               if [[ "$val" = "" ]]
               then
                 val=$(bit)
                 put "genvalues" "$table.$unquotedColname" "$val"
               fi

               BEGIN="$BEGIN $val,"
               insert_cols="$insert_cols $sqlColname,"
               select="$select CAST( $sqlColname AS UNSIGNED) = '$val' and"
               continue 2
               ;;
             boolean )
               if [[ "$val" = "" ]]
               then
                 val=$(boolean)
                 put "genvalues" "$table.$unquotedColname" "$val"
               fi
               ;;
             tinyint )
               if [[ "$val" = "" ]]
               then
                 val=$(tinyint)
                 put "genvalues" "$table.$unquotedColname" "$val"
               fi
               ;;
             smallint )
               if [[ "$val" = "" ]]
               then
                 val=$(smallint)
                 put "genvalues" "$table.$unquotedColname" "$val"
               fi
               ;;
             derbybinary )
               if [[ "$val" = "" ]]
               then
                 val=$(string "$colsize")
                 val=$(hex "$val")
                 val="X'$val'"
                 put "genvalues" "$table.$unquotedColname" "$val"
               fi
               ;;
           esac

           BEGIN="$BEGIN $val,"
           insert_cols="$insert_cols $sqlColname,"
           if [ ! "$table.$unquotedColname" = "TBLS.LINK_TARGET_ID" ] && [ ! "$table.$unquotedColname" = "PARTITIONS.LINK_TARGET_ID" ]
           then
             select="$select $sqlColname = $val and"
           fi
           break
         fi
       done
     done
     BEGIN="${BEGIN: 0:${#BEGIN} - 1} );"
     insert_cols="${insert_cols: 0:${#insert_cols} - 1} )"
     foo=$(insert_column_names "$BEGIN" "$insert_cols")
     select="${select: 0:${#select} - 3} ;"
     echo "$foo" >> $SCRIPT_INSERT
     echo "$select" >> $SCRIPT_QUERY
     echo "$select" >> $QUERYALL
   done < $INSERT;
 }

 if [ ! -f $HIVE_SCHEMA_BASE ]; then
   log "Error: $HIVE_SCHEMA_BASE is not found."
   exit 1
 fi

 processFile() {

   debug "[processFile] $VERSION_CURR"
   debug "parsed sql output file is $SQL_OUT, sorted file is $SQL_SORTED"
   debug "generated sql script is $SCRIPT_INSERT, query file is $SCRIPT_QUERY"

   if [ -e "$SQL_OUT" ] && [ -e "$SQL_SORTED" ] && [ "$skipParsing" -eq "1" ];
   then
     info "skipParsing flag enabled, skipping parsing for $HIVE_SCHEMA_BASE"
   else
     debug "Invoking the parsing routine for [$1]"
     parse_schema "$1" "$2"
     debug "Invoking the cleansing routine for[$1]"
     cleanse_sql
     debug "Sorting tables for [$1]"
     sort_tabledefs "$2"
   fi

   if [ -e "$SQL_SORTED" ] && [ "$skipGeneration" -eq "0" ];
   then
     debug "Invoking the generate routine for[$1]"
     generate_data "$2"
   fi

   debug "[$1] DONE!!"
 }

 parseAndAppend()
 {
   info "[parseAndAppend] $SQL_OUT:$SCRIPT_INSERT:$SQL_SORTED for $VERSION_CURR"
   if [ -e $SQL_OUT ] && [ -e $SQL_SORTED ] && [ "$skipParsing" -eq "1" ];
   then
     info "[parseAndAppend] Files exist, skipping parsing for $VERSION_CURR"
     return
   fi

   parse_schema "$1" "$2"
 }

 sortAndGenerate()
 {
   debug "sortAndGenerate for $SQL_SORTED::$SCRIPT_INSERT"
   if [ -e "$SQL_SORTED" ] && [ "$skipParsing" -eq "1" ];
   then
     debug "$SQL_SORTED exists and skipParsing is true, skipping sorting."
   else
     cleanse_sql
     sort_tabledefs "$1"
   fi

   if [ -e "$SQL_SORTED" ] && [ "$skipGeneration" -eq "0" ];
   then
     generate_data "$1"
   else
     debug "SQL file not found or skipGeneration is true, skipping generation..."
   fi
 }

 clearFiles() {
   rmfs "$SQL_OUT"
   rmfs "$SQL_SORTED"
   rmfs "$SCRIPT_INSERT"
   rmfs "$SCRIPT_QUERY"
 }

 info "Processing $HIVE_SCHEMA_BASE"
 processFile "$HIVE_SCHEMA_BASE" "0"

 if [[ -e "$SCRIPT_INSERT" ]]
   then
     debug "Appending file to master:$SCRIPT_INSERT"
     cat "$SCRIPT_INSERT" >> "$INSERTALL"
 fi

 FILE_LIST="$HMS_UPGRADE_DIR/$DB_SERVER/upgrade.order.$DB_SERVER"

 prefix=""
   case "$DB_SERVER" in
     mysql)
       # TODO perform a case-insensitive match
       prefix="SOURCE"
       ;;
     postgres)
       prefix="i\ "
       ;;
     derby)
       # TODO perform a case-insensitive match
       prefix="RUN"
       ;;
     oracle)
       prefix="NOTSUPPORTED"
       ;;
     mssql)
       prefix="NOTSUPPORTED"
       ;;
   esac

 # iterate thru every upgrade file listed in the upgrade.order.<db> file and for each
 # upgrade file, parse the schema file and generate the data and a query file.
 while read order
 do
   curr="$VERSION_CURR-to-"
   if test "${order#*$curr}" != "$order"
   then
     VERSION_CURR="${order#*$curr}"
     SQL_OUT="/tmp/$DB_SERVER-$VERSION_CURR-create.sql"
     SQL_SORTED="/tmp/$DB_SERVER-$VERSION_CURR-sortedcreate.sql"
     SCRIPT_INSERT="$(pwd)/dbs/$DB_SERVER/$DB_SERVER-$VERSION_CURR-insert.sql"
     SCRIPT_QUERY="$(pwd)/dbs/$DB_SERVER/$DB_SERVER-$VERSION_CURR-query.sql"
     upgrade="$HMS_UPGRADE_DIR/$DB_SERVER/upgrade-$order.$DB_SERVER.sql"
     append=0

     if [ -e $SQL_OUT ] && [ -e $SCRIPT_INSERT ] && [ -e $SQL_SORTED ] && [ "$skipParsing" -eq "1" ] && [ "$skipGeneration" -eq "1" ];
     then
       info "Files exist, Skipping parsing/generation for $VERSION_CURR"
       continue
     fi

     if [ "$skipParsing" -eq "0" ];
     then
       clearFiles
     elif [ "$skipGeneration" -eq "0" ];
     then
       rmfs "$SCRIPT_INSERT"
       rmfs "$SCRIPT_QUERY"
     fi

     while read line
     do
       debug "$line"
       if [[ $line =~ $prefix.* ]]
       then
         debug "upgrade.order processing $line prefix=$prefix"
         minisql=`echo $line | cut -d' ' -f2- | cut -d';' -f1`
         minisql="${minisql%\'}"
         minisql="${minisql#\'}"
         info "Processing $DB_SERVER/$minisql"
         parseAndAppend "$HMS_UPGRADE_DIR/$DB_SERVER/$minisql" "$append"
         append=1
       fi
     done < $upgrade;

     sortAndGenerate "$append"

     if [[ -e $SCRIPT_INSERT ]]
     then
       info "Appending file to master $SCRIPT_INSERT"
       cat "$SCRIPT_INSERT" >> "$INSERTALL"
     fi

     append=0
   fi
 done < $FILE_LIST;

 info "Hive Metastore data validation test successful."