blob: 69049302e9317d5e67e25cafcdfdffe9e0d8c9c8 [file]
-- A postgreSQL script to update Hive Metastore for HIVE-2246
SET client_min_messages = 'warning';
SELECT '< HIVE-2246: Dedupe tables column schemas from partitions in the metastore db >';
--
-- Creates the following tables:
-- - CDS
-- - COLUMNS_V2
-- The new columns table is called COLUMNS_V2
--
CREATE TABLE "CDS" (
"CD_ID" bigint NOT NULL,
PRIMARY KEY ("CD_ID")
);
CREATE TABLE "COLUMNS_V2" (
"CD_ID" bigint NOT NULL,
"COMMENT" varchar(4000),
"COLUMN_NAME" varchar(128) NOT NULL,
"TYPE_NAME" varchar(4000),
"INTEGER_IDX" INTEGER NOT NULL,
PRIMARY KEY ("CD_ID", "COLUMN_NAME")
);
ALTER TABLE "COLUMNS_V2" ADD FOREIGN KEY ("CD_ID")
REFERENCES "CDS" ("CD_ID") DEFERRABLE;
SELECT 'Tables Created';
-- Alter the SDS table to:
-- - add the column CD_ID
-- - add a foreign key on CD_ID
-- - create an index on CD_ID
--
ALTER TABLE "SDS" ADD COLUMN "CD_ID" bigint;
ALTER TABLE "SDS" ADD FOREIGN KEY ("CD_ID") REFERENCES "CDS" ("CD_ID") DEFERRABLE;
SELECT 'SDS Table Updated';
--
-- Migrate the TBLS table
-- Add entries into CDS.
-- Populate the CD_ID field in SDS for tables
-- Add entires to COLUMNS_V2 based on this table's sd's columns
--
-- In the migration, there is a 1:1 mapping between CD_ID and SD_ID
-- for tables. For speed, just let CD_ID = SD_ID for tables
--
INSERT INTO "CDS" ("CD_ID")
SELECT t."SD_ID" FROM "TBLS" t WHERE t."SD_ID" IS NOT NULL ORDER BY t."SD_ID";
SELECT 'CDS populated';
UPDATE "SDS"
SET "CD_ID" = "SD_ID"
WHERE "SD_ID" in
(SELECT t."SD_ID" FROM "TBLS" t WHERE t."SD_ID" IS NOT NULL ORDER BY t."SD_ID");
INSERT INTO "COLUMNS_V2"
("CD_ID", "COMMENT", "COLUMN_NAME", "TYPE_NAME", "INTEGER_IDX")
SELECT
c."SD_ID", c."COMMENT", c."COLUMN_NAME", c."TYPE_NAME", c."INTEGER_IDX"
FROM
"COLUMNS" c
JOIN
"TBLS" t
ON
t."SD_ID" = c."SD_ID"
;
SELECT 'Tables Migrated';
--
-- Migrate the partitions.
-- Update the partitions' SDS to use the parent tables' CD_ID BEGIN
-- Derby does not allow joins in update statements,
-- so we have to make a temporary tableh
--
CREATE GLOBAL TEMPORARY TABLE "TMP_TBL" (
"SD_ID" bigint not null,
"CD_ID" bigint not null
) ON COMMIT PRESERVE ROWS;
INSERT INTO "TMP_TBL" SELECT
p."SD_ID", sds."CD_ID"
FROM "PARTITIONS" p
JOIN "TBLS" t ON t."TBL_ID" = p."TBL_ID"
JOIN "SDS" sds on t."SD_ID" = sds."SD_ID"
WHERE p."SD_ID" IS NOT NULL;
UPDATE "SDS" sd
SET "CD_ID" = (SELECT tt."CD_ID" FROM "TMP_TBL" tt WHERE tt."SD_ID" = sd."SD_ID")
WHERE sd."SD_ID" IN (SELECT "SD_ID" FROM "TMP_TBL");
SELECT 'Partitions Migrated';
--
-- Migrate IDXS
--
INSERT INTO "CDS" ("CD_ID")
SELECT i."SD_ID" FROM "IDXS" i WHERE i."SD_ID" IS NOT NULL ORDER BY i."SD_ID";
UPDATE "SDS"
SET "CD_ID" = "SD_ID"
WHERE "SD_ID" in
(SELECT i."SD_ID" FROM "IDXS" i WHERE i."SD_ID" IS NOT NULL ORDER BY i."SD_ID");
INSERT INTO "COLUMNS_V2"
("CD_ID", "COMMENT", "COLUMN_NAME", "TYPE_NAME", "INTEGER_IDX")
SELECT
c."SD_ID", c."COMMENT", c."COLUMN_NAME", c."TYPE_NAME", c."INTEGER_IDX"
FROM
"COLUMNS" c
JOIN
"IDXS" i
ON
i."SD_ID" = c."SD_ID"
;
SELECT 'Indexes Migrated';
--
-- rename the old COLUMNS table
--
ALTER TABLE "COLUMNS" RENAME TO "COLUMNS_OLD";
SELECT 'Columns Table Renamed';