blob: 78159f194ffab6e006fb37f88562b3d2e0dd0ff3 [file] [log] [blame]
set hive.strict.checks.cartesian.product=false;
CREATE TABLE `customer_removal_n0`(
`c_custkey` bigint,
`c_name` string,
`c_address` string,
`c_city` string,
`c_nation` string,
`c_region` string,
`c_phone` string,
`c_mktsegment` string,
primary key (`c_custkey`) disable rely)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|'
STORED AS TEXTFILE;
CREATE TABLE `dates_removal_n0`(
`d_datekey` bigint,
`d_date` string,
`d_dayofweek` string,
`d_month` string,
`d_year` int,
`d_yearmonthnum` int,
`d_yearmonth` string,
`d_daynuminweek` int,
`d_daynuminmonth` int,
`d_daynuminyear` int,
`d_monthnuminyear` int,
`d_weeknuminyear` int,
`d_sellingseason` string,
`d_lastdayinweekfl` int,
`d_lastdayinmonthfl` int,
`d_holidayfl` int ,
`d_weekdayfl`int,
primary key (`d_datekey`) disable rely)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|'
STORED AS TEXTFILE;
CREATE TABLE `lineorder_removal_n0`(
`lo_orderkey` bigint,
`lo_linenumber` int,
`lo_custkey` bigint not null disable rely,
`lo_partkey` bigint not null disable rely,
`lo_suppkey` bigint not null disable rely,
`lo_orderdate` bigint,
`lo_ordpriority` string,
`lo_shippriority` string,
`lo_quantity` double,
`lo_extendedprice` double,
`lo_ordtotalprice` double,
`lo_discount` double,
`lo_revenue` double,
`lo_supplycost` double,
`lo_tax` double,
`lo_commitdate` bigint,
`lo_shipmode` string,
primary key (`lo_orderkey`) disable rely,
constraint fk1 foreign key (`lo_custkey`) references `customer_removal_n0`(`c_custkey`) disable rely,
constraint fk2 foreign key (`lo_orderdate`) references `dates_removal_n0`(`d_datekey`) disable rely)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|'
STORED AS TEXTFILE;
-- CAN BE REMOVED AND DOES NOT NEED FILTER ON JOIN COLUMN
-- AS COLUMN IS ALREADY NOT NULLABLE
EXPLAIN
SELECT `lo_linenumber`
FROM `lineorder_removal_n0`
JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`;
-- CAN BE REMOVED AND INTRODUCES A FILTER ON JOIN COLUMN
EXPLAIN
SELECT `lo_linenumber`
FROM `lineorder_removal_n0`
JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey`;
-- REMOVES THE JOIN
EXPLAIN
SELECT `lo_linenumber`
FROM `lineorder_removal_n0`
LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`;
-- TRANSFORMS THE JOIN
EXPLAIN
SELECT `lo_linenumber`, `c_region`
FROM `lineorder_removal_n0`
LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`;
-- NOT TRANSFORMED INTO INNER JOIN SINCE JOIN COLUMN IS NULLABLE
EXPLAIN
SELECT `lo_linenumber`
FROM `lineorder_removal_n0`
LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey`;
-- REMOVES BOTH JOINS
EXPLAIN
SELECT `lo_linenumber`
FROM `lineorder_removal_n0`
JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`
JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey`;
-- REMOVES BOTH JOINS
EXPLAIN
SELECT `lo_linenumber`
FROM `lineorder_removal_n0`
LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`
JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey`;
-- REMOVE INNER AND NOT TRANFORM OUTER
EXPLAIN
SELECT `lo_linenumber` FROM
(SELECT *
FROM `lineorder_removal_n0`
JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`) subq
LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey`;
-- REMOVE FIRST OUTER AND NOT TRANFORM SECOND OUTER
EXPLAIN
SELECT `lo_linenumber`
FROM `lineorder_removal_n0`
LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`
LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey`;
-- REMOVE SECOND OUTER AND NOT TRANFORM FIRST OUTER
EXPLAIN
SELECT `lo_linenumber`
FROM `lineorder_removal_n0`
LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey`
LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`;
-- SWAP AND REMOVE
EXPLAIN
SELECT `lo_linenumber`, `c_custkey`
FROM `lineorder_removal_n0`
JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`;
-- FK-PK JOIN with FK side removal
EXPLAIN
SELECT customer_removal_n0.*
FROM customer_removal_n0
JOIN
(SELECT lo_custkey
FROM lineorder_removal_n0
WHERE lo_custkey IS NOT NULL
GROUP BY lo_custkey) fkSide ON fkSide.lo_custkey = customer_removal_n0.c_custkey;
-- FK-PK JOIN with FK side removal
EXPLAIN
SELECT customer_removal_n0.*
FROM customer_removal_n0
JOIN
(SELECT lo_custkey, sum(lo_discount) sm
FROM lineorder_removal_n0
WHERE lo_custkey IS NOT NULL
GROUP BY lo_custkey) fkSide ON fkSide.lo_custkey = customer_removal_n0.c_custkey
WHERE fkSide.sm > 0;
-- FK-PK JOIN with FK side removal, BUT without explicit IS NOT NULL on join column
EXPLAIN
SELECT customer_removal_n0.*
FROM customer_removal_n0
JOIN (SELECT lo_custkey
FROM lineorder_removal_n0
GROUP BY lo_custkey) fkSide on fkSide.lo_custkey = customer_removal_n0.c_custkey;
-- NEGATIVE for FK-PK JOIN with FK side removal, FK JOIN COL might not be distinct
EXPLAIN
SELECT customer_removal_n0.*
FROM customer_removal_n0
JOIN (SELECT lo_linenumber,lo_custkey
FROM lineorder_removal_n0
WHERE lo_custkey IS NOT NULL
GROUP BY lo_linenumber, lo_custkey,lo_custkey
) fkSide ON fkSide.lo_custkey = customer_removal_n0.c_custkey;
-- FK side join col is PK as well (thus providing uniqueness without GROUP BY)
CREATE TABLE `t1`(
`lo_orderkey` bigint,
`lo_linenumber` int,
`lo_custkey` bigint not null disable rely,
`lo_partkey` bigint not null disable rely,
`lo_orderdate` bigint,
`lo_revenue` double,
primary key (`lo_custkey`) disable rely,
constraint fkt1 foreign key (`lo_custkey`) references `customer_removal_n0`(`c_custkey`) disable rely,
constraint fkt2 foreign key (`lo_orderdate`) references `dates_removal_n0`(`d_datekey`) disable rely)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|'
STORED AS TEXTFILE;
EXPLAIN
SELECT customer_removal_n0.*
FROM customer_removal_n0
JOIN
(SELECT lo_custkey,lo_linenumber
FROM t1
WHERE lo_custkey IS NOT NULL) fkSide ON fkSide.lo_custkey = customer_removal_n0.c_custkey;
DROP TABLE t1;
-- FK side join col is UNIQUE and NOT NULL(thus providing uniqueness without GROUP BY)
CREATE TABLE `t1`(
`lo_orderkey` bigint,
`lo_linenumber` int,
`lo_custkey` bigint not null disable rely,
`lo_partkey` bigint not null disable rely,
`lo_orderdate` bigint,
`lo_revenue` double,
UNIQUE (`lo_custkey`) disable rely,
constraint fkt1 foreign key (`lo_custkey`) references `customer_removal_n0`(`c_custkey`) disable rely,
constraint fkt2 foreign key (`lo_orderdate`) references `dates_removal_n0`(`d_datekey`) disable rely)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|'
STORED AS TEXTFILE;
EXPLAIN
SELECT customer_removal_n0.*
FROM customer_removal_n0
JOIN
(SELECT lo_custkey,lo_linenumber
FROM t1) fkSide ON fkSide.lo_custkey = customer_removal_n0.c_custkey;
DROP TABLE t1;