blob: 42fb720a3536d8fb849d2748e7aa8318bf2475c2 [file] [log] [blame]
-- start_ignore
create schema subselect_gp;
set search_path to subselect_gp, public;
-- end_ignore
set optimizer_enable_master_only_queries = on;
set optimizer_segments = 3;
set optimizer_nestloop_factor = 1.0;
--
-- Base tables for CSQ tests
--
drop table if exists csq_t1_base;
create table csq_t1_base(x int, y int);
insert into csq_t1_base values(1,2);
insert into csq_t1_base values(2,1);
insert into csq_t1_base values(4,2);
drop table if exists csq_t2_base;
create table csq_t2_base(x int, y int);
insert into csq_t2_base values(3,2);
insert into csq_t2_base values(3,2);
insert into csq_t2_base values(3,2);
insert into csq_t2_base values(3,2);
insert into csq_t2_base values(3,1);
--
-- Correlated subqueries
--
drop table if exists csq_t1;
drop table if exists csq_t2;
create table csq_t1(x int, y int);
create table csq_t2(x int, y int);
insert into csq_t1 select * from csq_t1_base;
insert into csq_t2 select * from csq_t2_base;
select * from csq_t1 where csq_t1.x >ALL (select csq_t2.x from csq_t2 where csq_t2.y=csq_t1.y) order by 1; -- expected (4,2)
--
-- correlations in the targetlist
--
select csq_t1.x, (select sum(bar.x) from csq_t1 bar where bar.x >= csq_t1.x) as sum from csq_t1 order by csq_t1.x;
select csq_t1.x, (select sum(bar.x) from csq_t1 bar where bar.x = csq_t1.x) as sum from csq_t1 order by csq_t1.x;
select csq_t1.x, (select bar.x from csq_t1 bar where bar.x = csq_t1.x) as sum from csq_t1 order by csq_t1.x;
--
-- CSQs with partitioned tables
--
drop table if exists csq_t1;
drop table if exists csq_t2;
create table csq_t1(x int, y int)
partition by range (y) ( start (0) end (4) every (1))
;
create table csq_t2(x int, y int)
partition by range (y) ( start (0) end (4) every (1))
;
insert into csq_t1 select * from csq_t1_base;
insert into csq_t2 select * from csq_t2_base;
explain select * from csq_t1 where csq_t1.x >ALL (select csq_t2.x from csq_t2 where csq_t2.y=csq_t1.y) order by 1;
select * from csq_t1 where csq_t1.x >ALL (select csq_t2.x from csq_t2 where csq_t2.y=csq_t1.y) order by 1; -- expected (4,2)
drop table if exists csq_t1;
drop table if exists csq_t2;
drop table if exists csq_t1_base;
drop table if exists csq_t2_base;
--
-- Multi-row subqueries
--
drop table if exists mrs_t1;
create table mrs_t1(x int);
insert into mrs_t1 select generate_series(1,20);
analyze mrs_t1;
explain select * from mrs_t1 where exists (select x from mrs_t1 where x < -1);
select * from mrs_t1 where exists (select x from mrs_t1 where x < -1) order by 1;
explain select * from mrs_t1 where exists (select x from mrs_t1 where x = 1);
select * from mrs_t1 where exists (select x from mrs_t1 where x = 1) order by 1;
explain select * from mrs_t1 where x in (select x-95 from mrs_t1) or x < 5;
select * from mrs_t1 where x in (select x-95 from mrs_t1) or x < 5 order by 1;
drop table if exists mrs_t1;
--
-- Multi-row subquery from MSTR
--
drop table if exists mrs_u1;
drop table if exists mrs_u2;
create TABLE mrs_u1 (a int, b int);
create TABLE mrs_u2 (a int, b int);
insert into mrs_u1 values (1,2),(11,22);
insert into mrs_u2 values (1,2),(11,22),(33,44);
select * from mrs_u1 join mrs_u2 on mrs_u1.a=mrs_u2.a where mrs_u1.a in (1,11) or mrs_u2.a in (select a from mrs_u1 where a=1) order by 1;
drop table if exists mrs_u1;
drop table if exists mrs_u2;
--
-- MPP-13758
--
drop table if exists csq_m1;
create table csq_m1();
set allow_system_table_mods=true;
delete from gp_distribution_policy where localoid='csq_m1'::regclass;
reset allow_system_table_mods;
alter table csq_m1 add column x int;
insert into csq_m1 values(1);
analyze csq_m1;
drop table if exists csq_d1;
create table csq_d1(x int);
insert into csq_d1 select * from csq_m1;
analyze csq_d1;
explain select array(select x from csq_m1); -- no initplan
select array(select x from csq_m1); -- {1}
explain select array(select x from csq_d1); -- initplan
select array(select x from csq_d1); -- {1}
--
-- CSQs involving master-only and distributed tables
--
drop table if exists t3cozlib;
create table t3cozlib (c1 int , c2 varchar) with (appendonly=true, compresstype=zlib, orientation=column);
drop table if exists pg_attribute_storage;
create table pg_attribute_storage (attrelid int, attnum int, attoptions text[]);
insert into pg_attribute_storage values ('t3cozlib'::regclass, 1, E'{\'something\'}');
insert into pg_attribute_storage values ('t3cozlib'::regclass, 2, E'{\'something2\'}');
SELECT a.attname
, pg_catalog.format_type(a.atttypid, a.atttypmod)
, ( SELECT substring(pg_catalog.pg_get_expr(d.adbin, d.adrelid) for 128)
FROM pg_catalog.pg_attrdef d
WHERE d.adrelid = a.attrelid AND d.adnum = a.attnum AND a.atthasdef
)
, a.attnotnull
, a.attnum
, a.attstorage
, pg_catalog.col_description(a.attrelid, a.attnum)
, ( SELECT s.attoptions
FROM pg_attribute_storage s
WHERE s.attrelid = a.attrelid AND s.attnum = a.attnum
) newcolumn
FROM pg_catalog.pg_attribute a
WHERE a.attrelid = 't3cozlib'::regclass AND a.attnum > 0 AND NOT a.attisdropped
ORDER BY a.attnum
; -- expect to see 2 rows
--
-- More CSQs involving master-only and distributed relations
--
drop table if exists csq_m1;
create table csq_m1();
set allow_system_table_mods=true;
delete from gp_distribution_policy where localoid='csq_m1'::regclass;
reset allow_system_table_mods;
alter table csq_m1 add column x int;
insert into csq_m1 values(1),(2),(3);
analyze csq_m1;
drop table if exists csq_d1;
create table csq_d1(x int);
insert into csq_d1 select * from csq_m1 where x < 3;
insert into csq_d1 values(4);
analyze csq_d1;
select * from csq_m1;
select * from csq_d1;
--
-- outer plan node is master-only and CSQ has distributed relation
--
explain select * from csq_m1 where x not in (select x from csq_d1) or x < -100; -- gather motion
select * from csq_m1 where x not in (select x from csq_d1) or x < -100; -- (3)
--
-- outer plan node is master-only and CSQ has distributed relation
--
explain select * from csq_d1 where x not in (select x from csq_m1) or x < -100; -- broadcast motion
select * from csq_d1 where x not in (select x from csq_m1) or x < -100; -- (4)
-- drop csq_m1 since we deleted its gp_distribution_policy entry
drop table csq_m1;
--
-- MPP-14441 Don't lose track of initplans
--
drop table if exists csq_t1;
CREATE TABLE csq_t1 (a int, b int, c int, d int, e text);
INSERT INTO csq_t1 SELECT i, i/3, i%2, 100-i, 'text'||i FROM generate_series(1,100) i;
select count(*) from csq_t1 t1 where a > (SELECT x.b FROM ( select avg(a)::int as b,'haha'::text from csq_t1 t2 where t2.a=t1.d) x ) ;
select count(*) from csq_t1 t1 where a > ( select avg(a)::int from csq_t1 t2 where t2.a=t1.d) ;
--
-- correlation in a func expr
--
CREATE OR REPLACE FUNCTION csq_f(a int) RETURNS int AS $$ select $1 $$ LANGUAGE SQL CONTAINS SQL;
DROP TABLE IF EXISTS csq_r;
CREATE TABLE csq_r(a int);
INSERT INTO csq_r VALUES (1);
-- subqueries shouldn't be pulled into a join if the from clause has a function call
-- with a correlated argument
-- force_explain
explain SELECT * FROM csq_r WHERE a IN (SELECT * FROM csq_f(csq_r.a));
SELECT * FROM csq_r WHERE a IN (SELECT * FROM csq_f(csq_r.a));
-- force_explain
explain SELECT * FROM csq_r WHERE a not IN (SELECT * FROM csq_f(csq_r.a));
SELECT * FROM csq_r WHERE a not IN (SELECT * FROM csq_f(csq_r.a));
-- force_explain
explain SELECT * FROM csq_r WHERE exists (SELECT * FROM csq_f(csq_r.a));
SELECT * FROM csq_r WHERE exists (SELECT * FROM csq_f(csq_r.a));
-- force_explain
explain SELECT * FROM csq_r WHERE not exists (SELECT * FROM csq_f(csq_r.a));
SELECT * FROM csq_r WHERE not exists (SELECT * FROM csq_f(csq_r.a));
-- force_explain
explain SELECT * FROM csq_r WHERE a > (SELECT csq_f FROM csq_f(csq_r.a) limit 1);
SELECT * FROM csq_r WHERE a > (SELECT csq_f FROM csq_f(csq_r.a) limit 1);
-- force_explain
explain SELECT * FROM csq_r WHERE a < ANY (SELECT csq_f FROM csq_f(csq_r.a));
SELECT * FROM csq_r WHERE a < ANY (SELECT csq_f FROM csq_f(csq_r.a));
-- force_explain
explain SELECT * FROM csq_r WHERE a <= ALL (SELECT csq_f FROM csq_f(csq_r.a));
SELECT * FROM csq_r WHERE a <= ALL (SELECT csq_f FROM csq_f(csq_r.a));
-- force_explain
explain SELECT * FROM csq_r WHERE a IN (SELECT csq_f FROM csq_f(csq_r.a),csq_r);
SELECT * FROM csq_r WHERE a IN (SELECT csq_f FROM csq_f(csq_r.a),csq_r);
--
-- Test pullup of expr CSQs to joins
--
--
-- Test data
--
drop table if exists csq_pullup;
create table csq_pullup(t text, n numeric, i int, v varchar(10));
insert into csq_pullup values ('abc',1, 2, 'xyz');
analyze csq_pullup;
insert into csq_pullup values ('xyz',2, 3, 'def');
insert into csq_pullup values ('def',3, 1, 'abc');
--
-- Expr CSQs to joins
--
--
-- text, text
--
explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.t=t1.t);
select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.t=t1.t);
--
-- text, varchar
--
explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.t=t1.v);
select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.t=t1.v);
--
-- numeric, numeric
--
explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.n=t1.n);
select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.n=t1.n);
--
-- function(numeric), function(numeric)
--
explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.n + 1=t1.n + 1);
select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.n + 1=t1.n + 1);
--
-- function(numeric), function(int)
--
explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.n + 1=t1.i + 1);
select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.n + 1=t1.i + 1);
--
-- Test a few cases where pulling up an aggregate subquery is not possible
--
-- subquery contains a LIMIT
explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.t=t1.t LIMIT 1);
select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.t=t1.t LIMIT 1);
-- subquery contains a HAVING clause
explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.t=t1.t HAVING count(*) < 10);
select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.t=t1.t HAVING count(*) < 10);
-- subquery contains quals of form 'function(outervar, innervar1) = innvervar2'
explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.n + t1.n =t1.i);
select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.n + t1.n =t1.i);
--
-- NOT EXISTS CSQs to joins
--
--
-- text, text
--
explain select * from csq_pullup t0 where not exists (select 1 from csq_pullup t1 where t0.t=t1.t and t1.i = 1);
select * from csq_pullup t0 where not exists (select 1 from csq_pullup t1 where t0.t=t1.t and t1.i = 1);
--
-- int, function(int)
--
explain select * from csq_pullup t0 where not exists (select 1 from csq_pullup t1 where t0.i=t1.i + 1);
select * from csq_pullup t0 where not exists (select 1 from csq_pullup t1 where t0.i=t1.i + 1);
--
-- wrong results bug MPP-16477
--
drop table if exists subselect_t1;
drop table if exists subselect_t2;
create table subselect_t1(x int);
insert into subselect_t1 values(1),(2);
create table subselect_t2(y int);
insert into subselect_t2 values(1),(2),(2);
analyze subselect_t1;
analyze subselect_t2;
explain select * from subselect_t1 where x in (select y from subselect_t2);
select * from subselect_t1 where x in (select y from subselect_t2);
-- start_ignore
-- Known_opt_diff: MPP-21351
-- end_ignore
explain select * from subselect_t1 where x in (select y from subselect_t2 union all select y from subselect_t2);
select * from subselect_t1 where x in (select y from subselect_t2 union all select y from subselect_t2);
explain select count(*) from subselect_t1 where x in (select y from subselect_t2);
select count(*) from subselect_t1 where x in (select y from subselect_t2);
-- start_ignore
-- Known_opt_diff: MPP-21351
-- end_ignore
explain select count(*) from subselect_t1 where x in (select y from subselect_t2 union all select y from subselect_t2);
select count(*) from subselect_t1 where x in (select y from subselect_t2 union all select y from subselect_t2);
select count(*) from
( select 1 as FIELD_1 union all select 2 as FIELD_1 ) TABLE_1
where FIELD_1 in ( select 1 as FIELD_1 union all select 1 as FIELD_1 union all select 1 as FIELD_1 );
--
-- Query was deadlocking because of not squelching subplans (MPP-18936)
--
drop table if exists t1;
drop table if exists t2;
drop table if exists t3;
drop table if exists t4;
CREATE TABLE t1 AS (SELECT generate_series(1, 5000) AS i, generate_series(5001, 10000) AS j);
CREATE TABLE t2 AS (SELECT * FROM t1 WHERE gp_segment_id = 0);
CREATE TABLE t3 AS (SELECT * FROM t1 WHERE gp_segment_id = 1);
CREATE TABLE t4 (i1 int, i2 int);
set gp_interconnect_queue_depth=1;
-- This query was deadlocking on a 2P system
INSERT INTO t4
(
SELECT t1.i, (SELECT t3.i FROM t3 WHERE t3.i + 1 = t1.i + 1)
FROM t1, t3
WHERE t1.i = t3.i
)
UNION
(
SELECT t1.i, (SELECT t2.i FROM t2 WHERE t2.i + 1 = t1.i + 1)
FROM t1, t2
WHERE t1.i = t2.i
);
drop table if exists t1;
drop table if exists t2;
drop table if exists t3;
drop table if exists t4;
--
-- Initplans with no corresponding params should be removed MPP-20600
--
drop table if exists t1;
drop table if exists t2;
create table t1(a int);
create table t2(b int);
select * from t1 where a=1 and a=2 and a > (select t2.b from t2);
explain select * from t1 where a=1 and a=2 and a > (select t2.b from t2);
explain select * from t1 where a=1 and a=2 and a > (select t2.b from t2)
union all
select * from t1 where a=1 and a=2 and a > (select t2.b from t2);
select * from t1 where a=1 and a=2 and a > (select t2.b from t2)
union all
select * from t1 where a=1 and a=2 and a > (select t2.b from t2);
explain select * from t1,
(select * from t1 where a=1 and a=2 and a > (select t2.b from t2)) foo
where t1.a = foo.a;
select * from t1,
(select * from t1 where a=1 and a=2 and a > (select t2.b from t2)) foo
where t1.a = foo.a;
--
-- Correlated subqueries with limit/offset clause must not be pulled up as join
--
insert into t1 values (1);
insert into t2 values (1);
explain select 1 from t1 where a in (select b from t2 where a = 1 limit 1);
explain select 1 from t1 where a in (select b from t2 where a = 1 offset 1);
select 1 from t1 where a in (select b from t2 where a = 1 limit 1);
select 1 from t1 where a in (select b from t2 where a = 1 offset 1);
drop table if exists t1;
drop table if exists t2;
--
-- Test for a bug we used to have with eliminating InitPlans. The subplan,
-- (select max(content) from y), was eliminated when it shouldn't have been.
-- The query is supposed to return 0 rows, but returned > 0 when the bug was
-- present.
--
CREATE TABLE initplan_x (i int4, t text);
insert into initplan_x values
(1, 'foobar1'),
(2, 'foobar2'),
(3, 'foobar3'),
(4, 'foobar4'),
(5, 'foobar5');
CREATE TABLE initplan_y (content int4);
insert into initplan_y values (5);
select i, t from initplan_x
except
select g, t from initplan_x,
generate_series(0, (select max(content) from initplan_y)) g
order by 1;
drop table if exists initplan_x;
drop table if exists initplan_y;
--
-- Test Initplans that return multiple params.
--
create table initplan_test(i int, j int, m int);
insert into initplan_test values (1,1,1);
select * from initplan_test where row(j, m) = (select j, m from initplan_test where i = 1);
drop table initplan_test;
--
-- apply parallelization for subplan MPP-24563
--
create table t1_mpp_24563 (id int, value int);
insert into t1_mpp_24563 values (1, 3);
create table t2_mpp_24563 (id int, value int, seq int);
insert into t2_mpp_24563 values (1, 7, 5);
explain select row_number() over (order by seq asc) as id, foo.cnt
from
(select seq, (select count(*) from t1_mpp_24563 t1 where t1.id = t2.id) cnt from
t2_mpp_24563 t2 where value = 7) foo;
drop table t1_mpp_24563;
drop table t2_mpp_24563;
--
-- MPP-20470 update the flow of node after parallelizing subplan.
--
CREATE TABLE t_mpp_20470 (
col_date timestamp without time zone,
col_name character varying(6),
col_expiry date
) PARTITION BY RANGE(col_date)
(
START ('2013-05-10 00:00:00'::timestamp without time zone) END ('2013-05-11
00:00:00'::timestamp without time zone) WITH (tablename='t_mpp_20470_ptr1', appendonly=false ),
START ('2013-05-24 00:00:00'::timestamp without time zone) END ('2013-05-25
00:00:00'::timestamp without time zone) WITH (tablename='t_mpp_20470_ptr2', appendonly=false )
);
COPY t_mpp_20470 from STDIN delimiter '|' null '';
2013-05-10 00:00:00|OPTCUR|2013-05-29
2013-05-10 04:35:20|OPTCUR|2013-05-29
2013-05-24 03:10:30|FUTCUR|2014-04-28
2013-05-24 05:32:34|OPTCUR|2013-05-29
\.
create view v1_mpp_20470 as
SELECT
CASE
WHEN b.col_name::text = 'FUTCUR'::text
THEN ( SELECT count(a.col_expiry) AS count FROM t_mpp_20470 a WHERE
a.col_name::text = b.col_name::text)::text
ELSE 'Q2'::text END AS cc, 1 AS nn
FROM t_mpp_20470 b;
explain SELECT cc, sum(nn) over() FROM v1_mpp_20470;
drop view v1_mpp_20470;
drop table t_mpp_20470;
create table tbl_25484(id int, num int);
insert into tbl_25484 values(1, 1), (2, 2), (3, 3);
select id from tbl_25484 where 3 = (select 3 where 3 = (select num));
drop table tbl_25484;
reset optimizer_segments;
reset optimizer_nestloop_factor;
--
-- Test case that once triggered a bug in the IN-clause pull-up code.
--
SELECT p.id
FROM (SELECT * FROM generate_series(1,10) id
WHERE id IN (
SELECT 1
UNION ALL
SELECT 0)) p;
--
-- Verify another bug in the IN-clause pull-up code. This returned some
-- rows from xsupplier twice, because of a bug in detecting whether a
-- Redistribute node was needed.
--
CREATE TABLE xlineitem (l_orderkey int4, l_suppkey int4);
insert into xlineitem select g+3, g from generate_series(10,100) g;
insert into xlineitem select g+1, g from generate_series(10,100) g;
insert into xlineitem select g, g from generate_series(10,100) g;
CREATE TABLE xsupplier (s_suppkey int4, s_name text);
insert into xsupplier select g, 'foo' || g from generate_series(1,10) g;
select s_name from xsupplier
where s_suppkey in (
select g.l_suppkey from xlineitem g
) ;
--
-- Another case that failed at one point. (A planner bug in pulling up a
-- subquery with constant distribution key, 1, in the outer queries.)
--
create table nested_in_tbl(tc1 int, tc2 int);
select * from nested_in_tbl t1 where tc1 in
(select 1 from nested_in_tbl t2 where tc1 in
(select 1 from nested_in_tbl t3 where t3.tc2 = t2.tc2));
drop table nested_in_tbl;
--
-- Window query with a function scan that has non-correlated subquery.
--
SELECT rank() over (partition by min(c) order by min(c)) AS p_rank FROM (SELECT d AS c FROM (values(1)) d1, generate_series(0,(SELECT 2)) AS d) tt GROUP BY c;
--
-- Remove unused subplans
--
create table foo(a int, b int) partition by range(b) (start(1) end(3) every(1));
create table bar(a int, b int);
with CT as (select a from foo except select a from bar)
select * from foo
where exists (select 1 from CT where CT.a = foo.a);
drop table foo;
drop table bar;
--
-- Multiple SUBPLAN nodes referring to the same plan_id
--
CREATE TABLE bar_s (c integer, d character varying(10));
INSERT INTO bar_s VALUES (9,9);
ANALYZE bar_s;
SELECT * FROM bar_s T1 WHERE c = (SELECT max(c) FROM bar_s T2 WHERE T2.d = T1.d GROUP BY c) AND c < 10;
CREATE TABLE foo_s (a integer, b integer) PARTITION BY RANGE(b)
(PARTITION sub_one START (1) END (10),
PARTITION sub_two START (11) END (22));
INSERT INTO foo_s VALUES (9,9);
INSERT INTO foo_s VALUES (2,9);
SELECT bar_s.c from bar_s, foo_s WHERE foo_s.a=2 AND foo_s.b = (SELECT max(b) FROM foo_s WHERE bar_s.c = 9);
CREATE TABLE baz_s (i int4);
INSERT INTO baz_s VALUES (9);
ANALYZE baz_s;
-- In this query, the planner avoids using SubPlan 1 in the qual in the join,
-- because it avoids picking SubPlans from an equivalence class, when it has
-- other choices.
SELECT bar_s.c FROM bar_s, foo_s WHERE foo_s.b = (SELECT max(i) FROM baz_s WHERE bar_s.c = 9) AND foo_s.b = bar_s.d::int4;
-- Same as above, but with another subquery, so it must use a SubPlan. There
-- are two references to the same SubPlan in the plan, on different slices.
-- GPDB_96_MERGE_FIXME: this EXPLAIN output should become nicer-looking once we
-- merge upstream commit 4d042999f9, to suppress the SubPlans from being
-- printed twice.
explain SELECT bar_s.c FROM bar_s, foo_s WHERE foo_s.b = (SELECT max(i) FROM baz_s WHERE bar_s.c = 9) AND foo_s.b = (select bar_s.d::int4);
SELECT bar_s.c FROM bar_s, foo_s WHERE foo_s.b = (SELECT max(i) FROM baz_s WHERE bar_s.c = 9) AND foo_s.b = (select bar_s.d::int4);
DROP TABLE bar_s;
DROP TABLE foo_s;
DROP TABLE baz_s;
--
-- EXPLAIN tests for queries in subselect.sql to significant plan changes
--
-- Set up some simple test tables
CREATE TABLE SUBSELECT_TBL (
f1 integer,
f2 integer,
f3 float
);
INSERT INTO SUBSELECT_TBL VALUES (1, 2, 3);
INSERT INTO SUBSELECT_TBL VALUES (2, 3, 4);
INSERT INTO SUBSELECT_TBL VALUES (3, 4, 5);
INSERT INTO SUBSELECT_TBL VALUES (1, 1, 1);
INSERT INTO SUBSELECT_TBL VALUES (2, 2, 2);
INSERT INTO SUBSELECT_TBL VALUES (3, 3, 3);
INSERT INTO SUBSELECT_TBL VALUES (6, 7, 8);
INSERT INTO SUBSELECT_TBL VALUES (8, 9, NULL);
ANALYZE SUBSELECT_TBL;
-- Uncorrelated subselects
EXPLAIN SELECT '' AS six, f1 AS "Uncorrelated Field" FROM SUBSELECT_TBL
WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL) ORDER BY 2;
EXPLAIN SELECT '' AS six, f1 AS "Uncorrelated Field" FROM SUBSELECT_TBL
WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL WHERE
f2 IN (SELECT f1 FROM SUBSELECT_TBL)) ORDER BY 2;
EXPLAIN SELECT '' AS three, f1, f2
FROM SUBSELECT_TBL
WHERE (f1, f2) NOT IN (SELECT f2, CAST(f3 AS int4) FROM SUBSELECT_TBL
WHERE f3 IS NOT NULL) ORDER BY 2,3;
ANALYZE tenk1;
EXPLAIN SELECT * FROM tenk1 a, tenk1 b
WHERE (a.unique1,b.unique2) IN (SELECT unique1,unique2 FROM tenk1 c);
-- Correlated subselects
EXPLAIN SELECT '' AS six, f1 AS "Correlated Field", f2 AS "Second Field"
FROM SUBSELECT_TBL upper
WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL WHERE f1 = upper.f1) ORDER BY 2,3;
EXPLAIN SELECT '' AS six, f1 AS "Correlated Field", f3 AS "Second Field"
FROM SUBSELECT_TBL upper
WHERE f1 IN
(SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3) ORDER BY 2,3;
EXPLAIN SELECT '' AS six, f1 AS "Correlated Field", f3 AS "Second Field"
FROM SUBSELECT_TBL upper
WHERE f3 IN (SELECT upper.f1 + f2 FROM SUBSELECT_TBL
WHERE f2 = CAST(f3 AS integer)) ORDER BY 2,3;
EXPLAIN SELECT '' AS five, f1 AS "Correlated Field"
FROM SUBSELECT_TBL
WHERE (f1, f2) IN (SELECT f2, CAST(f3 AS int4) FROM SUBSELECT_TBL
WHERE f3 IS NOT NULL) ORDER BY 2;
--
-- Test cases to catch unpleasant interactions between IN-join processing
-- and subquery pullup.
--
EXPLAIN select count(*) from
(select 1 from tenk1 a
where unique1 IN (select hundred from tenk1 b)) ss;
EXPLAIN select count(distinct ss.ten) from
(select ten from tenk1 a
where unique1 IN (select hundred from tenk1 b)) ss;
EXPLAIN select count(*) from
(select 1 from tenk1 a
where unique1 IN (select distinct hundred from tenk1 b)) ss;
EXPLAIN select count(distinct ss.ten) from
(select ten from tenk1 a
where unique1 IN (select distinct hundred from tenk1 b)) ss;
--
-- In case of simple exists query, planner can generate alternative
-- subplans and choose one of them during execution based on the cost.
-- The below test check that we are generating alternative subplans,
-- we should see 2 subplans in the explain
--
EXPLAIN SELECT EXISTS(SELECT * FROM tenk1 WHERE tenk1.unique1 = tenk2.unique1) FROM tenk2 LIMIT 1;
SELECT EXISTS(SELECT * FROM tenk1 WHERE tenk1.unique1 = tenk2.unique1) FROM tenk2 LIMIT 1;
--
-- Ensure that NOT is not lost during subquery pull-up
--
SELECT 1 AS col1 WHERE NOT (SELECT 1 = 1);
--
-- Test sane behavior in case of semi join semantics
--
-- start_ignore
DROP TABLE IF EXISTS dedup_test1;
DROP TABLE IF EXISTS dedup_test2;
DROP TABLE IF EXISTS dedup_test3;
-- end_ignore
CREATE TABLE dedup_test1 ( a int, b int );
CREATE TABLE dedup_test2 ( e int, f int );
CREATE TABLE dedup_test3 ( a int, b int, c int) PARTITION BY RANGE(c) (START(1) END(2) EVERY(1));
INSERT INTO dedup_test1 select i, i from generate_series(1,4)i;
INSERT INTO dedup_test2 select i, i from generate_series(1,4)i;
INSERT INTO dedup_test3 select 1, 1, 1 from generate_series(1,10);
ANALYZE dedup_test1;
ANALYZE dedup_test2;
ANALYZE dedup_test3;
EXPLAIN SELECT * FROM dedup_test1 INNER JOIN dedup_test2 ON dedup_test1.a= dedup_test2.e WHERE (a) IN (SELECT a FROM dedup_test3);
SELECT * FROM dedup_test1 INNER JOIN dedup_test2 ON dedup_test1.a= dedup_test2.e WHERE (a) IN (SELECT a FROM dedup_test3);
-- Test planner to check if it optimizes the join and marks it as a dummy join
EXPLAIN SELECT * FROM dedup_test3, dedup_test1 WHERE c = 7 AND dedup_test3.b IN (SELECT b FROM dedup_test1);
EXPLAIN SELECT * FROM dedup_test3, dedup_test1 WHERE c = 7 AND dedup_test3.b IN (SELECT a FROM dedup_test1);
EXPLAIN SELECT * FROM dedup_test3, dedup_test1 WHERE c = 7 AND EXISTS (SELECT b FROM dedup_test1) AND dedup_test3.b IN (SELECT b FROM dedup_test1);
-- More dedup semi-join tests.
create table dedup_tab (a int4) ;
insert into dedup_tab select g from generate_series(1,100) g;
analyze dedup_tab;
create table dedup_reptab (a int4) ;
insert into dedup_reptab select generate_series(1,1);
analyze dedup_reptab;
-- Replicated table on the inner side of the join. The replicated table needs
-- be broadcast from a single node to the others, with a unique RowIdExpr
-- tacked on, because even though all the rows are available in all the
-- segments, you cannot distinguish join rows generated by the same "logical"
-- row otherwise.
explain (costs off)
select * from dedup_reptab r where r.a in (select t.a/10 from dedup_tab t);
select * from dedup_reptab r where r.a in (select t.a/10 from dedup_tab t);
-- Try the same with a General-locus function. In GPDB 6 and below, this
-- generated a plan that did create the same logical row ID on each segment,
-- on the assumption that an immutable function generates the result rows
-- in the same order on all segments. We no longer assume that, and generate
-- the same plan with a broadcast as the case with a replicated table.
--
-- We have to create a custom function for this, instead of using
-- generate_series() directly, because the rows-estimate for generate_series()
-- is so high that we don't get the plan we want. (After PostgreSQL v12 we
-- could though, because the cost estimation of functions was improved.)
create function dedup_srf() RETURNS SETOF int AS $$
begin
return query select generate_series(1, 3);
end;
$$ LANGUAGE plpgsql IMMUTABLE ROWS 3;
create function dedup_srf_stable() RETURNS SETOF int AS $$
begin
return query select generate_series(1, 3);
end;
$$ LANGUAGE plpgsql STABLE ROWS 3;
create function dedup_srf_volatile() RETURNS SETOF int AS $$
begin
return query select generate_series(1, 3);
end;
$$ LANGUAGE plpgsql VOLATILE ROWS 3;
explain (costs off)
select * from dedup_srf() r(a) where r.a in (select t.a/10 from dedup_tab t);
select * from dedup_srf() r(a) where r.a in (select t.a/10 from dedup_tab t);
explain (costs off)
select * from dedup_srf_stable() r(a) where r.a in (select t.a/10 from dedup_tab t);
select * from dedup_srf_stable() r(a) where r.a in (select t.a/10 from dedup_tab t);
explain (costs off)
select * from dedup_srf_volatile() r(a) where r.a in (select t.a/10 from dedup_tab t);
select * from dedup_srf_volatile() r(a) where r.a in (select t.a/10 from dedup_tab t);
-- Also test it with non-SRFs. In principle, since the function returns exactly
-- one row, no deduplication would be needed in these cases. But the planner
-- doesn't recognize that currently, so you get the same kind of plan as with
-- set-returning functions.
create function dedup_func() RETURNS int AS $$
select 5;
$$ LANGUAGE SQL IMMUTABLE;
create function dedup_func_stable() RETURNS int AS $$
select 5;
$$ LANGUAGE SQL STABLE;
create function dedup_func_volatile() RETURNS int AS $$
select 5;
$$ LANGUAGE SQL VOLATILE;
explain (costs off)
select * from dedup_func() r(a) where r.a in (select t.a/10 from dedup_tab t);
select * from dedup_func() r(a) where r.a in (select t.a/10 from dedup_tab t);
explain (costs off)
select * from dedup_func_stable() r(a) where r.a in (select t.a/10 from dedup_tab t);
select * from dedup_func_stable() r(a) where r.a in (select t.a/10 from dedup_tab t);
explain (costs off)
select * from dedup_func_volatile() r(a) where r.a in (select t.a/10 from dedup_tab t);
select * from dedup_func_volatile() r(a) where r.a in (select t.a/10 from dedup_tab t);
--
-- Test init/main plan are not both parallel
--
create table init_main_plan_parallel (c1 int, c2 int);
-- case 1: init plan is parallel, main plan is not.
select relname from pg_class where exists(select * from init_main_plan_parallel);
-- case2: init plan is not parallel, main plan is parallel
select * from init_main_plan_parallel where exists (select * from pg_class);
-- A subplan whose targetlist might be expanded to make sure all entries of its
-- hashExpr are in its targetlist, test the motion node above it also updated
-- its targetlist, otherwise, a wrong answer or a crash happens.
DROP TABLE IF EXISTS TEST_IN;
CREATE TABLE TEST_IN(
C01 FLOAT,
C02 NUMERIC(10,0)
);
--insert repeatable records:
INSERT INTO TEST_IN
SELECT
ROUND(RANDOM()*1E1),ROUND(RANDOM()*1E1)
FROM GENERATE_SERIES(1,1E4::BIGINT) I;
ANALYZE TEST_IN;
SELECT COUNT(*) FROM
TEST_IN A
WHERE A.C01 IN(SELECT C02 FROM TEST_IN);
--
-- Variant of the test in upstream 'subselect' test, for PostgreSQL bug #14924
-- At one point, this produced wrong results on GPDB for different reasons than
-- the original bug: we forgot to handle the VALUES list in the function to
-- mutate a plan tree (plan_tree_mutator()).
--
create temp table onerowtmp as select 1;
select val.x
from generate_series(1,10) as s(i),
lateral (
values ((select s.i + 1 from onerowtmp)), (s.i + 101)
) as val(x)
where s.i < 10 and val.x < 110;
-- EXISTS sublink simplication
drop table if exists simplify_sub;
create table simplify_sub (i int);
insert into simplify_sub values (1);
insert into simplify_sub values (2);
analyze simplify_sub;
-- limit n
explain (costs off)
select * from simplify_sub t1 where exists (select 1 from simplify_sub t2 where t1.i = t2.i limit 1);
select * from simplify_sub t1 where exists (select 1 from simplify_sub t2 where t1.i = t2.i limit 1);
explain (costs off)
select * from simplify_sub t1 where not exists (select 1 from simplify_sub t2 where t1.i = t2.i limit 1);
select * from simplify_sub t1 where not exists (select 1 from simplify_sub t2 where t1.i = t2.i limit 1);
explain (costs off)
select * from simplify_sub t1 where exists (select 1 from simplify_sub t2 where t1.i = t2.i limit 0);
select * from simplify_sub t1 where exists (select 1 from simplify_sub t2 where t1.i = t2.i limit 0);
explain (costs off)
select * from simplify_sub t1 where not exists (select 1 from simplify_sub t2 where t1.i = t2.i limit 0);
select * from simplify_sub t1 where not exists (select 1 from simplify_sub t2 where t1.i = t2.i limit 0);
explain (costs off)
select * from simplify_sub t1 where exists (select 1 from simplify_sub t2 where t1.i = t2.i limit all);
select * from simplify_sub t1 where exists (select 1 from simplify_sub t2 where t1.i = t2.i limit all);
explain (costs off)
select * from simplify_sub t1 where not exists (select 1 from simplify_sub t2 where t1.i = t2.i limit all);
select * from simplify_sub t1 where not exists (select 1 from simplify_sub t2 where t1.i = t2.i limit all);
explain (costs off)
select * from simplify_sub t1 where exists (select 1 from simplify_sub t2 where t1.i = t2.i limit NULL);
select * from simplify_sub t1 where exists (select 1 from simplify_sub t2 where t1.i = t2.i limit NULL);
explain (costs off)
select * from simplify_sub t1 where not exists (select 1 from simplify_sub t2 where t1.i = t2.i limit NULL);
select * from simplify_sub t1 where not exists (select 1 from simplify_sub t2 where t1.i = t2.i limit NULL);
-- aggregates without GROUP BY or HAVING
explain (costs off)
select * from simplify_sub t1 where exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i);
select * from simplify_sub t1 where exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i);
explain (costs off)
select * from simplify_sub t1 where not exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i);
select * from simplify_sub t1 where not exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i);
explain (costs off)
select * from simplify_sub t1 where exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset 0);
select * from simplify_sub t1 where exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset 0);
explain (costs off)
select * from simplify_sub t1 where not exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset 0);
select * from simplify_sub t1 where not exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset 0);
explain (costs off)
select * from simplify_sub t1 where exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset 1);
select * from simplify_sub t1 where exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset 1);
explain (costs off)
select * from simplify_sub t1 where not exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset 1);
select * from simplify_sub t1 where not exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset 1);
explain (costs off)
select * from simplify_sub t1 where exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset NULL);
select * from simplify_sub t1 where exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset NULL);
explain (costs off)
select * from simplify_sub t1 where not exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset NULL);
select * from simplify_sub t1 where not exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset NULL);
drop table if exists simplify_sub;
--
-- Test a couple of cases where a SubPlan is used in a Motion's hash key.
--
create table foo (i int4, j int4);
create table bar (i int4, j int4);
create table baz (i int4, j int4);
insert into foo select g, g from generate_series(1, 10) g;
insert into bar values (1, 1);
insert into baz select g, g from generate_series(5, 100) g;
analyze foo;
analyze bar;
analyze baz;
explain (verbose, costs off)
select * from foo left outer join baz on (select bar.i from bar where bar.i = foo.i) + 1 = baz.j;
select * from foo left outer join baz on (select bar.i from bar where bar.i = foo.i) + 1 = baz.j;
-- This is a variant of a query in the upstream 'subselect' test, with the
-- twist that baz.i is the distribution key for the table. In the plan, the
-- CASE WHEN construct with SubPlan is used as Hash Key in the Redistribute
-- Motion. It is a planned as a hashed SubPlan. (We had a bug at one point,
-- where the hashed SubPlan was added to the target list twice, which
-- caused an error at runtime when the executor tried to build the hash
-- table twice, because the Motion in the SubPlan couldn't be rescanned.)
explain (verbose, costs off)
select * from foo where
(case when foo.i in (select a.i from baz a) then foo.i else null end) in
(select b.i from baz b);
select * from foo where
(case when foo.i in (select a.i from baz a) then foo.i else null end) in
(select b.i from baz b);
-- When creating plan with subquery and CTE, it sets the useless flow for the plan.
-- But we only need flow for the topmost plan and child of the motion. See commit
-- https://github.com/greenplum-db/gpdb/commit/93abe741cd67f04958e2951edff02b45ab6e280f for detail
-- The extra flow will cause subplan set wrong motionType and cause an ERROR
-- unexpected gang size: XX
-- This related to issue: https://github.com/greenplum-db/gpdb/issues/12371
create table extra_flow_dist(a int, b int, c date);
create table extra_flow_dist1(a int, b int);
insert into extra_flow_dist select i, i, '1949-10-01'::date from generate_series(1, 10)i;
insert into extra_flow_dist1 select i, i from generate_series(20, 22)i;
-- case 1 subplan with outer general locus (CTE and subquery)
explain (verbose, costs off) with run_dt as (
select
(
select c from extra_flow_dist where b = x -- subplan's outer is the below general locus path
) dt
from (select ( max(1) ) x) a -- general locus
)
select * from run_dt, extra_flow_dist1
where dt < '2010-01-01'::date;
with run_dt as (
select
(
select c from extra_flow_dist where b = x
) dt
from (select ( max(1) ) x) a
)
select * from run_dt, extra_flow_dist1
where dt < '2010-01-01'::date;
create table extra_flow_rand(a int) ;
insert into extra_flow_rand values (1);
-- case 2 for subplan with outer segment general locus (CTE and subquery)
explain (verbose, costs off) with run_dt as (
select
(
select c from extra_flow_dist where b = x -- subplan's outer is the below segment general locus path
) dt
from (select a x from extra_flow_rand) a -- segment general locus
)
select * from run_dt, extra_flow_dist1
where dt < '2010-01-01'::date;
with run_dt as (
select
(
select c from extra_flow_dist where b = x
) dt
from (select a x from extra_flow_rand) a
)
select * from run_dt, extra_flow_dist1
where dt < '2010-01-01'::date;
-- case 3 for subplan with outer entry locus (CTE and subquery)
explain (verbose, costs off) with run_dt as (
select
(
select c from extra_flow_dist where b = x -- subplan's outer is the below entry locus path
) dt
from (select 1 x from pg_class limit 1) a -- entry locus
)
select * from run_dt, extra_flow_dist1
where dt < '2010-01-01'::date;
with run_dt as (
select
(
select c from extra_flow_dist where b = x -- subplan's outer is the below entry locus path
) dt
from (select 1 x from pg_class limit 1) a -- entry locus
)
select * from run_dt, extra_flow_dist1
where dt < '2010-01-01'::date;
-- case 4 subplan with outer segment general locus without param in subplan (CTE and subquery)
explain (verbose, costs off) with run_dt as (
select x, y dt
from (select a x from extra_flow_rand ) a -- segment general locus
left join (select max(1) y) aaa
on a.x > any (select random() from extra_flow_dist) -- subplan's outer is the above segment general locus path
)
select * from run_dt, extra_flow_dist1
where dt < extra_flow_dist1.a;
-- case 5 for subplan with outer entry locus without param in subplan (CTE and subquery)
explain (verbose, costs off) with run_dt as (
select x, y dt
from (select relnatts x from pg_class ) a -- entry locus
left join (select max(1) y) aaa
on a.x > any (select random() from extra_flow_dist) -- subplan's outer is the above entry loucs
)
select * from run_dt, extra_flow_dist1
where dt < extra_flow_dist1.a;
-- case 6 without CTE, nested subquery should not add extral flow
explain (verbose, costs off) select * from (
select dt from (
select
(
select c from extra_flow_dist where b = x -- subplan's outer is the below general locus path
) dt
from (select ( max(1) ) x) a -- general locus
union
select
(
select c from extra_flow_dist where b = x -- subplan's outer is the below general locus path
) dt
from (select ( max(1) ) x) aa -- general locus
) tbl
) run_dt,
extra_flow_dist1
where dt < '2010-01-01'::date;
-- Check DISTINCT ON clause and ORDER BY clause in SubLink, See https://github.com/greenplum-db/gpdb/issues/12656.
-- For EXISTS SubLink, we don’t need to care about the data deduplication problem, we can delete DISTINCT ON clause and
-- ORDER BY clause with confidence, because we only care about whether the data exists.
-- But for ANY SubLink, wo can't do this, because we not only care about the existence of data, but also the content of
-- the data.
create table issue_12656 (
i int,
j int
);
insert into issue_12656 values (1, 10001), (1, 10002);
-- case 1, check basic DISTINCT ON
explain (costs off, verbose)
select * from issue_12656 where (i, j) in (select distinct on (i) i, j from issue_12656);
select * from issue_12656 where (i, j) in (select distinct on (i) i, j from issue_12656);
-- case 2, check DISTINCT ON and ORDER BY
explain (costs off, verbose)
select * from issue_12656 where (i, j) in (select distinct on (i) i, j from issue_12656 order by i, j asc);
select * from issue_12656 where (i, j) in (select distinct on (i) i, j from issue_12656 order by i, j asc);
explain (costs off, verbose)
select * from issue_12656 where (i, j) in (select distinct on (i) i, j from issue_12656 order by i, j desc);
select * from issue_12656 where (i, j) in (select distinct on (i) i, j from issue_12656 order by i, j desc);