blob: a36f1c63574dea89b9bd519ad892ea8060b08dba [file] [log] [blame]
--
-- SUBSELECT
--
SELECT 1 AS one WHERE 1 IN (SELECT 1);
one
-----
1
(1 row)
SELECT 1 AS zero WHERE 1 NOT IN (SELECT 1);
zero
------
(0 rows)
SELECT 1 AS zero WHERE 1 IN (SELECT 2);
zero
------
(0 rows)
-- Check grammar's handling of extra parens in assorted contexts
SELECT * FROM (SELECT 1 AS x) ss;
x
---
1
(1 row)
SELECT * FROM ((SELECT 1 AS x)) ss;
x
---
1
(1 row)
SELECT * FROM ((SELECT 1 AS x)), ((SELECT * FROM ((SELECT 2 AS y))));
x | y
---+---
1 | 2
(1 row)
(SELECT 2) UNION SELECT 2;
?column?
----------
2
(1 row)
((SELECT 2)) UNION SELECT 2;
?column?
----------
2
(1 row)
SELECT ((SELECT 2) UNION SELECT 2);
?column?
----------
2
(1 row)
SELECT (((SELECT 2)) UNION SELECT 2);
?column?
----------
2
(1 row)
SELECT (SELECT ARRAY[1,2,3])[1];
array
-------
1
(1 row)
SELECT ((SELECT ARRAY[1,2,3]))[2];
array
-------
2
(1 row)
SELECT (((SELECT ARRAY[1,2,3])))[3];
array
-------
3
(1 row)
-- Set up some simple test tables
CREATE TABLE SUBSELECT_TBL (
f1 integer,
f2 integer,
f3 float
);
INSERT INTO SUBSELECT_TBL VALUES (1, 2, 3);
INSERT INTO SUBSELECT_TBL VALUES (2, 3, 4);
INSERT INTO SUBSELECT_TBL VALUES (3, 4, 5);
INSERT INTO SUBSELECT_TBL VALUES (1, 1, 1);
INSERT INTO SUBSELECT_TBL VALUES (2, 2, 2);
INSERT INTO SUBSELECT_TBL VALUES (3, 3, 3);
INSERT INTO SUBSELECT_TBL VALUES (6, 7, 8);
INSERT INTO SUBSELECT_TBL VALUES (8, 9, NULL);
SELECT * FROM SUBSELECT_TBL;
f1 | f2 | f3
----+----+----
1 | 2 | 3
2 | 3 | 4
3 | 4 | 5
1 | 1 | 1
2 | 2 | 2
3 | 3 | 3
6 | 7 | 8
8 | 9 |
(8 rows)
-- Uncorrelated subselects
SELECT f1 AS "Constant Select" FROM SUBSELECT_TBL
WHERE f1 IN (SELECT 1);
Constant Select
-----------------
1
1
(2 rows)
SELECT f1 AS "Uncorrelated Field" FROM SUBSELECT_TBL
WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL);
Uncorrelated Field
--------------------
1
2
3
1
2
3
(6 rows)
SELECT f1 AS "Uncorrelated Field" FROM SUBSELECT_TBL
WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL WHERE
f2 IN (SELECT f1 FROM SUBSELECT_TBL));
Uncorrelated Field
--------------------
1
2
3
1
2
3
(6 rows)
SELECT f1, f2
FROM SUBSELECT_TBL
WHERE (f1, f2) NOT IN (SELECT f2, CAST(f3 AS int4) FROM SUBSELECT_TBL
WHERE f3 IS NOT NULL);
f1 | f2
----+----
1 | 2
6 | 7
8 | 9
(3 rows)
-- Correlated subselects
SELECT f1 AS "Correlated Field", f2 AS "Second Field"
FROM SUBSELECT_TBL upper
WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL WHERE f1 = upper.f1);
Correlated Field | Second Field
------------------+--------------
1 | 2
2 | 3
3 | 4
1 | 1
2 | 2
3 | 3
(6 rows)
SELECT f1 AS "Correlated Field", f3 AS "Second Field"
FROM SUBSELECT_TBL upper
WHERE f1 IN
(SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3);
Correlated Field | Second Field
------------------+--------------
2 | 4
3 | 5
1 | 1
2 | 2
3 | 3
(5 rows)
SELECT f1 AS "Correlated Field", f3 AS "Second Field"
FROM SUBSELECT_TBL upper
WHERE f3 IN (SELECT upper.f1 + f2 FROM SUBSELECT_TBL
WHERE f2 = CAST(f3 AS integer));
Correlated Field | Second Field
------------------+--------------
1 | 3
2 | 4
3 | 5
6 | 8
(4 rows)
SELECT f1 AS "Correlated Field"
FROM SUBSELECT_TBL
WHERE (f1, f2) IN (SELECT f2, CAST(f3 AS int4) FROM SUBSELECT_TBL
WHERE f3 IS NOT NULL);
Correlated Field
------------------
2
3
1
2
3
(5 rows)
-- Subselects without aliases
SELECT count FROM (SELECT COUNT(DISTINCT name) FROM road);
count
-------
2911
(1 row)
SELECT COUNT(*) FROM (SELECT DISTINCT name FROM road);
count
-------
2911
(1 row)
SELECT * FROM (SELECT * FROM int4_tbl), (VALUES (123456)) WHERE f1 = column1;
f1 | column1
--------+---------
123456 | 123456
(1 row)
CREATE VIEW view_unnamed_ss AS
SELECT * FROM (SELECT * FROM (SELECT abs(f1) AS a1 FROM int4_tbl)),
(SELECT * FROM int8_tbl)
WHERE a1 < 10 AND q1 > a1 ORDER BY q1, q2;
SELECT * FROM view_unnamed_ss;
a1 | q1 | q2
----+------------------+-------------------
0 | 123 | 456
0 | 123 | 4567890123456789
0 | 4567890123456789 | -4567890123456789
0 | 4567890123456789 | 123
0 | 4567890123456789 | 4567890123456789
(5 rows)
\sv view_unnamed_ss
CREATE OR REPLACE VIEW public.view_unnamed_ss AS
SELECT unnamed_subquery.a1,
unnamed_subquery_1.q1,
unnamed_subquery_1.q2
FROM ( SELECT unnamed_subquery_2.a1
FROM ( SELECT abs(int4_tbl.f1) AS a1
FROM int4_tbl) unnamed_subquery_2) unnamed_subquery,
( SELECT int8_tbl.q1,
int8_tbl.q2
FROM int8_tbl) unnamed_subquery_1
WHERE unnamed_subquery.a1 < 10 AND unnamed_subquery_1.q1 > unnamed_subquery.a1
ORDER BY unnamed_subquery_1.q1, unnamed_subquery_1.q2
DROP VIEW view_unnamed_ss;
-- Test matching of locking clause to correct alias
CREATE VIEW view_unnamed_ss_locking AS
SELECT * FROM (SELECT * FROM int4_tbl), int8_tbl AS unnamed_subquery
WHERE f1 = q1
FOR UPDATE OF unnamed_subquery;
\sv view_unnamed_ss_locking
CREATE OR REPLACE VIEW public.view_unnamed_ss_locking AS
SELECT unnamed_subquery.f1,
unnamed_subquery_1.q1,
unnamed_subquery_1.q2
FROM ( SELECT int4_tbl.f1
FROM int4_tbl) unnamed_subquery,
int8_tbl unnamed_subquery_1
WHERE unnamed_subquery.f1 = unnamed_subquery_1.q1
FOR UPDATE OF unnamed_subquery_1
DROP VIEW view_unnamed_ss_locking;
--
-- Use some existing tables in the regression test
--
SELECT ss.f1 AS "Correlated Field", ss.f3 AS "Second Field"
FROM SUBSELECT_TBL ss
WHERE f1 NOT IN (SELECT f1+1 FROM INT4_TBL
WHERE f1 != ss.f1 AND f1 < 2147483647);
Correlated Field | Second Field
------------------+--------------
2 | 4
3 | 5
2 | 2
3 | 3
6 | 8
8 |
(6 rows)
select q1, float8(count(*)) / (select count(*) from int8_tbl)
from int8_tbl group by q1 order by q1;
q1 | ?column?
------------------+----------
123 | 0.4
4567890123456789 | 0.6
(2 rows)
-- Unspecified-type literals in output columns should resolve as text
SELECT *, pg_typeof(f1) FROM
(SELECT 'foo' AS f1 FROM generate_series(1,3)) ss ORDER BY 1;
f1 | pg_typeof
-----+-----------
foo | text
foo | text
foo | text
(3 rows)
-- ... unless there's context to suggest differently
explain (verbose, costs off) select '42' union all select '43';
QUERY PLAN
----------------------------
Append
-> Result
Output: '42'::text
-> Result
Output: '43'::text
(5 rows)
explain (verbose, costs off) select '42' union all select 43;
QUERY PLAN
--------------------
Append
-> Result
Output: 42
-> Result
Output: 43
(5 rows)
-- check materialization of an initplan reference (bug #14524)
explain (verbose, costs off)
select 1 = all (select (select 1));
QUERY PLAN
-----------------------------------
Result
Output: (SubPlan 2)
SubPlan 2
-> Materialize
Output: ($0)
InitPlan 1 (returns $0)
-> Result
Output: 1
-> Result
Output: $0
(10 rows)
select 1 = all (select (select 1));
?column?
----------
t
(1 row)
--
-- Check EXISTS simplification with LIMIT
--
explain (costs off)
select * from int4_tbl o where exists
(select 1 from int4_tbl i where i.f1=o.f1 limit null);
QUERY PLAN
------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Semi Join
Hash Cond: (o.f1 = i.f1)
-> Seq Scan on int4_tbl o
-> Hash
-> Seq Scan on int4_tbl i
Optimizer: Postgres query optimizer
(7 rows)
explain (costs off)
select * from int4_tbl o where not exists
(select 1 from int4_tbl i where i.f1=o.f1 limit 1);
QUERY PLAN
------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Right Anti Join
Hash Cond: (i.f1 = o.f1)
-> Seq Scan on int4_tbl i
-> Hash
-> Seq Scan on int4_tbl o
Optimizer: Postgres query optimizer
(7 rows)
explain (costs off)
select * from int4_tbl o where exists
(select 1 from int4_tbl i where i.f1=o.f1 limit 0);
QUERY PLAN
-------------------------------------
Result
One-Time Filter: false
Optimizer: Postgres query optimizer
(3 rows)
--
-- Test cases to catch unpleasant interactions between IN-join processing
-- and subquery pullup.
--
select count(*) from
(select 1 from tenk1 a
where unique1 IN (select hundred from tenk1 b)) ss;
count
-------
100
(1 row)
select count(distinct ss.ten) from
(select ten from tenk1 a
where unique1 IN (select hundred from tenk1 b)) ss;
count
-------
10
(1 row)
select count(*) from
(select 1 from tenk1 a
where unique1 IN (select distinct hundred from tenk1 b)) ss;
count
-------
100
(1 row)
select count(distinct ss.ten) from
(select ten from tenk1 a
where unique1 IN (select distinct hundred from tenk1 b)) ss;
count
-------
10
(1 row)
--
-- Test cases to check for overenthusiastic optimization of
-- "IN (SELECT DISTINCT ...)" and related cases. Per example from
-- Luca Pireddu and Michael Fuhr.
--
CREATE TEMP TABLE foo (id integer);
CREATE TEMP TABLE bar (id1 integer, id2 integer);
INSERT INTO foo VALUES (1);
INSERT INTO bar VALUES (1, 1);
INSERT INTO bar VALUES (2, 2);
INSERT INTO bar VALUES (3, 1);
-- These cases require an extra level of distinct-ing above subquery s
SELECT * FROM foo WHERE id IN
(SELECT id2 FROM (SELECT DISTINCT id1, id2 FROM bar) AS s);
id
----
1
(1 row)
SELECT * FROM foo WHERE id IN
(SELECT id2 FROM (SELECT id1,id2 FROM bar GROUP BY id1,id2) AS s);
id
----
1
(1 row)
SELECT * FROM foo WHERE id IN
(SELECT id2 FROM (SELECT id1, id2 FROM bar UNION
SELECT id1, id2 FROM bar) AS s);
id
----
1
(1 row)
-- These cases do not
SELECT * FROM foo WHERE id IN
(SELECT id2 FROM (SELECT DISTINCT ON (id2) id1, id2 FROM bar) AS s);
id
----
1
(1 row)
SELECT * FROM foo WHERE id IN
(SELECT id2 FROM (SELECT id2 FROM bar GROUP BY id2) AS s);
id
----
1
(1 row)
SELECT * FROM foo WHERE id IN
(SELECT id2 FROM (SELECT id2 FROM bar UNION
SELECT id2 FROM bar) AS s);
id
----
1
(1 row)
--
-- Test case to catch problems with multiply nested sub-SELECTs not getting
-- recalculated properly. Per bug report from Didier Moens.
--
CREATE TABLE orderstest (
approver_ref integer,
po_ref integer,
ordercanceled boolean
);
INSERT INTO orderstest VALUES (1, 1, false);
INSERT INTO orderstest VALUES (66, 5, false);
INSERT INTO orderstest VALUES (66, 6, false);
INSERT INTO orderstest VALUES (66, 7, false);
INSERT INTO orderstest VALUES (66, 1, true);
INSERT INTO orderstest VALUES (66, 8, false);
INSERT INTO orderstest VALUES (66, 1, false);
INSERT INTO orderstest VALUES (77, 1, false);
INSERT INTO orderstest VALUES (1, 1, false);
INSERT INTO orderstest VALUES (66, 1, false);
INSERT INTO orderstest VALUES (1, 1, false);
CREATE VIEW orders_view AS
SELECT *,
(SELECT CASE
WHEN ord.approver_ref=1 THEN '---' ELSE 'Approved'
END) AS "Approved",
(SELECT CASE
WHEN ord.ordercanceled
THEN 'Canceled'
ELSE
(SELECT CASE
WHEN ord.po_ref=1
THEN
(SELECT CASE
WHEN ord.approver_ref=1
THEN '---'
ELSE 'Approved'
END)
ELSE 'PO'
END)
END) AS "Status",
(CASE
WHEN ord.ordercanceled
THEN 'Canceled'
ELSE
(CASE
WHEN ord.po_ref=1
THEN
(CASE
WHEN ord.approver_ref=1
THEN '---'
ELSE 'Approved'
END)
ELSE 'PO'
END)
END) AS "Status_OK"
FROM orderstest ord;
SELECT * FROM orders_view;
approver_ref | po_ref | ordercanceled | Approved | Status | Status_OK
--------------+--------+---------------+----------+----------+-----------
1 | 1 | f | --- | --- | ---
66 | 5 | f | Approved | PO | PO
66 | 6 | f | Approved | PO | PO
66 | 7 | f | Approved | PO | PO
66 | 1 | t | Approved | Canceled | Canceled
66 | 8 | f | Approved | PO | PO
66 | 1 | f | Approved | Approved | Approved
77 | 1 | f | Approved | Approved | Approved
1 | 1 | f | --- | --- | ---
66 | 1 | f | Approved | Approved | Approved
1 | 1 | f | --- | --- | ---
(11 rows)
DROP TABLE orderstest cascade;
NOTICE: drop cascades to view orders_view
--
-- Test cases to catch situations where rule rewriter fails to propagate
-- hasSubLinks flag correctly. Per example from Kyle Bateman.
--
create temp table parts (
partnum text,
cost float8
);
create temp table shipped (
ttype char(2),
ordnum int4,
partnum text,
value float8
);
create temp view shipped_view as
select * from shipped where ttype = 'wt';
create rule shipped_view_insert as on insert to shipped_view do instead
insert into shipped values('wt', new.ordnum, new.partnum, new.value);
insert into parts (partnum, cost) values (1, 1234.56);
insert into shipped_view (ordnum, partnum, value)
values (0, 1, (select cost from parts where partnum = '1'));
select * from shipped_view;
ttype | ordnum | partnum | value
-------+--------+---------+---------
wt | 0 | 1 | 1234.56
(1 row)
create rule shipped_view_update as on update to shipped_view do instead
update shipped set partnum = new.partnum, value = new.value
where ttype = new.ttype and ordnum = new.ordnum;
update shipped_view set value = 11
from int4_tbl a join int4_tbl b
on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1))
where ordnum = a.f1;
select * from shipped_view;
ttype | ordnum | partnum | value
-------+--------+---------+-------
wt | 0 | 1 | 11
(1 row)
select f1, ss1 as relabel from
(select *, (select sum(f1) from int4_tbl b where f1 >= a.f1) as ss1
from int4_tbl a) ss;
f1 | relabel
-------------+------------
0 | 2147607103
123456 | 2147607103
-123456 | 2147483647
2147483647 | 2147483647
-2147483647 | 0
(5 rows)
--
-- Test cases involving PARAM_EXEC parameters and min/max index optimizations.
-- Per bug report from David Sanchez i Gregori.
--
select * from (
select max(unique1) from tenk1 as a
where exists (select 1 from tenk1 as b where b.thousand = a.unique2)
) ss;
max
------
9997
(1 row)
select * from (
select min(unique1) from tenk1 as a
where not exists (select 1 from tenk1 as b where b.unique2 = 10000)
) ss;
min
-----
0
(1 row)
--
-- Test that an IN implemented using a UniquePath does unique-ification
-- with the right semantics, as per bug #4113. (Unfortunately we have
-- no simple way to ensure that this test case actually chooses that type
-- of plan, but it does in releases 7.4-8.3. Note that an ordering difference
-- here might mean that some other plan type is being used, rendering the test
-- pointless.)
--
create temp table numeric_table (num_col numeric);
insert into numeric_table values (1), (1.000000000000000000001), (2), (3);
create temp table float_table (float_col float8);
insert into float_table values (1), (2), (3);
select * from float_table
where float_col in (select num_col from numeric_table);
float_col
-----------
1
2
3
(3 rows)
select * from numeric_table
where num_col in (select float_col from float_table);
num_col
-------------------------
1
1.000000000000000000001
2
3
(4 rows)
--
-- Test case for bug #4290: bogus calculation of subplan param sets
--
create temp table ta (id int primary key, val int);
insert into ta values(1,1);
insert into ta values(2,2);
create temp table tb (id int primary key, aval int);
insert into tb values(1,1);
insert into tb values(2,1);
insert into tb values(3,2);
insert into tb values(4,2);
create temp table tc (id int primary key, aid int);
insert into tc values(1,1);
insert into tc values(2,2);
select
( select min(tb.id) from tb
where tb.aval = (select ta.val from ta where ta.id = tc.aid) ) as min_tb_id
from tc;
ERROR: correlated subquery with skip-level correlations is not supported
--
-- Test case for 8.3 "failed to locate grouping columns" bug
--
create temp table t1 (f1 numeric(14,0), f2 varchar(30));
select * from
(select distinct f1, f2, (select f2 from t1 x where x.f1 = up.f1) as fs
from t1 up) ss
group by f1,f2,fs;
f1 | f2 | fs
----+----+----
(0 rows)
--
-- Test case for bug #5514 (mishandling of whole-row Vars in subselects)
--
create temp table table_a(id integer);
insert into table_a values (42);
create temp view view_a as select * from table_a;
select view_a from view_a;
view_a
--------
(42)
(1 row)
select (select view_a) from view_a;
view_a
--------
(42)
(1 row)
select (select (select view_a)) from view_a;
view_a
--------
(42)
(1 row)
select (select (a.*)::text) from view_a a;
a
------
(42)
(1 row)
--
-- Check that whole-row Vars reading the result of a subselect don't include
-- any junk columns therein
--
-- In GPDB, the ORDER BY in the subquery or CTE doesn't force an ordering
-- for the whole query. Mark these with the "order none" gpdiff directive,
-- so that differences in result order are ignored.
select q from (select max(f1) from int4_tbl group by f1 order by f1) q; -- order none
q
---------------
(-2147483647)
(-123456)
(0)
(123456)
(2147483647)
(5 rows)
with q as (select max(f1) from int4_tbl group by f1 order by f1)
select q from q; -- order none
q
---------------
(-2147483647)
(-123456)
(0)
(123456)
(2147483647)
(5 rows)
--
-- Test case for sublinks pulled up into joinaliasvars lists in an
-- inherited update/delete query
--
begin; -- this shouldn't delete anything, but be safe
delete from road
where exists (
select 1
from
int4_tbl cross join
( select f1, array(select q1 from int8_tbl) as arr
from text_tbl ) ss
where road.name = ss.f1 );
rollback;
--
-- Test case for sublinks pushed down into subselects via join alias expansion
--
-- Cloudberry note: This query will only work with ORCA. This type of query
-- was not supported in postgres versions prior to 8.4, and thus was never
-- supported in the planner. After 8.4 versions, the planner works, but
-- the plan it creates is not currently parallel safe.
select
(select sq1) as qq1
from
(select exists(select 1 from int4_tbl where f1 = q2) as sq1, 42 as dummy
from int8_tbl) sq0
join
int4_tbl i4 on dummy = i4.f1;
ERROR: correlated subquery with skip-level correlations is not supported
--
-- Test case for subselect within UPDATE of INSERT...ON CONFLICT DO UPDATE
--
create temp table upsert(key int4 primary key, val text);
insert into upsert values(1, 'val') on conflict (key) do update set val = 'not seen';
insert into upsert values(1, 'val') on conflict (key) do update set val = 'seen with subselect ' || (select f1 from int4_tbl where f1 != 0 order by f1 limit 1)::text;
select * from upsert;
key | val
-----+---------------------------------
1 | seen with subselect -2147483647
(1 row)
with aa as (select 'int4_tbl' u from int4_tbl limit 1)
insert into upsert values (1, 'x'), (999, 'y')
on conflict (key) do update set val = (select u from aa)
returning *;
key | val
-----+----------
1 | int4_tbl
999 | y
(2 rows)
--
-- Test case for cross-type partial matching in hashed subplan (bug #7597)
--
create temp table outer_7597 (f1 int4, f2 int4);
insert into outer_7597 values (0, 0);
insert into outer_7597 values (1, 0);
insert into outer_7597 values (0, null);
insert into outer_7597 values (1, null);
create temp table inner_7597(c1 int8, c2 int8);
insert into inner_7597 values(0, null);
select * from outer_7597 where (f1, f2) not in (select * from inner_7597);
f1 | f2
----+----
1 | 0
1 |
(2 rows)
--
-- Similar test case using text that verifies that collation
-- information is passed through by execTuplesEqual() in nodeSubplan.c
-- (otherwise it would error in texteq())
--
create temp table outer_text (f1 text, f2 text);
insert into outer_text values ('a', 'a');
insert into outer_text values ('b', 'a');
insert into outer_text values ('a', null);
insert into outer_text values ('b', null);
create temp table inner_text (c1 text, c2 text);
insert into inner_text values ('a', null);
insert into inner_text values ('123', '456');
select * from outer_text where (f1, f2) not in (select * from inner_text);
f1 | f2
----+----
b | a
b |
(2 rows)
--
-- Another test case for cross-type hashed subplans: comparison of
-- inner-side values must be done with appropriate operator
--
explain (verbose, costs off)
select 'foo'::text in (select 'bar'::name union all select 'bar'::name);
QUERY PLAN
-------------------------------------
Result
Output: (hashed SubPlan 1)
SubPlan 1
-> Append
-> Result
Output: 'bar'::name
-> Result
Output: 'bar'::name
(8 rows)
select 'foo'::text in (select 'bar'::name union all select 'bar'::name);
?column?
----------
f
(1 row)
--
-- Test that we don't try to hash nested records (bug #17363)
-- (Hashing could be supported, but for now we don't)
--
explain (verbose, costs off)
select row(row(row(1))) = any (select row(row(1)));
QUERY PLAN
-------------------------------------
Result
Output: (SubPlan 1)
SubPlan 1
-> Materialize
Output: (ROW(ROW(1)))
-> Result
Output: ROW(ROW(1))
Optimizer: Postgres query optimizer
(8 rows)
select row(row(row(1))) = any (select row(row(1)));
?column?
----------
t
(1 row)
--
-- Test case for premature memory release during hashing of subplan output
--
select '1'::text in (select '1'::name union all select '1'::name);
?column?
----------
t
(1 row)
--
-- Test that we don't try to use a hashed subplan if the simplified
-- testexpr isn't of the right shape
--
-- this fails by default, of course
select * from int8_tbl where q1 in (select c1 from inner_text);
ERROR: operator does not exist: bigint = text
LINE 1: select * from int8_tbl where q1 in (select c1 from inner_tex...
^
HINT: No operator matches the given name and argument types. You might need to add explicit type casts.
begin;
-- make an operator to allow it to succeed
create function bogus_int8_text_eq(int8, text) returns boolean
language sql as 'select $1::text = $2';
create operator = (procedure=bogus_int8_text_eq, leftarg=int8, rightarg=text);
explain (costs off)
select * from int8_tbl where q1 in (select c1 from inner_text);
QUERY PLAN
-----------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on int8_tbl
Filter: (hashed SubPlan 1)
SubPlan 1
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on inner_text
(7 rows)
select * from int8_tbl where q1 in (select c1 from inner_text);
q1 | q2
-----+------------------
123 | 456
123 | 4567890123456789
(2 rows)
-- inlining of this function results in unusual number of hash clauses,
-- which we can still cope with
create or replace function bogus_int8_text_eq(int8, text) returns boolean
language sql as 'select $1::text = $2 and $1::text = $2';
explain (costs off)
select * from int8_tbl where q1 in (select c1 from inner_text);
QUERY PLAN
-----------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on int8_tbl
Filter: (hashed SubPlan 1)
SubPlan 1
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on inner_text
(7 rows)
select * from int8_tbl where q1 in (select c1 from inner_text);
q1 | q2
-----+------------------
123 | 456
123 | 4567890123456789
(2 rows)
-- inlining of this function causes LHS and RHS to be switched,
-- which we can't cope with, so hashing should be abandoned
create or replace function bogus_int8_text_eq(int8, text) returns boolean
language sql as 'select $2 = $1::text';
explain (costs off)
select * from int8_tbl where q1 in (select c1 from inner_text);
QUERY PLAN
-----------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on int8_tbl
Filter: (SubPlan 1)
SubPlan 1
-> Materialize
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on inner_text
Optimizer: Postgres query optimizer
(8 rows)
select * from int8_tbl where q1 in (select c1 from inner_text);
q1 | q2
-----+------------------
123 | 456
123 | 4567890123456789
(2 rows)
rollback; -- to get rid of the bogus operator
--
-- Test resolution of hashed vs non-hashed implementation of EXISTS subplan
--
explain (costs off)
select count(*) from tenk1 t
where (exists(select 1 from tenk1 k where k.unique1 = t.unique2) or ten < 0);
QUERY PLAN
-----------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Seq Scan on tenk1 t
Filter: ((hashed SubPlan 2) OR (ten < 0))
SubPlan 2
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on tenk1 k
(9 rows)
select count(*) from tenk1 t
where (exists(select 1 from tenk1 k where k.unique1 = t.unique2) or ten < 0);
count
-------
10000
(1 row)
explain (costs off)
select count(*) from tenk1 t
where (exists(select 1 from tenk1 k where k.unique1 = t.unique2) or ten < 0)
and thousand = 1;
QUERY PLAN
-----------------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Bitmap Heap Scan on tenk1 t
Recheck Cond: (thousand = 1)
Filter: ((SubPlan 1) OR (ten < 0))
-> Bitmap Index Scan on tenk1_thous_tenthous
Index Cond: (thousand = 1)
SubPlan 1
-> Result
Filter: (k.unique1 = t.unique2)
-> Materialize
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on tenk1 k
(14 rows)
select count(*) from tenk1 t
where (exists(select 1 from tenk1 k where k.unique1 = t.unique2) or ten < 0)
and thousand = 1;
count
-------
10
(1 row)
-- It's possible for the same EXISTS to get resolved both ways
create temp table exists_tbl (c1 int, c2 int, c3 int) partition by list (c1);
create temp table exists_tbl_null partition of exists_tbl for values in (null);
create temp table exists_tbl_def partition of exists_tbl default;
insert into exists_tbl select x, x/2, x+1 from generate_series(0,10) x;
analyze exists_tbl;
explain (costs off)
select * from exists_tbl t1
where (exists(select 1 from exists_tbl t2 where t1.c1 = t2.c2) or c3 < 0);
QUERY PLAN
------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Append
-> Seq Scan on exists_tbl_null t1_1
Filter: ((SubPlan 1 (copy 3)) OR (c3 < 0))
SubPlan 1 (copy 3)
-> Result
Filter: (t1_1.c1 = t2.c2)
-> Materialize
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Append
-> Seq Scan on exists_tbl_null t2_1
-> Seq Scan on exists_tbl_def t2_2
-> Seq Scan on exists_tbl_def t1_2
Filter: ((SubPlan 1 (copy 5)) OR (c3 < 0))
SubPlan 1 (copy 5)
-> Result
Filter: (t1_2.c1 = t2_3.c2)
-> Materialize
-> Broadcast Motion 3:3 (slice3; segments: 3)
-> Append
-> Seq Scan on exists_tbl_null t2_4
-> Seq Scan on exists_tbl_def t2_5
Optimizer: Postgres query optimizer
(23 rows)
select * from exists_tbl t1
where (exists(select 1 from exists_tbl t2 where t1.c1 = t2.c2) or c3 < 0);
c1 | c2 | c3
----+----+----
0 | 0 | 1
1 | 0 | 2
2 | 1 | 3
3 | 1 | 4
4 | 2 | 5
5 | 2 | 6
(6 rows)
--
-- Test case for planner bug with nested EXISTS handling
--
-- GPDB_92_MERGE_FIXME: ORCA cannot decorrelate this query, and generates
-- correct-but-slow plan that takes 45 minutes. Revisit this when ORCA can
-- reorder anti-joins
set optimizer to off;
select a.thousand from tenk1 a, tenk1 b
where a.thousand = b.thousand
and exists ( select 1 from tenk1 c where b.hundred = c.hundred
and not exists ( select 1 from tenk1 d
where a.thousand = d.thousand ) );
thousand
----------
(0 rows)
reset optimizer;
--
-- Check that nested sub-selects are not pulled up if they contain volatiles
--
explain (verbose, costs off)
select x, x from
(select (select current_database()) as x from (values(1),(2)) v(y)) ss;
QUERY PLAN
--------------------------------------
Values Scan on "*VALUES*"
Output: $0, $1
InitPlan 1 (returns $0)
-> Result
Output: 'regression'::name
InitPlan 2 (returns $1)
-> Result
Output: 'regression'::name
Optimizer: Postgres query optimizer
(9 rows)
explain (verbose, costs off)
select x, x from
(select (select random()) as x from (values(1),(2)) v(y)) ss;
QUERY PLAN
-----------------------------------
Subquery Scan on ss
Output: ss.x, ss.x
-> Values Scan on "*VALUES*"
Output: $0
InitPlan 1 (returns $0)
-> Result
Output: random()
Optimizer: Postgres query optimizer
(8 rows)
explain (verbose, costs off)
select x, x from
(select (select current_database() where y=y) as x from (values(1),(2)) v(y)) ss;
QUERY PLAN
----------------------------------------------------------------------
Values Scan on "*VALUES*"
Output: (SubPlan 1), (SubPlan 2)
SubPlan 1
-> Result
Output: 'regression'::name
One-Time Filter: ("*VALUES*".column1 = "*VALUES*".column1)
SubPlan 2
-> Result
Output: 'regression'::name
One-Time Filter: ("*VALUES*".column1 = "*VALUES*".column1)
Optimizer: Postgres query optimizer
(11 rows)
explain (verbose, costs off)
select x, x from
(select (select random() where y=y) as x from (values(1),(2)) v(y)) ss;
QUERY PLAN
----------------------------------------------------------------------------
Subquery Scan on ss
Output: ss.x, ss.x
-> Values Scan on "*VALUES*"
Output: (SubPlan 1)
SubPlan 1
-> Result
Output: random()
One-Time Filter: ("*VALUES*".column1 = "*VALUES*".column1)
Optimizer: Postgres query optimizer
(9 rows)
--
-- Test rescan of a hashed subplan (the use of random() is to prevent the
-- sub-select from being pulled up, which would result in not hashing)
--
explain (verbose, costs off)
select sum(ss.tst::int) from
onek o cross join lateral (
select i.ten in (select f1 from int4_tbl where f1 <= o.hundred) as tst,
random() as r
from onek i where i.unique1 = o.unique1 ) ss
where o.ten = 0;
ERROR: correlated subquery with skip-level correlations is not supported
select sum(ss.tst::int) from
onek o cross join lateral (
select i.ten in (select f1 from int4_tbl where f1 <= o.hundred) as tst,
random() as r
from onek i where i.unique1 = o.unique1 ) ss
where o.ten = 0;
ERROR: correlated subquery with skip-level correlations is not supported
--
-- Test rescan of a SetOp node
--
explain (costs off)
select count(*) from
onek o cross join lateral (
select * from onek i1 where i1.unique1 = o.unique1
except
select * from onek i2 where i2.unique1 = o.unique2
) ss
where o.ten = 1;
QUERY PLAN
------------------------------------------------------------------------------------------------
Aggregate
-> Nested Loop
-> Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on onek o
Filter: (ten = 1)
-> Materialize
-> Subquery Scan on ss
-> HashSetOp Except
-> Append
-> Subquery Scan on "*SELECT* 1"
-> Result
Filter: (i1.unique1 = o.unique1)
-> Materialize
-> Gather Motion 3:1 (slice2; segments: 3)
-> Seq Scan on onek i1
-> Subquery Scan on "*SELECT* 2"
-> Result
Filter: (i2.unique1 = o.unique2)
-> Materialize
-> Gather Motion 3:1 (slice3; segments: 3)
-> Seq Scan on onek i2
Optimizer: Postgres query optimizer
(22 rows)
select count(*) from
onek o cross join lateral (
select * from onek i1 where i1.unique1 = o.unique1
except
select * from onek i2 where i2.unique1 = o.unique2
) ss
where o.ten = 1;
count
-------
100
(1 row)
--
-- Test rescan of a RecursiveUnion node
--
explain (costs off)
select sum(o.four), sum(ss.a) from
onek o cross join lateral (
with recursive x(a) as
(select o.four as a
union
select a + 1 from x
where a < 10)
select * from x
) ss
where o.ten = 1;
QUERY PLAN
---------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Nested Loop
-> Seq Scan on onek o
Filter: (ten = 1)
-> Materialize
-> Memoize
Cache Key: o.four
Cache Mode: binary
-> Recursive Union
-> Result
-> WorkTable Scan on x
Filter: (a < 10)
Optimizer: Postgres query optimizer
(15 rows)
select sum(o.four), sum(ss.a) from
onek o cross join lateral (
with recursive x(a) as
(select o.four as a
union
select a + 1 from x
where a < 10)
select * from x
) ss
where o.ten = 1;
sum | sum
------+------
1700 | 5350
(1 row)
--
-- Check we don't misoptimize a NOT IN where the subquery returns no rows.
--
create temp table notinouter (a int);
create temp table notininner (b int not null);
insert into notinouter values (null), (1);
select * from notinouter where a not in (select b from notininner);
a
---
1
(2 rows)
--
-- Check we behave sanely in corner case of empty SELECT list (bug #8648)
--
create temp table nocolumns();
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, and no column type is suitable for a distribution key. Creating a NULL policy entry.
select exists(select * from nocolumns);
exists
--------
f
(1 row)
--
-- Check behavior with a SubPlan in VALUES (bug #14924)
--
select val.x
from generate_series(1,10) as s(i),
lateral (
values ((select s.i + 1)), (s.i + 101)
) as val(x)
where s.i < 10 and (select val.x) < 110;
x
-----
2
102
3
103
4
104
5
105
6
106
7
107
8
108
9
109
10
(17 rows)
-- another variant of that (bug #16213)
explain (verbose, costs off)
select * from
(values
(3 not in (select * from (values (1), (2)) ss1)),
(false)
) ss;
QUERY PLAN
----------------------------------------
Values Scan on "*VALUES*"
Output: "*VALUES*".column1
SubPlan 1
-> Values Scan on "*VALUES*_1"
Output: "*VALUES*_1".column1
(5 rows)
select * from
(values
(3 not in (select * from (values (1), (2)) ss1)),
(false)
) ss;
column1
---------
t
f
(2 rows)
--
-- Check sane behavior with nested IN SubLinks
-- GPDB_94_MERGE_FIXME: ORCA plan is correct but very pricy. Should we fallback to planner?
--
explain (verbose, costs off)
select * from int4_tbl where
(case when f1 in (select unique1 from tenk1 a) then f1 else null end) in
(select ten from tenk1 b);
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: int4_tbl.f1
-> HashAggregate
Output: int4_tbl.f1
Group Key: (RowIdExpr)
-> Redistribute Motion 3:3 (slice2; segments: 3)
Output: int4_tbl.f1, (RowIdExpr)
Hash Key: (RowIdExpr)
-> Hash Join
Output: int4_tbl.f1, (RowIdExpr)
Hash Cond: (b.ten = CASE WHEN ((hashed SubPlan 1)) THEN int4_tbl.f1 ELSE NULL::integer END)
-> Seq Scan on public.tenk1 b
Output: b.unique1, b.unique2, b.two, b.four, b.ten, b.twenty, b.hundred, b.thousand, b.twothousand, b.fivethous, b.tenthous, b.odd, b.even, b.stringu1, b.stringu2, b.string4
-> Hash
Output: int4_tbl.f1, ((hashed SubPlan 1)), (RowIdExpr)
-> Broadcast Motion 3:3 (slice3; segments: 3)
Output: int4_tbl.f1, ((hashed SubPlan 1)), (RowIdExpr)
-> Seq Scan on public.int4_tbl
Output: int4_tbl.f1, (hashed SubPlan 1), RowIdExpr
SubPlan 1
-> Broadcast Motion 3:3 (slice4; segments: 3)
Output: a.unique1
-> Seq Scan on public.tenk1 a
Output: a.unique1
Optimizer: Postgres query optimizer
(25 rows)
select * from int4_tbl where
(case when f1 in (select unique1 from tenk1 a) then f1 else null end) in
(select ten from tenk1 b);
f1
----
0
(1 row)
--
-- Check for incorrect optimization when IN subquery contains a SRF
--
explain (verbose, costs off)
select * from int4_tbl o where (f1, f1) in
(select f1, generate_series(1,50) / 10 g from int4_tbl i group by f1);
QUERY PLAN
-------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: o.f1
-> Hash Semi Join
Output: o.f1
Hash Cond: (o.f1 = "ANY_subquery".f1)
-> Seq Scan on public.int4_tbl o
Output: o.f1
-> Hash
Output: "ANY_subquery".f1, "ANY_subquery".g
-> Subquery Scan on "ANY_subquery"
Output: "ANY_subquery".f1, "ANY_subquery".g
Filter: ("ANY_subquery".f1 = "ANY_subquery".g)
-> Result
Output: i.f1, ((generate_series(1, 50)) / 10)
-> ProjectSet
Output: generate_series(1, 50), i.f1
-> Seq Scan on public.int4_tbl i
Output: i.f1
Optimizer: Postgres query optimizer
(19 rows)
select * from int4_tbl o where (f1, f1) in
(select f1, generate_series(1,50) / 10 g from int4_tbl i group by f1);
f1
----
0
(1 row)
--
-- check for over-optimization of whole-row Var referencing an Append plan
--
select (select q from
(select 1,2,3 where f1 > 0
union all
select 4,5,6.0 where f1 <= 0
) q )
from int4_tbl;
q
-----------
(4,5,6.0)
(1,2,3)
(4,5,6.0)
(1,2,3)
(4,5,6.0)
(5 rows)
--
-- Check for sane handling of a lateral reference in a subquery's quals
-- (most of the complication here is to prevent the test case from being
-- flattened too much)
--
explain (verbose, costs off)
select * from
int4_tbl i4,
lateral (
select i4.f1 > 1 as b, 1 as id
from (select random() order by 1) as t1
union all
select true as b, 2 as id
) as t2
where b and f1 >= 0;
QUERY PLAN
-------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: i4.f1, ((i4.f1 > 1)), (1)
-> Nested Loop
Output: i4.f1, ((i4.f1 > 1)), (1)
-> Seq Scan on public.int4_tbl i4
Output: i4.f1
Filter: (i4.f1 >= 0)
-> Materialize
Output: ((i4.f1 > 1)), (1)
-> Append
-> Subquery Scan on t1
Output: (i4.f1 > 1), 1
Filter: (i4.f1 > 1)
-> Result
Output: $0
InitPlan 1 (returns $0) (slice2)
-> Result
Output: random()
-> Result
Output: true, 2
(21 rows)
select * from
int4_tbl i4,
lateral (
select i4.f1 > 1 as b, 1 as id
from (select random() order by 1) as t1
union all
select true as b, 2 as id
) as t2
where b and f1 >= 0;
f1 | b | id
------------+---+----
0 | t | 2
123456 | t | 1
123456 | t | 2
2147483647 | t | 1
2147483647 | t | 2
(5 rows)
--
-- Check that volatile quals aren't pushed down past a DISTINCT:
-- nextval() should not be called more than the nominal number of times
--
create temp sequence ts1 cache 1;
select * from
(select distinct ten from tenk1) ss
where ten < 10 + nextval('ts1')
order by 1;
ten
-----
0
1
2
3
4
5
6
7
8
9
(10 rows)
select nextval('ts1');
nextval
---------
11
(1 row)
--
-- Check that volatile quals aren't pushed down past a set-returning function;
-- while a nonvolatile qual can be, if it doesn't reference the SRF.
--
create function tattle(x int, y int) returns bool
volatile language plpgsql as $$
begin
raise notice 'x = %, y = %', x, y;
return x > y;
end$$;
explain (verbose, costs off)
select * from
(select 9 as x, unnest(array[1,2,3,11,12,13]) as u) ss
where tattle(x, 8);
QUERY PLAN
----------------------------------------------------------
Subquery Scan on ss
Output: ss.x, ss.u
Filter: tattle(ss.x, 8)
-> ProjectSet
Output: 9, unnest('{1,2,3,11,12,13}'::integer[])
-> Result
(6 rows)
select * from
(select 9 as x, unnest(array[1,2,3,11,12,13]) as u) ss
where tattle(x, 8);
NOTICE: x = 9, y = 8
NOTICE: x = 9, y = 8
NOTICE: x = 9, y = 8
NOTICE: x = 9, y = 8
NOTICE: x = 9, y = 8
NOTICE: x = 9, y = 8
x | u
---+----
9 | 1
9 | 2
9 | 3
9 | 11
9 | 12
9 | 13
(6 rows)
-- if we pretend it's stable, we get different results:
alter function tattle(x int, y int) stable;
explain (verbose, costs off)
select * from
(select 9 as x, unnest(array[1,2,3,11,12,13]) as u) ss
where tattle(x, 8);
NOTICE: x = 9, y = 8
QUERY PLAN
----------------------------------------------------
ProjectSet
Output: 9, unnest('{1,2,3,11,12,13}'::integer[])
-> Result
(4 rows)
select * from
(select 9 as x, unnest(array[1,2,3,11,12,13]) as u) ss
where tattle(x, 8);
NOTICE: x = 9, y = 8
x | u
---+----
9 | 1
9 | 2
9 | 3
9 | 11
9 | 12
9 | 13
(6 rows)
-- although even a stable qual should not be pushed down if it references SRF
explain (verbose, costs off)
select * from
(select 9 as x, unnest(array[1,2,3,11,12,13]) as u) ss
where tattle(x, u);
QUERY PLAN
----------------------------------------------------------
Subquery Scan on ss
Output: ss.x, ss.u
Filter: tattle(ss.x, ss.u)
-> ProjectSet
Output: 9, unnest('{1,2,3,11,12,13}'::integer[])
-> Result
(6 rows)
select * from
(select 9 as x, unnest(array[1,2,3,11,12,13]) as u) ss
where tattle(x, u);
NOTICE: x = 9, y = 1
NOTICE: x = 9, y = 2
NOTICE: x = 9, y = 3
NOTICE: x = 9, y = 11
NOTICE: x = 9, y = 12
NOTICE: x = 9, y = 13
x | u
---+---
9 | 1
9 | 2
9 | 3
(3 rows)
drop function tattle(x int, y int);
set optimizer to off;
--
-- Test that LIMIT can be pushed to SORT through a subquery that just projects
-- columns. We check for that having happened by looking to see if EXPLAIN
-- ANALYZE shows that a top-N sort was used. We must suppress or filter away
-- all the non-invariant parts of the EXPLAIN ANALYZE output. Use a replicated
-- table to genarate a plan like: Limit -> Subquery -> Sort
--
create table sq_limit (pk int primary key, c1 int, c2 int) distributed replicated;
insert into sq_limit values
(1, 1, 1),
(2, 2, 2),
(3, 3, 3),
(4, 4, 4),
(5, 1, 1),
(6, 2, 2),
(7, 3, 3),
(8, 4, 4);
create function explain_sq_limit() returns setof text language plpgsql as
$$
declare ln text;
begin
set local enable_parallel=off;
for ln in
explain (analyze, summary off, timing off, costs off)
select * from (select pk,c2 from sq_limit order by c1,pk) as x limit 3
loop
ln := regexp_replace(ln, 'Memory: \S*', 'Memory: xxx');
return next ln;
end loop;
reset enable_parallel;
end;
$$;
select * from explain_sq_limit();
explain_sq_limit
----------------------------------------------------------------------
Gather Motion 1:1 (slice1; segments: 1) (actual rows=3 loops=1)
-> Limit (actual rows=3 loops=1)
-> Subquery Scan on x (actual rows=3 loops=1)
-> Sort (actual rows=3 loops=1)
Sort Key: sq_limit.c1, sq_limit.pk
Sort Method: top-N heapsort Memory: xxx
-> Seq Scan on sq_limit (actual rows=8 loops=1)
Optimizer: Postgres query optimizer
(8 rows)
-- a subpath is sorted under a subqueryscan. however, the subqueryscan is not.
-- whether the order of subpath can applied to the subqueryscan is up-to-implement.
-- now we do not guarantee the order of subpath can apply to the subqueryscan.
-- so the results of bellow is not stable, so we ignore the results
--start_ignore
select * from (select pk,c2 from sq_limit order by c1,pk) as x limit 3;
pk | c2
----+----
1 | 1
5 | 1
6 | 2
(3 rows)
--end_ignore
reset optimizer;
drop function explain_sq_limit();
drop table sq_limit;
--
-- Ensure that backward scan direction isn't propagated into
-- expression subqueries (bug #15336)
--
--start_ignore
begin;
declare c1 scroll cursor for
select * from generate_series(1,4) i
where i <> all (values (2),(3));
move forward all in c1;
fetch backward all in c1;
i
---
4
1
(2 rows)
commit;
--end_ignore
--
-- Verify that we correctly flatten cases involving a subquery output
-- expression that doesn't need to be wrapped in a PlaceHolderVar
--
explain (costs off)
select tname, attname from (
select relname::information_schema.sql_identifier as tname, * from
(select * from pg_class c) ss1) ss2
right join pg_attribute a on a.attrelid = ss2.oid
where tname = 'tenk1' and attnum = 1;
QUERY PLAN
-----------------------------------------------------------------------
Hash Join
Hash Cond: (a.attrelid = c.oid)
-> Seq Scan on pg_attribute a
Filter: (attnum = 1)
-> Hash
-> Index Scan using pg_class_relname_nsp_index on pg_class c
Index Cond: (relname = 'tenk1'::name)
Optimizer: Postgres query optimizer
(8 rows)
select tname, attname from (
select relname::information_schema.sql_identifier as tname, * from
(select * from pg_class c) ss1) ss2
right join pg_attribute a on a.attrelid = ss2.oid
where tname = 'tenk1' and attnum = 1;
tname | attname
-------+---------
tenk1 | unique1
(1 row)
-- Check behavior when there's a lateral reference in the output expression
explain (verbose, costs off)
select t1.ten, sum(x) from
tenk1 t1 left join lateral (
select t1.ten + t2.ten as x, t2.fivethous from tenk1 t2
) ss on t1.unique1 = ss.fivethous
group by t1.ten
order by t1.ten;
QUERY PLAN
------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: t1.ten, (sum((t1.ten + t2.ten)))
Merge Key: t1.ten
-> Sort
Output: t1.ten, (sum((t1.ten + t2.ten)))
Sort Key: t1.ten
-> Finalize HashAggregate
Output: t1.ten, sum((t1.ten + t2.ten))
Group Key: t1.ten
-> Redistribute Motion 3:3 (slice2; segments: 3)
Output: t1.ten, (PARTIAL sum((t1.ten + t2.ten)))
Hash Key: t1.ten
-> Partial HashAggregate
Output: t1.ten, PARTIAL sum((t1.ten + t2.ten))
Group Key: t1.ten
-> Hash Right Join
Output: t1.ten, t2.ten
Hash Cond: (t2.fivethous = t1.unique1)
-> Redistribute Motion 3:3 (slice3; segments: 3)
Output: t2.ten, t2.fivethous
Hash Key: t2.fivethous
-> Seq Scan on public.tenk1 t2
Output: t2.ten, t2.fivethous
-> Hash
Output: t1.ten, t1.unique1
-> Seq Scan on public.tenk1 t1
Output: t1.ten, t1.unique1
Optimizer: Postgres query optimizer
(28 rows)
select t1.ten, sum(x) from
tenk1 t1 left join lateral (
select t1.ten + t2.ten as x, t2.fivethous from tenk1 t2
) ss on t1.unique1 = ss.fivethous
group by t1.ten
order by t1.ten;
ten | sum
-----+-------
0 | 0
1 | 2000
2 | 4000
3 | 6000
4 | 8000
5 | 10000
6 | 12000
7 | 14000
8 | 16000
9 | 18000
(10 rows)
explain (verbose, costs off)
select t1.q1, x from
int8_tbl t1 left join
(int8_tbl t2 left join
lateral (select t2.q1+t3.q1 as x, * from int8_tbl t3) t3 on t2.q2 = t3.q2)
on t1.q2 = t2.q2
order by 1, 2;
QUERY PLAN
------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: t1.q1, ((t2.q1 + t3.q1))
Merge Key: t1.q1, ((t2.q1 + t3.q1))
-> Sort
Output: t1.q1, ((t2.q1 + t3.q1))
Sort Key: t1.q1, ((t2.q1 + t3.q1))
-> Hash Right Join
Output: t1.q1, (t2.q1 + t3.q1)
Hash Cond: (t2.q2 = t1.q2)
-> Hash Left Join
Output: t2.q1, t2.q2, t3.q1
Hash Cond: (t2.q2 = t3.q2)
-> Redistribute Motion 3:3 (slice2; segments: 3)
Output: t2.q1, t2.q2
Hash Key: t2.q2
-> Seq Scan on public.int8_tbl t2
Output: t2.q1, t2.q2
-> Hash
Output: t3.q1, t3.q2
-> Redistribute Motion 3:3 (slice3; segments: 3)
Output: t3.q1, t3.q2
Hash Key: t3.q2
-> Seq Scan on public.int8_tbl t3
Output: t3.q1, t3.q2
-> Hash
Output: t1.q1, t1.q2
-> Redistribute Motion 3:3 (slice4; segments: 3)
Output: t1.q1, t1.q2
Hash Key: t1.q2
-> Seq Scan on public.int8_tbl t1
Output: t1.q1, t1.q2
Optimizer: Postgres query optimizer
(32 rows)
select t1.q1, x from
int8_tbl t1 left join
(int8_tbl t2 left join
lateral (select t2.q1+t3.q1 as x, * from int8_tbl t3) t3 on t2.q2 = t3.q2)
on t1.q2 = t2.q2
order by 1, 2;
q1 | x
------------------+------------------
123 | 246
123 | 246
123 | 4567890123456912
123 | 4567890123456912
123 | 9135780246913578
4567890123456789 | 246
4567890123456789 | 4567890123456912
4567890123456789 | 4567890123456912
4567890123456789 | 9135780246913578
4567890123456789 | 9135780246913578
4567890123456789 | 9135780246913578
(11 rows)
--
-- Tests for CTE inlining behavior
--
set gp_cte_sharing to on;
-- Basic subquery that can be inlined
explain (verbose, costs off)
with x as (select * from (select f1 from subselect_tbl) ss)
select * from x where f1 = 1;
QUERY PLAN
------------------------------------------
Gather Motion 1:1 (slice1; segments: 1)
Output: subselect_tbl.f1
-> Seq Scan on public.subselect_tbl
Output: subselect_tbl.f1
Filter: (subselect_tbl.f1 = 1)
Optimizer: Postgres query optimizer
Settings: optimizer=off
(7 rows)
-- Explicitly request materialization
explain (verbose, costs off)
with x as materialized (select * from (select f1 from subselect_tbl) ss)
select * from x where f1 = 1;
QUERY PLAN
----------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: x.f1
-> Subquery Scan on x
Output: x.f1
Filter: (x.f1 = 1)
-> Shared Scan (share slice:id 1:0)
Output: share0_ref1.f1
-> Seq Scan on public.subselect_tbl
Output: subselect_tbl.f1
Optimizer: Postgres query optimizer
Settings: gp_cte_sharing=on
(11 rows)
-- Stable functions are safe to inline
explain (verbose, costs off)
with x as (select * from (select f1, current_database() from subselect_tbl) ss)
select * from x where f1 = 1;
QUERY PLAN
------------------------------------------------------
Gather Motion 1:1 (slice1; segments: 1)
Output: subselect_tbl.f1, ('regression'::name)
-> Seq Scan on public.subselect_tbl
Output: subselect_tbl.f1, 'regression'::name
Filter: (subselect_tbl.f1 = 1)
Optimizer: Postgres query optimizer
Settings: gp_cte_sharing=on
(7 rows)
-- Volatile functions prevent inlining
-- Prevent inlining happens on GPDB, inlining may cause wrong results.
-- For example, nextval() function.
explain (verbose, costs off)
with x as (select * from (select f1, random() from subselect_tbl) ss)
select * from x where f1 = 1;
QUERY PLAN
----------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: x.f1, x.random
-> Subquery Scan on x
Output: x.f1, x.random
Filter: (x.f1 = 1)
-> Shared Scan (share slice:id 1:0)
Output: share0_ref1.f1, share0_ref1.random
-> Seq Scan on public.subselect_tbl
Output: subselect_tbl.f1, random()
Optimizer: Postgres query optimizer
(10 rows)
create temporary sequence ts;
create table vol_test(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
explain (verbose, costs off)
with x as (select * from (select a, nextval('ts') from vol_test) ss)
select * from x where a = 1;
QUERY PLAN
-----------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: x.a, x.nextval
-> Subquery Scan on x
Output: x.a, x.nextval
Filter: (x.a = 1)
-> Shared Scan (share slice:id 1:0)
Output: share0_ref1.a, share0_ref1.nextval
-> Seq Scan on public.vol_test
Output: vol_test.a, nextval('ts'::regclass)
Optimizer: Postgres query optimizer
(10 rows)
drop sequence ts;
drop table vol_test;
-- SELECT FOR UPDATE cannot be inlined
-- GPDB: select statement with locking clause is not easy to fully supported
-- in greenplum. The following case even with GDD enabled greenplum will still
-- lock the table in Exclusive Lock and not generate LockRows plan node.
-- For detail, please refer to checkCanOptSelectLockingClause.
explain (verbose, costs off)
with x as (select * from (select f1 from subselect_tbl for update) ss)
select * from x where f1 = 1;
QUERY PLAN
------------------------------------------------------------
Gather Motion 1:1 (slice1; segments: 1)
Output: ss.f1
-> Subquery Scan on ss
Output: ss.f1
-> Seq Scan on public.subselect_tbl
Output: subselect_tbl.f1, subselect_tbl.ctid
Filter: (subselect_tbl.f1 = 1)
Optimizer: Postgres query optimizer
Settings: gp_cte_sharing=on, optimizer=off
(9 rows)
-- Multiply-referenced CTEs are inlined only when requested
explain (verbose, costs off)
with x as (select * from (select f1, current_database() as n from subselect_tbl) ss)
select * from x, x x2 where x.n = x2.n;
QUERY PLAN
------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: share0_ref2.f1, share0_ref2.n, share0_ref1.f1, share0_ref1.n
-> Hash Join
Output: share0_ref2.f1, share0_ref2.n, share0_ref1.f1, share0_ref1.n
Hash Cond: (share0_ref2.n = share0_ref1.n)
-> Redistribute Motion 3:3 (slice2; segments: 3)
Output: share0_ref2.f1, share0_ref2.n
Hash Key: share0_ref2.n
-> Shared Scan (share slice:id 2:0)
Output: share0_ref2.f1, share0_ref2.n
-> Hash
Output: share0_ref1.f1, share0_ref1.n
-> Redistribute Motion 3:3 (slice3; segments: 3)
Output: share0_ref1.f1, share0_ref1.n
Hash Key: share0_ref1.n
-> Shared Scan (share slice:id 3:0)
Output: share0_ref1.f1, share0_ref1.n
-> Seq Scan on public.subselect_tbl
Output: subselect_tbl.f1, 'regression'::name
Settings: gp_cte_sharing=on
Optimizer: Postgres query optimizer
(21 rows)
explain (verbose, costs off)
with x as not materialized (select * from (select f1, current_database() as n from subselect_tbl) ss)
select * from x, x x2 where x.n = x2.n;
QUERY PLAN
--------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: subselect_tbl.f1, ('regression'::name), subselect_tbl_1.f1, ('regression'::name)
-> Hash Join
Output: subselect_tbl.f1, ('regression'::name), subselect_tbl_1.f1, ('regression'::name)
Hash Cond: (('regression'::name) = ('regression'::name))
-> Redistribute Motion 3:3 (slice2; segments: 3)
Output: subselect_tbl.f1, ('regression'::name)
Hash Key: ('regression'::name)
-> Seq Scan on public.subselect_tbl
Output: subselect_tbl.f1, 'regression'::name
-> Hash
Output: subselect_tbl_1.f1, ('regression'::name)
-> Redistribute Motion 3:3 (slice3; segments: 3)
Output: subselect_tbl_1.f1, ('regression'::name)
Hash Key: ('regression'::name)
-> Seq Scan on public.subselect_tbl subselect_tbl_1
Output: subselect_tbl_1.f1, 'regression'::name
Optimizer: Postgres query optimizer
Settings: gp_cte_sharing=on
(19 rows)
-- Multiply-referenced CTEs can't be inlined if they contain outer self-refs
explain (verbose, costs off)
with recursive x(a) as
((values ('a'), ('b'))
union all
(with z as not materialized (select * from x)
select z.a || z1.a as a from z cross join z as z1
where length(z.a || z1.a) < 5))
select * from x;
QUERY PLAN
---------------------------------------------------
Recursive Union
-> Values Scan on "*VALUES*"
Output: "*VALUES*".column1
-> Nested Loop
Output: (x.a || x_1.a)
Join Filter: (length((x.a || x_1.a)) < 5)
-> WorkTable Scan on x
Output: x.a
-> WorkTable Scan on x x_1
Output: x_1.a
Settings: optimizer = 'off'
Optimizer: Postgres query optimizer
(12 rows)
with recursive x(a) as
((values ('a'), ('b'))
union all
(with z as not materialized (select * from x)
select z.a || z1.a as a from z cross join z as z1
where length(z.a || z1.a) < 5))
select * from x;
a
------
a
b
aa
ab
ba
bb
aaaa
aaab
aaba
aabb
abaa
abab
abba
abbb
baaa
baab
baba
babb
bbaa
bbab
bbba
bbbb
(22 rows)
explain (verbose, costs off)
with recursive x(a) as
((values ('a'), ('b'))
union all
(with z as not materialized (select * from x)
select z.a || z.a as a from z
where length(z.a || z.a) < 5))
select * from x;
QUERY PLAN
--------------------------------------------------
Recursive Union
-> Values Scan on "*VALUES*"
Output: "*VALUES*".column1
-> Subquery Scan on z
Output: (z.a || z.a)
-> WorkTable Scan on x
Output: x.a
Filter: (length((x.a || x.a)) < 5)
Settings: optimizer = 'off'
Optimizer: Postgres query optimizer
(10 rows)
with recursive x(a) as
((values ('a'), ('b'))
union all
(with z as not materialized (select * from x)
select z.a || z.a as a from z
where length(z.a || z.a) < 5))
select * from x;
a
------
a
b
aa
bb
aaaa
bbbb
(6 rows)
-- Check handling of outer references
explain (verbose, costs off)
with x as (select * from int4_tbl)
select * from (with y as (select * from x) select * from y) ss;
QUERY PLAN
------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: int4_tbl.f1
-> Seq Scan on public.int4_tbl
Output: int4_tbl.f1
Optimizer: Postgres query optimizer
(5 rows)
explain (verbose, costs off)
with x as materialized (select * from int4_tbl)
select * from (with y as (select * from x) select * from y) ss;
QUERY PLAN
--------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: share0_ref1.f1
-> Shared Scan (share slice:id 1:0)
Output: share0_ref1.f1
-> Seq Scan on public.int4_tbl
Output: int4_tbl.f1
Optimizer: Postgres query optimizer
Settings: gp_cte_sharing=on, optimizer=off
(8 rows)
-- Ensure that we inline the currect CTE when there are
-- multiple CTEs with the same name
explain (verbose, costs off)
with x as (select 1 as y)
select * from (with x as (select 2 as y) select * from x) ss;
QUERY PLAN
-------------
Result
Output: 2
(2 rows)
-- Row marks are not pushed into CTEs
explain (verbose, costs off)
with x as (select * from subselect_tbl)
select * from x for update;
QUERY PLAN
----------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: subselect_tbl.f1, subselect_tbl.f2, subselect_tbl.f3
-> Seq Scan on public.subselect_tbl
Output: subselect_tbl.f1, subselect_tbl.f2, subselect_tbl.f3
Optimizer: Postgres query optimizer
(5 rows)
set gp_cte_sharing to off;
-- Ensure that both planners produce valid plans for the query with the nested
-- SubLink, which contains attributes referenced in query's GROUP BY clause.
-- Due to presence of non-grouping columns in targetList, ORCA performs query
-- normalization, during which ORCA establishes a correspondence between vars
-- from targetlist entries to grouping attributes. And this process should
-- correctly handle nested structures. The inner part of SubPlan in the test
-- should contain only t.j.
-- start_ignore
drop table if exists t;
NOTICE: table "t" does not exist, skipping
-- end_ignore
create table t (i int, j int) distributed by (i);
insert into t values (1, 2);
explain (verbose, costs off)
select j,
(select j from (select j) q2)
from t
group by i, j;
QUERY PLAN
------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: t.j, ((SubPlan 1)), t.i
-> HashAggregate
Output: t.j, (SubPlan 1), t.i
Group Key: t.i, t.j
-> Seq Scan on public.t
Output: t.i, t.j
SubPlan 1
-> Result
Output: t.j
Optimizer: Postgres-based planner
(11 rows)
select j,
(select j from (select j) q2)
from t
group by i, j;
j | j
---+---
2 | 2
(1 row)
-- Ensure that both planners produce valid plans for the query with the nested
-- SubLink when this SubLink is inside the GROUP BY clause. Attribute, which is
-- not grouping column (1 as c), is added to query targetList to make ORCA
-- perform query normalization. During normalization ORCA modifies the vars of
-- the grouping elements of targetList in order to produce a new Query tree.
-- The modification of vars inside nested part of SubLinks should be handled
-- correctly. ORCA shouldn't fall back due to missing variable entry as a result
-- of incorrect query normalization.
explain (verbose, costs off)
select j, 1 as c,
(select j from (select j) q2) q1
from t
group by j, q1;
QUERY PLAN
------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: t.j, 1, ((SubPlan 1))
-> HashAggregate
Output: t.j, 1, ((SubPlan 1))
Group Key: t.j, ((SubPlan 1))
-> Redistribute Motion 3:3 (slice2; segments: 3)
Output: t.j, ((SubPlan 1))
Hash Key: t.j, ((SubPlan 1))
-> HashAggregate
Output: t.j, ((SubPlan 1))
Group Key: t.j, (SubPlan 1)
-> Seq Scan on public.t
Output: t.j, (SubPlan 1)
SubPlan 1
-> Result
Output: t.j
Optimizer: Postgres-based planner
(17 rows)
select j, 1 as c,
(select j from (select j) q2) q1
from t
group by j, q1;
j | c | q1
---+---+----
2 | 1 | 2
(1 row)
-- Ensure that both planners produce valid plans for the query with the nested
-- SubLink, and this SubLink is under aggregation. ORCA shouldn't fall back due
-- to missing variable entry as a result of incorrect query normalization. ORCA
-- should correctly process args of the aggregation during normalization.
explain (verbose, costs off)
select (select max((select t.i))) from t;
QUERY PLAN
------------------------------------------------
Finalize Aggregate
Output: (SubPlan 2)
-> Gather Motion 3:1 (slice1; segments: 3)
Output: (PARTIAL max((SubPlan 1)))
-> Partial Aggregate
Output: PARTIAL max((SubPlan 1))
-> Seq Scan on public.t
Output: t.i, t.j
SubPlan 1
-> Result
Output: t.i
SubPlan 2
-> Result
Output: max((SubPlan 1))
Optimizer: Postgres-based planner
(15 rows)
select (select max((select t.i))) from t;
max
-----
1
(1 row)
drop table t;