src/test/regress/expected/gpdist_legacy_opclasses_optimizer.out - cloudberry - Git at Google

 --
 -- Tests for legacy cdbhash opclasses
 --
 drop schema if exists gpdist_legacy_opclasses;
 create schema gpdist_legacy_opclasses;
 set search_path to gpdist_legacy_opclasses;
 -- Basic sanity check of all the legacy hash opclasses. Create a table that
 -- uses all of them in the distribution key, and insert a value.
 set gp_use_legacy_hashops=on;
 create table all_legacy_types(
   int2_col int2,
   int4_col int4,
   int8_col int8,
   float4_col float4,
   float8_col float8,
   numeric_col numeric,
   char_col "char",
   bpchar_col char(20),
   text_col text,
   varchar_col varchar(20),
   bytea_col bytea,
   name_col name,
   oid_col oid,
   tid_col tid,
   timestamp_col timestamp,
   timestamptz_col timestamptz,
   date_col date,
   time_col time,
   timetz_col timetz,
   interval_col interval,
   inet_col inet,
   cidr_col cidr,
   macaddr_col macaddr,
   bit_col bit(11),
   varbit_col varbit(20),
   bool_col bool,
   oidvector_col oidvector,
   money_col money,
   --uuid_col uuid,
   complex_col complex
 ) distributed by (
   int2_col,
   int4_col,
   int8_col,
   float4_col,
   float8_col,
   numeric_col,
   char_col,
   bpchar_col,
   text_col,
   varchar_col,
   bytea_col,
   name_col,
   oid_col,
   tid_col,
   timestamp_col,
   timestamptz_col,
   date_col,
   time_col,
   timetz_col,
   interval_col,
   inet_col,
   cidr_col,
   macaddr_col,
   bit_col,
   varbit_col,
   bool_col,
   oidvector_col,
   money_col,
   --uuid_col,
   complex_col
 );
 -- Verify that all columns are using the legacy hashops
 select attno, opc.opcname from
   (select unnest(distkey) as attno, unnest(distclass) as distclass from gp_distribution_policy
    where localoid='all_legacy_types'::regclass) as d,
   pg_opclass opc
 where opc.oid=distclass
 order by attno;
  attno |         opcname
 -------+-------------------------
      1 | cdbhash_int2_ops
      2 | cdbhash_int4_ops
      3 | cdbhash_int8_ops
      4 | cdbhash_float4_ops
      5 | cdbhash_float8_ops
      6 | cdbhash_numeric_ops
      7 | cdbhash_char_ops
      8 | cdbhash_bpchar_ops
      9 | cdbhash_text_ops
     10 | cdbhash_text_ops
     11 | cdbhash_bytea_ops
     12 | cdbhash_name_ops
     13 | cdbhash_oid_ops
     14 | cdbhash_tid_ops
     15 | cdbhash_timestamp_ops
     16 | cdbhash_timestamptz_ops
     17 | cdbhash_date_ops
     18 | cdbhash_time_ops
     19 | cdbhash_timetz_ops
     20 | cdbhash_interval_ops
     21 | cdbhash_inet_ops
     22 | cdbhash_inet_ops
     23 | cdbhash_macaddr_ops
     24 | cdbhash_bit_ops
     25 | cdbhash_bit_ops
     26 | cdbhash_bool_ops
     27 | cdbhash_oidvector_ops
     28 | cdbhash_cash_ops
     29 | cdbhash_complex_ops
 (29 rows)

 insert into all_legacy_types values (
   '12345',          -- int2
   '12345678',       -- int4
   '1234567890123',  -- int8
   '1.2345',         -- float4
   '1.23456789',     -- float8
   '1.23456789',     -- numeric
   'x',              -- "char"
   'bpchar foobar',  -- bpchar
   'text foobar',     -- text
   'varchar foobar', -- varchar
   '\xdeadbeef',     -- bytea
   'name foobar',    -- name
   '12345',          -- oid
   '(1,1)',          -- tid
   '2018-11-25 01:23:45+02',   -- timestamp
   '2018-11-25 01:23:45+02',   -- timestamptz
   '2018-11-25',     -- date,
   '01:23:45',       -- time,
   '01:23:45+02',    -- timetz,
   '6 years',        -- interval,
   '192.168.1.255/25',   -- inet,
   '10.1.2',         -- cidr,
   '08:00:2b:01:02:03',         -- macaddr,
   B'11011000000',   -- bit,
   B'01010101010',   -- varbit,
   'true',        -- bool,
   '1 2 3 4',        -- oidvector,
   '123.45',        -- money,
   --uuid_col         -- uuid,
   '5 + 3i'         -- complex
 );
 -- Test that CTAS honors the gp_use_legacy_hashops GUC
 -- Note: When ORCA is on, the distribution is RANDOM.
 create table legacy_hashops_ctas as select 1;
 select gpdp.distkey, pgopc.opcname
   from gp_distribution_policy gpdp, pg_opclass pgopc
   where gpdp.localoid='legacy_hashops_ctas'::regclass and pgopc.oid::text = gpdp.distclass::text;
  distkey | opcname
 ---------+---------
 (0 rows)

 set gp_use_legacy_hashops=off;
 --
 -- Test joins between tables using a mix of default and legacy opclasses.
 --
 create table legacy_int (id int4) distributed by (id cdbhash_int4_ops);
 insert into legacy_int values (1), (2), (3);
 create table modern_int (id int4) distributed by (id);
 insert into modern_int values (2), (3), (4);
 analyze modern_int;
 create table modern_text (t text) distributed by (t);
 insert into modern_text values ('foo'), ('1');
 -- Only legacy opclasses used. ORCA can deal with this
 explain (costs off) select * from legacy_int a inner join legacy_int b on a.id = b.id;
                       QUERY PLAN
 -------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    ->  Hash Join
          Hash Cond: (a.id = b.id)
          ->  Seq Scan on legacy_int a
          ->  Hash
                ->  Seq Scan on legacy_int b
  Optimizer: Pivotal Optimizer (GPORCA) version 3.9.0
 (7 rows)

 select * from legacy_int a inner join legacy_int b on a.id = b.id;
  id | id
 ----+----
   1 |  1
   2 |  2
   3 |  3
 (3 rows)

 -- A mixture of legacy and modern opclasses. Needs a Redistribute motion. ORCA
 -- can't handle this, and falls back.
 explain (costs off) select * from legacy_int a inner join modern_int b on a.id = b.id;
                             QUERY PLAN
 ------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    ->  Hash Join
          Hash Cond: (a.id = b.id)
          ->  Seq Scan on legacy_int a
          ->  Hash
                ->  Redistribute Motion 3:3  (slice2; segments: 3)
                      Hash Key: b.id
                      ->  Seq Scan on modern_int b
  Optimizer: Postgres query optimizer
 (9 rows)

 select * from legacy_int a inner join modern_int b on a.id = b.id;
  id | id
 ----+----
   2 |  2
   3 |  3
 (2 rows)

 -- for the sake of completeness, modern opclasses only. No problem for ORCA.
 explain (costs off) select * from modern_int a inner join modern_int b on a.id = b.id;
                       QUERY PLAN
 -------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    ->  Hash Join
          Hash Cond: (a.id = b.id)
          ->  Seq Scan on modern_int a
          ->  Hash
                ->  Seq Scan on modern_int b
  Optimizer: Pivotal Optimizer (GPORCA) version 3.9.0
 (7 rows)

 select * from modern_int a inner join modern_int b on a.id = b.id;
  id | id
 ----+----
   2 |  2
   3 |  3
   4 |  4
 (3 rows)

 -- In principle, ORCA would create a correct plan for this, because even though it
 -- mixes the legacy and other opclasses, they're not used in join conditions. But
 -- the ORCA translator code is conservative, and falls back if there are any mixed
 -- use, even if it would be safe.
 explain (costs off) select * from legacy_int a inner join modern_text b on a.id::text = b.t;
                          QUERY PLAN
 ------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    ->  Hash Join
          Hash Cond: ((a.id)::text = b.t)
          ->  Redistribute Motion 3:3  (slice2; segments: 3)
                Hash Key: (a.id)::text
                ->  Seq Scan on legacy_int a
          ->  Hash
                ->  Seq Scan on modern_text b
  Optimizer: Postgres query optimizer
 (9 rows)

 select * from legacy_int a inner join modern_text b on a.id::text = b.t;
  id | t
 ----+---
   1 | 1
 (1 row)

 -- Also test with a domain over a base type, and enums.
 create domain intdom as integer;
 create table legacy_domain_over_int(id intdom) distributed by(id cdbhash_int4_ops);
 insert into legacy_domain_over_int values (1), (2), (3);
 analyze legacy_domain_over_int;
 explain (costs off) select * from legacy_domain_over_int a inner join legacy_domain_over_int b on a.id = b.id;
                                              QUERY PLAN
 ----------------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    ->  Hash Join
          Hash Cond: ((a.id)::integer = (b.id)::integer)
          ->  Seq Scan on legacy_domain_over_int a
          ->  Hash
                ->  Seq Scan on legacy_domain_over_int b
  Optimizer: Pivotal Optimizer (GPORCA) version 3.9.0
 (7 rows)

 explain (costs off) select * from legacy_int a inner join legacy_domain_over_int b on a.id = b.id;
                                 QUERY PLAN
 ---------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    ->  Hash Join
          Hash Cond: (a.id = (b.id)::integer)
          ->  Seq Scan on legacy_int a
          ->  Hash
                ->  Seq Scan on legacy_domain_over_int b
  Optimizer: Pivotal Optimizer (GPORCA) version 3.9.0
 (7 rows)

 explain (costs off) select * from modern_int a inner join legacy_domain_over_int b on a.id = b.id;
                             QUERY PLAN
 ------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    ->  Hash Join
          Hash Cond: (a.id = (b.id)::integer)
          ->  Seq Scan on modern_int a
          ->  Hash
                ->  Redistribute Motion 3:3  (slice2; segments: 3)
                      Hash Key: b.id
                      ->  Seq Scan on legacy_domain_over_int b
  Optimizer: Postgres query optimizer
 (9 rows)

 -- Distributing by enum has been banned, so this test is updated to instead distribute by a dummy int column
 -- Banned because in backup/restore scenarios the data will be in the "wrong" segment as oids for each enum
 -- entry are re-generated and hashing them will result in arbitrary segment assignment.
 create type colors as enum ('red', 'green', 'blue');
 create table legacy_enum(col1 int, color colors) distributed by(col1);
 insert into legacy_enum values (1, 'red'), (2, 'green'), (3, 'blue');
 explain (costs off) select * from legacy_enum a inner join legacy_enum b on a.color = b.color;
                  QUERY PLAN
 ---------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    ->  Hash Join
          Hash Cond: ((a.color)::anyenum = (b.color)::anyenum)
          ->  Seq Scan on legacy_enum a
          ->  Hash
                ->  Broadcast Motion 3:3  (slice2; segments: 3)
                      ->  Seq Scan on legacy_enum b
  Optimizer: Pivotal Optimizer (GPORCA) version 3.41.0
 (8 rows)

 select * from legacy_enum a inner join legacy_enum b on a.color = b.color;
  col1 | color | col1 | color
 ------+-------+------+-------
     2 | green |    2 | green
     3 | blue  |    3 | blue
     1 | red   |    1 | red
 (3 rows)

 --
 -- A regression issue that the data is reorganized incorrectly when
 -- gp_use_legacy_hashops has non-default value.
 --
 -- The ALTER TABLE command reorganizes the data by using a temporary table, if
 -- a "distributed by" clause is specified without the opclasses, the default
 -- opclasses will be chosen.  There was a bug that the non-legacy opclasses are
 -- always chosen, regarding the setting of gp_use_legacy_hashops.  However the
 -- table's new opclasses are determined with gp_use_legacy_hashops, so when
 -- gp_use_legacy_hashops is true the data will be incorrectly redistributed.
 --
 -- set the guc to the non-default value
 set gp_use_legacy_hashops to on;
 create table legacy_data_reorg (c1 int) distributed by (c1);
 insert into legacy_data_reorg select i from generate_series(1, 10) i;
 -- verify the opclass and data distribution
 select gp_segment_id, c1 from legacy_data_reorg order by 1, 2;
  gp_segment_id | c1
 ---------------+----
              0 |  1
              0 |  2
              1 |  3
              1 |  4
              1 |  5
              1 |  6
              1 |  7
              2 |  8
              2 |  9
              2 | 10
 (10 rows)

 select dp.localoid::regclass::name as name, oc.opcname
   from gp_distribution_policy dp
   join pg_opclass oc
     on oc.oid::text = dp.distclass::text
  where dp.localoid = 'legacy_data_reorg'::regclass::oid;
        name        |     opcname
 -------------------+------------------
  legacy_data_reorg | cdbhash_int4_ops
 (1 row)

 -- when reorganizing the table we set the distributed-by without an explicit
 -- opclass, so the default one should be chosen according to
 -- gp_use_legacy_hashops.
 alter table legacy_data_reorg set with (reorganize) distributed by (c1);
 -- double-check the opclass and data distribution
 select gp_segment_id, c1 from legacy_data_reorg order by 1, 2;
  gp_segment_id | c1
 ---------------+----
              0 |  1
              0 |  2
              1 |  3
              1 |  4
              1 |  5
              1 |  6
              1 |  7
              2 |  8
              2 |  9
              2 | 10
 (10 rows)

 select dp.localoid::regclass::name as name, oc.opcname
   from gp_distribution_policy dp
   join pg_opclass oc
     on oc.oid::text = dp.distclass::text
  where dp.localoid = 'legacy_data_reorg'::regclass::oid;
        name        |     opcname
 -------------------+------------------
  legacy_data_reorg | cdbhash_int4_ops
 (1 row)

 --
 -- A regression issue similar to previous one, with CTAS.
 --
 -- The default opclasses in CTAS should also be determined with
 -- gp_use_legacy_hashops.
 --
 set gp_use_legacy_hashops=off;
 create table ctastest_off as select 123 as col distributed by (col);
 set gp_use_legacy_hashops=on;
 create table ctastest_on as select 123 as col distributed by (col);
 select dp.localoid::regclass::name as name, oc.opcname
   from gp_distribution_policy dp
   join pg_opclass oc
     on oc.oid::text = dp.distclass::text
  where dp.localoid in ('ctastest_on'::regclass::oid,
                        'ctastest_off'::regclass::oid);
      name     |     opcname
 --------------+------------------
  ctastest_off | int4_ops
  ctastest_on  | cdbhash_int4_ops
 (2 rows)

 set gp_use_legacy_hashops=on;
 create table try_distinct_array (test_char varchar,test_array integer[]);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'test_char' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 insert into try_distinct_array select 'y',string_to_array('1~1','~')::int[];
 analyze try_distinct_array;
 insert into try_distinct_array select 'n',string_to_array('1~1','~')::int[];
 -- Aggregate with grouping column that does not have legacy hashop
 explain (costs off) select distinct test_array from try_distinct_array;
                    QUERY PLAN
 ------------------------------------------------
  HashAggregate
    Group Key: test_array
    ->  Gather Motion 3:1  (slice1; segments: 3)
          ->  Seq Scan on try_distinct_array
  Optimizer: Pivotal Optimizer (GPORCA)
 (5 rows)

 select distinct test_array from try_distinct_array;
  test_array
 ------------
  {1,1}
 (1 row)

 -- Hash join on column that does not have legacy hashop
 explain (costs off) select * from try_distinct_array a, try_distinct_array b where a.test_array=b.test_array;
                                                QUERY PLAN
 --------------------------------------------------------------------------------------------------------
  Hash Join
    Hash Cond: ((a.test_array)::anyarray = (b.test_array)::anyarray)
    ->  Gather Motion 3:1  (slice1; segments: 3)
          ->  Seq Scan on try_distinct_array a
    ->  Hash
          ->  Gather Motion 3:1  (slice2; segments: 3)
                ->  Seq Scan on try_distinct_array b
  Optimizer: Pivotal Optimizer (GPORCA)
 (8 rows)

 select * from try_distinct_array a, try_distinct_array b where a.test_array=b.test_array;
  test_char | test_array | test_char | test_array
 -----------+------------+-----------+------------
  n         | {1,1}      | y         | {1,1}
  n         | {1,1}      | n         | {1,1}
  y         | {1,1}      | y         | {1,1}
  y         | {1,1}      | n         | {1,1}
 (4 rows)

 -- CTAS should use value of gp_use_legacy_hashops when setting the distribution policy based on an existing table
 set gp_use_legacy_hashops=on;
 create table ctas_base_legacy as select unnest(array[1,2,3]) as col distributed by (col);
 set gp_use_legacy_hashops=off;
 create table ctas_from_legacy as select * from ctas_base_legacy distributed by (col);
 create table ctas_explicit_legacy as select * from ctas_base_legacy distributed by (col cdbhash_int4_ops);
 create table ctas_base_nonlegacy as select unnest(array[1,2,3]) as col distributed by (col);
 set gp_use_legacy_hashops=on;
 create table ctas_from_nonlegacy as select * from ctas_base_nonlegacy distributed by (col);
 create table ctas_explicit_nonlegacy as select * from ctas_base_nonlegacy distributed by (col int4_ops);
 select dp.localoid::regclass as name, opc.opcname
   from gp_distribution_policy dp
   join pg_opclass opc
     on ARRAY[opc.oid]::oidvector = dp.distclass
  where dp.localoid in ('ctas_base_legacy'::regclass,
                        'ctas_from_legacy'::regclass,
                        'ctas_base_nonlegacy'::regclass,
                        'ctas_from_nonlegacy'::regclass,
                        'ctas_explicit_legacy'::regclass,
                        'ctas_explicit_nonlegacy'::regclass);
           name           |     opcname
 -------------------------+------------------
  ctas_explicit_nonlegacy | int4_ops
  ctas_base_nonlegacy     | int4_ops
  ctas_from_legacy        | int4_ops
  ctas_from_nonlegacy     | cdbhash_int4_ops
  ctas_explicit_legacy    | cdbhash_int4_ops
  ctas_base_legacy        | cdbhash_int4_ops
 (6 rows)

 select * from ctas_from_legacy where col=1;
  col
 -----
    1
 (1 row)

 select * from ctas_explicit_legacy where col=1;
  col
 -----
    1
 (1 row)

 select * from ctas_from_nonlegacy where col=1;
  col
 -----
    1
 (1 row)

 select * from ctas_explicit_nonlegacy where col=1;
  col
 -----
    1
 (1 row)
	--
	-- Tests for legacy cdbhash opclasses
	--
	drop schema if exists gpdist_legacy_opclasses;
	create schema gpdist_legacy_opclasses;
	set search_path to gpdist_legacy_opclasses;
	-- Basic sanity check of all the legacy hash opclasses. Create a table that
	-- uses all of them in the distribution key, and insert a value.
	set gp_use_legacy_hashops=on;
	create table all_legacy_types(
	int2_col int2,
	int4_col int4,
	int8_col int8,
	float4_col float4,
	float8_col float8,
	numeric_col numeric,
	char_col "char",
	bpchar_col char(20),
	text_col text,
	varchar_col varchar(20),
	bytea_col bytea,
	name_col name,
	oid_col oid,
	tid_col tid,
	timestamp_col timestamp,
	timestamptz_col timestamptz,
	date_col date,
	time_col time,
	timetz_col timetz,
	interval_col interval,
	inet_col inet,
	cidr_col cidr,
	macaddr_col macaddr,
	bit_col bit(11),
	varbit_col varbit(20),
	bool_col bool,
	oidvector_col oidvector,
	money_col money,
	--uuid_col uuid,
	complex_col complex
	) distributed by (
	int2_col,
	int4_col,
	int8_col,
	float4_col,
	float8_col,
	numeric_col,
	char_col,
	bpchar_col,
	text_col,
	varchar_col,
	bytea_col,
	name_col,
	oid_col,
	tid_col,
	timestamp_col,
	timestamptz_col,
	date_col,
	time_col,
	timetz_col,
	interval_col,
	inet_col,
	cidr_col,
	macaddr_col,
	bit_col,
	varbit_col,
	bool_col,
	oidvector_col,
	money_col,
	--uuid_col,
	complex_col
	);
	-- Verify that all columns are using the legacy hashops
	select attno, opc.opcname from
	(select unnest(distkey) as attno, unnest(distclass) as distclass from gp_distribution_policy
	where localoid='all_legacy_types'::regclass) as d,
	pg_opclass opc
	where opc.oid=distclass
	order by attno;
	attno \| opcname
	-------+-------------------------
	1 \| cdbhash_int2_ops
	2 \| cdbhash_int4_ops
	3 \| cdbhash_int8_ops
	4 \| cdbhash_float4_ops
	5 \| cdbhash_float8_ops
	6 \| cdbhash_numeric_ops
	7 \| cdbhash_char_ops
	8 \| cdbhash_bpchar_ops
	9 \| cdbhash_text_ops
	10 \| cdbhash_text_ops
	11 \| cdbhash_bytea_ops
	12 \| cdbhash_name_ops
	13 \| cdbhash_oid_ops
	14 \| cdbhash_tid_ops
	15 \| cdbhash_timestamp_ops
	16 \| cdbhash_timestamptz_ops
	17 \| cdbhash_date_ops
	18 \| cdbhash_time_ops
	19 \| cdbhash_timetz_ops
	20 \| cdbhash_interval_ops
	21 \| cdbhash_inet_ops
	22 \| cdbhash_inet_ops
	23 \| cdbhash_macaddr_ops
	24 \| cdbhash_bit_ops
	25 \| cdbhash_bit_ops
	26 \| cdbhash_bool_ops
	27 \| cdbhash_oidvector_ops
	28 \| cdbhash_cash_ops
	29 \| cdbhash_complex_ops
	(29 rows)

	insert into all_legacy_types values (
	'12345', -- int2
	'12345678', -- int4
	'1234567890123', -- int8
	'1.2345', -- float4
	'1.23456789', -- float8
	'1.23456789', -- numeric
	'x', -- "char"
	'bpchar foobar', -- bpchar
	'text foobar', -- text
	'varchar foobar', -- varchar
	'\xdeadbeef', -- bytea
	'name foobar', -- name
	'12345', -- oid
	'(1,1)', -- tid
	'2018-11-25 01:23:45+02', -- timestamp
	'2018-11-25 01:23:45+02', -- timestamptz
	'2018-11-25', -- date,
	'01:23:45', -- time,
	'01:23:45+02', -- timetz,
	'6 years', -- interval,
	'192.168.1.255/25', -- inet,
	'10.1.2', -- cidr,
	'08:00:2b:01:02:03', -- macaddr,
	B'11011000000', -- bit,
	B'01010101010', -- varbit,
	'true', -- bool,
	'1 2 3 4', -- oidvector,
	'123.45', -- money,
	--uuid_col -- uuid,
	'5 + 3i' -- complex
	);
	-- Test that CTAS honors the gp_use_legacy_hashops GUC
	-- Note: When ORCA is on, the distribution is RANDOM.
	create table legacy_hashops_ctas as select 1;
	select gpdp.distkey, pgopc.opcname
	from gp_distribution_policy gpdp, pg_opclass pgopc
	where gpdp.localoid='legacy_hashops_ctas'::regclass and pgopc.oid::text = gpdp.distclass::text;
	distkey \| opcname
	---------+---------
	(0 rows)

	set gp_use_legacy_hashops=off;
	--
	-- Test joins between tables using a mix of default and legacy opclasses.
	--
	create table legacy_int (id int4) distributed by (id cdbhash_int4_ops);
	insert into legacy_int values (1), (2), (3);
	create table modern_int (id int4) distributed by (id);
	insert into modern_int values (2), (3), (4);
	analyze modern_int;
	create table modern_text (t text) distributed by (t);
	insert into modern_text values ('foo'), ('1');
	-- Only legacy opclasses used. ORCA can deal with this
	explain (costs off) select * from legacy_int a inner join legacy_int b on a.id = b.id;
	QUERY PLAN
	-------------------------------------------------------
	Gather Motion 3:1 (slice1; segments: 3)
	-> Hash Join
	Hash Cond: (a.id = b.id)
	-> Seq Scan on legacy_int a
	-> Hash
	-> Seq Scan on legacy_int b
	Optimizer: Pivotal Optimizer (GPORCA) version 3.9.0
	(7 rows)

	select * from legacy_int a inner join legacy_int b on a.id = b.id;
	id \| id
	----+----
	1 \| 1
	2 \| 2
	3 \| 3
	(3 rows)

	-- A mixture of legacy and modern opclasses. Needs a Redistribute motion. ORCA
	-- can't handle this, and falls back.
	explain (costs off) select * from legacy_int a inner join modern_int b on a.id = b.id;
	QUERY PLAN
	------------------------------------------------------------------
	Gather Motion 3:1 (slice1; segments: 3)
	-> Hash Join
	Hash Cond: (a.id = b.id)
	-> Seq Scan on legacy_int a
	-> Hash
	-> Redistribute Motion 3:3 (slice2; segments: 3)
	Hash Key: b.id
	-> Seq Scan on modern_int b
	Optimizer: Postgres query optimizer
	(9 rows)

	select * from legacy_int a inner join modern_int b on a.id = b.id;
	id \| id
	----+----
	2 \| 2
	3 \| 3
	(2 rows)

	-- for the sake of completeness, modern opclasses only. No problem for ORCA.
	explain (costs off) select * from modern_int a inner join modern_int b on a.id = b.id;
	QUERY PLAN
	-------------------------------------------------------
	Gather Motion 3:1 (slice1; segments: 3)
	-> Hash Join
	Hash Cond: (a.id = b.id)
	-> Seq Scan on modern_int a
	-> Hash
	-> Seq Scan on modern_int b
	Optimizer: Pivotal Optimizer (GPORCA) version 3.9.0
	(7 rows)

	select * from modern_int a inner join modern_int b on a.id = b.id;
	id \| id
	----+----
	2 \| 2
	3 \| 3
	4 \| 4
	(3 rows)

	-- In principle, ORCA would create a correct plan for this, because even though it
	-- mixes the legacy and other opclasses, they're not used in join conditions. But
	-- the ORCA translator code is conservative, and falls back if there are any mixed
	-- use, even if it would be safe.
	explain (costs off) select * from legacy_int a inner join modern_text b on a.id::text = b.t;
	QUERY PLAN
	------------------------------------------------------------
	Gather Motion 3:1 (slice1; segments: 3)
	-> Hash Join
	Hash Cond: ((a.id)::text = b.t)
	-> Redistribute Motion 3:3 (slice2; segments: 3)
	Hash Key: (a.id)::text
	-> Seq Scan on legacy_int a
	-> Hash
	-> Seq Scan on modern_text b
	Optimizer: Postgres query optimizer
	(9 rows)

	select * from legacy_int a inner join modern_text b on a.id::text = b.t;
	id \| t
	----+---
	1 \| 1
	(1 row)

	-- Also test with a domain over a base type, and enums.
	create domain intdom as integer;
	create table legacy_domain_over_int(id intdom) distributed by(id cdbhash_int4_ops);
	insert into legacy_domain_over_int values (1), (2), (3);
	analyze legacy_domain_over_int;
	explain (costs off) select * from legacy_domain_over_int a inner join legacy_domain_over_int b on a.id = b.id;
	QUERY PLAN
	----------------------------------------------------------------------------------------------------
	Gather Motion 3:1 (slice1; segments: 3)
	-> Hash Join
	Hash Cond: ((a.id)::integer = (b.id)::integer)
	-> Seq Scan on legacy_domain_over_int a
	-> Hash
	-> Seq Scan on legacy_domain_over_int b
	Optimizer: Pivotal Optimizer (GPORCA) version 3.9.0
	(7 rows)

	explain (costs off) select * from legacy_int a inner join legacy_domain_over_int b on a.id = b.id;
	QUERY PLAN
	---------------------------------------------------------------------------
	Gather Motion 3:1 (slice1; segments: 3)
	-> Hash Join
	Hash Cond: (a.id = (b.id)::integer)
	-> Seq Scan on legacy_int a
	-> Hash
	-> Seq Scan on legacy_domain_over_int b
	Optimizer: Pivotal Optimizer (GPORCA) version 3.9.0
	(7 rows)

	explain (costs off) select * from modern_int a inner join legacy_domain_over_int b on a.id = b.id;
	QUERY PLAN
	------------------------------------------------------------------
	Gather Motion 3:1 (slice1; segments: 3)
	-> Hash Join
	Hash Cond: (a.id = (b.id)::integer)
	-> Seq Scan on modern_int a
	-> Hash
	-> Redistribute Motion 3:3 (slice2; segments: 3)
	Hash Key: b.id
	-> Seq Scan on legacy_domain_over_int b
	Optimizer: Postgres query optimizer
	(9 rows)

	-- Distributing by enum has been banned, so this test is updated to instead distribute by a dummy int column
	-- Banned because in backup/restore scenarios the data will be in the "wrong" segment as oids for each enum
	-- entry are re-generated and hashing them will result in arbitrary segment assignment.
	create type colors as enum ('red', 'green', 'blue');
	create table legacy_enum(col1 int, color colors) distributed by(col1);
	insert into legacy_enum values (1, 'red'), (2, 'green'), (3, 'blue');
	explain (costs off) select * from legacy_enum a inner join legacy_enum b on a.color = b.color;
	QUERY PLAN
	---------------------------------------------
	Gather Motion 3:1 (slice1; segments: 3)
	-> Hash Join
	Hash Cond: ((a.color)::anyenum = (b.color)::anyenum)
	-> Seq Scan on legacy_enum a
	-> Hash
	-> Broadcast Motion 3:3 (slice2; segments: 3)
	-> Seq Scan on legacy_enum b
	Optimizer: Pivotal Optimizer (GPORCA) version 3.41.0
	(8 rows)

	select * from legacy_enum a inner join legacy_enum b on a.color = b.color;
	col1 \| color \| col1 \| color
	------+-------+------+-------
	2 \| green \| 2 \| green
	3 \| blue \| 3 \| blue
	1 \| red \| 1 \| red
	(3 rows)

	--
	-- A regression issue that the data is reorganized incorrectly when
	-- gp_use_legacy_hashops has non-default value.
	--
	-- The ALTER TABLE command reorganizes the data by using a temporary table, if
	-- a "distributed by" clause is specified without the opclasses, the default
	-- opclasses will be chosen. There was a bug that the non-legacy opclasses are
	-- always chosen, regarding the setting of gp_use_legacy_hashops. However the
	-- table's new opclasses are determined with gp_use_legacy_hashops, so when
	-- gp_use_legacy_hashops is true the data will be incorrectly redistributed.
	--
	-- set the guc to the non-default value
	set gp_use_legacy_hashops to on;
	create table legacy_data_reorg (c1 int) distributed by (c1);
	insert into legacy_data_reorg select i from generate_series(1, 10) i;
	-- verify the opclass and data distribution
	select gp_segment_id, c1 from legacy_data_reorg order by 1, 2;
	gp_segment_id \| c1
	---------------+----
	0 \| 1
	0 \| 2
	1 \| 3
	1 \| 4
	1 \| 5
	1 \| 6
	1 \| 7
	2 \| 8
	2 \| 9
	2 \| 10
	(10 rows)

	select dp.localoid::regclass::name as name, oc.opcname
	from gp_distribution_policy dp
	join pg_opclass oc
	on oc.oid::text = dp.distclass::text
	where dp.localoid = 'legacy_data_reorg'::regclass::oid;
	name \| opcname
	-------------------+------------------
	legacy_data_reorg \| cdbhash_int4_ops
	(1 row)

	-- when reorganizing the table we set the distributed-by without an explicit
	-- opclass, so the default one should be chosen according to
	-- gp_use_legacy_hashops.
	alter table legacy_data_reorg set with (reorganize) distributed by (c1);
	-- double-check the opclass and data distribution
	select gp_segment_id, c1 from legacy_data_reorg order by 1, 2;
	gp_segment_id \| c1
	---------------+----
	0 \| 1
	0 \| 2
	1 \| 3
	1 \| 4
	1 \| 5
	1 \| 6
	1 \| 7
	2 \| 8
	2 \| 9
	2 \| 10
	(10 rows)

	select dp.localoid::regclass::name as name, oc.opcname
	from gp_distribution_policy dp
	join pg_opclass oc
	on oc.oid::text = dp.distclass::text
	where dp.localoid = 'legacy_data_reorg'::regclass::oid;
	name \| opcname
	-------------------+------------------
	legacy_data_reorg \| cdbhash_int4_ops
	(1 row)

	--
	-- A regression issue similar to previous one, with CTAS.
	--
	-- The default opclasses in CTAS should also be determined with
	-- gp_use_legacy_hashops.
	--
	set gp_use_legacy_hashops=off;
	create table ctastest_off as select 123 as col distributed by (col);
	set gp_use_legacy_hashops=on;
	create table ctastest_on as select 123 as col distributed by (col);
	select dp.localoid::regclass::name as name, oc.opcname
	from gp_distribution_policy dp
	join pg_opclass oc
	on oc.oid::text = dp.distclass::text
	where dp.localoid in ('ctastest_on'::regclass::oid,
	'ctastest_off'::regclass::oid);
	name \| opcname
	--------------+------------------
	ctastest_off \| int4_ops
	ctastest_on \| cdbhash_int4_ops
	(2 rows)

	set gp_use_legacy_hashops=on;
	create table try_distinct_array (test_char varchar,test_array integer[]);
	NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'test_char' as the Apache Cloudberry data distribution key for this table.
	HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
	insert into try_distinct_array select 'y',string_to_array('1~1','~')::int[];
	analyze try_distinct_array;
	insert into try_distinct_array select 'n',string_to_array('1~1','~')::int[];
	-- Aggregate with grouping column that does not have legacy hashop
	explain (costs off) select distinct test_array from try_distinct_array;
	QUERY PLAN
	------------------------------------------------
	HashAggregate
	Group Key: test_array
	-> Gather Motion 3:1 (slice1; segments: 3)
	-> Seq Scan on try_distinct_array
	Optimizer: Pivotal Optimizer (GPORCA)
	(5 rows)

	select distinct test_array from try_distinct_array;
	test_array
	------------
	{1,1}
	(1 row)

	-- Hash join on column that does not have legacy hashop
	explain (costs off) select * from try_distinct_array a, try_distinct_array b where a.test_array=b.test_array;
	QUERY PLAN
	--------------------------------------------------------------------------------------------------------
	Hash Join
	Hash Cond: ((a.test_array)::anyarray = (b.test_array)::anyarray)
	-> Gather Motion 3:1 (slice1; segments: 3)
	-> Seq Scan on try_distinct_array a
	-> Hash
	-> Gather Motion 3:1 (slice2; segments: 3)
	-> Seq Scan on try_distinct_array b
	Optimizer: Pivotal Optimizer (GPORCA)
	(8 rows)

	select * from try_distinct_array a, try_distinct_array b where a.test_array=b.test_array;
	test_char \| test_array \| test_char \| test_array
	-----------+------------+-----------+------------
	n \| {1,1} \| y \| {1,1}
	n \| {1,1} \| n \| {1,1}
	y \| {1,1} \| y \| {1,1}
	y \| {1,1} \| n \| {1,1}
	(4 rows)

	-- CTAS should use value of gp_use_legacy_hashops when setting the distribution policy based on an existing table
	set gp_use_legacy_hashops=on;
	create table ctas_base_legacy as select unnest(array[1,2,3]) as col distributed by (col);
	set gp_use_legacy_hashops=off;
	create table ctas_from_legacy as select * from ctas_base_legacy distributed by (col);
	create table ctas_explicit_legacy as select * from ctas_base_legacy distributed by (col cdbhash_int4_ops);
	create table ctas_base_nonlegacy as select unnest(array[1,2,3]) as col distributed by (col);
	set gp_use_legacy_hashops=on;
	create table ctas_from_nonlegacy as select * from ctas_base_nonlegacy distributed by (col);
	create table ctas_explicit_nonlegacy as select * from ctas_base_nonlegacy distributed by (col int4_ops);
	select dp.localoid::regclass as name, opc.opcname
	from gp_distribution_policy dp
	join pg_opclass opc
	on ARRAY[opc.oid]::oidvector = dp.distclass
	where dp.localoid in ('ctas_base_legacy'::regclass,
	'ctas_from_legacy'::regclass,
	'ctas_base_nonlegacy'::regclass,
	'ctas_from_nonlegacy'::regclass,
	'ctas_explicit_legacy'::regclass,
	'ctas_explicit_nonlegacy'::regclass);
	name \| opcname
	-------------------------+------------------
	ctas_explicit_nonlegacy \| int4_ops
	ctas_base_nonlegacy \| int4_ops
	ctas_from_legacy \| int4_ops
	ctas_from_nonlegacy \| cdbhash_int4_ops
	ctas_explicit_legacy \| cdbhash_int4_ops
	ctas_base_legacy \| cdbhash_int4_ops
	(6 rows)

	select * from ctas_from_legacy where col=1;
	col
	-----
	1
	(1 row)

	select * from ctas_explicit_legacy where col=1;
	col
	-----
	1
	(1 row)

	select * from ctas_from_nonlegacy where col=1;
	col
	-----
	1
	(1 row)

	select * from ctas_explicit_nonlegacy where col=1;
	col
	-----
	1
	(1 row)