ql/src/test/queries/clientpositive/orc_ppd_schema_evol_3a.q - hive - Git at Google

 SET hive.vectorized.execution.enabled=false;
 set hive.mapred.mode=nonstrict;
 SET hive.fetch.task.conversion=none;
 SET hive.cbo.enable=false;
 SET hive.map.aggr=false;
 -- disabling map side aggregation as that can lead to different intermediate record counts

 CREATE TABLE staging_n8(t tinyint,
            si smallint,
            i int,
            b bigint,
            f float,
            d double,
            bo boolean,
            s string,
            ts timestamp,
            `dec` decimal(4,2),
            bin binary)
 ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
 STORED AS TEXTFILE;

 LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging_n8;
 LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging_n8;

 CREATE TABLE orc_ppd_staging_n2(t tinyint,
            si smallint,
            i int,
            b bigint,
            f float,
            d double,
            bo boolean,
            s string,
            c char(50),
            v varchar(50),
            da date,
            `dec` decimal(4,2),
            bin binary)
 STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*");

 insert overwrite table orc_ppd_staging_n2 select t, si, i, b, f, d, bo, s, cast(s as char(50)) as c,
 cast(s as varchar(50)) as v, cast(ts as date) as da, `dec`, bin from staging_n8 order by t, si, i, b, f, d, bo, s, c, v, da, `dec`, bin;

 -- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values
 -- which makes it hard to test bloom filters
 insert into orc_ppd_staging_n2 select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11",-71.54,"aaa" from staging_n8 limit 1;
 insert into orc_ppd_staging_n2 select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11",71.54,"zzz" from staging_n8 limit 1;

 CREATE TABLE orc_ppd_n3(t tinyint,
            si smallint,
            i int,
            b bigint,
            f float,
            d double,
            bo boolean,
            s string,
            c char(50),
            v varchar(50),
            da date,
            `dec` decimal(4,2),
            bin binary)
 STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*");

 insert overwrite table orc_ppd_n3 select t, si, i, b, f, d, bo, s, cast(s as char(50)) as c,
 cast(s as varchar(50)) as v, da, `dec`, bin from orc_ppd_staging_n2 order by t, si, i, b, f, d, bo, s, c, v, da, `dec`, bin;

 SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecTezSummaryPrinter;
 SET hive.optimize.index.filter=false;

 -- Row group statistics for column t:
 -- Entry 0: count: 994 hasNull: true min: -10 max: 54 sum: 26014 positions: 0,0,0,0,0,0,0
 -- Entry 1: count: 1000 hasNull: false min: 54 max: 118 sum: 86812 positions: 0,2,124,0,0,116,11
 -- Entry 2: count: 100 hasNull: false min: 118 max: 127 sum: 12151 positions: 0,4,119,0,0,244,19

 -- INPUT_RECORDS: 0 (no row groups)
 select count(*) from orc_ppd_n3 where t > 127;
 SET hive.optimize.index.filter=true;
 -- INPUT_RECORDS: 0 (no row groups)
 select count(*) from orc_ppd_n3 where t > 127;

 SET hive.optimize.index.filter=false;
 -- INPUT_RECORDS: 1000 (1 row group)
 select count(*) from orc_ppd_n3 where t = 55;
 SET hive.optimize.index.filter=true;
 -- INPUT_RECORDS: 1000 (1 row group)
 select count(*) from orc_ppd_n3 where t = 55;

 SET hive.optimize.index.filter=false;
 -- INPUT_RECORDS: 2000 (2 row groups)
 select count(*) from orc_ppd_n3 where t = 54;
 SET hive.optimize.index.filter=true;
 -- INPUT_RECORDS: 2000 (2 row groups)
 select count(*) from orc_ppd_n3 where t = 54;

 alter table orc_ppd_n3 change column t t smallint;

 SET hive.optimize.index.filter=false;
 -- INPUT_RECORDS: 0 (no row groups)
 select count(*) from orc_ppd_n3 where t > 127;
 SET hive.optimize.index.filter=true;
 -- INPUT_RECORDS: 0 (no row groups)
 select count(*) from orc_ppd_n3 where t > 127;

 SET hive.optimize.index.filter=false;
 -- INPUT_RECORDS: 1000 (1 row group)
 select count(*) from orc_ppd_n3 where t = 55;
 SET hive.optimize.index.filter=true;
 -- INPUT_RECORDS: 1000 (1 row group)
 select count(*) from orc_ppd_n3 where t = 55;

 SET hive.optimize.index.filter=false;
 -- INPUT_RECORDS: 2000 (2 row groups)
 select count(*) from orc_ppd_n3 where t = 54;
 SET hive.optimize.index.filter=true;
 -- INPUT_RECORDS: 2000 (2 row groups)
 select count(*) from orc_ppd_n3 where t = 54;

 alter table orc_ppd_n3 change column t t int;

 SET hive.optimize.index.filter=false;
 -- INPUT_RECORDS: 0 (no row groups)
 select count(*) from orc_ppd_n3 where t > 127;
 SET hive.optimize.index.filter=true;
 -- INPUT_RECORDS: 0 (no row groups)
 select count(*) from orc_ppd_n3 where t > 127;

 SET hive.optimize.index.filter=false;
 -- INPUT_RECORDS: 1000 (1 row group)
 select count(*) from orc_ppd_n3 where t = 55;
 SET hive.optimize.index.filter=true;
 -- INPUT_RECORDS: 1000 (1 row group)
 select count(*) from orc_ppd_n3 where t = 55;

 SET hive.optimize.index.filter=false;
 -- INPUT_RECORDS: 2000 (2 row groups)
 select count(*) from orc_ppd_n3 where t = 54;
 SET hive.optimize.index.filter=true;
 -- INPUT_RECORDS: 2000 (2 row groups)
 select count(*) from orc_ppd_n3 where t = 54;

 alter table orc_ppd_n3 change column t t bigint;

 SET hive.optimize.index.filter=false;
 -- INPUT_RECORDS: 0 (no row groups)
 select count(*) from orc_ppd_n3 where t > 127;
 SET hive.optimize.index.filter=true;
 -- INPUT_RECORDS: 0 (no row groups)
 select count(*) from orc_ppd_n3 where t > 127;

 SET hive.optimize.index.filter=false;
 -- INPUT_RECORDS: 1000 (1 row group)
 select count(*) from orc_ppd_n3 where t = 55;
 SET hive.optimize.index.filter=true;
 -- INPUT_RECORDS: 1000 (1 row group)
 select count(*) from orc_ppd_n3 where t = 55;

 SET hive.optimize.index.filter=false;
 -- INPUT_RECORDS: 2000 (2 row groups)
 select count(*) from orc_ppd_n3 where t = 54;
 SET hive.optimize.index.filter=true;
 -- INPUT_RECORDS: 2000 (2 row groups)
 select count(*) from orc_ppd_n3 where t = 54;

 alter table orc_ppd_n3 change column t t string;

 SET hive.optimize.index.filter=false;
 -- INPUT_RECORDS: 0 (no row groups)
 select count(*) from orc_ppd_n3 where t > '127';
 SET hive.optimize.index.filter=true;
 -- INPUT_RECORDS: 0 (no row groups)
 select count(*) from orc_ppd_n3 where t > '127';

 SET hive.optimize.index.filter=false;
 -- INPUT_RECORDS: 1000 (1 row group)
 select count(*) from orc_ppd_n3 where t = '55';
 SET hive.optimize.index.filter=true;
 -- INPUT_RECORDS: 1000 (1 row group)
 select count(*) from orc_ppd_n3 where t = '55';

 SET hive.optimize.index.filter=false;
 -- INPUT_RECORDS: 2000 (2 row groups)
 select count(*) from orc_ppd_n3 where t = '54';
 SET hive.optimize.index.filter=true;
 -- INPUT_RECORDS: 2000 (2 row groups)
 select count(*) from orc_ppd_n3 where t = '54';

 SET hive.optimize.index.filter=false;
 -- float tests
 select count(*) from orc_ppd_n3 where f = 74.72;
 SET hive.optimize.index.filter=true;
 select count(*) from orc_ppd_n3 where f = 74.72;

 alter table orc_ppd_n3 change column f f double;

 SET hive.optimize.index.filter=false;
 select count(*) from orc_ppd_n3 where f = 74.72000122070312;
 SET hive.optimize.index.filter=true;
 select count(*) from orc_ppd_n3 where f = 74.72000122070312;

 alter table orc_ppd_n3 change column f f string;

 SET hive.optimize.index.filter=false;
 select count(*) from orc_ppd_n3 where f = '74.72000122070312';
 SET hive.optimize.index.filter=true;
 select count(*) from orc_ppd_n3 where f = '74.72000122070312';

 SET hive.optimize.index.filter=false;
 -- string tests
 select count(*) from orc_ppd_n3 where s = 'bob davidson';
 SET hive.optimize.index.filter=true;
 select count(*) from orc_ppd_n3 where s = 'bob davidson';

 alter table orc_ppd_n3 change column s s char(50);

 SET hive.optimize.index.filter=false;
 select count(*) from orc_ppd_n3 where s = 'bob davidson';
 SET hive.optimize.index.filter=true;
 select count(*) from orc_ppd_n3 where s = 'bob davidson';

 alter table orc_ppd_n3 change column s s varchar(50);

 SET hive.optimize.index.filter=false;
 select count(*) from orc_ppd_n3 where s = 'bob davidson';
 SET hive.optimize.index.filter=true;
 select count(*) from orc_ppd_n3 where s = 'bob davidson';

 alter table orc_ppd_n3 change column s s char(50);

 SET hive.optimize.index.filter=false;
 select count(*) from orc_ppd_n3 where s = 'bob davidson';
 SET hive.optimize.index.filter=true;
 select count(*) from orc_ppd_n3 where s = 'bob davidson';

 alter table orc_ppd_n3 change column s s string;

 SET hive.optimize.index.filter=false;
 select count(*) from orc_ppd_n3 where s = 'bob davidson';
 SET hive.optimize.index.filter=true;
 select count(*) from orc_ppd_n3 where s = 'bob davidson';

 alter table orc_ppd_n3 add columns (boo boolean);

 SET hive.optimize.index.filter=false;
 -- ppd on newly added column
 select count(*) from orc_ppd_n3 where si = 442;
 select count(*) from orc_ppd_n3 where si = 442 or boo is not null or boo = false;
 SET hive.optimize.index.filter=true;
 select count(*) from orc_ppd_n3 where si = 442;
 select count(*) from orc_ppd_n3 where si = 442 or boo is not null or boo = false;
	SET hive.vectorized.execution.enabled=false;
	set hive.mapred.mode=nonstrict;
	SET hive.fetch.task.conversion=none;
	SET hive.cbo.enable=false;
	SET hive.map.aggr=false;
	-- disabling map side aggregation as that can lead to different intermediate record counts

	CREATE TABLE staging_n8(t tinyint,
	si smallint,
	i int,
	b bigint,
	f float,
	d double,
	bo boolean,
	s string,
	ts timestamp,
	`dec` decimal(4,2),
	bin binary)
	ROW FORMAT DELIMITED FIELDS TERMINATED BY '\|'
	STORED AS TEXTFILE;

	LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging_n8;
	LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging_n8;

	CREATE TABLE orc_ppd_staging_n2(t tinyint,
	si smallint,
	i int,
	b bigint,
	f float,
	d double,
	bo boolean,
	s string,
	c char(50),
	v varchar(50),
	da date,
	`dec` decimal(4,2),
	bin binary)
	STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*");

	insert overwrite table orc_ppd_staging_n2 select t, si, i, b, f, d, bo, s, cast(s as char(50)) as c,
	cast(s as varchar(50)) as v, cast(ts as date) as da, `dec`, bin from staging_n8 order by t, si, i, b, f, d, bo, s, c, v, da, `dec`, bin;

	-- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values
	-- which makes it hard to test bloom filters
	insert into orc_ppd_staging_n2 select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11",-71.54,"aaa" from staging_n8 limit 1;
	insert into orc_ppd_staging_n2 select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11",71.54,"zzz" from staging_n8 limit 1;

	CREATE TABLE orc_ppd_n3(t tinyint,
	si smallint,
	i int,
	b bigint,
	f float,
	d double,
	bo boolean,
	s string,
	c char(50),
	v varchar(50),
	da date,
	`dec` decimal(4,2),
	bin binary)
	STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*");

	insert overwrite table orc_ppd_n3 select t, si, i, b, f, d, bo, s, cast(s as char(50)) as c,
	cast(s as varchar(50)) as v, da, `dec`, bin from orc_ppd_staging_n2 order by t, si, i, b, f, d, bo, s, c, v, da, `dec`, bin;

	SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecTezSummaryPrinter;
	SET hive.optimize.index.filter=false;

	-- Row group statistics for column t:
	-- Entry 0: count: 994 hasNull: true min: -10 max: 54 sum: 26014 positions: 0,0,0,0,0,0,0
	-- Entry 1: count: 1000 hasNull: false min: 54 max: 118 sum: 86812 positions: 0,2,124,0,0,116,11
	-- Entry 2: count: 100 hasNull: false min: 118 max: 127 sum: 12151 positions: 0,4,119,0,0,244,19

	-- INPUT_RECORDS: 0 (no row groups)
	select count(*) from orc_ppd_n3 where t > 127;
	SET hive.optimize.index.filter=true;
	-- INPUT_RECORDS: 0 (no row groups)
	select count(*) from orc_ppd_n3 where t > 127;

	SET hive.optimize.index.filter=false;
	-- INPUT_RECORDS: 1000 (1 row group)
	select count(*) from orc_ppd_n3 where t = 55;
	SET hive.optimize.index.filter=true;
	-- INPUT_RECORDS: 1000 (1 row group)
	select count(*) from orc_ppd_n3 where t = 55;

	SET hive.optimize.index.filter=false;
	-- INPUT_RECORDS: 2000 (2 row groups)
	select count(*) from orc_ppd_n3 where t = 54;
	SET hive.optimize.index.filter=true;
	-- INPUT_RECORDS: 2000 (2 row groups)
	select count(*) from orc_ppd_n3 where t = 54;

	alter table orc_ppd_n3 change column t t smallint;

	SET hive.optimize.index.filter=false;
	-- INPUT_RECORDS: 0 (no row groups)
	select count(*) from orc_ppd_n3 where t > 127;
	SET hive.optimize.index.filter=true;
	-- INPUT_RECORDS: 0 (no row groups)
	select count(*) from orc_ppd_n3 where t > 127;

	SET hive.optimize.index.filter=false;
	-- INPUT_RECORDS: 1000 (1 row group)
	select count(*) from orc_ppd_n3 where t = 55;
	SET hive.optimize.index.filter=true;
	-- INPUT_RECORDS: 1000 (1 row group)
	select count(*) from orc_ppd_n3 where t = 55;

	SET hive.optimize.index.filter=false;
	-- INPUT_RECORDS: 2000 (2 row groups)
	select count(*) from orc_ppd_n3 where t = 54;
	SET hive.optimize.index.filter=true;
	-- INPUT_RECORDS: 2000 (2 row groups)
	select count(*) from orc_ppd_n3 where t = 54;

	alter table orc_ppd_n3 change column t t int;

	SET hive.optimize.index.filter=false;
	-- INPUT_RECORDS: 0 (no row groups)
	select count(*) from orc_ppd_n3 where t > 127;
	SET hive.optimize.index.filter=true;
	-- INPUT_RECORDS: 0 (no row groups)
	select count(*) from orc_ppd_n3 where t > 127;

	SET hive.optimize.index.filter=false;
	-- INPUT_RECORDS: 1000 (1 row group)
	select count(*) from orc_ppd_n3 where t = 55;
	SET hive.optimize.index.filter=true;
	-- INPUT_RECORDS: 1000 (1 row group)
	select count(*) from orc_ppd_n3 where t = 55;

	SET hive.optimize.index.filter=false;
	-- INPUT_RECORDS: 2000 (2 row groups)
	select count(*) from orc_ppd_n3 where t = 54;
	SET hive.optimize.index.filter=true;
	-- INPUT_RECORDS: 2000 (2 row groups)
	select count(*) from orc_ppd_n3 where t = 54;

	alter table orc_ppd_n3 change column t t bigint;

	SET hive.optimize.index.filter=false;
	-- INPUT_RECORDS: 0 (no row groups)
	select count(*) from orc_ppd_n3 where t > 127;
	SET hive.optimize.index.filter=true;
	-- INPUT_RECORDS: 0 (no row groups)
	select count(*) from orc_ppd_n3 where t > 127;

	SET hive.optimize.index.filter=false;
	-- INPUT_RECORDS: 1000 (1 row group)
	select count(*) from orc_ppd_n3 where t = 55;
	SET hive.optimize.index.filter=true;
	-- INPUT_RECORDS: 1000 (1 row group)
	select count(*) from orc_ppd_n3 where t = 55;

	SET hive.optimize.index.filter=false;
	-- INPUT_RECORDS: 2000 (2 row groups)
	select count(*) from orc_ppd_n3 where t = 54;
	SET hive.optimize.index.filter=true;
	-- INPUT_RECORDS: 2000 (2 row groups)
	select count(*) from orc_ppd_n3 where t = 54;

	alter table orc_ppd_n3 change column t t string;

	SET hive.optimize.index.filter=false;
	-- INPUT_RECORDS: 0 (no row groups)
	select count(*) from orc_ppd_n3 where t > '127';
	SET hive.optimize.index.filter=true;
	-- INPUT_RECORDS: 0 (no row groups)
	select count(*) from orc_ppd_n3 where t > '127';

	SET hive.optimize.index.filter=false;
	-- INPUT_RECORDS: 1000 (1 row group)
	select count(*) from orc_ppd_n3 where t = '55';
	SET hive.optimize.index.filter=true;
	-- INPUT_RECORDS: 1000 (1 row group)
	select count(*) from orc_ppd_n3 where t = '55';

	SET hive.optimize.index.filter=false;
	-- INPUT_RECORDS: 2000 (2 row groups)
	select count(*) from orc_ppd_n3 where t = '54';
	SET hive.optimize.index.filter=true;
	-- INPUT_RECORDS: 2000 (2 row groups)
	select count(*) from orc_ppd_n3 where t = '54';

	SET hive.optimize.index.filter=false;
	-- float tests
	select count(*) from orc_ppd_n3 where f = 74.72;
	SET hive.optimize.index.filter=true;
	select count(*) from orc_ppd_n3 where f = 74.72;

	alter table orc_ppd_n3 change column f f double;

	SET hive.optimize.index.filter=false;
	select count(*) from orc_ppd_n3 where f = 74.72000122070312;
	SET hive.optimize.index.filter=true;
	select count(*) from orc_ppd_n3 where f = 74.72000122070312;

	alter table orc_ppd_n3 change column f f string;

	SET hive.optimize.index.filter=false;
	select count(*) from orc_ppd_n3 where f = '74.72000122070312';
	SET hive.optimize.index.filter=true;
	select count(*) from orc_ppd_n3 where f = '74.72000122070312';

	SET hive.optimize.index.filter=false;
	-- string tests
	select count(*) from orc_ppd_n3 where s = 'bob davidson';
	SET hive.optimize.index.filter=true;
	select count(*) from orc_ppd_n3 where s = 'bob davidson';

	alter table orc_ppd_n3 change column s s char(50);

	SET hive.optimize.index.filter=false;
	select count(*) from orc_ppd_n3 where s = 'bob davidson';
	SET hive.optimize.index.filter=true;
	select count(*) from orc_ppd_n3 where s = 'bob davidson';

	alter table orc_ppd_n3 change column s s varchar(50);

	SET hive.optimize.index.filter=false;
	select count(*) from orc_ppd_n3 where s = 'bob davidson';
	SET hive.optimize.index.filter=true;
	select count(*) from orc_ppd_n3 where s = 'bob davidson';

	alter table orc_ppd_n3 change column s s char(50);

	SET hive.optimize.index.filter=false;
	select count(*) from orc_ppd_n3 where s = 'bob davidson';
	SET hive.optimize.index.filter=true;
	select count(*) from orc_ppd_n3 where s = 'bob davidson';

	alter table orc_ppd_n3 change column s s string;

	SET hive.optimize.index.filter=false;
	select count(*) from orc_ppd_n3 where s = 'bob davidson';
	SET hive.optimize.index.filter=true;
	select count(*) from orc_ppd_n3 where s = 'bob davidson';

	alter table orc_ppd_n3 add columns (boo boolean);

	SET hive.optimize.index.filter=false;
	-- ppd on newly added column
	select count(*) from orc_ppd_n3 where si = 442;
	select count(*) from orc_ppd_n3 where si = 442 or boo is not null or boo = false;
	SET hive.optimize.index.filter=true;
	select count(*) from orc_ppd_n3 where si = 442;
	select count(*) from orc_ppd_n3 where si = 442 or boo is not null or boo = false;