ql/src/test/queries/clientpositive/avro_partitioned.q - hive - Git at Google

 set hive.mapred.mode=nonstrict;
 -- SORT_QUERY_RESULTS
 -- Verify that table scans work with partitioned Avro tables
 CREATE TABLE episodes_n2
 ROW FORMAT
 SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
 STORED AS
 INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
 OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
 TBLPROPERTIES ('avro.schema.literal'='{
   "namespace": "testing.hive.avro.serde",
   "name": "episodes_n2",
   "type": "record",
   "fields": [
     {
       "name":"title",
       "type":"string",
       "doc":"episode title"
     },
     {
       "name":"air_date",
       "type":"string",
       "doc":"initial date"
     },
     {
       "name":"doctor",
       "type":"int",
       "doc":"main actor playing the Doctor in episode"
     }
   ]
 }');

 LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes_n2;

 CREATE TABLE episodes_partitioned_n1
 PARTITIONED BY (doctor_pt INT)
 ROW FORMAT
 SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
 STORED AS
 INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
 OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
 TBLPROPERTIES ('avro.schema.literal'='{
   "namespace": "testing.hive.avro.serde",
   "name": "episodes_n2",
   "type": "record",
   "fields": [
     {
       "name":"title",
       "type":"string",
       "doc":"episode title"
     },
     {
       "name":"air_date",
       "type":"string",
       "doc":"initial date"
     },
     {
       "name":"doctor",
       "type":"int",
       "doc":"main actor playing the Doctor in episode"
     }
   ]
 }');

 INSERT OVERWRITE TABLE episodes_partitioned_n1 PARTITION (doctor_pt) SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes_n2;

 SELECT * FROM episodes_partitioned_n1 WHERE doctor_pt > 6;

 -- Verify that Fetch works in addition to Map
 SELECT * FROM episodes_partitioned_n1 ORDER BY air_date LIMIT 5;
 -- Fetch w/filter to specific partition
 SELECT * FROM episodes_partitioned_n1 WHERE doctor_pt = 6;
 -- Fetch w/non-existent partition
 SELECT * FROM episodes_partitioned_n1 WHERE doctor_pt = 7 LIMIT 5;
 -- Alter table add an empty partition
 ALTER TABLE episodes_partitioned_n1 ADD PARTITION (doctor_pt=7);
 SELECT COUNT(*) FROM episodes_partitioned_n1;

 -- Verify that reading from an Avro partition works
 -- even if it has an old schema relative to the current table level schema

 -- Create table and store schema in SERDEPROPERTIES
 CREATE TABLE episodes_partitioned_serdeproperties
 PARTITIONED BY (doctor_pt INT)
 ROW FORMAT
 SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
 WITH SERDEPROPERTIES ('avro.schema.literal'='{
   "namespace": "testing.hive.avro.serde",
   "name": "episodes_n2",
   "type": "record",
   "fields": [
     {
       "name":"title",
       "type":"string",
       "doc":"episode title"
     },
     {
       "name":"air_date",
       "type":"string",
       "doc":"initial date"
     },
     {
       "name":"doctor",
       "type":"int",
       "doc":"main actor playing the Doctor in episode"
     }
   ]
 }')
 STORED AS
 INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
 OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat';

 -- Insert data into a partition
 INSERT INTO TABLE episodes_partitioned_serdeproperties PARTITION (doctor_pt) SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes_n2;
 set hive.metastore.disallow.incompatible.col.type.changes=false;
 -- Evolve the table schema by adding new array field "cast_and_crew"
 ALTER TABLE episodes_partitioned_serdeproperties
 SET SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
 WITH SERDEPROPERTIES ('avro.schema.literal'='{
   "namespace": "testing.hive.avro.serde",
   "name": "episodes_n2",
   "type": "record",
   "fields": [
     {
       "name":"cast_and_crew",
       "type":{"type":"array","items":"string"},
       "default":[]
     },
     {
       "name":"title",
       "type":"string",
       "doc":"episode title"
     },
     {
       "name":"air_date",
       "type":"string",
       "doc":"initial date"
     },
     {
       "name":"doctor",
       "type":"int",
       "doc":"main actor playing the Doctor in episode"
     }
   ]
 }');

 -- Try selecting from the evolved table
 SELECT * FROM episodes_partitioned_serdeproperties;
 reset hive.metastore.disallow.incompatible.col.type.changes;
	set hive.mapred.mode=nonstrict;
	-- SORT_QUERY_RESULTS
	-- Verify that table scans work with partitioned Avro tables
	CREATE TABLE episodes_n2
	ROW FORMAT
	SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
	STORED AS
	INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
	OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
	TBLPROPERTIES ('avro.schema.literal'='{
	"namespace": "testing.hive.avro.serde",
	"name": "episodes_n2",
	"type": "record",
	"fields": [
	{
	"name":"title",
	"type":"string",
	"doc":"episode title"
	},
	{
	"name":"air_date",
	"type":"string",
	"doc":"initial date"
	},
	{
	"name":"doctor",
	"type":"int",
	"doc":"main actor playing the Doctor in episode"
	}
	]
	}');

	LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes_n2;

	CREATE TABLE episodes_partitioned_n1
	PARTITIONED BY (doctor_pt INT)
	ROW FORMAT
	SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
	STORED AS
	INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
	OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
	TBLPROPERTIES ('avro.schema.literal'='{
	"namespace": "testing.hive.avro.serde",
	"name": "episodes_n2",
	"type": "record",
	"fields": [
	{
	"name":"title",
	"type":"string",
	"doc":"episode title"
	},
	{
	"name":"air_date",
	"type":"string",
	"doc":"initial date"
	},
	{
	"name":"doctor",
	"type":"int",
	"doc":"main actor playing the Doctor in episode"
	}
	]
	}');

	INSERT OVERWRITE TABLE episodes_partitioned_n1 PARTITION (doctor_pt) SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes_n2;

	SELECT * FROM episodes_partitioned_n1 WHERE doctor_pt > 6;

	-- Verify that Fetch works in addition to Map
	SELECT * FROM episodes_partitioned_n1 ORDER BY air_date LIMIT 5;
	-- Fetch w/filter to specific partition
	SELECT * FROM episodes_partitioned_n1 WHERE doctor_pt = 6;
	-- Fetch w/non-existent partition
	SELECT * FROM episodes_partitioned_n1 WHERE doctor_pt = 7 LIMIT 5;
	-- Alter table add an empty partition
	ALTER TABLE episodes_partitioned_n1 ADD PARTITION (doctor_pt=7);
	SELECT COUNT(*) FROM episodes_partitioned_n1;

	-- Verify that reading from an Avro partition works
	-- even if it has an old schema relative to the current table level schema

	-- Create table and store schema in SERDEPROPERTIES
	CREATE TABLE episodes_partitioned_serdeproperties
	PARTITIONED BY (doctor_pt INT)
	ROW FORMAT
	SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
	WITH SERDEPROPERTIES ('avro.schema.literal'='{
	"namespace": "testing.hive.avro.serde",
	"name": "episodes_n2",
	"type": "record",
	"fields": [
	{
	"name":"title",
	"type":"string",
	"doc":"episode title"
	},
	{
	"name":"air_date",
	"type":"string",
	"doc":"initial date"
	},
	{
	"name":"doctor",
	"type":"int",
	"doc":"main actor playing the Doctor in episode"
	}
	]
	}')
	STORED AS
	INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
	OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat';

	-- Insert data into a partition
	INSERT INTO TABLE episodes_partitioned_serdeproperties PARTITION (doctor_pt) SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes_n2;
	set hive.metastore.disallow.incompatible.col.type.changes=false;
	-- Evolve the table schema by adding new array field "cast_and_crew"
	ALTER TABLE episodes_partitioned_serdeproperties
	SET SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
	WITH SERDEPROPERTIES ('avro.schema.literal'='{
	"namespace": "testing.hive.avro.serde",
	"name": "episodes_n2",
	"type": "record",
	"fields": [
	{
	"name":"cast_and_crew",
	"type":{"type":"array","items":"string"},
	"default":[]
	},
	{
	"name":"title",
	"type":"string",
	"doc":"episode title"
	},
	{
	"name":"air_date",
	"type":"string",
	"doc":"initial date"
	},
	{
	"name":"doctor",
	"type":"int",
	"doc":"main actor playing the Doctor in episode"
	}
	]
	}');

	-- Try selecting from the evolved table
	SELECT * FROM episodes_partitioned_serdeproperties;
	reset hive.metastore.disallow.incompatible.col.type.changes;