blob: 37a6d951ee82e609fd5608dc7f13f56eb6ad48b1 [file] [log] [blame]
set hive.stats.dbclass=fs;
--
-- Table src
--
DROP TABLE IF EXISTS src;
CREATE TABLE src (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt" INTO TABLE src;
ANALYZE TABLE src COMPUTE STATISTICS;
ANALYZE TABLE src COMPUTE STATISTICS FOR COLUMNS key,value;
--
-- Table src1
--
DROP TABLE IF EXISTS src1;
CREATE TABLE src1 (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv3.txt" INTO TABLE src1;
ANALYZE TABLE src1 COMPUTE STATISTICS;
ANALYZE TABLE src1 COMPUTE STATISTICS FOR COLUMNS key,value;
--
-- Table src_json
--
DROP TABLE IF EXISTS src_json;
CREATE TABLE src_json (json STRING COMMENT 'default') STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/json.txt" INTO TABLE src_json;
ANALYZE TABLE src_json COMPUTE STATISTICS;
ANALYZE TABLE src_json COMPUTE STATISTICS FOR COLUMNS json;
--
-- Table src_sequencefile
--
DROP TABLE IF EXISTS src_sequencefile;
CREATE TABLE src_sequencefile (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS SEQUENCEFILE;
LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.seq" INTO TABLE src_sequencefile;
ANALYZE TABLE src_sequencefile COMPUTE STATISTICS;
ANALYZE TABLE src_sequencefile COMPUTE STATISTICS FOR COLUMNS key,value;
--
-- Table src_thrift
--
DROP TABLE IF EXISTS src_thrift;
CREATE TABLE src_thrift
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer'
WITH SERDEPROPERTIES (
'serialization.class' = 'org.apache.hadoop.hive.serde2.thrift.test.Complex',
'serialization.format' = 'org.apache.thrift.protocol.TBinaryProtocol')
STORED AS SEQUENCEFILE;
LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/complex.seq" INTO TABLE src_thrift;
ANALYZE TABLE src_thrift COMPUTE STATISTICS;
--
-- Table srcbucket
--
DROP TABLE IF EXISTS srcbucket_tmp;
DROP TABLE IF EXISTS srcbucket;
CREATE TABLE srcbucket_tmp (key INT, value STRING) STORED AS TEXTFILE;
CREATE TABLE srcbucket (key INT, value STRING)
CLUSTERED BY (key) INTO 2 BUCKETS
STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/srcbucket0.txt" INTO TABLE srcbucket_tmp;
LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/srcbucket1.txt" INTO TABLE srcbucket_tmp;
INSERT INTO srcbucket SELECT * FROM srcbucket_tmp;
DROP TABLE srcbucket_tmp;
ANALYZE TABLE srcbucket COMPUTE STATISTICS;
ANALYZE TABLE srcbucket COMPUTE STATISTICS FOR COLUMNS key,value;
--
-- Table srcbucket2
--
DROP TABLE IF EXISTS srcbucket_tmp;
DROP TABLE IF EXISTS srcbucket2;
CREATE TABLE srcbucket_tmp (key INT, value STRING);
CREATE TABLE srcbucket2 (key INT, value STRING)
CLUSTERED BY (key) INTO 4 BUCKETS
STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/srcbucket20.txt" INTO TABLE srcbucket_tmp;
LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/srcbucket21.txt" INTO TABLE srcbucket_tmp;
LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/srcbucket22.txt" INTO TABLE srcbucket_tmp;
LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/srcbucket23.txt" INTO TABLE srcbucket_tmp;
INSERT INTO srcbucket2 SELECT * FROM srcbucket_tmp;
DROP TABLE srcbucket_tmp;
ANALYZE TABLE srcbucket2 COMPUTE STATISTICS;
ANALYZE TABLE srcbucket2 COMPUTE STATISTICS FOR COLUMNS key,value;
--
-- Table srcpart
--
DROP TABLE IF EXISTS srcpart;
CREATE TABLE srcpart (key STRING COMMENT 'default', value STRING COMMENT 'default')
PARTITIONED BY (ds STRING, hr STRING)
STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt"
OVERWRITE INTO TABLE srcpart PARTITION (ds="2008-04-08", hr="11");
LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt"
OVERWRITE INTO TABLE srcpart PARTITION (ds="2008-04-08", hr="12");
LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt"
OVERWRITE INTO TABLE srcpart PARTITION (ds="2008-04-09", hr="11");
LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt"
OVERWRITE INTO TABLE srcpart PARTITION (ds="2008-04-09", hr="12");
ANALYZE TABLE srcpart PARTITION(ds, hr) COMPUTE STATISTICS;
ANALYZE TABLE srcpart PARTITION(ds, hr) COMPUTE STATISTICS FOR COLUMNS key,value;
--
-- Table alltypesorc
--
DROP TABLE IF EXISTS alltypesorc;
CREATE TABLE alltypesorc(
ctinyint TINYINT,
csmallint SMALLINT,
cint INT,
cbigint BIGINT,
cfloat FLOAT,
cdouble DOUBLE,
cstring1 STRING,
cstring2 STRING,
ctimestamp1 TIMESTAMP,
ctimestamp2 TIMESTAMP,
cboolean1 BOOLEAN,
cboolean2 BOOLEAN)
STORED AS ORC;
LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/alltypesorc"
OVERWRITE INTO TABLE alltypesorc;
ANALYZE TABLE alltypesorc COMPUTE STATISTICS;
ANALYZE TABLE alltypesorc COMPUTE STATISTICS FOR COLUMNS ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1,cstring2,ctimestamp1,ctimestamp2,cboolean1,cboolean2;
--
-- Table primitives
--
DROP TABLE IF EXISTS primitives;
CREATE TABLE primitives (
id INT COMMENT 'default',
bool_col BOOLEAN COMMENT 'default',
tinyint_col TINYINT COMMENT 'default',
smallint_col SMALLINT COMMENT 'default',
int_col INT COMMENT 'default',
bigint_col BIGINT COMMENT 'default',
float_col FLOAT COMMENT 'default',
double_col DOUBLE COMMENT 'default',
date_string_col STRING COMMENT 'default',
string_col STRING COMMENT 'default',
timestamp_col TIMESTAMP COMMENT 'default')
PARTITIONED BY (year INT COMMENT 'default', month INT COMMENT 'default')
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
ESCAPED BY '\\'
STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/types/primitives/090101.txt"
OVERWRITE INTO TABLE primitives PARTITION(year=2009, month=1);
LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/types/primitives/090201.txt"
OVERWRITE INTO TABLE primitives PARTITION(year=2009, month=2);
LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/types/primitives/090301.txt"
OVERWRITE INTO TABLE primitives PARTITION(year=2009, month=3);
LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/types/primitives/090401.txt"
OVERWRITE INTO TABLE primitives PARTITION(year=2009, month=4);
--
-- Function qtest_get_java_boolean
--
DROP FUNCTION IF EXISTS qtest_get_java_boolean;
CREATE FUNCTION qtest_get_java_boolean AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFTestGetJavaBoolean';
--
-- Table dest1
--
DROP TABLE IF EXISTS dest1;
CREATE TABLE dest1 (key STRING COMMENT 'default', value STRING COMMENT 'default')
STORED AS
INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
--
-- Table dest2
--
DROP TABLE IF EXISTS dest2;
CREATE TABLE dest2 (key STRING COMMENT 'default', value STRING COMMENT 'default')
STORED AS
INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
--
-- Table dest3
--
DROP TABLE IF EXISTS dest3;
CREATE TABLE dest3 (key STRING COMMENT 'default', value STRING COMMENT 'default')
PARTITIONED BY (ds STRING, hr STRING)
STORED AS
INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
ALTER TABLE dest3 ADD PARTITION (ds='2008-04-08',hr='12');
--
-- Table dest4
--
DROP TABLE IF EXISTS dest4;
CREATE TABLE dest4 (key STRING COMMENT 'default', value STRING COMMENT 'default')
STORED AS
INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
--
-- Table dest4_sequencefile
--
DROP TABLE IF EXISTS dest4_sequencefile;
CREATE TABLE dest4_sequencefile (key STRING COMMENT 'default', value STRING COMMENT 'default')
STORED AS
INPUTFORMAT 'org.apache.hadoop.mapred.SequenceFileInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.mapred.SequenceFileOutputFormat';
--
-- CBO tables
--
drop table if exists cbo_t1;
drop table if exists cbo_t2;
drop table if exists cbo_t3;
drop table if exists src_cbo;
drop table if exists part;
drop table if exists lineitem;
set hive.cbo.enable=true;
create table cbo_t1(key string, value string, c_int int, c_float float, c_boolean boolean) partitioned by (dt string) row format delimited fields terminated by ',' STORED AS TEXTFILE;
create table cbo_t2(key string, value string, c_int int, c_float float, c_boolean boolean) partitioned by (dt string) row format delimited fields terminated by ',' STORED AS TEXTFILE;
create table cbo_t3(key string, value string, c_int int, c_float float, c_boolean boolean) row format delimited fields terminated by ',' STORED AS TEXTFILE;
load data local inpath '../../data/files/cbo_t1.txt' into table cbo_t1 partition (dt='2014');
load data local inpath '../../data/files/cbo_t2.txt' into table cbo_t2 partition (dt='2014');
load data local inpath '../../data/files/cbo_t3.txt' into table cbo_t3;
CREATE TABLE part(
p_partkey INT,
p_name STRING,
p_mfgr STRING,
p_brand STRING,
p_type STRING,
p_size INT,
p_container STRING,
p_retailprice DOUBLE,
p_comment STRING
);
LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
CREATE TABLE lineitem (L_ORDERKEY INT,
L_PARTKEY INT,
L_SUPPKEY INT,
L_LINENUMBER INT,
L_QUANTITY DOUBLE,
L_EXTENDEDPRICE DOUBLE,
L_DISCOUNT DOUBLE,
L_TAX DOUBLE,
L_RETURNFLAG STRING,
L_LINESTATUS STRING,
l_shipdate STRING,
L_COMMITDATE STRING,
L_RECEIPTDATE STRING,
L_SHIPINSTRUCT STRING,
L_SHIPMODE STRING,
L_COMMENT STRING)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|';
LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem;
create table src_cbo as select * from src;
analyze table cbo_t1 partition (dt) compute statistics;
analyze table cbo_t1 compute statistics for columns key, value, c_int, c_float, c_boolean;
analyze table cbo_t2 partition (dt) compute statistics;
analyze table cbo_t2 compute statistics for columns key, value, c_int, c_float, c_boolean;
analyze table cbo_t3 compute statistics;
analyze table cbo_t3 compute statistics for columns key, value, c_int, c_float, c_boolean;
analyze table src_cbo compute statistics;
analyze table src_cbo compute statistics for columns;
analyze table part compute statistics;
analyze table part compute statistics for columns;
analyze table lineitem compute statistics;
analyze table lineitem compute statistics for columns;
reset;
set hive.stats.dbclass=fs;