| -- Tests for Hbase - Load/Extract |
| -- Added April 2014 |
| -- |
| -- @@@ START COPYRIGHT @@@ |
| -- |
| -- Licensed to the Apache Software Foundation (ASF) under one |
| -- or more contributor license agreements. See the NOTICE file |
| -- distributed with this work for additional information |
| -- regarding copyright ownership. The ASF licenses this file |
| -- to you under the Apache License, Version 2.0 (the |
| -- "License"); you may not use this file except in compliance |
| -- with the License. You may obtain a copy of the License at |
| -- |
| -- http://www.apache.org/licenses/LICENSE-2.0 |
| -- |
| -- Unless required by applicable law or agreed to in writing, |
| -- software distributed under the License is distributed on an |
| -- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| -- KIND, either express or implied. See the License for the |
| -- specific language governing permissions and limitations |
| -- under the License. |
| -- |
| -- @@@ END COPYRIGHT @@@ |
| |
| create schema trafodion.hbase; |
| set schema trafodion.hbase; |
| CQD COMP_BOOL_226 'ON'; |
| |
| cqd TRAF_LOAD_TAKE_SNAPSHOT 'on'; |
| |
| obey TEST015(clean_up); |
| |
| |
| log LOG015 clear; |
| |
| |
| obey TEST015(setup); |
| |
| cqd COMPRESSED_INTERNAL_FORMAT 'ON'; |
| cqd COMPRESSED_INTERNAL_FORMAT_BMO 'ON'; |
| cqd COMPRESSED_INTERNAL_FORMAT_DEFRAG_RATIO '100'; |
| cqd HIVE_NUM_ESPS_PER_DATANODE '3'; |
| |
| cqd auto_query_retry_warnings 'ON'; |
| |
| obey TEST015(test_bulk_load_simple); |
| |
| log; |
| |
| obey TEST015(clean_up); |
| exit; |
| |
| ?section clean_up |
| drop index cd_dep_count_IDX; |
| drop index cd_dep_college_count_IDX; |
| drop index cd_dep_count_IDX2; |
| drop index cd_dep_college_count_IDX2; |
| drop table customer_demographics cascade; |
| drop table customer_demographics_salt cascade; |
| drop table customer_address cascade; |
| drop table customer_address_NOPK cascade; |
| drop table t015t1 cascade ; |
| drop table t015t2 cascade; |
| drop table t015t3 cascade; |
| drop table t015t4 cascade; |
| drop table t015t5 cascade; |
| drop table t015t6 cascade; |
| drop table "customer_address_delim" ; |
| |
| ?section setup |
| -------------------------------------------------------------------------- |
| create table customer_demographics |
| ( |
| cd_demo_sk int not null, |
| cd_gender char(1), |
| cd_marital_status char(1), |
| cd_education_status char(20), |
| cd_purchase_estimate int, |
| cd_credit_rating char(10), |
| cd_dep_count int, |
| cd_dep_employed_count int, |
| cd_dep_college_count int, |
| primary key (cd_demo_sk) |
| ); |
| |
| create table customer_demographics_salt |
| ( |
| cd_demo_sk int not null, |
| cd_gender char(1), |
| cd_marital_status char(1), |
| cd_education_status char(20), |
| cd_purchase_estimate int, |
| cd_credit_rating char(10), |
| cd_dep_count int, |
| cd_dep_employed_count int, |
| cd_dep_college_count int, |
| primary key (cd_demo_sk) |
| ) |
| salt using 4 partitions on (cd_demo_sk); |
| |
| |
| create table customer_address |
| ( |
| ca_address_sk int not null, |
| ca_address_id char(16), |
| ca_street_number char(10), |
| ca_street_name varchar(60), |
| ca_street_type char(15), |
| ca_suite_number char(10), |
| ca_city varchar(60), |
| ca_county varchar(30), |
| ca_state char(2), |
| ca_zip char(10), |
| ca_country varchar(30), |
| ca_gmt_offset decimal(5,2), |
| ca_location_type char(20), |
| primary key (ca_address_sk) |
| ); |
| |
| |
| create table customer_address_NOPK |
| ( |
| ca_address_sk int not null, |
| ca_address_id char(16), |
| ca_street_number char(10), |
| ca_street_name varchar(60), |
| ca_street_type char(15), |
| ca_suite_number char(10), |
| ca_city varchar(60), |
| ca_county varchar(30), |
| ca_state char(2), |
| ca_zip char(10), |
| ca_country varchar(30), |
| ca_gmt_offset decimal(5,2), |
| ca_location_type char(20) |
| ); |
| |
| |
| create table t015t1 (a int) ; |
| create table t015t2 (a int) ; |
| create table t015t3 ( a int not null primary key, b int, c int); |
| create table t015t4 ( a int not null primary key, b int, c int) salt using 2 partitions on (a); |
| create table t015t5 ( a int not null , b int, c int) store by (a); |
| create table t015t6 ( a int not null , b int, c int); |
| |
| delete from t015t1; |
| insert into t015t1 values (1),(2),(3),(4),(5); |
| |
| |
| |
| ?section test_bulk_load_simple |
| -------------------------------------------------------------------------- |
| |
| |
| prepare s from load cleanup for table t015t2; |
| explain options 'f' s; |
| execute s; |
| |
| prepare s from load transform into t015t2 select * from t015t1; |
| explain options 'f' s; |
| execute s; |
| |
| |
| prepare s from load complete for table t015t2; |
| explain options 'f' s; |
| execute s; |
| |
| |
| select * from t015t2 order by a; |
| |
| prepare s from load into t015t2 select * from t015t1; |
| explain options 'f' s; |
| execute s; |
| |
| prepare s from load with no recovery into t015t2 select * from t015t1; |
| explain options 'f' s; |
| execute s; |
| |
| --log error rows |
| prepare s from load with log error rows into t015t2 select * from t015t1; |
| explain options 'f' s; |
| execute s; |
| |
| delete from t015t2; |
| insert into t015t2 values (100),(200),(300); |
| select * from t015t2 order by a; |
| load with truncate table into t015t2 select * from t015t1; |
| |
| select * from t015t2 order by a; |
| |
| |
| load transform into t015t3 select a,a,a from t015t1; |
| |
| create index t015t3idx on t015t3(b); |
| |
| load transform into t015t3 select a,a,a from t015t1; |
| |
| |
| drop index t015t3idx ; |
| |
| alter table t015t3 add constraint t015t3_cnst check (b>100); |
| |
| load transform into t015t3 select a,a,a from t015t1; |
| |
| |
| alter table t015t3 drop constraint t015t3_cnst; |
| |
| prepare s3 from load transform into t015t3 select a,a,a from t015t1 <<+ cardinality 10e0 >> ; |
| explain options 'f' s3; |
| log LOG015_plan.log clear; |
| explain s3; |
| log; |
| sh grep "sort_key" LOG015_PLAN.LOG >> LOG015 ; |
| log LOG015; |
| prepare s4 from load transform into t015t4 select a,a,a from t015t1 <<+ cardinality 10e0 >> ; |
| explain options 'f' s4; |
| log; |
| log LOG015_plan.log clear; |
| explain s4; |
| log; |
| sh grep -A 2 "sort_key" LOG015_PLAN.LOG >> LOG015 ; |
| log LOG015; |
| prepare s5 from load transform into t015t5 select a,a,a from t015t1 <<+ cardinality 10e0 >> ; |
| explain options 'f' s5; |
| log; |
| log LOG015_plan.log clear; |
| explain s5; |
| log; |
| sh grep "sort_key" LOG015_PLAN.LOG >> LOG015 ; |
| log LOG015; |
| prepare s6 from load transform into t015t6 select a,a,a from t015t1; |
| explain options 'f' s6; |
| |
| -------------------------------------------------------------------------- |
| |
| select count(*) from hive.hive.customer_address where ca_address_sk <= 5000; |
| select count(*) from customer_address; |
| |
| prepare s from |
| load transform into customer_address |
| select * from hive.hive.customer_address where ca_address_sk <= 5000; |
| explain options 'f' s; |
| |
| load into customer_address |
| select * from hive.hive.customer_address where ca_address_sk <= 5000; |
| |
| select count(*) from customer_address; |
| |
| select [first 20] * from customer_address where ca_address_sk <= 5000 order by ca_address_sk ; |
| |
| -------------------------------------------------------------------------- |
| --select count(*) from hive.hive.customer_address; |
| select count(*) from customer_address_NOPK; |
| |
| prepare s from |
| load transform into customer_address_NOPK |
| select * from hive.hive.customer_address where ca_address_sk <= 5000; |
| explain options 'f' s; |
| |
| load into customer_address_NOPK |
| select * from hive.hive.customer_address where ca_address_sk <= 5000; |
| |
| select count(*) from customer_address_NOPK; |
| |
| select [first 20] * from customer_address_NOPK order by ca_address_sk ; |
| |
| --------------------- |
| |
| select count(*) from hive.hive.customer_demographics where cd_demo_sk <= 5000; |
| select count(*) from customer_demographics; |
| |
| prepare s from |
| load transform into customer_demographics |
| select * from hive.hive.customer_demographics where cd_demo_sk <= 5000; |
| explain options 'f' s; |
| |
| log LOG015_plan.log clear; |
| explain s; |
| log; |
| sh grep "ESP_EXCHANGE.*6" LOG015_PLAN.LOG >> LOG015 ; |
| sh grep -A 18 "ESP_EXCHANGE.*6" LOG015_PLAN.LOG | grep partitioning -A 1 >> LOG015 ; |
| sh grep "ESP_EXCHANGE.*2" LOG015_PLAN.LOG >> LOG015 ; |
| sh grep -A 18 "ESP_EXCHANGE.*2" LOG015_PLAN.LOG | grep parent_partitioning -A 1 >> LOG015 ; |
| |
| log LOG015; |
| |
| load into customer_demographics |
| select * from hive.hive.customer_demographics where cd_demo_sk <= 5000; |
| |
| select count(*) from customer_demographics; |
| |
| select [first 20] * from customer_demographics order by cd_demo_sk ; |
| |
| |
| --------------------- |
| select count(*) from hive.hive.customer_demographics where cd_demo_sk <= 5000;; |
| select count(*) from customer_demographics_salt; |
| |
| prepare s from |
| load transform into customer_demographics_salt |
| select * from hive.hive.customer_demographics where cd_demo_sk <= 5000;; |
| explain options 'f' s; |
| |
| log LOG015_plan.log clear; |
| explain s; |
| log; |
| |
| sh grep "ESP_EXCHANGE.*6" LOG015_PLAN.LOG >> LOG015 ; |
| sh grep -A 18 "ESP_EXCHANGE.*6" LOG015_PLAN.LOG | grep partitioning -A 3 >> LOG015 ; |
| sh grep "ESP_EXCHANGE.*2" LOG015_PLAN.LOG >> LOG015 ; |
| sh grep -A 18 "ESP_EXCHANGE.*2" LOG015_PLAN.LOG | grep parent_partitioning -A 3 >> LOG015 ; |
| log LOG015; |
| |
| load into customer_demographics_salt |
| select * from hive.hive.customer_demographics where cd_demo_sk <= 5000;; |
| |
| select count(*) from customer_demographics_salt; |
| |
| select [first 20] * from customer_demographics_salt order by cd_demo_sk ; |
| |
| ---------------- |
| |
| -- with no duplicats option |
| |
| |
| cqd comp_bool_226 'on'; |
| drop table t015t3 cascade; |
| drop table t015t4 cascade; |
| drop table t015t5 cascade; |
| drop table t015t6 cascade; |
| |
| create table t015t3 ( a int not null primary key, b int, c int); |
| create table t015t4 ( a int not null primary key, b int, c int) salt using 2 partitions on (a); |
| create table t015t5 ( a int not null , b int, c int) store by (a); |
| create table t015t6 ( a int not null , b int, c int); |
| delete from t015t1; |
| insert into t015t1 values (25),(24),(23),(7),(5),(5),(2),(10),(11),(12),(13),(14),(15); |
| |
| |
| ----produces error |
| prepare s from load transform into t015t3 select a,a,a from t015t1; |
| explain options 'f' s; |
| execute s; |
| load with no output into t015t3 select a,a,a from t015t1; |
| select * from t015t3 order by a; |
| ---produces eror |
| prepare s from load transform into t015t4 select a,a,a from t015t1 <<+ cardinality 10e8 >>; |
| explain options 'f' s; |
| execute s; |
| load with no output into t015t4 select a,a,a from t015t1 <<+ cardinality 10e8 >>; |
| select * from t015t4 order by a; |
| --- |
| prepare s from load transform into t015t5 select a,a,a from t015t1 ; |
| explain options 'f' s; |
| execute s; |
| load with no output into t015t5 select a,a,a from t015t1 ; |
| select * from t015t5 order by a; |
| --- |
| prepare s from load transform into t015t6 select a,a,a from t015t1 ; |
| explain options 'f' s; |
| execute s; |
| load with no output into t015t6 select a,a,a from t015t1 ; |
| select * from t015t6 order by a; |
| ------------------------------ |
| |
| ---- |
| cqd TRAF_LOAD_PREP_SKIP_DUPLICATES 'ON'; |
| prepare s from load transform into t015t3 select a,a,a from t015t1; |
| explain options 'f' s; |
| execute s; |
| cqd TRAF_LOAD_PREP_SKIP_DUPLICATES reset; |
| load with no output, no duplicate check into t015t3 select a,a,a from t015t1; |
| select * from t015t3 order by a; |
| --- |
| cqd TRAF_LOAD_PREP_SKIP_DUPLICATES 'ON'; |
| prepare s from load transform into t015t4 select a,a,a from t015t1 <<+ cardinality 10e8 >>; |
| explain options 'f' s; |
| execute s; |
| cqd TRAF_LOAD_PREP_SKIP_DUPLICATES reset; |
| load with no output, no duplicate check into t015t4 select a,a,a from t015t1 <<+ cardinality 10e8 >>; |
| select * from t015t4 order by a; |
| --- |
| cqd TRAF_LOAD_PREP_SKIP_DUPLICATES 'ON'; |
| prepare s from load transform into t015t5 select a,a,a from t015t1 ; |
| explain options 'f' s; |
| execute s; |
| cqd TRAF_LOAD_PREP_SKIP_DUPLICATES reset; |
| load with no output, no duplicate check into t015t5 select a,a,a from t015t1 ; |
| select * from t015t5 order by a; |
| --- |
| cqd TRAF_LOAD_PREP_SKIP_DUPLICATES 'ON'; |
| prepare s from load transform into t015t6 select a,a,a from t015t1 ; |
| explain options 'f' s; |
| execute s; |
| cqd TRAF_LOAD_PREP_SKIP_DUPLICATES reset; |
| load with no output, no duplicate check into t015t6 select a,a,a from t015t1 ; |
| select * from t015t4 order by a; |
| |
| |
| --Add tests with indexes on bulkoaded tables |
| drop table customer_demographics_salt cascade; |
| drop table customer_demographics cascade; |
| create table customer_demographics |
| ( |
| cd_demo_sk int not null, |
| cd_gender char(1), |
| cd_marital_status char(1), |
| cd_education_status char(20), |
| cd_purchase_estimate int, |
| cd_credit_rating char(10), |
| cd_dep_count int, |
| cd_dep_employed_count int, |
| cd_dep_college_count int |
| ) store by (cd_demo_sk); |
| |
| create table customer_demographics_salt |
| ( |
| cd_demo_sk int not null, |
| cd_gender char(1), |
| cd_marital_status char(1), |
| cd_education_status char(20), |
| cd_purchase_estimate int, |
| cd_credit_rating char(10), |
| cd_dep_count int, |
| cd_dep_employed_count int, |
| cd_dep_college_count int, |
| primary key (cd_demo_sk) |
| ) |
| salt using 4 partitions on (cd_demo_sk); |
| create index cd_dep_count_IDX on customer_demographics(cd_dep_count); |
| create index cd_dep_college_count_IDX on customer_demographics(cd_dep_college_count); |
| |
| explain options 'f' |
| load transform into customer_demographics |
| select * from hive.hive.customer_demographics where cd_demo_sk <= 5000; |
| |
| load into customer_demographics |
| select * from hive.hive.customer_demographics where cd_demo_sk <= 5000; |
| |
| set parserflags 1; |
| select count(*) from customer_demographics; |
| select count(*) from table(index_table cd_dep_count_IDX); |
| select count(*) from table(index_table cd_dep_college_count_IDX); |
| |
| load with rebuild indexes into customer_demographics |
| select * from hive.hive.customer_demographics where cd_demo_sk > 5000 and cd_demo_sk <= 6000 ; |
| |
| select count(*) from customer_demographics; |
| select count(*) from table(index_table cd_dep_count_IDX); |
| select count(*) from table(index_table cd_dep_college_count_IDX); |
| |
| create index cd_dep_count_IDX2 on customer_demographics_salt(cd_dep_count); |
| create index cd_dep_college_count_IDX2 on customer_demographics_salt(cd_dep_college_count); |
| --------------------------- |
| load into customer_demographics_salt |
| select * from hive.hive.customer_demographics where cd_demo_sk <= 5000; |
| |
| set parserflags 1; |
| select count(*) from customer_demographics_salt; |
| select count(*) from table(index_table cd_dep_count_IDX2); |
| select count(*) from table(index_table cd_dep_college_count_IDX2); |
| |
| drop index cd_dep_count_IDX2; |
| drop index cd_dep_college_count_IDX2; |
| |
| -- --load with upsert using load |
| -- drop table customer_demographics_salt cascade; |
| -- drop table customer_demographics cascade; |
| -- create table customer_demographics |
| -- ( |
| -- cd_demo_sk int not null, |
| -- cd_gender char(1), |
| -- cd_marital_status char(1), |
| -- cd_education_status char(20), |
| -- cd_purchase_estimate int, |
| -- cd_credit_rating char(10), |
| -- cd_dep_count int, |
| -- cd_dep_employed_count int, |
| -- cd_dep_college_count int, |
| -- primary key (cd_demo_sk) |
| -- ); |
| -- |
| -- create table customer_demographics_salt |
| -- ( |
| -- cd_demo_sk int not null, |
| -- cd_gender char(1), |
| -- cd_marital_status char(1), |
| -- cd_education_status char(20), |
| -- cd_purchase_estimate int, |
| -- cd_credit_rating char(10), |
| -- cd_dep_count int, |
| -- cd_dep_employed_count int, |
| -- cd_dep_college_count int, |
| -- primary key (cd_demo_sk) |
| -- ) |
| -- salt using 4 partitions on (cd_demo_sk); |
| -- create index cd_dep_count_IDX on customer_demographics(cd_dep_count) no populate ; |
| -- create index cd_dep_college_count_IDX on customer_demographics(cd_dep_college_count) no populate; |
| -- |
| -- load with upsert using load into customer_demographics |
| -- select * from hive.hive.customer_demographics where cd_demo_sk <= 5000; |
| -- |
| -- set parserflags 1; |
| -- select count(*) from customer_demographics; |
| -- select count(*) from table(index_table cd_dep_count_IDX); |
| -- select count(*) from table(index_table cd_dep_college_count_IDX); |
| -- |
| -- drop index cd_dep_count_IDX; |
| -- drop index cd_dep_college_count_IDX; |
| -- |
| -- create index cd_dep_count_IDX2 on customer_demographics_salt(cd_dep_count) no populate ; |
| -- create index cd_dep_college_count_IDX2 on customer_demographics_salt(cd_dep_college_count) no populate; |
| -- --------------------------- |
| -- load with upsert using load into customer_demographics_salt |
| -- select * from hive.hive.customer_demographics where cd_demo_sk <= 5000; |
| -- |
| -- set parserflags 1; |
| -- select count(*) from customer_demographics_salt; |
| -- select count(*) from table(index_table cd_dep_count_IDX2); |
| -- select count(*) from table(index_table cd_dep_college_count_IDX2); |
| -- |
| -- drop index cd_dep_count_IDX2; |
| -- drop index cd_dep_college_count_IDX2; |
| |
| ----- |
| drop table customer_address cascade; |
| |
| create table "customer_address_delim" |
| ( |
| ca_address_sk int not null, |
| ca_address_id char(16), |
| ca_street_number char(10), |
| ca_street_name varchar(60), |
| ca_street_type char(15), |
| ca_suite_number char(10), |
| ca_city varchar(60), |
| ca_county varchar(30), |
| ca_state char(2), |
| ca_zip char(10), |
| ca_country varchar(30), |
| ca_gmt_offset decimal(5,2), |
| ca_location_type char(20), |
| primary key (ca_address_sk) |
| ) |
| salt using 4 partitions |
| HBASE_OPTIONS (data_block_encoding = 'FAST_DIFF', compression = 'GZ') |
| ; |
| |
| select count(*) from hive.hive.customer_address where ca_address_sk <= 5000; |
| select count(*) from "customer_address_delim"; |
| |
| prepare s from |
| load transform into "customer_address_delim" |
| select * from hive.hive.customer_address where ca_address_sk <= 5000; |
| explain options 'f' s; |
| |
| load into "customer_address_delim" |
| select * from hive.hive.customer_address where ca_address_sk <= 5000; |
| |
| select count(*) from "customer_address_delim"; |
| |
| select [first 20] * from "customer_address_delim" order by ca_address_sk ; |
| |
| cqd attempt_esp_parallelism 'off'; |
| load with no output into t015t4 select a,a,a from t015t1; |
| |
| |
| |
| |
| |
| |
| |