blob: dfac48f971320aca73e65aed37d31c5acea209a7 [file]
-- Tests for Hbase - Load/Extract
-- Added April 2014
--
-- @@@ START COPYRIGHT @@@
--
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
--
-- @@@ END COPYRIGHT @@@
create schema trafodion.hbase;
set schema trafodion.hbase;
CQD COMP_BOOL_226 'ON';
cqd TRAF_LOAD_TAKE_SNAPSHOT 'on';
obey TEST015(clean_up);
log LOG015 clear;
obey TEST015(setup);
cqd COMPRESSED_INTERNAL_FORMAT 'ON';
cqd COMPRESSED_INTERNAL_FORMAT_BMO 'ON';
cqd COMPRESSED_INTERNAL_FORMAT_DEFRAG_RATIO '100';
cqd HIVE_NUM_ESPS_PER_DATANODE '3';
cqd auto_query_retry_warnings 'ON';
obey TEST015(test_bulk_load_simple);
log;
obey TEST015(clean_up);
exit;
?section clean_up
drop index cd_dep_count_IDX;
drop index cd_dep_college_count_IDX;
drop index cd_dep_count_IDX2;
drop index cd_dep_college_count_IDX2;
drop table customer_demographics cascade;
drop table customer_demographics_salt cascade;
drop table customer_address cascade;
drop table customer_address_NOPK cascade;
drop table t015t1 cascade ;
drop table t015t2 cascade;
drop table t015t3 cascade;
drop table t015t4 cascade;
drop table t015t5 cascade;
drop table t015t6 cascade;
drop table "customer_address_delim" ;
?section setup
--------------------------------------------------------------------------
create table customer_demographics
(
cd_demo_sk int not null,
cd_gender char(1),
cd_marital_status char(1),
cd_education_status char(20),
cd_purchase_estimate int,
cd_credit_rating char(10),
cd_dep_count int,
cd_dep_employed_count int,
cd_dep_college_count int,
primary key (cd_demo_sk)
);
create table customer_demographics_salt
(
cd_demo_sk int not null,
cd_gender char(1),
cd_marital_status char(1),
cd_education_status char(20),
cd_purchase_estimate int,
cd_credit_rating char(10),
cd_dep_count int,
cd_dep_employed_count int,
cd_dep_college_count int,
primary key (cd_demo_sk)
)
salt using 4 partitions on (cd_demo_sk);
create table customer_address
(
ca_address_sk int not null,
ca_address_id char(16),
ca_street_number char(10),
ca_street_name varchar(60),
ca_street_type char(15),
ca_suite_number char(10),
ca_city varchar(60),
ca_county varchar(30),
ca_state char(2),
ca_zip char(10),
ca_country varchar(30),
ca_gmt_offset decimal(5,2),
ca_location_type char(20),
primary key (ca_address_sk)
);
create table customer_address_NOPK
(
ca_address_sk int not null,
ca_address_id char(16),
ca_street_number char(10),
ca_street_name varchar(60),
ca_street_type char(15),
ca_suite_number char(10),
ca_city varchar(60),
ca_county varchar(30),
ca_state char(2),
ca_zip char(10),
ca_country varchar(30),
ca_gmt_offset decimal(5,2),
ca_location_type char(20)
);
create table t015t1 (a int) ;
create table t015t2 (a int) ;
create table t015t3 ( a int not null primary key, b int, c int);
create table t015t4 ( a int not null primary key, b int, c int) salt using 2 partitions on (a);
create table t015t5 ( a int not null , b int, c int) store by (a);
create table t015t6 ( a int not null , b int, c int);
delete from t015t1;
insert into t015t1 values (1),(2),(3),(4),(5);
?section test_bulk_load_simple
--------------------------------------------------------------------------
prepare s from load cleanup for table t015t2;
explain options 'f' s;
execute s;
prepare s from load transform into t015t2 select * from t015t1;
explain options 'f' s;
execute s;
prepare s from load complete for table t015t2;
explain options 'f' s;
execute s;
select * from t015t2 order by a;
prepare s from load into t015t2 select * from t015t1;
explain options 'f' s;
execute s;
prepare s from load with no recovery into t015t2 select * from t015t1;
explain options 'f' s;
execute s;
--log error rows
prepare s from load with log error rows into t015t2 select * from t015t1;
explain options 'f' s;
execute s;
delete from t015t2;
insert into t015t2 values (100),(200),(300);
select * from t015t2 order by a;
load with truncate table into t015t2 select * from t015t1;
select * from t015t2 order by a;
load transform into t015t3 select a,a,a from t015t1;
create index t015t3idx on t015t3(b);
load transform into t015t3 select a,a,a from t015t1;
drop index t015t3idx ;
alter table t015t3 add constraint t015t3_cnst check (b>100);
load transform into t015t3 select a,a,a from t015t1;
alter table t015t3 drop constraint t015t3_cnst;
prepare s3 from load transform into t015t3 select a,a,a from t015t1 <<+ cardinality 10e0 >> ;
explain options 'f' s3;
log LOG015_plan.log clear;
explain s3;
log;
sh grep "sort_key" LOG015_PLAN.LOG >> LOG015 ;
log LOG015;
prepare s4 from load transform into t015t4 select a,a,a from t015t1 <<+ cardinality 10e0 >> ;
explain options 'f' s4;
log;
log LOG015_plan.log clear;
explain s4;
log;
sh grep -A 2 "sort_key" LOG015_PLAN.LOG >> LOG015 ;
log LOG015;
prepare s5 from load transform into t015t5 select a,a,a from t015t1 <<+ cardinality 10e0 >> ;
explain options 'f' s5;
log;
log LOG015_plan.log clear;
explain s5;
log;
sh grep "sort_key" LOG015_PLAN.LOG >> LOG015 ;
log LOG015;
prepare s6 from load transform into t015t6 select a,a,a from t015t1;
explain options 'f' s6;
--------------------------------------------------------------------------
select count(*) from hive.hive.customer_address where ca_address_sk <= 5000;
select count(*) from customer_address;
prepare s from
load transform into customer_address
select * from hive.hive.customer_address where ca_address_sk <= 5000;
explain options 'f' s;
load into customer_address
select * from hive.hive.customer_address where ca_address_sk <= 5000;
select count(*) from customer_address;
select [first 20] * from customer_address where ca_address_sk <= 5000 order by ca_address_sk ;
--------------------------------------------------------------------------
--select count(*) from hive.hive.customer_address;
select count(*) from customer_address_NOPK;
prepare s from
load transform into customer_address_NOPK
select * from hive.hive.customer_address where ca_address_sk <= 5000;
explain options 'f' s;
load into customer_address_NOPK
select * from hive.hive.customer_address where ca_address_sk <= 5000;
select count(*) from customer_address_NOPK;
select [first 20] * from customer_address_NOPK order by ca_address_sk ;
---------------------
select count(*) from hive.hive.customer_demographics where cd_demo_sk <= 5000;
select count(*) from customer_demographics;
prepare s from
load transform into customer_demographics
select * from hive.hive.customer_demographics where cd_demo_sk <= 5000;
explain options 'f' s;
log LOG015_plan.log clear;
explain s;
log;
sh grep "ESP_EXCHANGE.*6" LOG015_PLAN.LOG >> LOG015 ;
sh grep -A 18 "ESP_EXCHANGE.*6" LOG015_PLAN.LOG | grep partitioning -A 1 >> LOG015 ;
sh grep "ESP_EXCHANGE.*2" LOG015_PLAN.LOG >> LOG015 ;
sh grep -A 18 "ESP_EXCHANGE.*2" LOG015_PLAN.LOG | grep parent_partitioning -A 1 >> LOG015 ;
log LOG015;
load into customer_demographics
select * from hive.hive.customer_demographics where cd_demo_sk <= 5000;
select count(*) from customer_demographics;
select [first 20] * from customer_demographics order by cd_demo_sk ;
---------------------
select count(*) from hive.hive.customer_demographics where cd_demo_sk <= 5000;;
select count(*) from customer_demographics_salt;
prepare s from
load transform into customer_demographics_salt
select * from hive.hive.customer_demographics where cd_demo_sk <= 5000;;
explain options 'f' s;
log LOG015_plan.log clear;
explain s;
log;
sh grep "ESP_EXCHANGE.*6" LOG015_PLAN.LOG >> LOG015 ;
sh grep -A 18 "ESP_EXCHANGE.*6" LOG015_PLAN.LOG | grep partitioning -A 3 >> LOG015 ;
sh grep "ESP_EXCHANGE.*2" LOG015_PLAN.LOG >> LOG015 ;
sh grep -A 18 "ESP_EXCHANGE.*2" LOG015_PLAN.LOG | grep parent_partitioning -A 3 >> LOG015 ;
log LOG015;
load into customer_demographics_salt
select * from hive.hive.customer_demographics where cd_demo_sk <= 5000;;
select count(*) from customer_demographics_salt;
select [first 20] * from customer_demographics_salt order by cd_demo_sk ;
----------------
-- with no duplicats option
cqd comp_bool_226 'on';
drop table t015t3 cascade;
drop table t015t4 cascade;
drop table t015t5 cascade;
drop table t015t6 cascade;
create table t015t3 ( a int not null primary key, b int, c int);
create table t015t4 ( a int not null primary key, b int, c int) salt using 2 partitions on (a);
create table t015t5 ( a int not null , b int, c int) store by (a);
create table t015t6 ( a int not null , b int, c int);
delete from t015t1;
insert into t015t1 values (25),(24),(23),(7),(5),(5),(2),(10),(11),(12),(13),(14),(15);
----produces error
prepare s from load transform into t015t3 select a,a,a from t015t1;
explain options 'f' s;
execute s;
load with no output into t015t3 select a,a,a from t015t1;
select * from t015t3 order by a;
---produces eror
prepare s from load transform into t015t4 select a,a,a from t015t1 <<+ cardinality 10e8 >>;
explain options 'f' s;
execute s;
load with no output into t015t4 select a,a,a from t015t1 <<+ cardinality 10e8 >>;
select * from t015t4 order by a;
---
prepare s from load transform into t015t5 select a,a,a from t015t1 ;
explain options 'f' s;
execute s;
load with no output into t015t5 select a,a,a from t015t1 ;
select * from t015t5 order by a;
---
prepare s from load transform into t015t6 select a,a,a from t015t1 ;
explain options 'f' s;
execute s;
load with no output into t015t6 select a,a,a from t015t1 ;
select * from t015t6 order by a;
------------------------------
----
cqd TRAF_LOAD_PREP_SKIP_DUPLICATES 'ON';
prepare s from load transform into t015t3 select a,a,a from t015t1;
explain options 'f' s;
execute s;
cqd TRAF_LOAD_PREP_SKIP_DUPLICATES reset;
load with no output, no duplicate check into t015t3 select a,a,a from t015t1;
select * from t015t3 order by a;
---
cqd TRAF_LOAD_PREP_SKIP_DUPLICATES 'ON';
prepare s from load transform into t015t4 select a,a,a from t015t1 <<+ cardinality 10e8 >>;
explain options 'f' s;
execute s;
cqd TRAF_LOAD_PREP_SKIP_DUPLICATES reset;
load with no output, no duplicate check into t015t4 select a,a,a from t015t1 <<+ cardinality 10e8 >>;
select * from t015t4 order by a;
---
cqd TRAF_LOAD_PREP_SKIP_DUPLICATES 'ON';
prepare s from load transform into t015t5 select a,a,a from t015t1 ;
explain options 'f' s;
execute s;
cqd TRAF_LOAD_PREP_SKIP_DUPLICATES reset;
load with no output, no duplicate check into t015t5 select a,a,a from t015t1 ;
select * from t015t5 order by a;
---
cqd TRAF_LOAD_PREP_SKIP_DUPLICATES 'ON';
prepare s from load transform into t015t6 select a,a,a from t015t1 ;
explain options 'f' s;
execute s;
cqd TRAF_LOAD_PREP_SKIP_DUPLICATES reset;
load with no output, no duplicate check into t015t6 select a,a,a from t015t1 ;
select * from t015t4 order by a;
--Add tests with indexes on bulkoaded tables
drop table customer_demographics_salt cascade;
drop table customer_demographics cascade;
create table customer_demographics
(
cd_demo_sk int not null,
cd_gender char(1),
cd_marital_status char(1),
cd_education_status char(20),
cd_purchase_estimate int,
cd_credit_rating char(10),
cd_dep_count int,
cd_dep_employed_count int,
cd_dep_college_count int
) store by (cd_demo_sk);
create table customer_demographics_salt
(
cd_demo_sk int not null,
cd_gender char(1),
cd_marital_status char(1),
cd_education_status char(20),
cd_purchase_estimate int,
cd_credit_rating char(10),
cd_dep_count int,
cd_dep_employed_count int,
cd_dep_college_count int,
primary key (cd_demo_sk)
)
salt using 4 partitions on (cd_demo_sk);
create index cd_dep_count_IDX on customer_demographics(cd_dep_count);
create index cd_dep_college_count_IDX on customer_demographics(cd_dep_college_count);
explain options 'f'
load transform into customer_demographics
select * from hive.hive.customer_demographics where cd_demo_sk <= 5000;
load into customer_demographics
select * from hive.hive.customer_demographics where cd_demo_sk <= 5000;
set parserflags 1;
select count(*) from customer_demographics;
select count(*) from table(index_table cd_dep_count_IDX);
select count(*) from table(index_table cd_dep_college_count_IDX);
load with rebuild indexes into customer_demographics
select * from hive.hive.customer_demographics where cd_demo_sk > 5000 and cd_demo_sk <= 6000 ;
select count(*) from customer_demographics;
select count(*) from table(index_table cd_dep_count_IDX);
select count(*) from table(index_table cd_dep_college_count_IDX);
create index cd_dep_count_IDX2 on customer_demographics_salt(cd_dep_count);
create index cd_dep_college_count_IDX2 on customer_demographics_salt(cd_dep_college_count);
---------------------------
load into customer_demographics_salt
select * from hive.hive.customer_demographics where cd_demo_sk <= 5000;
set parserflags 1;
select count(*) from customer_demographics_salt;
select count(*) from table(index_table cd_dep_count_IDX2);
select count(*) from table(index_table cd_dep_college_count_IDX2);
drop index cd_dep_count_IDX2;
drop index cd_dep_college_count_IDX2;
-- --load with upsert using load
-- drop table customer_demographics_salt cascade;
-- drop table customer_demographics cascade;
-- create table customer_demographics
-- (
-- cd_demo_sk int not null,
-- cd_gender char(1),
-- cd_marital_status char(1),
-- cd_education_status char(20),
-- cd_purchase_estimate int,
-- cd_credit_rating char(10),
-- cd_dep_count int,
-- cd_dep_employed_count int,
-- cd_dep_college_count int,
-- primary key (cd_demo_sk)
-- );
--
-- create table customer_demographics_salt
-- (
-- cd_demo_sk int not null,
-- cd_gender char(1),
-- cd_marital_status char(1),
-- cd_education_status char(20),
-- cd_purchase_estimate int,
-- cd_credit_rating char(10),
-- cd_dep_count int,
-- cd_dep_employed_count int,
-- cd_dep_college_count int,
-- primary key (cd_demo_sk)
-- )
-- salt using 4 partitions on (cd_demo_sk);
-- create index cd_dep_count_IDX on customer_demographics(cd_dep_count) no populate ;
-- create index cd_dep_college_count_IDX on customer_demographics(cd_dep_college_count) no populate;
--
-- load with upsert using load into customer_demographics
-- select * from hive.hive.customer_demographics where cd_demo_sk <= 5000;
--
-- set parserflags 1;
-- select count(*) from customer_demographics;
-- select count(*) from table(index_table cd_dep_count_IDX);
-- select count(*) from table(index_table cd_dep_college_count_IDX);
--
-- drop index cd_dep_count_IDX;
-- drop index cd_dep_college_count_IDX;
--
-- create index cd_dep_count_IDX2 on customer_demographics_salt(cd_dep_count) no populate ;
-- create index cd_dep_college_count_IDX2 on customer_demographics_salt(cd_dep_college_count) no populate;
-- ---------------------------
-- load with upsert using load into customer_demographics_salt
-- select * from hive.hive.customer_demographics where cd_demo_sk <= 5000;
--
-- set parserflags 1;
-- select count(*) from customer_demographics_salt;
-- select count(*) from table(index_table cd_dep_count_IDX2);
-- select count(*) from table(index_table cd_dep_college_count_IDX2);
--
-- drop index cd_dep_count_IDX2;
-- drop index cd_dep_college_count_IDX2;
-----
drop table customer_address cascade;
create table "customer_address_delim"
(
ca_address_sk int not null,
ca_address_id char(16),
ca_street_number char(10),
ca_street_name varchar(60),
ca_street_type char(15),
ca_suite_number char(10),
ca_city varchar(60),
ca_county varchar(30),
ca_state char(2),
ca_zip char(10),
ca_country varchar(30),
ca_gmt_offset decimal(5,2),
ca_location_type char(20),
primary key (ca_address_sk)
)
salt using 4 partitions
HBASE_OPTIONS (data_block_encoding = 'FAST_DIFF', compression = 'GZ')
;
select count(*) from hive.hive.customer_address where ca_address_sk <= 5000;
select count(*) from "customer_address_delim";
prepare s from
load transform into "customer_address_delim"
select * from hive.hive.customer_address where ca_address_sk <= 5000;
explain options 'f' s;
load into "customer_address_delim"
select * from hive.hive.customer_address where ca_address_sk <= 5000;
select count(*) from "customer_address_delim";
select [first 20] * from "customer_address_delim" order by ca_address_sk ;
cqd attempt_esp_parallelism 'off';
load with no output into t015t4 select a,a,a from t015t1;