blob: 0b2b6fe592c2495387796605346249c6a2839d50 [file] [log] [blame]
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License") you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
DROP TABLE hbase_table_1
CREATE TABLE hbase_table_1(key int, value string)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = "cf:string")
TBLPROPERTIES ("hbase.table.name" = "hbase_table_0")
DESCRIBE EXTENDED hbase_table_1
key int from deserializer
value string from deserializer
Detailed Table Information Table(tableName:hbase_table_1, dbName:default, owner:testuser1@MINOTAUR.CLOUDERA.COM, createTime:1301900428, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:hdfs://minotaur01.sf.cloudera.com:17020/user/hive/warehouse/hbase_table_1, inputFormat:org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat, outputFormat:org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.hbase.HBaseSerDe, parameters:{serialization.format=1, hbase.columns.mapping=cf:string}), bucketCols:[], sortCols:[], parameters:{}), partitionKeys:[], parameters:{hbase.table.name=hbase_table_0, transient_lastDdlTime=1301900428, storage_handler=org.apache.hadoop.hive.hbase.HBaseStorageHandler}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)
select * from hbase_table_1
EXPLAIN FROM src INSERT OVERWRITE TABLE hbase_table_1 SELECT * WHERE (key%2)=0
ABSTRACT SYNTAX TREE:
(TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME hbase_table_1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (% (TOK_TABLE_OR_COL key) 2) 0))))
STAGE DEPENDENCIES:
Stage-0 is a root stage
STAGE PLANS:
Stage: Stage-0
Map Reduce
Alias -> Map Operator Tree:
src
TableScan
alias: src
Filter Operator
predicate:
expr: ((key % 2) = 0)
type: boolean
Filter Operator
predicate:
expr: ((key % 2) = 0)
type: boolean
Select Operator
expressions:
expr: key
type: string
expr: value
type: string
outputColumnNames: _col0, _col1
Select Operator
expressions:
expr: UDFToInteger(_col0)
type: int
expr: _col1
type: string
outputColumnNames: _col0, _col1
File Output Operator
compressed: false
GlobalTableId: 1
table:
input format: org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat
output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat
serde: org.apache.hadoop.hive.hbase.HBaseSerDe
name: default.hbase_table_1
FROM src INSERT OVERWRITE TABLE hbase_table_1 SELECT * WHERE (key%2)=0
DROP TABLE hbase_table_2
CREATE EXTERNAL TABLE hbase_table_2(key int, value string)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = "cf:string")
TBLPROPERTIES ("hbase.table.name" = "hbase_table_0")
EXPLAIN
SELECT Y.*
FROM
(SELECT hbase_table_1.* FROM hbase_table_1) x
JOIN
(SELECT src.* FROM src) Y
ON (x.key = Y.key)
ORDER BY key, value LIMIT 20
ABSTRACT SYNTAX TREE:
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_table_1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME hbase_table_1)))))) x) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME src)))))) Y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL Y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME Y)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))) (TOK_LIMIT 20)))
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
Stage: Stage-1
Map Reduce
Alias -> Map Operator Tree:
x:hbase_table_1
TableScan
alias: hbase_table_1
Select Operator
expressions:
expr: key
type: int
outputColumnNames: _col0
Reduce Output Operator
key expressions:
expr: UDFToDouble(_col0)
type: double
sort order: +
Map-reduce partition columns:
expr: UDFToDouble(_col0)
type: double
tag: 0
y:src
TableScan
alias: src
Select Operator
expressions:
expr: key
type: string
expr: value
type: string
outputColumnNames: _col0, _col1
Reduce Output Operator
key expressions:
expr: UDFToDouble(_col0)
type: double
sort order: +
Map-reduce partition columns:
expr: UDFToDouble(_col0)
type: double
tag: 1
value expressions:
expr: _col0
type: string
expr: _col1
type: string
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
condition expressions:
0
1 {VALUE._col0} {VALUE._col1}
handleSkewJoin: false
outputColumnNames: _col2, _col3
Select Operator
expressions:
expr: _col2
type: string
expr: _col3
type: string
outputColumnNames: _col0, _col1
File Output Operator
compressed: false
GlobalTableId: 0
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
Stage: Stage-2
Map Reduce
Alias -> Map Operator Tree:
hdfs://minotaur01.sf.cloudera.com:17020/tmp/hive-testuser1/hive_2011-04-04_00-00-37_447_311787050586995300/-mr-10002
Reduce Output Operator
key expressions:
expr: _col0
type: string
expr: _col1
type: string
sort order: ++
tag: -1
value expressions:
expr: _col0
type: string
expr: _col1
type: string
Reduce Operator Tree:
Extract
Limit
File Output Operator
compressed: false
GlobalTableId: 0
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Stage: Stage-0
Fetch Operator
limit: 20
SELECT Y.*
FROM
(SELECT hbase_table_1.* FROM hbase_table_1) x
JOIN
(SELECT src.* FROM src) Y
ON (x.key = Y.key)
ORDER BY key, value LIMIT 20
0 val_0
0 val_0
0 val_0
10 val_10
100 val_100
100 val_100
104 val_104
104 val_104
114 val_114
116 val_116
118 val_118
118 val_118
12 val_12
12 val_12
120 val_120
120 val_120
126 val_126
128 val_128
128 val_128
128 val_128
EXPLAIN
SELECT Y.*
FROM
(SELECT hbase_table_1.* FROM hbase_table_1 WHERE hbase_table_1.key > 100) x
JOIN
(SELECT hbase_table_2.* FROM hbase_table_2 WHERE hbase_table_2.key < 120) Y
ON (x.key = Y.key)
ORDER BY key, value
ABSTRACT SYNTAX TREE:
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_table_1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME hbase_table_1)))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL hbase_table_1) key) 100)))) x) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_table_2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME hbase_table_2)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL hbase_table_2) key) 120)))) Y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL Y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME Y)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value)))))
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
Stage: Stage-1
Map Reduce
Alias -> Map Operator Tree:
x:hbase_table_1
TableScan
alias: hbase_table_1
Filter Operator
predicate:
expr: (key > 100)
type: boolean
Filter Operator
predicate:
expr: (key > 100)
type: boolean
Select Operator
expressions:
expr: key
type: int
outputColumnNames: _col0
Reduce Output Operator
key expressions:
expr: _col0
type: int
sort order: +
Map-reduce partition columns:
expr: _col0
type: int
tag: 0
y:hbase_table_2
TableScan
alias: hbase_table_2
Filter Operator
predicate:
expr: (key < 120)
type: boolean
Filter Operator
predicate:
expr: (key < 120)
type: boolean
Select Operator
expressions:
expr: key
type: int
expr: value
type: string
outputColumnNames: _col0, _col1
Reduce Output Operator
key expressions:
expr: _col0
type: int
sort order: +
Map-reduce partition columns:
expr: _col0
type: int
tag: 1
value expressions:
expr: _col0
type: int
expr: _col1
type: string
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
condition expressions:
0
1 {VALUE._col0} {VALUE._col1}
handleSkewJoin: false
outputColumnNames: _col2, _col3
Select Operator
expressions:
expr: _col2
type: int
expr: _col3
type: string
outputColumnNames: _col0, _col1
File Output Operator
compressed: false
GlobalTableId: 0
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
Stage: Stage-2
Map Reduce
Alias -> Map Operator Tree:
hdfs://minotaur01.sf.cloudera.com:17020/tmp/hive-testuser1/hive_2011-04-04_00-01-03_920_5397268077686778739/-mr-10002
Reduce Output Operator
key expressions:
expr: _col0
type: int
expr: _col1
type: string
sort order: ++
tag: -1
value expressions:
expr: _col0
type: int
expr: _col1
type: string
Reduce Operator Tree:
Extract
File Output Operator
compressed: false
GlobalTableId: 0
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Stage: Stage-0
Fetch Operator
limit: -1
SELECT Y.*
FROM
(SELECT hbase_table_1.* FROM hbase_table_1 WHERE hbase_table_1.key > 100) x
JOIN
(SELECT hbase_table_2.* FROM hbase_table_2 WHERE hbase_table_2.key < 120) Y
ON (x.key = Y.key)
ORDER BY key,value
104 val_104
114 val_114
116 val_116
118 val_118
DROP TABLE empty_hbase_table
CREATE TABLE empty_hbase_table(key int, value string)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = "cf:string")
DROP TABLE empty_normal_table
CREATE TABLE empty_normal_table(key int, value string)
select * from (select count(1) as c from empty_normal_table union all select count(1) as c from empty_hbase_table) x order by c
0
0
select * from (select count(1) c from empty_normal_table union all select count(1) as c from hbase_table_1) x order by c
0
155
select * from (select count(1) c from src union all select count(1) as c from empty_hbase_table) x order by c
0
500
select * from (select count(1) c from src union all select count(1) as c from hbase_table_1) x order by c
155
500
CREATE TABLE hbase_table_3(key int, value string, count int)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES (
"hbase.columns.mapping" = "cf:val,cf2:count"
)
EXPLAIN
INSERT OVERWRITE TABLE hbase_table_3
SELECT x.key, x.value, Y.count
FROM
(SELECT hbase_table_1.* FROM hbase_table_1) x
JOIN
(SELECT src.key, count(src.key) as count FROM src GROUP BY src.key) Y
ON (x.key = Y.key)
ABSTRACT SYNTAX TREE:
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_table_1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME hbase_table_1)))))) x) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) key)) count)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src) key)))) Y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL Y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME hbase_table_3))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL Y) count)))))
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Map Reduce
Alias -> Map Operator Tree:
y:src
TableScan
alias: src
Select Operator
expressions:
expr: key
type: string
outputColumnNames: key
Group By Operator
aggregations:
expr: count(key)
bucketGroup: false
keys:
expr: key
type: string
mode: hash
outputColumnNames: _col0, _col1
Reduce Output Operator
key expressions:
expr: _col0
type: string
sort order: +
Map-reduce partition columns:
expr: _col0
type: string
tag: -1
value expressions:
expr: _col1
type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
expr: count(VALUE._col0)
bucketGroup: false
keys:
expr: KEY._col0
type: string
mode: mergepartial
outputColumnNames: _col0, _col1
Select Operator
expressions:
expr: _col0
type: string
expr: _col1
type: bigint
outputColumnNames: _col0, _col1
File Output Operator
compressed: false
GlobalTableId: 0
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
Stage: Stage-0
Map Reduce
Alias -> Map Operator Tree:
$INTNAME
Reduce Output Operator
key expressions:
expr: UDFToDouble(_col0)
type: double
sort order: +
Map-reduce partition columns:
expr: UDFToDouble(_col0)
type: double
tag: 1
value expressions:
expr: _col1
type: bigint
x:hbase_table_1
TableScan
alias: hbase_table_1
Select Operator
expressions:
expr: key
type: int
expr: value
type: string
outputColumnNames: _col0, _col1
Reduce Output Operator
key expressions:
expr: UDFToDouble(_col0)
type: double
sort order: +
Map-reduce partition columns:
expr: UDFToDouble(_col0)
type: double
tag: 0
value expressions:
expr: _col0
type: int
expr: _col1
type: string
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
condition expressions:
0 {VALUE._col0} {VALUE._col1}
1 {VALUE._col1}
handleSkewJoin: false
outputColumnNames: _col0, _col1, _col3
Select Operator
expressions:
expr: _col0
type: int
expr: _col1
type: string
expr: _col3
type: bigint
outputColumnNames: _col0, _col1, _col2
Select Operator
expressions:
expr: _col0
type: int
expr: _col1
type: string
expr: UDFToInteger(_col2)
type: int
outputColumnNames: _col0, _col1, _col2
File Output Operator
compressed: false
GlobalTableId: 1
table:
input format: org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat
output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat
serde: org.apache.hadoop.hive.hbase.HBaseSerDe
name: default.hbase_table_3
INSERT OVERWRITE TABLE hbase_table_3
SELECT x.key, x.value, Y.count
FROM
(SELECT hbase_table_1.* FROM hbase_table_1) x
JOIN
(SELECT src.key, count(src.key) as count FROM src GROUP BY src.key) Y
ON (x.key = Y.key)
select count(1) from hbase_table_3
155
select * from hbase_table_3 order by key, value limit 5
0 val_0 3
2 val_2 1
4 val_4 1
8 val_8 1
10 val_10 1
select key, count from hbase_table_3 order by key, count desc limit 5
0 3
2 1
4 1
8 1
10 1
DROP TABLE hbase_table_4
CREATE TABLE hbase_table_4(key int, value1 string, value2 int, value3 int)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES (
"hbase.columns.mapping" = "a:b,a:c,d:e"
)
INSERT OVERWRITE TABLE hbase_table_4 SELECT key, value, key+1, key+2
FROM src WHERE key=98 OR key=100
SELECT * FROM hbase_table_4 ORDER BY key
98 val_98 99 100
100 val_100 101 102
DROP TABLE hbase_table_5
CREATE EXTERNAL TABLE hbase_table_5(key int, value map<string,string>)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = "a:")
TBLPROPERTIES ("hbase.table.name" = "hbase_table_4")
SELECT * FROM hbase_table_5 ORDER BY key
98 {"b":"val_98","c":"99"}
100 {"b":"val_100","c":"101"}
DROP TABLE hbase_table_6
CREATE TABLE hbase_table_6(key int, value map<string,string>)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES (
"hbase.columns.mapping" = ":key,cf:"
)
INSERT OVERWRITE TABLE hbase_table_6 SELECT key, map(value, key) FROM src
WHERE key=98 OR key=100
SELECT * FROM hbase_table_6 ORDER BY key
98 {"val_98":"98"}
100 {"val_100":"100"}
DROP TABLE hbase_table_7
CREATE TABLE hbase_table_7(value map<string,string>, key int)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES (
"hbase.columns.mapping" = "cf:,:key"
)
INSERT OVERWRITE TABLE hbase_table_7
SELECT map(value, key, upper(value), key+1), key FROM src
WHERE key=98 OR key=100
SELECT * FROM hbase_table_7 ORDER BY key
{"VAL_98":"99.0","val_98":"98"} 98
{"VAL_100":"101.0","val_100":"100"} 100
set hive.hbase.wal.enabled=false
DROP TABLE hbase_table_8
CREATE TABLE hbase_table_8(key int, value1 string, value2 int, value3 int)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES (
"hbase.columns.mapping" = "a:b,a:c,d:e"
)
INSERT OVERWRITE TABLE hbase_table_8 SELECT key, value, key+1, key+2
FROM src WHERE key=98 OR key=100
SELECT * FROM hbase_table_8 ORDER BY key
98 val_98 99 100
100 val_100 101 102
DROP TABLE hbase_table_1
DROP TABLE hbase_table_2
DROP TABLE hbase_table_3
DROP TABLE hbase_table_4
DROP TABLE hbase_table_5
DROP TABLE hbase_table_6
DROP TABLE hbase_table_7
DROP TABLE hbase_table_8
DROP TABLE empty_hbase_table
DROP TABLE empty_normal_table