blob: 32a9a28da0a7413f5e081b7f876f26a0734a7103 [file] [log] [blame]
====
---- QUERY
# Filter root-level scalar column in file with nested types.
select count(*) from functional_parquet.complextypestbl where id < 1
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 2
aggregation(SUM, NumStatsFilteredRowGroups): 2
====
---- QUERY
# Filter root-level scalar column in file with nested types.
select id, int_array.item
from functional_parquet.complextypestbl, complextypestbl.int_array
where id < 0;
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 2
aggregation(SUM, NumStatsFilteredRowGroups): 2
====
---- QUERY
# Row-group skipping based on min-max stats for collection types.
# Cases tested:
# - collection required vs. not required (use outer joins)
# - collection type: array, map, struct
#
# Array collection.
select id, int_array.item
from functional_parquet.complextypestbl, functional_parquet.complextypestbl.int_array
where int_array.item < -1;
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 2
aggregation(SUM, NumStatsFilteredRowGroups): 2
====
---- QUERY
# Map collection
select id, int_map.key
from functional_parquet.complextypestbl, functional_parquet.complextypestbl.int_map
where int_map.value < -1;
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 2
aggregation(SUM, NumStatsFilteredRowGroups): 2
====
---- QUERY
# Multiple nesting, all filtered
select id from functional_parquet.complextypestbl c, c.int_array_array cn, cn.item bottom
where bottom.item < -2;
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 2
aggregation(SUM, NumStatsFilteredRowGroups): 2
====
---- QUERY
# Multiple nesting, some filtered. Expect some groups filtered, not all. < filter.
select id from functional_parquet.complextypestbl c, c.int_array_array cn, cn.item bottom
where bottom.item < -1;
---- RESULTS
8
---- TYPES
bigint
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 2
aggregation(SUM, NumStatsFilteredRowGroups): 1
====
---- QUERY
# Multiple nesting, some filtered. Expect some groups filtered, not all. = filter.
select id from functional_parquet.complextypestbl c, c.int_array_array cn, cn.item bottom
where bottom.item = -2;
---- RESULTS
8
---- TYPES
bigint
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 2
aggregation(SUM, NumStatsFilteredRowGroups): 1
====
---- QUERY
# Multiple nesting, some filtered. Expect some groups filtered, not all. IN filter.
select id from functional_parquet.complextypestbl c, c.int_array_array cn, cn.item bottom
where bottom.item in (5,6);
---- RESULTS
7
7
---- TYPES
bigint
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 2
aggregation(SUM, NumStatsFilteredRowGroups): 1
====
---- QUERY
# Multiple nesting, some filtered. Expect some groups filtered, not all. complex filter.
select id from functional_parquet.complextypestbl c, c.int_array_array cn, cn.item bottom
where bottom.item > -2 and bottom.item in (-2, -1);
---- RESULTS
8
---- TYPES
bigint
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 2
aggregation(SUM, NumStatsFilteredRowGroups): 1
====
---- QUERY
# Multiple nesting, some filtered. Expect some groups filtered, not all. complex filter.
select distinct id from functional_parquet.complextypestbl c, c.int_array_array cn, cn.item bottom
where bottom.item > 2 and bottom.item not in (4,5,6);
---- RESULTS
1
2
---- TYPES
bigint
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 2
aggregation(SUM, NumStatsFilteredRowGroups): 1
====
---- QUERY
# Multiple nesting, mixed collection type
select id from functional_parquet.complextypestbl c, c.int_map_array cn, cn.item m
where m.value < -2;
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 2
aggregation(SUM, NumStatsFilteredRowGroups): 2
====
---- QUERY
# Scalar in struct
select id from functional_parquet.complextypestbl c where c.nested_struct.a < -10;
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 2
aggregation(SUM, NumStatsFilteredRowGroups): 2
====
---- QUERY
# Collection in struct
select id from functional_parquet.complextypestbl c, c.nested_struct.b a
where a.item < -1;
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 2
aggregation(SUM, NumStatsFilteredRowGroups): 2
====
---- QUERY
# Collection in struct
select id from functional_parquet.complextypestbl c, c.nested_struct.b a
where -1 > a.item;
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 2
aggregation(SUM, NumStatsFilteredRowGroups): 2
====
---- QUERY
# Deeply nested collection, all required.
select id from functional_parquet.complextypestbl c, c.nested_struct.c.d cn, cn.item a
where a.item.e < -10;
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 2
aggregation(SUM, NumStatsFilteredRowGroups): 2
====
---- QUERY
# Deeply nested collection, top required, bottom not required.
# TODO: with more aggressive pruning, this should skip groups as well.
# see the false pruning example below for a case that needs the
# more restrictive !empty guard. See org.apache.impala.planner.HdfsScanNode
# for more details on when/why !empty guards are inserted during analysis.
select id from functional_parquet.complextypestbl c, c.nested_struct.c.d cn
left outer join cn.item a
where a.item.e < -10;
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 2
aggregation(SUM, NumStatsFilteredRowGroups): 0
====
---- QUERY
# Deeply nested collection, top not required, bottom required (no !empty guard)
select id from functional_parquet.complextypestbl c
left outer join c.nested_struct.c.d cn, cn.item a where a.item.e < -10;
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 2
aggregation(SUM, NumStatsFilteredRowGroups): 0
====
---- QUERY
# Multiple collections, all required.
select id from functional_parquet.complextypestbl c, c.nested_struct.c.d a, a.item aa,
c.nested_struct.g.value.h.i b where aa.e < -10;
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 2
aggregation(SUM, NumStatsFilteredRowGroups): 2
====
---- QUERY
# Multiple collections, all required just on filtering path.
select id from functional_parquet.complextypestbl c, c.nested_struct.c.d a,
a.item aa left outer join c.nested_struct.g.value.h.i b where aa.e < -10;
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 2
aggregation(SUM, NumStatsFilteredRowGroups): 2
====
---- QUERY
# Multiple collections, not required on filtering path.
select id from functional_parquet.complextypestbl c, c.nested_struct.c.d a
left outer join a.item aa, c.nested_struct.g.value.h.i b where aa.e < -10;
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 2
aggregation(SUM, NumStatsFilteredRowGroups): 0
====
---- QUERY
# False pruning example. There is one table that's scanned (complextypestbl).
# As a result, there is one scan. The predicate on the complex type is pushed
# down to the scan, but the field is not required. If we erronenously prune,
# nothing will be returned instead of the expected left-outer-join result.
select count(*) from functional_parquet.complextypestbl c left outer join
(select * from c.int_array where item > 10) v;
---- RESULTS
8
---- TYPES
BIGINT
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 2
aggregation(SUM, NumStatsFilteredRowGroups): 0
====
---- QUERY
# Similar, but filter is not selective, so the outer nested type is repeated for
# several tuples. Again, no pruning is expected.
select count(*) from functional_parquet.complextypestbl c left outer join
(select * from c.int_array where item > -10) v;
---- RESULTS
12
---- TYPES
BIGINT
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 2
aggregation(SUM, NumStatsFilteredRowGroups): 0
====
---- QUERY
#
# Min-max collection filtering for tpch data.
#
# Array, single level, struct type:
select c.c_custkey from tpch_nested_parquet.customer c, c.c_orders o
where o.o_orderkey < 0;
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
# Array, multi-level, all required.
select c.c_custkey from tpch_nested_parquet.customer c, c.c_orders o, o.o_lineitems l
where l.l_partkey < 0;
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
# Array, single level, optional
select c.c_custkey from tpch_nested_parquet.customer c left outer join c.c_orders o
where o.o_orderkey < 0;
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 0
====
---- QUERY
# Array, multi-level, all optional
select c.c_custkey from tpch_nested_parquet.customer c left outer join c.c_orders o
left outer join o.o_lineitems l where l.l_partkey < 0;
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 0
====
---- QUERY
# Array, multi-level, optional top
select c.c_custkey from tpch_nested_parquet.customer c left outer join c.c_orders o,
o.o_lineitems l where l.l_partkey < 0;
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 0
====
---- QUERY
# Array, multi-level, optional bottom, filter bottom
select c.c_custkey from tpch_nested_parquet.customer c, c.c_orders o left outer join
o.o_lineitems l where l.l_partkey < 0;
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 0
====
---- QUERY
# Array, multi-level, optional bottom, filter top
select c.c_custkey from tpch_nested_parquet.customer c, c.c_orders o left outer join
o.o_lineitems l where o.o_orderkey < 0;
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====