blob: 1a7d93418b13bbd27ea76dae130cea844bdf762f [file] [log] [blame]
====
---- QUERY
# Check that ds_kll_quantile returns error for strings that are not serialized sketches.
select ds_kll_quantile(date_string_col, 0.5) from functional_parquet.alltypestiny;
---- CATCH
UDF ERROR: Unable to deserialize sketch
====
---- QUERY
select ds_kll_quantile(ds_kll_sketch(float_col), -0.1) from functional_parquet.alltypestiny;
---- CATCH
UDF ERROR: Rank parameter should be in the range of [0,1]
====
---- QUERY
select ds_kll_quantile(ds_kll_sketch(float_col), 1.1) from functional_parquet.alltypestiny;
---- CATCH
UDF ERROR: Rank parameter should be in the range of [0,1]
====
---- QUERY
select
ds_kll_quantile(ds_kll_sketch(id), 0),
ds_kll_quantile(ds_kll_sketch(tinyint_col), 0),
ds_kll_quantile(ds_kll_sketch(smallint_col), 0),
ds_kll_quantile(ds_kll_sketch(int_col), 0),
ds_kll_quantile(ds_kll_sketch(bigint_col), 0),
ds_kll_quantile(ds_kll_sketch(float_col), 0)
from functional_parquet.alltypestiny;
---- RESULTS
0,0,0,0,0,0
---- TYPES
FLOAT,FLOAT,FLOAT,FLOAT,FLOAT,FLOAT
====
---- QUERY
select
ds_kll_quantile(ds_kll_sketch(id), 0.5),
ds_kll_quantile(ds_kll_sketch(tinyint_col), 0.5),
ds_kll_quantile(ds_kll_sketch(smallint_col), 0.5),
ds_kll_quantile(ds_kll_sketch(int_col), 0.5),
ds_kll_quantile(ds_kll_sketch(bigint_col), 0.5),
ds_kll_quantile(ds_kll_sketch(float_col), 0.5)
from functional_parquet.alltypestiny;
---- RESULTS
4,1,1,1,10,1.100000023841858
---- TYPES
FLOAT,FLOAT,FLOAT,FLOAT,FLOAT,FLOAT
====
---- QUERY
select
ds_kll_quantile(ds_kll_sketch(id), 1),
ds_kll_quantile(ds_kll_sketch(tinyint_col), 1),
ds_kll_quantile(ds_kll_sketch(smallint_col), 1),
ds_kll_quantile(ds_kll_sketch(int_col), 1),
ds_kll_quantile(ds_kll_sketch(bigint_col), 1),
ds_kll_quantile(ds_kll_sketch(float_col), 1)
from functional_parquet.alltypestiny;
---- RESULTS
7,1,1,1,10,1.100000023841858
---- TYPES
FLOAT,FLOAT,FLOAT,FLOAT,FLOAT,FLOAT
====
---- QUERY
select ds_kll_sketch(double_col) from functional_parquet.alltypestiny;
---- CATCH
AnalysisException: No matching function with signature: ds_kll_sketch(DOUBLE)
====
---- QUERY
select ds_kll_sketch(string_col) from functional_parquet.alltypestiny;
---- CATCH
AnalysisException: No matching function with signature: ds_kll_sketch(STRING)
====
---- QUERY
select ds_kll_sketch(timestamp_col) from functional_parquet.alltypestiny;
---- CATCH
AnalysisException: No matching function with signature: ds_kll_sketch(TIMESTAMP)
====
---- QUERY
select ds_kll_sketch(cast(date_string_col as date format 'MM/DD/YYYY'))
from functional_parquet.alltypestiny;
---- CATCH
AnalysisException: No matching function with signature: ds_kll_sketch(DATE)
====
---- QUERY
# Check that ds_kll_quantile() returns null for null inputs.
select ds_kll_quantile(c, 0.5) from functional_parquet.nulltable;
---- RESULTS
NULL
---- TYPES
FLOAT
====
---- QUERY
# Check that ds_kll_sketch() returns null for null inputs.
select ds_kll_sketch(d) from functional_parquet.nulltable;
---- RESULTS
'NULL'
---- TYPES
STRING
====
---- QUERY
# Check that ds_kll_sketch() returns null for empty input.
select ds_kll_sketch(f2) from functional_parquet.emptytable;
---- RESULTS
'NULL'
---- TYPES
STRING
====
---- QUERY
# Write sketches to a table as string and get an estimate from the written sketch.
# Note, the plan is to write sketches as binary instead of strings. For this we have to
# wait for the binary support (IMPALA-9482).
create table sketch_store
(year int, month int, id_sketch string, float_sketch string)
stored as parquet;
insert into sketch_store
select
year,
month,
ds_kll_sketch(id),
ds_kll_sketch(float_col)
from functional_parquet.alltypessmall
group by year, month;
select
year,
month,
ds_kll_quantile(id_sketch, 0.5),
ds_kll_quantile(float_sketch, 0.5)
from sketch_store;
---- RESULTS
2009,1,12,4.400000095367432
2009,2,37,4.400000095367432
2009,3,62,4.400000095367432
2009,4,87,4.400000095367432
---- TYPES
INT,INT,FLOAT,FLOAT
====
---- QUERY
# Check that sketches made by Hive can be read and used for estimating by Impala.
select
ds_kll_quantile(f, 0.5) as f,
ds_kll_quantile(repetitions, 0.5) as r,
ds_kll_quantile(some_nulls, 0.5) as sn,
ds_kll_quantile(all_nulls, 0.5) as an,
ds_kll_quantile(some_nans, 0.5) as snan,
ds_kll_quantile(all_nans, 0.5) as anan
from kll_sketches_from_hive;
---- TYPES
FLOAT,FLOAT,FLOAT,FLOAT,FLOAT,FLOAT
---- RESULTS
100.1999969482422,25000.099609375,50.90000152587891,NULL,50.5,NULL
====
---- QUERY
# Check that ds_kll_n() returns null for an empty sketch.
select ds_kll_n(ds_kll_sketch(cast(f2 as float))) from functional_parquet.emptytable;
---- RESULTS
NULL
---- TYPES
BIGINT
====
---- QUERY
# Check that ds_kll_n() returns null for a null input.
select ds_kll_n(c) from functional_parquet.nulltable;
---- RESULTS
NULL
---- TYPES
BIGINT
====
---- QUERY
# Check that ds_kll_n() returns error for strings that are not serialized sketches.
select ds_kll_n(date_string_col) from functional_parquet.alltypestiny;
---- CATCH
UDF ERROR: Unable to deserialize sketch
====
---- QUERY
select ds_kll_n(float_sketch) from sketch_store where year=2009 and month=1;
---- RESULTS
25
---- TYPES
BIGINT
====
---- QUERY
# Check that ds_kll_n() works on sketches created by Hive.
select ds_kll_n(f) from kll_sketches_from_hive;
---- RESULTS
6
---- TYPES
BIGINT
====
---- QUERY
select
ds_kll_rank(ds_kll_sketch(int_col), 0),
ds_kll_rank(ds_kll_sketch(int_col), 1.5),
ds_kll_rank(ds_kll_sketch(int_col), 5),
ds_kll_rank(ds_kll_sketch(int_col), 7.5),
ds_kll_rank(ds_kll_sketch(int_col), 10)
from functional_parquet.alltypessmall;
---- RESULTS
0,0.24,0.6,0.84,1
---- TYPES
DOUBLE,DOUBLE,DOUBLE,DOUBLE,DOUBLE
====
---- QUERY
# Check that ds_kll_rank() returns null for an empty sketch.
select ds_kll_rank(ds_kll_sketch(cast(f2 as float)), 10.0) from functional_parquet.emptytable;
---- RESULTS
NULL
---- TYPES
DOUBLE
====
---- QUERY
# Check that ds_kll_rank() returns null for a null input.
select ds_kll_rank(c, 10.0) from functional_parquet.nulltable;
---- RESULTS
NULL
---- TYPES
DOUBLE
====
---- QUERY
# Check that ds_kll_rank() returns error for strings that are not serialized sketches.
select ds_kll_rank(date_string_col, 10.0) from functional_parquet.alltypestiny;
---- CATCH
UDF ERROR: Unable to deserialize sketch
====
---- QUERY
# Check that ds_kll_rank() works on sketches created by Hive.
select
ds_kll_rank(f, 0.0),
ds_kll_rank(f, 10.0),
ds_kll_rank(f, 20.5),
ds_kll_rank(f, 50.6),
ds_kll_rank(f, 100.2001),
ds_kll_rank(f, 123.33),
ds_kll_rank(f, 200.46)
from kll_sketches_from_hive;
---- RESULTS
0,0.1666666666666667,0.3333333333333333,0.5,0.6666666666666666,0.8333333333333334,1
---- TYPES
DOUBLE,DOUBLE,DOUBLE,DOUBLE,DOUBLE,DOUBLE,DOUBLE
====
---- QUERY
# Unions the sketches from sketch_store and checks if the union produces the same result
# as if the whole data was sketched together into a single sketch.
select
ds_kll_quantile(ds_kll_union(id_sketch), 0.5),
ds_kll_quantile(ds_kll_union(float_sketch), 0.5)
from sketch_store;
---- TYPES
FLOAT,FLOAT
---- RESULTS
50,4.400000095367432
====
---- QUERY
# Checks that ds_kll_union() produces NULL for an empty dataset.
select ds_kll_union(field) from functional_parquet.emptytable;
---- TYPES
STRING
---- RESULTS
'NULL'
====
---- QUERY
# Checks that ds_kll_union() produces NULL for NULL inputs.
select ds_kll_union(null_str) from functional_parquet.nullrows;
---- TYPES
STRING
---- RESULTS
'NULL'
====
---- QUERY
# ds_kll_union() returns an error if it receives an invalid serialized sketch.
select ds_kll_union(date_string_col) from functional_parquet.alltypestiny where id=1;
---- CATCH
UDF ERROR: Unable to deserialize sketch.
====
---- QUERY
# Get the same sketches from Impala and Hive and put them into the same table. When we
# get the estimates from the unions of these sketches the expectation is to get the same
# results as if these sketches were used separately to get the estimates.
create table kll_sketches_impala_hive like kll_sketches_from_impala stored as parquet;
insert into kll_sketches_impala_hive select * from kll_sketches_from_hive;
insert into kll_sketches_impala_hive select * from kll_sketches_from_impala;
select
ds_kll_quantile(ds_kll_union(f), 0.5),
ds_kll_quantile(ds_kll_union(repetitions), 0.5),
ds_kll_quantile(ds_kll_union(some_nulls), 0.5),
ds_kll_quantile(ds_kll_union(all_nulls), 0.5),
ds_kll_quantile(ds_kll_union(some_nans), 0.5),
ds_kll_quantile(ds_kll_union(all_nans), 0.5)
from kll_sketches_impala_hive;
---- TYPES
FLOAT,FLOAT,FLOAT,FLOAT,FLOAT,FLOAT
---- RESULTS
100.1999969482422,25000.099609375,50.90000152587891,NULL,50.5,NULL
====
---- QUERY
# Checks that ds_kll_quantiles_as_string() produces NULL for an empty dataset.
select
ds_kll_quantiles_as_string(ds_kll_sketch(cast(f2 as float)), 0.5)
from functional_parquet.emptytable;
---- TYPES
STRING
---- RESULTS
'NULL'
====
---- QUERY
# Check that ds_kll_quantiles_as_string() returns null for a null input.
select ds_kll_quantiles_as_string(c, 0.5) from functional_parquet.nulltable;
---- RESULTS
'NULL'
---- TYPES
STRING
====
---- QUERY
# Check that ds_kll_quantiles_as_string() returns error for strings that are not
# serialized sketches.
select
ds_kll_quantiles_as_string(date_string_col, 0.5)
from functional_parquet.alltypestiny;
---- CATCH
UDF ERROR: Unable to deserialize sketch
====
---- QUERY
# Check error when rank is out of range
select
ds_kll_quantiles_as_string(ds_kll_sketch(float_col), 0, -1, 1, 0.5)
from functional_parquet.alltypessmall;
---- CATCH
UDF ERROR: Error while getting quantiles from DataSketches KLL. Message: Fraction cannot be less than zero or greater than 1.0
====
---- QUERY
# Check error when rank is out of range
select
ds_kll_quantiles_as_string(ds_kll_sketch(float_col), 0, 1.1, 1, 0.5)
from functional_parquet.alltypessmall;
---- CATCH
UDF ERROR: Error while getting quantiles from DataSketches KLL. Message: Fraction cannot be less than zero or greater than 1.0
====
---- QUERY
select
ds_kll_quantiles_as_string(ds_kll_sketch(float_col), 0, 0.2, 0.5, 0.8, 1)
from functional_parquet.alltypessmall;
---- RESULTS
'0,1.1,4.4,7.7,9.9'
---- TYPES
STRING
====
---- QUERY
select
ds_kll_quantiles_as_string(ds_kll_sketch(float_col), 0.5, 1, 0, 0.2, 0.8)
from functional_parquet.alltypessmall;
---- RESULTS
'4.4,9.9,0,1.1,7.7'
---- TYPES
STRING
====
---- QUERY
select
ds_kll_quantiles_as_string(ds_kll_sketch(float_col), 0.5)
from functional_parquet.alltypessmall;
---- RESULTS
'4.4'
---- TYPES
STRING
====
---- QUERY
select
ds_kll_quantiles_as_string(ds_kll_sketch(float_col), 0, 0.2, NULL, 0.8, 1)
from functional_parquet.alltypessmall;
---- CATCH
UDF ERROR: NULL or NaN provided in the input list.
====
---- QUERY
select
ds_kll_quantiles_as_string(ds_kll_sketch(float_col), 0, 0.2, 0.5, 0.8, NULL)
from functional_parquet.alltypessmall;
---- CATCH
UDF ERROR: NULL or NaN provided in the input list.
====
---- QUERY
select
ds_kll_quantiles_as_string(ds_kll_sketch(float_col), NULL)
from functional_parquet.alltypessmall;
---- CATCH
UDF ERROR: NULL or NaN provided in the input list.
====
---- QUERY
select
ds_kll_quantiles_as_string(ds_kll_sketch(float_col), 0, 0.5, cast('nan' as float), 1)
from functional_parquet.alltypessmall;
---- CATCH
UDF ERROR: NULL or NaN provided in the input list.
====
---- QUERY
select
ds_kll_quantiles_as_string(ds_kll_sketch(float_col), 0, 0.5, 1, cast('nan' as float))
from functional_parquet.alltypessmall;
---- CATCH
UDF ERROR: NULL or NaN provided in the input list.
====
---- QUERY
select
ds_kll_quantiles_as_string(ds_kll_sketch(float_col), cast('nan' as float))
from functional_parquet.alltypessmall;
---- CATCH
UDF ERROR: NULL or NaN provided in the input list.
====
---- QUERY
select
ds_kll_quantiles_as_string(ds_kll_sketch(float_col))
from functional_parquet.alltypessmall;
---- CATCH
AnalysisException: No matching function with signature: ds_kll_quantiles_as_string(STRING)
====
---- QUERY
# Checks that ds_kll_pmf_as_string() produces NULL for an empty dataset.
select
ds_kll_pmf_as_string(ds_kll_sketch(cast(f2 as float)), 0.5)
from functional_parquet.emptytable;
---- TYPES
STRING
---- RESULTS
'NULL'
====
---- QUERY
# Check that ds_kll_pmf_as_string() returns null for a null input.
select ds_kll_pmf_as_string(c, 0.5) from functional_parquet.nulltable;
---- RESULTS
'NULL'
---- TYPES
STRING
====
---- QUERY
# Check that ds_kll_pmf_as_string() returns error for strings that are not serialized
# sketches.
select ds_kll_pmf_as_string(date_string_col, 0.5) from functional_parquet.alltypestiny;
---- CATCH
UDF ERROR: Unable to deserialize sketch
====
---- QUERY
select
ds_kll_pmf_as_string(ds_kll_sketch(float_col), 0, 0.5, cast('nan' as float), 1)
from functional_parquet.alltypessmall;
---- CATCH
UDF ERROR: NULL or NaN provided in the input list.
====
---- QUERY
select
ds_kll_pmf_as_string(ds_kll_sketch(float_col), 0, 0.5, 1, cast('nan' as float))
from functional_parquet.alltypessmall;
---- CATCH
UDF ERROR: NULL or NaN provided in the input list.
====
---- QUERY
select
ds_kll_pmf_as_string(ds_kll_sketch(float_col), cast('nan' as float))
from functional_parquet.alltypessmall;
---- CATCH
UDF ERROR: NULL or NaN provided in the input list.
====
---- QUERY
select
ds_kll_pmf_as_string(ds_kll_sketch(float_col), 5)
from functional_parquet.alltypessmall;
---- RESULTS
'0.6,0.4'
---- TYPES
STRING
====
---- QUERY
select ds_kll_pmf_as_string(ds_kll_sketch(float_col), 1, 5, 8, 9)
from functional_parquet.alltypessmall;
---- RESULTS
'0.12,0.48,0.24,0.08,0.08'
---- TYPES
STRING
====
---- QUERY
select
ds_kll_pmf_as_string(ds_kll_sketch(float_col), 1, 5, 3)
from functional_parquet.alltypessmall;
---- CATCH
UDF ERROR: Error while running DataSketches KLL function. Message: Values must be unique and monotonically increasing
====
---- QUERY
select
ds_kll_pmf_as_string(ds_kll_sketch(float_col), 1, 5, 5)
from functional_parquet.alltypessmall;
---- CATCH
UDF ERROR: Error while running DataSketches KLL function. Message: Values must be unique and monotonically increasing
====
---- QUERY
# Checks that ds_kll_cdf_as_string() produces NULL for an empty dataset.
select
ds_kll_cdf_as_string(ds_kll_sketch(cast(f2 as float)), 0.5)
from functional_parquet.emptytable;
---- TYPES
STRING
---- RESULTS
'NULL'
====
---- QUERY
# Check that ds_kll_cdf_as_string() returns null for a null input.
select ds_kll_cdf_as_string(c, 0.5) from functional_parquet.nulltable;
---- RESULTS
'NULL'
---- TYPES
STRING
====
---- QUERY
# Check that ds_kll_cdf_as_string() returns error for strings that are not serialized
# sketches.
select ds_kll_cdf_as_string(date_string_col, 0.5) from functional_parquet.alltypestiny;
---- CATCH
UDF ERROR: Unable to deserialize sketch
====
---- QUERY
select
ds_kll_cdf_as_string(ds_kll_sketch(float_col), 0, 0.5, cast('nan' as float), 1)
from functional_parquet.alltypessmall;
---- CATCH
UDF ERROR: NULL or NaN provided in the input list.
====
---- QUERY
select
ds_kll_cdf_as_string(ds_kll_sketch(float_col), 0, 0.5, 1, cast('nan' as float))
from functional_parquet.alltypessmall;
---- CATCH
UDF ERROR: NULL or NaN provided in the input list.
====
---- QUERY
select
ds_kll_cdf_as_string(ds_kll_sketch(float_col), cast('nan' as float))
from functional_parquet.alltypessmall;
---- CATCH
UDF ERROR: NULL or NaN provided in the input list.
====
---- QUERY
select
ds_kll_cdf_as_string(ds_kll_sketch(float_col), 0, 0.5, NULL, 1)
from functional_parquet.alltypessmall;
---- CATCH
UDF ERROR: NULL or NaN provided in the input list.
====
---- QUERY
select
ds_kll_cdf_as_string(ds_kll_sketch(float_col), 5)
from functional_parquet.alltypessmall;
---- RESULTS
'0.6,1'
---- TYPES
STRING
====
---- QUERY
select ds_kll_cdf_as_string(ds_kll_sketch(float_col), 1, 5, 8, 9)
from functional_parquet.alltypessmall;
---- RESULTS
'0.12,0.6,0.84,0.92,1'
---- TYPES
STRING
====
---- QUERY
select
ds_kll_cdf_as_string(ds_kll_sketch(float_col), 1, 5, 3)
from functional_parquet.alltypessmall;
---- CATCH
UDF ERROR: Error while running DataSketches KLL function. Message: Values must be unique and monotonically increasing
====
---- QUERY
select
ds_kll_cdf_as_string(ds_kll_sketch(float_col), 1, 5, 5)
from functional_parquet.alltypessmall;
---- CATCH
UDF ERROR: Error while running DataSketches KLL function. Message: Values must be unique and monotonically increasing
====