blob: 6ab72785feb674da10acff635ff169915891f442 [file] [log] [blame]
-- see here: https://datasketches.apache.org/docs/Theta/ThetaHiveUDFs.html
create temporary table theta_input (id int, category char(1));
insert into table theta_input values
(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b');
create temporary table sketch_intermediate (category char(1), sketch binary);
insert into sketch_intermediate select category, ds_theta_sketch(id) from theta_input group by category;
select category, ds_theta_estimate(sketch) from sketch_intermediate;
select ds_theta_estimate(ds_theta_union(sketch)) from sketch_intermediate;
create temporary table sketch_input (id1 int, id2 int);
insert into table sketch_input values
(1, 2), (2, 4), (3, 6), (4, 8), (5, 10), (6, 12), (7, 14), (8, 16), (9, 18), (10, 20);
create temporary table sketch_intermediate2 (sketch1 binary, sketch2 binary);
insert into sketch_intermediate2 select ds_theta_sketch(id1), ds_theta_sketch(id2) from sketch_input;
select
ds_theta_estimate(sketch1),
ds_theta_estimate(sketch2),
ds_theta_estimate(ds_theta_union_f(sketch1, sketch2)),
ds_theta_estimate(ds_theta_intersect_f(sketch1, sketch2)),
ds_theta_estimate(ds_theta_exclude(sketch1, sketch2)),
ds_theta_estimate(ds_theta_exclude(sketch2, sketch1))
from sketch_intermediate2;