blob: 18a96562e068fb924effe07ddbf643a55b02df29 [file] [log] [blame]
-- Unit-Testing checklist
-- 1. Make sure the function does the right thing on normal input(s).
-- 2. Make sure the function is non-destructive to the input(s).
-- 3. Make sure the function handles null values in svecs correctly.
-- 4. Make sure the function handles length-1 svecs correctly.
-- 5. Make sure the function handles null svecs properly.
-- 6. For functions that handle float8[] input(s), make sure the two
-- representations of null values are handled properly.
-- \timing
-- \i gp_svec.sql_in
set search_path to "$user",MADLIB_SCHEMA,public;
select MADLIB_SCHEMA.dmin(1000,1000.1);
select MADLIB_SCHEMA.dmin(1000,NULL);
select MADLIB_SCHEMA.dmin(NULL,1000);
select MADLIB_SCHEMA.dmin(NULL,NULL);
select MADLIB_SCHEMA.dmax(1000,1000.1);
select MADLIB_SCHEMA.dmax(1000,NULL);
select MADLIB_SCHEMA.dmax(NULL,1000);
select MADLIB_SCHEMA.dmax(NULL,NULL);
drop table if exists MADLIB_SCHEMA.test_pairs;
create table MADLIB_SCHEMA.test_pairs( id int, a MADLIB_SCHEMA.svec, b MADLIB_SCHEMA.svec ) distributed by (id);
insert into MADLIB_SCHEMA.test_pairs values
(0, '{1,100,1}:{5,0,5}', '{50,50,2}:{1,2,10}'),
(1, '{1,100,1}:{-5,0,-5}', '{50,50,2}:{-1,-2,-10}');
insert into MADLIB_SCHEMA.test_pairs values
-- (10, null, null),
(11, '{1}:{0}', '{1}:{1}'),
(12, '{1}:{5}', '{3}:{-8}'),
(13, '{1}:{0}', '{1}:{NULL}'),
(14, '{1,2,1}:{2,4,2}', '{2,1,1}:{0,3,5}'),
(15, '{1,2,1}:{2,4,2}', '{2,1,1}:{NULL,3,5}');
select id, MADLIB_SCHEMA.svec_count(a,b) from MADLIB_SCHEMA.test_pairs where MADLIB_SCHEMA.dimension(a) = MADLIB_SCHEMA.dimension(b) order by id;
select id, MADLIB_SCHEMA.svec_plus(a,b) from MADLIB_SCHEMA.test_pairs order by id;
select id, MADLIB_SCHEMA.svec_plus(a,b) = MADLIB_SCHEMA.svec_plus(b,a) from MADLIB_SCHEMA.test_pairs order by id;
select id, MADLIB_SCHEMA.svec_mult(a,b) from MADLIB_SCHEMA.test_pairs order by id;
select id, MADLIB_SCHEMA.svec_mult(a,b) = MADLIB_SCHEMA.svec_mult(b,a) from MADLIB_SCHEMA.test_pairs order by id;
select id, MADLIB_SCHEMA.svec_div(a,b) = MADLIB_SCHEMA.svec_mult(a, MADLIB_SCHEMA.svec_pow(b,(-1)::svec)) from MADLIB_SCHEMA.test_pairs order by id;
select id, MADLIB_SCHEMA.svec_minus(a, b) = MADLIB_SCHEMA.svec_plus(MADLIB_SCHEMA.svec_mult((-1)::svec,b), a) from MADLIB_SCHEMA.test_pairs order by id;
select id, MADLIB_SCHEMA.svec_pow(a,2::svec) = MADLIB_SCHEMA.svec_mult(a,a) from MADLIB_SCHEMA.test_pairs order by id;
select id, MADLIB_SCHEMA.dot(a,b) from MADLIB_SCHEMA.test_pairs where MADLIB_SCHEMA.dimension(a) = MADLIB_SCHEMA.dimension(b) order by id;
select id, MADLIB_SCHEMA.dot(a,b) = MADLIB_SCHEMA.dot(b,a) from MADLIB_SCHEMA.test_pairs where MADLIB_SCHEMA.dimension(a) = MADLIB_SCHEMA.dimension(b) order by id;
select id, MADLIB_SCHEMA.dot(a,b::float8[]) = MADLIB_SCHEMA.dot(b,a::float8[]) from MADLIB_SCHEMA.test_pairs where MADLIB_SCHEMA.dimension(a) = MADLIB_SCHEMA.dimension(b) order by id;
select id, MADLIB_SCHEMA.dot(a::float8[],b) = MADLIB_SCHEMA.dot(b::float8[],a) from MADLIB_SCHEMA.test_pairs where MADLIB_SCHEMA.dimension(a) = MADLIB_SCHEMA.dimension(b) order by id;
select id, MADLIB_SCHEMA.dot(a::float8[],b::float8[]) = MADLIB_SCHEMA.dot(b::float8[],a::float8[]) from MADLIB_SCHEMA.test_pairs where MADLIB_SCHEMA.dimension(a) = MADLIB_SCHEMA.dimension(b) order by id;
select id, MADLIB_SCHEMA.l2norm(a), MADLIB_SCHEMA.l2norm(a::float[]), MADLIB_SCHEMA.l2norm(b), MADLIB_SCHEMA.l2norm(b::float8[]) from MADLIB_SCHEMA.test_pairs order by id;
select id, MADLIB_SCHEMA.l1norm(a), MADLIB_SCHEMA.l1norm(a::float[]), MADLIB_SCHEMA.l1norm(b), MADLIB_SCHEMA.l1norm(b::float8[]) from MADLIB_SCHEMA.test_pairs order by id;
select MADLIB_SCHEMA.svec_plus('{1,2,3}:{4,5,6}', 5::MADLIB_SCHEMA.svec);
select MADLIB_SCHEMA.svec_plus(5::MADLIB_SCHEMA.svec, '{1,2,3}:{4,5,6}');
select MADLIB_SCHEMA.svec_plus(500::MADLIB_SCHEMA.svec, '{1,2,3}:{4,null,6}');
select MADLIB_SCHEMA.svec_div(500::MADLIB_SCHEMA.svec, '{1,2,3}:{4,null,6}');
select MADLIB_SCHEMA.svec_div('{1,2,3}:{4,null,6}', 500::MADLIB_SCHEMA.svec);
-- Test operators between svec and float8[]
select ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec) %*% ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec)::float8[];
select ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec)::float8[] %*% ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec);
select ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec) / ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec)::float8[];
select ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec)::float8[] / ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec);
select ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec) * ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec)::float8[];
select ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec)::float8[] * ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec);
select ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec) + ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec)::float8[];
select ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec)::float8[] + ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec);
select ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec) - ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec)::float8[];
select ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec)::float8[] - ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec);
-- these should produce error messages
/*
select '{10000000000000000000}:{1}'::MADLIB_SCHEMA.svec ;
select '{1,null,2}:{2,3,4}'::MADLIB_SCHEMA.svec;
select MADLIB_SCHEMA.svec_count('{1,1,1}:{3,4,5}', '{2,2}:{1,3}');
select MADLIB_SCHEMA.svec_plus('{1,1,1}:{3,4,5}', '{2,2}:{1,3}');
select MADLIB_SCHEMA.svec_minus('{1,1,1}:{3,4,5}', '{2,2}:{1,3}');
select MADLIB_SCHEMA.svec_mult('{1,1,1}:{3,4,5}', '{2,2}:{1,3}');
select MADLIB_SCHEMA.svec_div('{1,1,1}:{3,4,5}', '{2,2}:{1,3}');
*/
select MADLIB_SCHEMA.unnest('{1}:{5}'::MADLIB_SCHEMA.svec);
select MADLIB_SCHEMA.unnest('{1,2,3,4}:{5,6,7,8}'::MADLIB_SCHEMA.svec);
select MADLIB_SCHEMA.unnest('{1,2,3,4}:{5,6,null,8}'::MADLIB_SCHEMA.svec);
select id, MADLIB_SCHEMA.unnest(a),a from MADLIB_SCHEMA.test_pairs where id >= 10 order by id;
select MADLIB_SCHEMA.vec_pivot('{1}:{5}', 2);
select MADLIB_SCHEMA.vec_pivot('{1}:{5}', 5);
select MADLIB_SCHEMA.vec_pivot('{1}:{5}', null);
select MADLIB_SCHEMA.vec_pivot('{1}:{null}', 5);
select MADLIB_SCHEMA.vec_pivot('{1}:{null}', null);
select MADLIB_SCHEMA.vec_pivot('{1,2,3,4}:{5,6,7,8}'::MADLIB_SCHEMA.svec, 2);
select id, MADLIB_SCHEMA.vec_pivot(a, 5), a, MADLIB_SCHEMA.vec_pivot(a,6), a, MADLIB_SCHEMA.vec_pivot(a,2) from MADLIB_SCHEMA.test_pairs order by id;
select id, MADLIB_SCHEMA.vec_pivot(b, 5), b, MADLIB_SCHEMA.vec_pivot(b,6), b, MADLIB_SCHEMA.vec_pivot(b,null) from MADLIB_SCHEMA.test_pairs order by id;
select MADLIB_SCHEMA.vec_sum('{1}:{-5}'::MADLIB_SCHEMA.svec);
select id, a, MADLIB_SCHEMA.vec_sum(a), a, b, MADLIB_SCHEMA.vec_sum(b), b from MADLIB_SCHEMA.test_pairs order by id;
select id, MADLIB_SCHEMA.vec_sum(a) = MADLIB_SCHEMA.vec_sum(a::float8[]) from MADLIB_SCHEMA.test_pairs order by id;
select id, MADLIB_SCHEMA.vec_sum(b) = MADLIB_SCHEMA.vec_sum(b::float8[]) from MADLIB_SCHEMA.test_pairs order by id;
select id, a, MADLIB_SCHEMA.vec_median(a), a from MADLIB_SCHEMA.test_pairs order by id;
select id, b, MADLIB_SCHEMA.vec_median(b), b from MADLIB_SCHEMA.test_pairs order by id;
select id, MADLIB_SCHEMA.vec_median(a) = MADLIB_SCHEMA.vec_median(a::float8[]),
MADLIB_SCHEMA.vec_median(b) = MADLIB_SCHEMA.vec_median(b::float8[]) from MADLIB_SCHEMA.test_pairs order by id;
select id, a, b, MADLIB_SCHEMA.svec_concat(a,b), a, b from MADLIB_SCHEMA.test_pairs order by id;
select id, MADLIB_SCHEMA.svec_concat_replicate(0, b), b from MADLIB_SCHEMA.test_pairs order by id;
select id, MADLIB_SCHEMA.svec_concat_replicate(1, b) = b from MADLIB_SCHEMA.test_pairs order by id;
select id, MADLIB_SCHEMA.svec_concat_replicate(3, b), b from MADLIB_SCHEMA.test_pairs order by id;
-- select id, MADLIB_SCHEMA.svec_concat_replicate(-2, b), b from MADLIB_SCHEMA.test_pairs order by id; -- this should produce error message
select id, MADLIB_SCHEMA.dimension(a), a, MADLIB_SCHEMA.dimension(b), b from MADLIB_SCHEMA.test_pairs order by id;
select MADLIB_SCHEMA.svec_lapply('sqrt', null);
select id, MADLIB_SCHEMA.svec_lapply('sqrt', MADLIB_SCHEMA.svec_lapply('abs', a)), a from MADLIB_SCHEMA.test_pairs order by id;
select id, MADLIB_SCHEMA.svec_lapply('sqrt', MADLIB_SCHEMA.svec_lapply('abs', b)), b from MADLIB_SCHEMA.test_pairs order by id;
select MADLIB_SCHEMA.svec_append(null::MADLIB_SCHEMA.svec, 220::float8, 20::int8);
select id, MADLIB_SCHEMA.svec_append(a, 50, 100), a, MADLIB_SCHEMA.svec_append(b, null, 50), b from MADLIB_SCHEMA.test_pairs order by id;
select MADLIB_SCHEMA.svec_proj(a,1), a, MADLIB_SCHEMA.svec_proj(b,1), b from MADLIB_SCHEMA.test_pairs order by id;
-- select MADLIB_SCHEMA.svec_proj(a,2), a, MADLIB_SCHEMA.svec_proj(b,2), b from MADLIB_SCHEMA.test_pairs order by id; -- this should result in an appropriate error message
select MADLIB_SCHEMA.svec_subvec('{1,20,30,10,600,2}:{1,2,3,4,5,6}', 3,69);
select MADLIB_SCHEMA.svec_subvec('{1,20,30,10,600,2}:{1,2,3,4,5,6}', 69,3);
select MADLIB_SCHEMA.svec_subvec(a,2,4), a from MADLIB_SCHEMA.test_pairs where MADLIB_SCHEMA.dimension(a) >= 4 order by id;
select MADLIB_SCHEMA.svec_subvec(a,2,MADLIB_SCHEMA.dimension(a)-1), a from MADLIB_SCHEMA.test_pairs where MADLIB_SCHEMA.dimension(a) >= 2 order by id;
-- select MADLIB_SCHEMA.svec_subvec(a,MADLIB_SCHEMA.dimension(a)-1,0), a from MADLIB_SCHEMA.test_pairs where MADLIB_SCHEMA.dimension(a) >= 2 order by id;
select MADLIB_SCHEMA.svec_reverse(a), a, MADLIB_SCHEMA.svec_reverse(b), b from MADLIB_SCHEMA.test_pairs order by id;
select MADLIB_SCHEMA.svec_subvec('{1,20,30,10,600,2}:{1,2,3,4,5,6}', 3,69) =
MADLIB_SCHEMA.svec_reverse(MADLIB_SCHEMA.svec_subvec('{1,20,30,10,600,2}:{1,2,3,4,5,6}', 69,3));
select MADLIB_SCHEMA.svec_change('{1,20,30,10,600,2}:{1,2,3,4,5,6}', 3, '{2,3}:{4,null}');
select MADLIB_SCHEMA.svec_change(a,1,'{1}:{-50}'), a from MADLIB_SCHEMA.test_pairs order by id;
-- Test the multi-concatenation and show sizes compared with a normal array
drop table if exists corpus_proj;
drop table if exists corpus_proj_array;
create table corpus_proj as (select 10000 *|| ('{45,2,35,4,15,1}:{0,1,0,1,0,2}'::MADLIB_SCHEMA.svec) result ) distributed randomly;
create table corpus_proj_array as (select result::float8[] from corpus_proj) distributed randomly;
-- Calculate on-disk size of sparse vector
select pg_size_pretty(pg_total_relation_size('corpus_proj'));
-- Calculate on-disk size of normal array
select pg_size_pretty(pg_total_relation_size('corpus_proj_array'));
-- Calculate L1 norm from sparse vector
select MADLIB_SCHEMA.l1norm(result) from corpus_proj;
-- Calculate L1 norm from float8[]
select MADLIB_SCHEMA.l1norm(result) from corpus_proj_array;
-- Calculate L2 norm from sparse vector
select MADLIB_SCHEMA.l2norm(result) from corpus_proj;
-- Calculate L2 norm from float8[]
select MADLIB_SCHEMA.l2norm(result) from corpus_proj_array;
drop table corpus_proj;
drop table corpus_proj_array;
-- -- Test the pivot operator
drop table if exists pivot_test;
create table pivot_test(a float8) distributed randomly;
insert into pivot_test values (0),(1),(0),(2),(3);
-- select MADLIB_SCHEMA.array_agg(a) from pivot_test;
select MADLIB_SCHEMA.l1norm(MADLIB_SCHEMA.array_agg(a)) from pivot_test;
drop table if exists pivot_test;
-- Answer should be 5
select MADLIB_SCHEMA.vec_median(MADLIB_SCHEMA.array_agg(a)) from (select generate_series(1,9) a) foo;
-- Answer should be a 10-wide vector
-- select MADLIB_SCHEMA.array_agg(a) from (select trunc(random()*10) a,generate_series(1,100000) order by a) foo;
-- Average is 4.50034, median is 5
select MADLIB_SCHEMA.vec_median('{9960,9926,10053,9993,10080,10050,9938,9941,10030,10029}:{1,9,8,7,6,5,4,3,2,0}'::MADLIB_SCHEMA.svec);
select MADLIB_SCHEMA.vec_median('{9960,9926,10053,9993,10080,10050,9938,9941,10030,10029}:{1,9,8,7,6,5,4,3,2,0}'::MADLIB_SCHEMA.svec::float8[]);