| -- Unit-Testing checklist |
| -- 1. Make sure the function does the right thing on normal input(s). |
| -- 2. Make sure the function is non-destructive to the input(s). |
| -- 3. Make sure the function handles null values in svecs correctly. |
| -- 4. Make sure the function handles length-1 svecs correctly. |
| -- 5. Make sure the function handles null svecs properly. |
| -- 6. For functions that handle float8[] input(s), make sure the two |
| -- representations of null values are handled properly. |
| |
| -- \timing |
| -- \i gp_svec.sql_in |
| |
| set search_path to "$user",MADLIB_SCHEMA,public; |
| |
| select MADLIB_SCHEMA.dmin(1000,1000.1); |
| select MADLIB_SCHEMA.dmin(1000,NULL); |
| select MADLIB_SCHEMA.dmin(NULL,1000); |
| select MADLIB_SCHEMA.dmin(NULL,NULL); |
| select MADLIB_SCHEMA.dmax(1000,1000.1); |
| select MADLIB_SCHEMA.dmax(1000,NULL); |
| select MADLIB_SCHEMA.dmax(NULL,1000); |
| select MADLIB_SCHEMA.dmax(NULL,NULL); |
| |
| drop table if exists MADLIB_SCHEMA.test_pairs; |
| create table MADLIB_SCHEMA.test_pairs( id int, a MADLIB_SCHEMA.svec, b MADLIB_SCHEMA.svec ) distributed by (id); |
| insert into MADLIB_SCHEMA.test_pairs values |
| (0, '{1,100,1}:{5,0,5}', '{50,50,2}:{1,2,10}'), |
| (1, '{1,100,1}:{-5,0,-5}', '{50,50,2}:{-1,-2,-10}'); |
| insert into MADLIB_SCHEMA.test_pairs values |
| -- (10, null, null), |
| (11, '{1}:{0}', '{1}:{1}'), |
| (12, '{1}:{5}', '{3}:{-8}'), |
| (13, '{1}:{0}', '{1}:{NULL}'), |
| (14, '{1,2,1}:{2,4,2}', '{2,1,1}:{0,3,5}'), |
| (15, '{1,2,1}:{2,4,2}', '{2,1,1}:{NULL,3,5}'); |
| |
| |
| select id, MADLIB_SCHEMA.svec_count(a,b) from MADLIB_SCHEMA.test_pairs where MADLIB_SCHEMA.dimension(a) = MADLIB_SCHEMA.dimension(b) order by id; |
| select id, MADLIB_SCHEMA.svec_plus(a,b) from MADLIB_SCHEMA.test_pairs order by id; |
| select id, MADLIB_SCHEMA.svec_plus(a,b) = MADLIB_SCHEMA.svec_plus(b,a) from MADLIB_SCHEMA.test_pairs order by id; |
| select id, MADLIB_SCHEMA.svec_mult(a,b) from MADLIB_SCHEMA.test_pairs order by id; |
| select id, MADLIB_SCHEMA.svec_mult(a,b) = MADLIB_SCHEMA.svec_mult(b,a) from MADLIB_SCHEMA.test_pairs order by id; |
| select id, MADLIB_SCHEMA.svec_div(a,b) = MADLIB_SCHEMA.svec_mult(a, MADLIB_SCHEMA.svec_pow(b,(-1)::svec)) from MADLIB_SCHEMA.test_pairs order by id; |
| select id, MADLIB_SCHEMA.svec_minus(a, b) = MADLIB_SCHEMA.svec_plus(MADLIB_SCHEMA.svec_mult((-1)::svec,b), a) from MADLIB_SCHEMA.test_pairs order by id; |
| select id, MADLIB_SCHEMA.svec_pow(a,2::svec) = MADLIB_SCHEMA.svec_mult(a,a) from MADLIB_SCHEMA.test_pairs order by id; |
| |
| select id, MADLIB_SCHEMA.dot(a,b) from MADLIB_SCHEMA.test_pairs where MADLIB_SCHEMA.dimension(a) = MADLIB_SCHEMA.dimension(b) order by id; |
| select id, MADLIB_SCHEMA.dot(a,b) = MADLIB_SCHEMA.dot(b,a) from MADLIB_SCHEMA.test_pairs where MADLIB_SCHEMA.dimension(a) = MADLIB_SCHEMA.dimension(b) order by id; |
| select id, MADLIB_SCHEMA.dot(a,b::float8[]) = MADLIB_SCHEMA.dot(b,a::float8[]) from MADLIB_SCHEMA.test_pairs where MADLIB_SCHEMA.dimension(a) = MADLIB_SCHEMA.dimension(b) order by id; |
| select id, MADLIB_SCHEMA.dot(a::float8[],b) = MADLIB_SCHEMA.dot(b::float8[],a) from MADLIB_SCHEMA.test_pairs where MADLIB_SCHEMA.dimension(a) = MADLIB_SCHEMA.dimension(b) order by id; |
| select id, MADLIB_SCHEMA.dot(a::float8[],b::float8[]) = MADLIB_SCHEMA.dot(b::float8[],a::float8[]) from MADLIB_SCHEMA.test_pairs where MADLIB_SCHEMA.dimension(a) = MADLIB_SCHEMA.dimension(b) order by id; |
| |
| select id, MADLIB_SCHEMA.l2norm(a), MADLIB_SCHEMA.l2norm(a::float[]), MADLIB_SCHEMA.l2norm(b), MADLIB_SCHEMA.l2norm(b::float8[]) from MADLIB_SCHEMA.test_pairs order by id; |
| select id, MADLIB_SCHEMA.l1norm(a), MADLIB_SCHEMA.l1norm(a::float[]), MADLIB_SCHEMA.l1norm(b), MADLIB_SCHEMA.l1norm(b::float8[]) from MADLIB_SCHEMA.test_pairs order by id; |
| |
| select MADLIB_SCHEMA.svec_plus('{1,2,3}:{4,5,6}', 5::MADLIB_SCHEMA.svec); |
| select MADLIB_SCHEMA.svec_plus(5::MADLIB_SCHEMA.svec, '{1,2,3}:{4,5,6}'); |
| select MADLIB_SCHEMA.svec_plus(500::MADLIB_SCHEMA.svec, '{1,2,3}:{4,null,6}'); |
| select MADLIB_SCHEMA.svec_div(500::MADLIB_SCHEMA.svec, '{1,2,3}:{4,null,6}'); |
| select MADLIB_SCHEMA.svec_div('{1,2,3}:{4,null,6}', 500::MADLIB_SCHEMA.svec); |
| |
| -- Test operators between svec and float8[] |
| select ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec) %*% ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec)::float8[]; |
| select ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec)::float8[] %*% ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec); |
| select ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec) / ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec)::float8[]; |
| select ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec)::float8[] / ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec); |
| select ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec) * ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec)::float8[]; |
| select ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec)::float8[] * ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec); |
| select ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec) + ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec)::float8[]; |
| select ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec)::float8[] + ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec); |
| select ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec) - ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec)::float8[]; |
| select ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec)::float8[] - ('{1,2,3,4}:{3,4,5,6}'::MADLIB_SCHEMA.svec); |
| |
| -- these should produce error messages |
| /* |
| select '{10000000000000000000}:{1}'::MADLIB_SCHEMA.svec ; |
| select '{1,null,2}:{2,3,4}'::MADLIB_SCHEMA.svec; |
| select MADLIB_SCHEMA.svec_count('{1,1,1}:{3,4,5}', '{2,2}:{1,3}'); |
| select MADLIB_SCHEMA.svec_plus('{1,1,1}:{3,4,5}', '{2,2}:{1,3}'); |
| select MADLIB_SCHEMA.svec_minus('{1,1,1}:{3,4,5}', '{2,2}:{1,3}'); |
| select MADLIB_SCHEMA.svec_mult('{1,1,1}:{3,4,5}', '{2,2}:{1,3}'); |
| select MADLIB_SCHEMA.svec_div('{1,1,1}:{3,4,5}', '{2,2}:{1,3}'); |
| */ |
| |
| select MADLIB_SCHEMA.unnest('{1}:{5}'::MADLIB_SCHEMA.svec); |
| select MADLIB_SCHEMA.unnest('{1,2,3,4}:{5,6,7,8}'::MADLIB_SCHEMA.svec); |
| select MADLIB_SCHEMA.unnest('{1,2,3,4}:{5,6,null,8}'::MADLIB_SCHEMA.svec); |
| select id, MADLIB_SCHEMA.unnest(a),a from MADLIB_SCHEMA.test_pairs where id >= 10 order by id; |
| |
| select MADLIB_SCHEMA.vec_pivot('{1}:{5}', 2); |
| select MADLIB_SCHEMA.vec_pivot('{1}:{5}', 5); |
| select MADLIB_SCHEMA.vec_pivot('{1}:{5}', null); |
| select MADLIB_SCHEMA.vec_pivot('{1}:{null}', 5); |
| select MADLIB_SCHEMA.vec_pivot('{1}:{null}', null); |
| select MADLIB_SCHEMA.vec_pivot('{1,2,3,4}:{5,6,7,8}'::MADLIB_SCHEMA.svec, 2); |
| select id, MADLIB_SCHEMA.vec_pivot(a, 5), a, MADLIB_SCHEMA.vec_pivot(a,6), a, MADLIB_SCHEMA.vec_pivot(a,2) from MADLIB_SCHEMA.test_pairs order by id; |
| select id, MADLIB_SCHEMA.vec_pivot(b, 5), b, MADLIB_SCHEMA.vec_pivot(b,6), b, MADLIB_SCHEMA.vec_pivot(b,null) from MADLIB_SCHEMA.test_pairs order by id; |
| |
| select MADLIB_SCHEMA.vec_sum('{1}:{-5}'::MADLIB_SCHEMA.svec); |
| select id, a, MADLIB_SCHEMA.vec_sum(a), a, b, MADLIB_SCHEMA.vec_sum(b), b from MADLIB_SCHEMA.test_pairs order by id; |
| select id, MADLIB_SCHEMA.vec_sum(a) = MADLIB_SCHEMA.vec_sum(a::float8[]) from MADLIB_SCHEMA.test_pairs order by id; |
| select id, MADLIB_SCHEMA.vec_sum(b) = MADLIB_SCHEMA.vec_sum(b::float8[]) from MADLIB_SCHEMA.test_pairs order by id; |
| |
| select id, a, MADLIB_SCHEMA.vec_median(a), a from MADLIB_SCHEMA.test_pairs order by id; |
| select id, b, MADLIB_SCHEMA.vec_median(b), b from MADLIB_SCHEMA.test_pairs order by id; |
| select id, MADLIB_SCHEMA.vec_median(a) = MADLIB_SCHEMA.vec_median(a::float8[]), |
| MADLIB_SCHEMA.vec_median(b) = MADLIB_SCHEMA.vec_median(b::float8[]) from MADLIB_SCHEMA.test_pairs order by id; |
| |
| select id, a, b, MADLIB_SCHEMA.svec_concat(a,b), a, b from MADLIB_SCHEMA.test_pairs order by id; |
| |
| select id, MADLIB_SCHEMA.svec_concat_replicate(0, b), b from MADLIB_SCHEMA.test_pairs order by id; |
| select id, MADLIB_SCHEMA.svec_concat_replicate(1, b) = b from MADLIB_SCHEMA.test_pairs order by id; |
| select id, MADLIB_SCHEMA.svec_concat_replicate(3, b), b from MADLIB_SCHEMA.test_pairs order by id; |
| -- select id, MADLIB_SCHEMA.svec_concat_replicate(-2, b), b from MADLIB_SCHEMA.test_pairs order by id; -- this should produce error message |
| |
| select id, MADLIB_SCHEMA.dimension(a), a, MADLIB_SCHEMA.dimension(b), b from MADLIB_SCHEMA.test_pairs order by id; |
| |
| select MADLIB_SCHEMA.svec_lapply('sqrt', null); |
| select id, MADLIB_SCHEMA.svec_lapply('sqrt', MADLIB_SCHEMA.svec_lapply('abs', a)), a from MADLIB_SCHEMA.test_pairs order by id; |
| select id, MADLIB_SCHEMA.svec_lapply('sqrt', MADLIB_SCHEMA.svec_lapply('abs', b)), b from MADLIB_SCHEMA.test_pairs order by id; |
| |
| select MADLIB_SCHEMA.svec_append(null::MADLIB_SCHEMA.svec, 220::float8, 20::int8); |
| select id, MADLIB_SCHEMA.svec_append(a, 50, 100), a, MADLIB_SCHEMA.svec_append(b, null, 50), b from MADLIB_SCHEMA.test_pairs order by id; |
| |
| select MADLIB_SCHEMA.svec_proj(a,1), a, MADLIB_SCHEMA.svec_proj(b,1), b from MADLIB_SCHEMA.test_pairs order by id; |
| -- select MADLIB_SCHEMA.svec_proj(a,2), a, MADLIB_SCHEMA.svec_proj(b,2), b from MADLIB_SCHEMA.test_pairs order by id; -- this should result in an appropriate error message |
| |
| select MADLIB_SCHEMA.svec_subvec('{1,20,30,10,600,2}:{1,2,3,4,5,6}', 3,69); |
| select MADLIB_SCHEMA.svec_subvec('{1,20,30,10,600,2}:{1,2,3,4,5,6}', 69,3); |
| select MADLIB_SCHEMA.svec_subvec(a,2,4), a from MADLIB_SCHEMA.test_pairs where MADLIB_SCHEMA.dimension(a) >= 4 order by id; |
| select MADLIB_SCHEMA.svec_subvec(a,2,MADLIB_SCHEMA.dimension(a)-1), a from MADLIB_SCHEMA.test_pairs where MADLIB_SCHEMA.dimension(a) >= 2 order by id; |
| -- select MADLIB_SCHEMA.svec_subvec(a,MADLIB_SCHEMA.dimension(a)-1,0), a from MADLIB_SCHEMA.test_pairs where MADLIB_SCHEMA.dimension(a) >= 2 order by id; |
| |
| select MADLIB_SCHEMA.svec_reverse(a), a, MADLIB_SCHEMA.svec_reverse(b), b from MADLIB_SCHEMA.test_pairs order by id; |
| select MADLIB_SCHEMA.svec_subvec('{1,20,30,10,600,2}:{1,2,3,4,5,6}', 3,69) = |
| MADLIB_SCHEMA.svec_reverse(MADLIB_SCHEMA.svec_subvec('{1,20,30,10,600,2}:{1,2,3,4,5,6}', 69,3)); |
| |
| select MADLIB_SCHEMA.svec_change('{1,20,30,10,600,2}:{1,2,3,4,5,6}', 3, '{2,3}:{4,null}'); |
| select MADLIB_SCHEMA.svec_change(a,1,'{1}:{-50}'), a from MADLIB_SCHEMA.test_pairs order by id; |
| |
| -- Test the multi-concatenation and show sizes compared with a normal array |
| drop table if exists corpus_proj; |
| drop table if exists corpus_proj_array; |
| create table corpus_proj as (select 10000 *|| ('{45,2,35,4,15,1}:{0,1,0,1,0,2}'::MADLIB_SCHEMA.svec) result ) distributed randomly; |
| create table corpus_proj_array as (select result::float8[] from corpus_proj) distributed randomly; |
| -- Calculate on-disk size of sparse vector |
| select pg_size_pretty(pg_total_relation_size('corpus_proj')); |
| -- Calculate on-disk size of normal array |
| select pg_size_pretty(pg_total_relation_size('corpus_proj_array')); |
| |
| -- Calculate L1 norm from sparse vector |
| select MADLIB_SCHEMA.l1norm(result) from corpus_proj; |
| -- Calculate L1 norm from float8[] |
| select MADLIB_SCHEMA.l1norm(result) from corpus_proj_array; |
| -- Calculate L2 norm from sparse vector |
| select MADLIB_SCHEMA.l2norm(result) from corpus_proj; |
| -- Calculate L2 norm from float8[] |
| select MADLIB_SCHEMA.l2norm(result) from corpus_proj_array; |
| |
| |
| drop table corpus_proj; |
| drop table corpus_proj_array; |
| |
| -- -- Test the pivot operator |
| drop table if exists pivot_test; |
| create table pivot_test(a float8) distributed randomly; |
| insert into pivot_test values (0),(1),(0),(2),(3); |
| -- select MADLIB_SCHEMA.array_agg(a) from pivot_test; |
| select MADLIB_SCHEMA.l1norm(MADLIB_SCHEMA.array_agg(a)) from pivot_test; |
| drop table if exists pivot_test; |
| -- Answer should be 5 |
| select MADLIB_SCHEMA.vec_median(MADLIB_SCHEMA.array_agg(a)) from (select generate_series(1,9) a) foo; |
| -- Answer should be a 10-wide vector |
| -- select MADLIB_SCHEMA.array_agg(a) from (select trunc(random()*10) a,generate_series(1,100000) order by a) foo; |
| -- Average is 4.50034, median is 5 |
| select MADLIB_SCHEMA.vec_median('{9960,9926,10053,9993,10080,10050,9938,9941,10030,10029}:{1,9,8,7,6,5,4,3,2,0}'::MADLIB_SCHEMA.svec); |
| select MADLIB_SCHEMA.vec_median('{9960,9926,10053,9993,10080,10050,9938,9941,10030,10029}:{1,9,8,7,6,5,4,3,2,0}'::MADLIB_SCHEMA.svec::float8[]); |