blob: e8e6f80484245610ffb0ef48cc577542c1d6eb43 [file] [log] [blame]
m4_include(`SQLCommon.m4')
---------------------------------------------------------------------------
-- Rules:
-- ------
-- 1) Any DB objects should be created w/o schema prefix,
-- since this file is executed in a separate schema context.
-- 2) There should be no DROP statements in this script, since
-- all objects created in the default schema will be cleaned-up outside.
---------------------------------------------------------------------------
---------------------------------------------------------------------------
-- Test:
---------------------------------------------------------------------------
select svec_dmin(1000,1000.1);
select svec_dmin(1000,NULL);
select svec_dmin(NULL,1000);
select svec_dmin(NULL,NULL);
select svec_dmax(1000,1000.1);
select svec_dmax(1000,NULL);
select svec_dmax(NULL,1000);
select svec_dmax(NULL,NULL);
create table test_pairs( id int, a svec, b svec );
insert into test_pairs values
(0, '{1,100,1}:{5,0,5}', '{50,50,2}:{1,2,10}'),
(1, '{1,100,1}:{-5,0,-5}', '{50,50,2}:{-1,-2,-10}');
insert into test_pairs values
-- (10, null, null),
(11, '{1}:{0}', '{1}:{1}'),
(12, '{1}:{5}', '{3}:{-8}'),
(13, '{1}:{0}', '{1}:{NULL}'),
(14, '{1,2,1}:{2,4,2}', '{2,1,1}:{0,3,5}'),
(15, '{1,2,1}:{2,4,2}', '{2,1,1}:{NULL,3,5}');
select id, svec_count(a,b) from test_pairs where svec_dimension(a) = svec_dimension(b) order by id;
select id, svec_plus(a,b) from test_pairs order by id;
select id, svec_plus(a,b) = svec_plus(b,a) from test_pairs order by id;
select id, svec_mult(a,b) from test_pairs order by id;
select id, svec_mult(a,b) = svec_mult(b,a) from test_pairs order by id;
select id, svec_div(a,b) = svec_mult(a, svec_pow(b,(-1)::svec)) from test_pairs order by id;
select id, svec_minus(a, b) = svec_plus(svec_mult((-1)::svec,b), a) from test_pairs order by id;
select id, svec_pow(a,2::svec) = svec_mult(a,a) from test_pairs order by id;
select id, svec_dot(a,b) from test_pairs where svec_dimension(a) = svec_dimension(b) order by id;
select id, svec_dot(a,b) = svec_dot(b,a) from test_pairs where svec_dimension(a) = svec_dimension(b) order by id;
select id, svec_dot(a,b::float8[]) = svec_dot(b,a::float8[]) from test_pairs where svec_dimension(a) = svec_dimension(b) order by id;
select id, svec_dot(a::float8[],b) = svec_dot(b::float8[],a) from test_pairs where svec_dimension(a) = svec_dimension(b) order by id;
select id, svec_dot(a::float8[],b::float8[]) = svec_dot(b::float8[],a::float8[]) from test_pairs where svec_dimension(a) = svec_dimension(b) order by id;
select id, svec_l2norm(a), svec_l2norm(a::float[]), svec_l2norm(b), svec_l2norm(b::float8[]) from test_pairs order by id;
select id, svec_l1norm(a), svec_l1norm(a::float[]), svec_l1norm(b), svec_l1norm(b::float8[]) from test_pairs order by id;
select svec_plus('{1,2,3}:{4,5,6}', 5::svec);
select svec_plus(5::svec, '{1,2,3}:{4,5,6}');
select svec_plus(500::svec, '{1,2,3}:{4,null,6}');
select svec_div(500::svec, '{1,2,3}:{4,null,6}');
select svec_div('{1,2,3}:{4,null,6}', 500::svec);
-- Test operators between svec and float8[]
select ('{1,2,3,4}:{3,4,5,6}'::svec) %*% ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[];
select ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[] %*% ('{1,2,3,4}:{3,4,5,6}'::svec);
select ('{1,2,3,4}:{3,4,5,6}'::svec) / ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[];
select ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[] / ('{1,2,3,4}:{3,4,5,6}'::svec);
select ('{1,2,3,4}:{3,4,5,6}'::svec) * ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[];
select ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[] * ('{1,2,3,4}:{3,4,5,6}'::svec);
select ('{1,2,3,4}:{3,4,5,6}'::svec) + ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[];
select ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[] + ('{1,2,3,4}:{3,4,5,6}'::svec);
select ('{1,2,3,4}:{3,4,5,6}'::svec) - ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[];
select ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[] - ('{1,2,3,4}:{3,4,5,6}'::svec);
-- these should produce error messages
/*
select '{10000000000000000000}:{1}'::svec ;
select '{1,null,2}:{2,3,4}'::svec;
select svec_count('{1,1,1}:{3,4,5}', '{2,2}:{1,3}');
select svec_plus('{1,1,1}:{3,4,5}', '{2,2}:{1,3}');
select svec_minus('{1,1,1}:{3,4,5}', '{2,2}:{1,3}');
select svec_mult('{1,1,1}:{3,4,5}', '{2,2}:{1,3}');
select svec_div('{1,1,1}:{3,4,5}', '{2,2}:{1,3}');
*/
select svec_unnest('{1}:{5}'::svec);
select svec_unnest('{1,2,3,4}:{5,6,7,8}'::svec);
select svec_unnest('{1,2,3,4}:{5,6,null,8}'::svec);
select id, svec_unnest(a),a from test_pairs where id >= 10 order by id;
select svec_pivot('{1}:{5}', 2);
select svec_pivot('{1}:{5}', 5);
select svec_pivot('{1}:{5}', null);
select svec_pivot('{1}:{null}', 5);
select svec_pivot('{1}:{null}', null);
select svec_pivot('{1,2,3,4}:{5,6,7,8}'::svec, 2);
select id, svec_pivot(a, 5), a, svec_pivot(a,6), a, svec_pivot(a,2) from test_pairs order by id;
select id, svec_pivot(b, 5), b, svec_pivot(b,6), b, svec_pivot(b,null) from test_pairs order by id;
select svec_elsum('{1}:{-5}'::svec);
select id, a, svec_elsum(a), a, b, svec_elsum(b), b from test_pairs order by id;
select id, svec_elsum(a) = svec_elsum(a::float8[]) from test_pairs order by id;
select id, svec_elsum(b) = svec_elsum(b::float8[]) from test_pairs order by id;
select id, a, svec_median(a), a from test_pairs order by id;
select id, b, svec_median(b), b from test_pairs order by id;
select id, svec_median(a) = svec_median(a::float8[]),
svec_median(b) = svec_median(b::float8[]) from test_pairs order by id;
select id, a, b, svec_concat(a,b), a, b from test_pairs order by id;
select id, svec_concat_replicate(0, b), b from test_pairs order by id;
select id, svec_concat_replicate(1, b) = b from test_pairs order by id;
select id, svec_concat_replicate(3, b), b from test_pairs order by id;
-- select id, svec_concat_replicate(-2, b), b from test_pairs order by id; -- this should produce error message
select id, svec_dimension(a), a, svec_dimension(b), b from test_pairs order by id;
select svec_lapply('sqrt', null);
select id, svec_lapply('sqrt', svec_lapply('abs', a)), a from test_pairs order by id;
select id, svec_lapply('sqrt', svec_lapply('abs', b)), b from test_pairs order by id;
select svec_append(null::svec, 220::float8, 20::int8);
select id, svec_append(a, 50, 100), a, svec_append(b, null, 50), b from test_pairs order by id;
select svec_proj(a,1), a, svec_proj(b,1), b from test_pairs order by id;
-- select svec_proj(a,2), a, svec_proj(b,2), b from test_pairs order by id; -- this should result in an appropriate error message
select svec_subvec('{1,20,30,10,600,2}:{1,2,3,4,5,6}', 3,69);
select svec_subvec('{1,20,30,10,600,2}:{1,2,3,4,5,6}', 69,3);
select svec_subvec(a,2,4), a from test_pairs where svec_dimension(a) >= 4 order by id;
select svec_subvec(a,2,svec_dimension(a)-1), a from test_pairs where svec_dimension(a) >= 2 order by id;
-- select svec_subvec(a,svec_dimension(a)-1,0), a from test_pairs where svec_dimension(a) >= 2 order by id;
select svec_reverse(a), a, svec_reverse(b), b from test_pairs order by id;
select svec_subvec('{1,20,30,10,600,2}:{1,2,3,4,5,6}', 3,69) =
svec_reverse(svec_subvec('{1,20,30,10,600,2}:{1,2,3,4,5,6}', 69,3));
select svec_change('{1,20,30,10,600,2}:{1,2,3,4,5,6}', 3, '{2,3}:{4,null}');
select svec_change(a,1,'{1}:{-50}'), a from test_pairs order by id;
-- Test the multi-concatenation and show sizes compared with a normal array
create table corpus_proj as (select 10000 *|| ('{45,2,35,4,15,1}:{0,1,0,1,0,2}'::svec) result );
create table corpus_proj_array as (select result::float8[] from corpus_proj);
-- Calculate on-disk size of sparse vector
select pg_size_pretty(pg_total_relation_size('corpus_proj'));
-- Calculate on-disk size of normal array
select pg_size_pretty(pg_total_relation_size('corpus_proj_array'));
-- Calculate L1 norm from sparse vector
select svec_l1norm(result) from corpus_proj;
-- Calculate L1 norm from float8[]
select svec_l1norm(result) from corpus_proj_array;
-- Calculate L2 norm from sparse vector
select svec_l2norm(result) from corpus_proj;
-- Calculate L2 norm from float8[]
select svec_l2norm(result) from corpus_proj_array;
create table svec_svec as (
select
1000 *|| ('{45,2,35,4,15,1}:{0,1,0,1,0,2}'::svec) result1,
1000 *|| ('{35,2,45,4,15,1}:{2,0,1,1,0,4}'::svec) result2 );
-- Calculate L1 norm from two sparse vectors
select l1norm(result1, result2) from svec_svec;
-- Calculate L2 norm from two sparse vectors
select l2norm(result1, result2) from svec_svec;
-- Calculate angle between two sparse vectors
select angle(result1, result2) from svec_svec;
-- Calculate tanimoto distance between two sparse vectors
select tanimoto_distance(result1, result2) from svec_svec;
-- Calculate normalized vectors
select normalize(result) from corpus_proj;
-- Test the pivot operator
create table pivot_test(a float8);
insert into pivot_test values (0),(1),(0),(2),(3);
-- select svec_agg(a) from pivot_test;
select svec_l1norm(svec_agg(a)) from pivot_test;
-- Answer should be 5
select svec_median(svec_agg(a)) from (select generate_series(1,9) a) foo;
-- Answer should be a 10-wide vector
select svec_agg(a) from (select trunc(random()*10) a,generate_series(1,100000) order by a) foo;
-- Average is 4.50034, median is 5
select svec_median('{9960,9926,10053,9993,10080,10050,9938,9941,10030,10029}:{1,9,8,7,6,5,4,3,2,0}'::svec);
select svec_median('{9960,9926,10053,9993,10080,10050,9938,9941,10030,10029}:{1,9,8,7,6,5,4,3,2,0}'::svec::float8[]);
-- This vfunction test svec creation from position array
select svec_cast_positions_float8arr('{1,2,4,6,2,5}'::INT8[], '{.2,.3,.4,.5,.3,.1}'::FLOAT8[], 10000, 0.0);
-- test of functions returning positions and values of non-base values
select svec_nonbase_values('{1,2,3,1000,4}:{1,2,3,0,4}'::SVEC, 0.0::float8);
select svec_nonbase_positions('{1,2,3,1000,4}:{1,2,3,0,4}'::SVEC, 0.0::float8);
-- svec conversion to and from string
select svec_to_string('{2,3}:{4,5}');
select svec_from_string('{2,3}:{4,5}');
-- UDA: mean(svec)
create table test_svec (a int, b svec);
select mean(b) from test_svec;
insert into test_svec select 1, '{1,2,3}'::float[]::svec;
insert into test_svec select 2, '{2,2.5,3.1}'::float[]::svec;
insert into test_svec select 3, '{3,3,3.2}'::float[]::svec;
select mean(b) from test_svec;
create table _dictionary (id bigint, dict_terms text[]);
insert into _dictionary values('1', '{am,before,being,bothered,corpus,document,i,in,is,me,never,now,one,really,second,the,third,this,until}');
create table dictionary as select generate_series(0, array_upper(dict_terms, 1) - 1) id, unnest(dict_terms) term from _dictionary;
create table _documents(id bigint, terms text[]);
insert into _documents values
(1,'{this,is,one,document,in,the,corpus}'),
(2,'{i,am,the,second,document,in,the,corpus}'),
(3,'{being,third,never,really,bothered,me,until,now}'),
(4,'{the,document,before,me,is,the,third,document}');
create table documents as select id, unnest term, count(*) from ( select id, unnest(terms) from _documents) s group by id, term;
select * from gen_doc_svecs('output','dictionary' ,'id', 'term', 'documents', 'id', 'term', 'count');
select * from output order by doc_id;
select
assert(count(*) = 2, '__gen_svec for indices bigger than 255')
from
(
select
svec_unnest(
__gen_svec('{7855,8623}', '{1,1}', 8624)
) as b
) a
where not b = 0;
---------------------------------------------------------------------------
-- operators
SELECT assert(NOT '{2,1}:{1,0}'::svec OPERATOR(==) '{1,1,1,2}:{1,0,1,0}'::svec, 'svec OPERATOR(==)');
SELECT assert('{500,300,200}:{1,1,1}'::svec OPERATOR(==) '{1000}:{1}'::svec, 'svec OPERATOR(==)');
SELECT assert(NOT '{2,1}:{1,0}'::svec OPERATOR(=) '{1,1,1,2}:{1,0,1,0}'::svec, 'svec OPERATOR(=)');
SELECT assert('{500,300,200}:{1,1,1}'::svec OPERATOR(=) '{1000}:{1}'::svec, 'svec OPERATOR(=)');
SELECT assert('{2,1}:{1,0}'::svec OPERATOR(<>) '{1,1,1,2}:{1,0,1,0}'::svec, 'svec OPERATOR(<>)');
SELECT assert(NOT '{500,300,200}:{1,1,1}'::svec OPERATOR(<>) '{1000}:{1}'::svec, 'svec OPERATOR(<>)');
--
SELECT assert('{2,1}:{1,0}'::svec OPERATOR(>) '{1,1,1,2}:{1,0,1,0}'::svec, 'svec OPERATOR(>)');
SELECT assert(NOT '{2,1}:{1,0}'::svec OPERATOR(>) '{1,1,1,2}:{1,1,0,0}'::svec, 'svec OPERATOR(>)');
SELECT assert(NOT '{2,3}:{1,0}'::svec OPERATOR(>) '{1,1,1,2}:{1,1,0,0}'::svec, 'svec OPERATOR(>)');
SELECT assert('{2,4}:{1,0}'::svec OPERATOR(>) '{1,1,1,2}:{1,1,0,0}'::svec, 'svec OPERATOR(>)');
SELECT assert(NOT '{500,300,200}:{1,1,1}'::svec OPERATOR(>) '{1000}:{1}'::svec, 'svec OPERATOR(>)');
SELECT assert('{500,300,400}:{1,1,1}'::svec OPERATOR(>) '{1000}:{1}'::svec, 'svec OPERATOR(>)');
SELECT assert('{500,300,200,1}:{1,1,1,-1}'::svec OPERATOR(>) '{1000}:{1}'::svec, 'svec OPERATOR(>)');
--
SELECT assert(NOT '{2,1}:{1,0}'::svec OPERATOR(<) '{1,1,1,2}:{1,0,1,0}'::svec, 'svec OPERATOR(<)');
SELECT assert('{2,1}:{1,0}'::svec OPERATOR(<) '{1,1,1,2}:{1,1,0,0}'::svec, 'svec OPERATOR(<)');
SELECT assert(NOT '{2,3}:{1,0}'::svec OPERATOR(<) '{1,1,1,2}:{1,1,0,0}'::svec, 'svec OPERATOR(<)');
SELECT assert(NOT '{2,4}:{1,0}'::svec OPERATOR(<) '{1,1,1,2}:{1,1,0,0}'::svec, 'svec OPERATOR(<)');
SELECT assert(NOT '{500,300,200}:{1,1,1}'::svec OPERATOR(<) '{1000}:{1}'::svec, 'svec OPERATOR(<)');
SELECT assert(NOT '{500,300,400}:{1,1,1}'::svec OPERATOR(<) '{1000}:{1}'::svec, 'svec OPERATOR(<)');
SELECT assert(NOT '{500,300,200,1}:{1,1,1,-1}'::svec OPERATOR(<) '{1000}:{1}'::svec, 'svec OPERATOR(<)');
--
SELECT assert('{2,1}:{1,0}'::svec OPERATOR(>=) '{1,1,1,2}:{1,0,1,0}'::svec, 'svec OPERATOR(>=)');
SELECT assert(NOT '{2,1}:{1,0}'::svec OPERATOR(>=) '{1,1,1,2}:{1,1,0,0}'::svec, 'svec OPERATOR(>=)');
SELECT assert('{2,3}:{1,0}'::svec OPERATOR(>=) '{1,1,1,2}:{1,1,0,0}'::svec, 'svec OPERATOR(>=)');
SELECT assert('{2,4}:{1,0}'::svec OPERATOR(>=) '{1,1,1,2}:{1,1,0,0}'::svec, 'svec OPERATOR(>=)');
SELECT assert('{500,300,200}:{1,1,1}'::svec OPERATOR(>=) '{1000}:{1}'::svec, 'svec OPERATOR(>=)');
SELECT assert('{500,300,400}:{1,1,1}'::svec OPERATOR(>=) '{1000}:{1}'::svec, 'svec OPERATOR(>=)');
SELECT assert('{500,300,200,1}:{1,1,1,-1}'::svec OPERATOR(>=) '{1000}:{1}'::svec, 'svec OPERATOR(>=)');
--
SELECT assert(NOT '{2,1}:{1,0}'::svec OPERATOR(<=) '{1,1,1,2}:{1,0,1,0}'::svec, 'svec OPERATOR(<=)');
SELECT assert('{2,1}:{1,0}'::svec OPERATOR(<=) '{1,1,1,2}:{1,1,0,0}'::svec, 'svec OPERATOR(<=)');
SELECT assert('{2,3}:{1,0}'::svec OPERATOR(<=) '{1,1,1,2}:{1,1,0,0}'::svec, 'svec OPERATOR(<=)');
SELECT assert(NOT '{2,4}:{1,0}'::svec OPERATOR(<=) '{1,1,1,2}:{1,1,0,0}'::svec, 'svec OPERATOR(<=)');
SELECT assert('{500,300,200}:{1,1,1}'::svec OPERATOR(<=) '{1000}:{1}'::svec, 'svec OPERATOR(<=)');
SELECT assert(NOT '{500,300,400}:{1,1,1}'::svec OPERATOR(<=) '{1000}:{1}'::svec, 'svec OPERATOR(<=)');
SELECT assert(NOT '{500,300,200,1}:{1,1,1,-1}'::svec OPERATOR(<=) '{1000}:{1}'::svec, 'svec OPERATOR(<=)');
-- MADLIB-642
CREATE TABLE madlib_642(id serial, id2 svec);
INSERT INTO madlib_642(id2) VALUES
(ARRAY[1,1,0]::float8[]::svec),
(ARRAY[1,0,0]::float8[]::svec),
(ARRAY[1,0,1]::float8[]::svec);
SELECT assert(
count(id) = 2,
'Wrong Result of predicates w/ <svec>')
FROM madlib_642
WHERE id2 <> ARRAY[1,1,0]::float8[]::svec;
-- SELECT assert(
-- count(c) = 3,
-- 'Wrong Result of GROUP BY <madlib.svec>')
-- FROM (
-- SELECT id2, count(id) AS c
-- FROM madlib_642
-- GROUP BY id2
-- ) subq;
-- Handling NaNs
-- IEEE754 specifies that NaN should not compare equal to any other
-- floating-point value (including NaN). In order to allow floating-point
-- values to be sorted and used in tree-based indexes, PostgreSQL treats
-- NaN values as equal, and greater than all non-NaN values (including Inf).
SELECT assert('{1,NULL}'::float8[]::svec > '{1,NaN}'::float8[]::svec, 'svec: NULL > NaN fails unexpectedly');
SELECT assert('{1,NaN}'::float8[]::svec > '{1,9999,35}'::float8[]::svec, 'svec: NaN > 9999 fails unexpectedly');
SELECT assert('{1,Infinity}'::float8[]::svec <= '{1,NaN,9999,35}'::float8[]::svec, 'svec: Inf <= NaN fails unexpectedly');
SELECT assert('{1,NULL}'::float8[]::svec = '{1,NULL}'::float8[]::svec, 'svec: NULL = NULL fails unexpectedly');
SELECT assert('{1,NaN}'::float8[]::svec = '{1,NaN}'::float8[]::svec, 'svec: NaN = NaN fails unexpectedly');
-- make sure float8[] has the same behaviors
SELECT assert('{1,NULL}'::float8[] > '{1,NaN}'::float8[], 'float8[]: NULL > NaN fails unexpectedly');
SELECT assert('{1,NaN}'::float8[] > '{1,9999,35}'::float8[], 'float8[]: NaN > 9999 fails unexpectedly');
SELECT assert('{1,Infinity}'::float8[] <= '{1,NaN,9999,35}'::float8[], 'float8[]: Inf <= NaN fails unexpectedly');
SELECT assert('{1,NULL}'::float8[] = '{1,NULL}'::float8[], 'float8[]: NULL = NULL fails unexpectedly');
SELECT assert('{1,NaN}'::float8[] = '{1,NaN}'::float8[], 'float8[]: NaN = NaN fails unexpectedly');