blob: e067d70bff3e65d466fca941500518acd16d0a60 [file] [log] [blame]
-- \set FETCH_COUNT 100
\timing
Timing is on.
drop table if exists test;
DROP TABLE
Time: 1.163 ms
create table test (a int, b svec) DISTRIBUTED BY (a);
CREATE TABLE
Time: 67.692 ms
insert into test (select 1,gp_extract_feature_histogram('{"one","two","three","four","five","six"}','{"twe","four","five","six","one","three","two","one"}'));
INSERT 0 1
Time: 72.360 ms
insert into test (select 2,gp_extract_feature_histogram('{"one","two","three","four","five","six"}','{"the","brown","cat","ran","across","three","dogs"}'));
INSERT 0 1
Time: 4.543 ms
insert into test (select 3,gp_extract_feature_histogram('{"one","two","three","four","five","six"}','{"two","four","five","six","one","three","two","one"}'));
INSERT 0 1
Time: 3.701 ms
-- Test the equals operator (should be only 3 rows)
select a,b::float8[] cross_product_equals from (select a,b from test) foo where b = foo.b order by a;
a | cross_product_equals
---+----------------------
1 | {2,1,1,1,1,1}
2 | {0,0,1,0,0,0}
3 | {2,2,1,1,1,1}
(3 rows)
Time: 4.044 ms
drop table if exists test2;
DROP TABLE
Time: 0.217 ms
create table test2 as select * from test DISTRIBUTED BY (a);
SELECT 3
Time: 64.780 ms
-- Test the plus operator (should be 9 rows)
select (t1.b+t2.b)::float8[] cross_product_sum from test t1, test2 t2 order by t1.a;
cross_product_sum
-------------------
{4,3,2,2,2,2}
{4,2,2,2,2,2}
{2,1,2,1,1,1}
{0,0,2,0,0,0}
{2,1,2,1,1,1}
{2,2,2,1,1,1}
{4,4,2,2,2,2}
{4,3,2,2,2,2}
{2,2,2,1,1,1}
(9 rows)
Time: 7.121 ms
-- Test ORDER BY
select (t1.b+t2.b)::float8[] cross_product_sum, l2norm(t1.b+t2.b) l2norm, (t1.b+t2.b) sparse_vector from test t1, test2 t2 order by 3;
cross_product_sum | l2norm | sparse_vector
-------------------+------------------+---------------------
{0,0,2,0,0,0} | 2 | {2,1,3}:{0,2,0}
{2,1,2,1,1,1} | 3.46410161513775 | {1,1,1,3}:{2,1,2,1}
{2,1,2,1,1,1} | 3.46410161513775 | {1,1,1,3}:{2,1,2,1}
{2,2,2,1,1,1} | 3.87298334620742 | {3,3}:{2,1}
{2,2,2,1,1,1} | 3.87298334620742 | {3,3}:{2,1}
{4,2,2,2,2,2} | 6 | {1,5}:{4,2}
{4,3,2,2,2,2} | 6.40312423743285 | {1,1,4}:{4,3,2}
{4,3,2,2,2,2} | 6.40312423743285 | {1,1,4}:{4,3,2}
{4,4,2,2,2,2} | 6.92820323027551 | {2,4}:{4,2}
(9 rows)
Time: 5.359 ms
select (sum(t1.b))::float8[] as features_sum from test t1;
features_sum
---------------
{4,3,3,2,2,2}
(1 row)
Time: 2.844 ms
-- Test the div operator
select (t1.b/(select sum(b) from test))::float8[] as weights from test t1 order by a;
weights
-------------------------------------------------------
{0.5,0.333333333333333,0.333333333333333,0.5,0.5,0.5}
{0,0,0.333333333333333,0,0,0}
{0.5,0.666666666666667,0.333333333333333,0.5,0.5,0.5}
(3 rows)
Time: 4.229 ms
-- Test the * operator
select t1.b %*% (t1.b/(select sum(b) from test)) as raw_score from test t1 order by a;
raw_score
-------------------
3.16666666666667
0.333333333333333
4.16666666666667
(3 rows)
Time: 4.098 ms
-- Test the * and l2norm operators
select (t1.b %*% (t1.b/(select sum(b) from test))) / (l2norm(t1.b) * l2norm((select sum(b) from test))) as norm_score from test t1 order by a;
norm_score
-------------------
0.15563317594128
0.049147318718299
0.177345110574739
(3 rows)
Time: 6.295 ms
-- Test the ^ and l1norm operators
select ('{1,2}:{20.,10.}'::svec)^('{1}:{3.}'::svec);
?column?
-------------------
{1,2}:{8000,1000}
(1 row)
Time: 1.060 ms
select (t1.b %*% (t1.b/(select sum(b) from test))) / (l1norm(t1.b) * l1norm((select sum(b) from test))) as norm_score from test t1 order by a;
norm_score
--------------------
0.0282738095238095
0.0208333333333333
0.0325520833333333
(3 rows)
Time: 6.715 ms
-- Test the multi-concatenation and show sizes compared with a normal array
drop table if exists corpus_proj;
DROP TABLE
Time: 0.258 ms
drop table if exists corpus_proj_array;
DROP TABLE
Time: 0.225 ms
create table corpus_proj as (select 10000 *|| ('{45,2,35,4,15,1}:{0,1,0,1,0,2}'::svec) result ) distributed randomly;
SELECT 1
Time: 91.366 ms
create table corpus_proj_array as (select result::float8[] from corpus_proj) distributed randomly;
SELECT 1
Time: 318.754 ms
-- Calculate on-disk size of sparse vector
select pg_size_pretty(pg_total_relation_size('corpus_proj'));
pg_size_pretty
----------------
192 kB
(1 row)
Time: 5.393 ms
-- Calculate on-disk size of normal array
select pg_size_pretty(pg_total_relation_size('corpus_proj_array'));
pg_size_pretty
----------------
288 kB
(1 row)
Time: 3.488 ms
\timing
Timing is off.
-- Calculate L1 norm from sparse vector
select l1norm(result) from corpus_proj;
l1norm
--------
80000
(1 row)
-- Calculate L1 norm from float8[]
select l1norm(result) from corpus_proj_array;
l1norm
--------
80000
(1 row)
-- Calculate L2 norm from sparse vector
select l2norm(result) from corpus_proj;
l2norm
------------------
316.227766016838
(1 row)
-- Calculate L2 norm from float8[]
select l2norm(result) from corpus_proj_array;
l2norm
------------------
316.227766016838
(1 row)
drop table corpus_proj;
DROP TABLE
drop table corpus_proj_array;
DROP TABLE
drop table test;
DROP TABLE
drop table test2;
DROP TABLE
-- Test operators between svec and float8[]
select ('{1,2,3,4}:{3,4,5,6}'::svec) %*% ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[];
?column?
----------
260
(1 row)
select ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[] %*% ('{1,2,3,4}:{3,4,5,6}'::svec);
?column?
----------
260
(1 row)
select ('{1,2,3,4}:{3,4,5,6}'::svec) / ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[];
?column?
----------
{10}:{1}
(1 row)
select ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[] / ('{1,2,3,4}:{3,4,5,6}'::svec);
?column?
----------
{10}:{1}
(1 row)
select ('{1,2,3,4}:{3,4,5,6}'::svec) * ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[];
?column?
------------------------
{1,2,3,4}:{9,16,25,36}
(1 row)
select ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[] * ('{1,2,3,4}:{3,4,5,6}'::svec);
?column?
------------------------
{1,2,3,4}:{9,16,25,36}
(1 row)
select ('{1,2,3,4}:{3,4,5,6}'::svec) + ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[];
?column?
-----------------------
{1,2,3,4}:{6,8,10,12}
(1 row)
select ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[] + ('{1,2,3,4}:{3,4,5,6}'::svec);
?column?
-----------------------
{1,2,3,4}:{6,8,10,12}
(1 row)
select ('{1,2,3,4}:{3,4,5,6}'::svec) - ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[];
?column?
----------
{10}:{0}
(1 row)
select ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[] - ('{1,2,3,4}:{3,4,5,6}'::svec);
?column?
----------
{10}:{0}
(1 row)
-- Test the pivot operator in the presence of NULL values
drop table if exists pivot_test;
DROP TABLE
create table pivot_test(a float8) distributed randomly;
CREATE TABLE
insert into pivot_test values (0),(1),(NULL),(2),(3);
INSERT 0 5
select array_agg(a) from pivot_test;
array_agg
---------------------
{1,1,2,1}:{1,2,0,3}
(1 row)
select l1norm(array_agg(a)) from pivot_test;
l1norm
--------
6
(1 row)
drop table if exists pivot_test;
DROP TABLE
-- Answer should be 5
select vec_median(array_agg(a)) from (select generate_series(1,9) a) foo;
vec_median
------------
5
(1 row)
-- Answer should be a 10-wide vector
select array_agg(a) from (select trunc(random()*10) a,generate_series(1,100000) order by a) foo;
array_agg
-------------------------------------------------------------------------------
{9946,10172,10063,9850,9874,10139,9964,9923,10131,9938}:{0,1,2,3,4,5,6,7,8,9}
(1 row)
-- Average is 4.50034, median is 5
select vec_median('{9960,9926,10053,9993,10080,10050,9938,9941,10030,10029}:{1,9,8,7,6,5,4,3,2,0}'::svec);
vec_median
------------
5
(1 row)
select vec_median('{9960,9926,10053,9993,10080,10050,9938,9941,10030,10029}:{1,9,8,7,6,5,4,3,2,0}'::svec::float8[]);
vec_median
------------
5
(1 row)