blob: b43b39f968f5fc57fe11ab758a6e5d77de7e3716 [file] [log] [blame]
/* ----------------------------------------------------------------------- *//**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*//* ----------------------------------------------------------------------- */
-----------------------------------------------------------------------
-- vec2cols
-----------------------------------------------------------------------
-- Create testing dataset
DROP TABLE IF EXISTS dt_golf CASCADE;
CREATE TABLE dt_golf (
id integer NOT NULL,
"OUTLOOK" text,
temperature double precision,
humidity double precision,
"Temp_Humidity" double precision[],
clouds_airquality text[],
windy boolean,
class text,
observation_weight double precision
);
INSERT INTO dt_golf VALUES
(1,'sunny', 85, 85, ARRAY[85, 85],ARRAY['none', 'unhealthy'], 'false','Don''t Play', 5.0),
(2, 'sunny', 80, 90, ARRAY[80, 90], ARRAY['none', 'moderate'], 'true', 'Don''t Play', 5.0),
(3, 'overcast', 83, 78, ARRAY[83, 78], ARRAY['low', 'moderate'], 'false', 'Play', 1.5),
(4, 'rain', 70, 96, ARRAY[70, 96], ARRAY['low', 'moderate'], 'false', 'Play', 1.0),
(5, 'rain', 68, 80, ARRAY[68, 80], ARRAY['medium', 'good'], 'false', 'Play', 1.0),
(6, 'rain', 65, 70, ARRAY[65, 70], ARRAY['low', 'unhealthy'], 'true', 'Don''t Play', 1.0),
(7, 'overcast', 64, 65, ARRAY[64, 65], ARRAY['medium', 'moderate'], 'true', 'Play', 1.5),
(8, 'sunny', 72, 95, ARRAY[72, 95], ARRAY['high', 'unhealthy'], 'false', 'Don''t Play', 5.0),
(9, 'sunny', 69, 70, ARRAY[69, 70], ARRAY['high', 'good'], 'false', 'Play', 5.0),
(10, 'rain', 75, 80, ARRAY[75, 80], ARRAY['medium', 'good'], 'false', 'Play', 1.0),
(11, 'sunny', 75, 70, ARRAY[75, 70], ARRAY['none', 'good'], 'true', 'Play', 5.0),
(12, 'overcast', 72, 90, ARRAY[72, 90], ARRAY['medium', 'moderate'], 'true', 'Play', 1.5),
(13, 'overcast', 81, 75, ARRAY[81, 75], ARRAY['medium', 'moderate'], 'false', 'Play', 1.5),
(14, 'rain', 71, 80, ARRAY[71, 80], ARRAY['low', 'unhealthy'], 'true', 'Don''t Play', 1.0);
-- Call the vec2cols function with different parameters on the 'clouds_airquality' column, to split it up
DROP TABLE IF EXISTS out_table;
SELECT vec2cols(
'dt_golf',
'out_table',
'clouds_airquality'
);
SELECT f1 from out_table;
SELECT assert ((SELECT count(*) FROM information_schema.columns WHERE table_name='out_table') = 2, 'Number of split columns does not match');
DROP TABLE IF EXISTS out_table;
SELECT vec2cols(
'dt_golf',
'out_table',
'clouds_airquality',
NULL,
'id'
);
SELECT f1 from out_table;
SELECT assert ((SELECT count(*) FROM information_schema.columns WHERE table_name='out_table') = 3, 'Number of split columns does not match');
SELECT assert ((SELECT clouds_airquality[1] FROM dt_golf WHERE id = 1) = (SELECT f1 FROM out_table WHERE id = 1), 'Split values do not match up');
SELECT assert ((SELECT clouds_airquality[2] FROM dt_golf WHERE id = 1) = (SELECT f2 FROM out_table WHERE id = 1), 'Split values do not match up');
DROP TABLE IF EXISTS out_table;
SELECT vec2cols(
'dt_golf',
'out_table',
'clouds_airquality',
ARRAY['clouds', 'air_quality']
);
SELECT clouds from out_table;
SELECT assert ((SELECT count(*) FROM information_schema.columns WHERE table_name='out_table') = 2, 'Number of split columns does not match');
DROP TABLE IF EXISTS out_table;
SELECT vec2cols(
'dt_golf',
'out_table',
'clouds_airquality',
ARRAY['clouds', 'air_quality'],
'"OUTLOOK", id'
);
SELECT clouds from out_table;
SELECT assert ((SELECT count(*) FROM information_schema.columns WHERE table_name='out_table') = 4, 'Number of split columns does not match');
SELECT assert ((SELECT clouds_airquality[1] FROM dt_golf WHERE id = 1) = (SELECT clouds FROM out_table WHERE id = 1), 'Split values do not match up');
SELECT assert ((SELECT clouds_airquality[2] FROM dt_golf WHERE id = 1) = (SELECT air_quality FROM out_table WHERE id = 1), 'Split values do not match up');
-- Testing splitting arrays of different lengths into features
DROP TABLE IF EXISTS diff_lengths_test;
CREATE TABLE diff_lengths_test(
"id" INTEGER,
"arr" TEXT[]);
INSERT INTO diff_lengths_test VALUES (1, '{a, b}'), (2, '{c, d}'), (3, '{e, f, g, h}'), (4, '{i}'), (5, '{}');
DROP TABLE IF EXISTS out_table;
SELECT vec2cols(
'diff_lengths_test',
'out_table',
'arr'
);
SELECT assert ((SELECT count(*) FROM information_schema.columns WHERE table_name='out_table') = (SELECT max(array_upper(arr, 1)) from diff_lengths_test), 'Number of split columns does not match');
DROP TABLE IF EXISTS out_table;
SELECT vec2cols(
'diff_lengths_test',
'out_table',
'arr',
ARRAY['a']
);
SELECT assert ((SELECT count(*) FROM information_schema.columns WHERE table_name='out_table') = 1, 'Number of split columns does not match');
DROP TABLE IF EXISTS out_table;
SELECT vec2cols(
'diff_lengths_test',
'out_table',
'arr',
ARRAY['a', 'b', 'c']
);
SELECT assert ((SELECT count(*) FROM information_schema.columns WHERE table_name='out_table') = 3, 'Number of split columns does not match');
DROP TABLE IF EXISTS out_table;
SELECT vec2cols(
'diff_lengths_test',
'out_table',
'arr',
ARRAY['a', 'b', 'c', 'd', 'e', 'f', 'g']
);
SELECT assert ((SELECT count(*) FROM information_schema.columns WHERE table_name='out_table') = 7, 'Number of split columns does not match');
-- Special character tests
DROP TABLE IF EXISTS special_char_check;
CREATE TABLE special_char_check(
"id" INTEGER,
"se$$,''x" TEXT,
"len$,,$'%*()gth" TEXT[],
"rin,$$Ж!#'gs" INTEGER,
",hel~!@#$%^&*()_+{}|:""<>?`-=[]\;',./'," TEXT,
"$$hi//\\*$$" INTEGER,
"**" INTEGER,
"'~d())" TEXT);
INSERT INTO special_char_check VALUES
(1,'M''M',ARRAY['sd''f', 'ab,,c'],6,'sd''f',1,2,'ab,,c'),
(2,'''M''M''',ARRAY['sdf$$sdfk(),', '$$sdlhf$$'],6,'sdf$$sdfk(),',1,2,'$$sdlhf$$'),
(3,'M|$$M',ARRAY['%~(())Ж"', 'sdf'],6,'%~(())Ж"',1,2,'sdf'),
(4,'M,M',ARRAY['sdf', 'sdf'],6,'sdf',1,2,'sdf'),
(5,'M@[}(:*;M',ARRAY['sdf', 'sdf'],6,'sdf',1,2,'sdf'),
(6,'M"M',ARRAY['sdf', 'sdf'],6,'sdf',1,2,'sdf'),
(7,'MЖM',ARRAY['sdf', 'sdf'],6,'sdf',1,2,'sdf');
DROP TABLE IF EXISTS out_table;
SELECT vec2cols(
'special_char_check',
'out_table',
'"len$,,$''%*()gth"',
ARRAY['",cl''oЖu,ds,"', '"air_qu""ality"'],
'*'
);
SELECT ",cl'oЖu,ds,", "air_qu""ality", ",hel~!@#$%^&*()_+{}|:""<>?`-=[]\;',./',", "$$hi//\\*$$", "**", "'~d())" from out_table;
SELECT assert ((SELECT count(*) FROM information_schema.columns WHERE table_name='out_table') = 10, 'Number of split columns does not match');
SELECT assert ((SELECT "len$,,$'%*()gth"[1] FROM special_char_check WHERE id = 1) = (SELECT ",cl'oЖu,ds," FROM out_table WHERE id = 1), 'Split values do not match up');
SELECT assert ((SELECT "se$$,''x" FROM special_char_check WHERE id = 6) = 'M"M', 'Incorrect entries in out_table');
DROP TABLE IF EXISTS out_table;
SELECT vec2cols(
'special_char_check',
'out_table',
'"len$,,$''%*()gth"',
ARRAY['",cl''oЖu,ds,"', '"air_qu""ality"'],
$__madlib__$id, "rin,$$Ж!#'gs", ",hel~!@#$%^&*()_+{}|:""<>?`-=[]\;',./',", "$$hi//\\*$$", "**"$__madlib__$
);
SELECT ",cl'oЖu,ds,", "air_qu""ality", id, "rin,$$Ж!#'gs", ",hel~!@#$%^&*()_+{}|:""<>?`-=[]\;',./',", "$$hi//\\*$$", "**" from out_table;
SELECT assert ((SELECT count(*) FROM information_schema.columns WHERE table_name='out_table') = 7, 'Number of split columns does not match');
SELECT assert ((SELECT "len$,,$'%*()gth"[1] FROM special_char_check WHERE id = 1) = (SELECT ",cl'oЖu,ds," FROM out_table WHERE id = 1), 'Split values do not match up');
SELECT assert ((SELECT ",hel~!@#$%^&*()_+{}|:""<>?`-=[]\;',./'," FROM special_char_check WHERE id = 1) = 'sd''f', 'Incorrect entries in out_table');
DROP TABLE IF EXISTS out_table;
SELECT vec2cols(
'special_char_check',
'out_table',
'"len$,,$''%*()gth"',
ARRAY['",cl''oЖu,ds,"', '"air_qu""ality"'],
'"**"'
);
SELECT "**", ",cl'oЖu,ds,", "air_qu""ality" from out_table;
SELECT assert ((SELECT count(*) FROM information_schema.columns WHERE table_name='out_table') = 3, 'Number of split columns does not match');
-----------------------------------------------------------------------
-- cols2vec
-----------------------------------------------------------------------
DROP TABLE IF EXISTS out_table, out_table_summary;
SELECT cols2vec(
'dt_golf',
'out_table',
'temperature, humidity'
);
SELECT feature_vector from out_table;
SELECT assert ((SELECT count(*) FROM information_schema.columns WHERE table_name='out_table') = 1, 'Number of output table columns does not match');
DROP TABLE IF EXISTS out_table, out_table_summary;
SELECT cols2vec(
'dt_golf',
'out_table',
'temperature, humidity',
NULL,
'id'
);
SELECT assert ((SELECT count(*) FROM information_schema.columns WHERE table_name='out_table') = 2, 'Number of output table columns does not match');
SELECT assert ((SELECT feature_vector from out_table where id = 1) = '{85, 85}', 'Output table values do not match up');
-- All numeric types
DROP TABLE IF EXISTS out_table, out_table_summary;
SELECT cols2vec(
'dt_golf',
'out_table',
'id, temperature, humidity, observation_weight',
NULL,
'*'
);
SELECT assert ((SELECT count(*) FROM information_schema.columns WHERE table_name='out_table') = 10, 'Number of output table columns does not match');
SELECT assert ((SELECT feature_vector from out_table where id = 1) = '{1.0, 85.0, 85.0, 5.0}', 'Output table values do not match up');
DROP TABLE IF EXISTS out_table, out_table_summary;
SELECT cols2vec(
'dt_golf',
'out_table',
'windy, temperature, humidity',
'windy',
'*'
);
SELECT assert ((SELECT count(*) FROM information_schema.columns WHERE table_name='out_table') = 10, 'Number of output table columns does not match');
SELECT assert ((SELECT feature_vector from out_table where id = 1) = '{85, 85}', 'Output table values do not match up');
DROP TABLE IF EXISTS out_table, out_table_summary;
SELECT cols2vec(
'dt_golf',
'out_table',
'*',
'"OUTLOOK", "Temp_Humidity", clouds_airquality, windy, class, id',
'*'
);
SELECT assert ((SELECT count(*) FROM information_schema.columns WHERE table_name='out_table') = 10, 'Number of output table columns does not match');
SELECT assert ((SELECT feature_vector from out_table where id = 1) = '{85, 85, 5}', 'Output table values do not match up');
DROP TABLE IF EXISTS out_table, out_table_summary;
SELECT cols2vec(
'dt_golf',
'out_table',
'windy, temperature, humidity',
'windy, class',
'*'
);
SELECT assert ((SELECT count(*) FROM information_schema.columns WHERE table_name='out_table') = 10, 'Number of output table columns does not match');
SELECT assert ((SELECT feature_vector from out_table where id = 1) = '{85, 85}', 'Output table values do not match up');
-- Special character tests
DROP TABLE IF EXISTS cols2vec_spcl;
CREATE TABLE cols2vec_spcl (
"i,Ж!#'d" bigint,
"lab$$''%*Ж!#''()el" int,
"fe''at1" float,
feat2 float,
"fe'%*()at3" float,
other_col float,
"se''x" TEXT
);
INSERT INTO cols2vec_spcl VALUES
(1, 0, 1, 1, 0.5, 0.9,'M''M'),
(2, 1, 0, 1, 0.3, 0.3,'M$M'),
(3, 0, 0, 0, 0.1, 1.1,'M,M'),
(4, 1, 1, 0, 0.9, 0.4,'M@[}(:*;M'),
(5, 1, 0, 1, 0.85, 0.34,'M@[}(:*;M'),
(6, 1, 0, 1, 0.63, 0.12,'M"M'),
(7, 0, 1, 0, 0.7, 1.4,'MЖM');
DROP TABLE IF EXISTS cols2vec_out, cols2vec_out_summary;
SELECT cols2vec( 'cols2vec_spcl', 'cols2vec_out','"fe''''at1",feat2,"fe''%*()at3"',
NULL, '"i,Ж!#''d","lab$$''''%*Ж!#''''()el"' );
SELECT assert(feature_vector = '{1,1,0.5}', 'Incorrect results for cols2vec')
FROM cols2vec_out
WHERE "i,Ж!#'d" = 1;
DROP TABLE IF EXISTS cols2vec_out, cols2vec_out_summary;
SELECT cols2vec('cols2vec_spcl', 'cols2vec_out','*', '"se''''x","i,Ж!#''d","lab$$''''%*Ж!#''''()el"',
'"i,Ж!#''d","lab$$''''%*Ж!#''''()el"');
SELECT assert(feature_vector = '{1,1,0.5,0.9}',
'Incorrect results for cols2vec')
FROM cols2vec_out
WHERE "i,Ж!#'d" = 1;
-- Type casting tests
DROP TABLE IF EXISTS cols2vec_casting_test;
CREATE TABLE cols2vec_casting_test (
a smallint,
b int,
c bigint,
d decimal,
e numeric,
f real,
g float,
h double precision,
i serial,
j bigserial,
k boolean,
l boolean,
m text,
n varchar,
o character varying,
p char,
q character,
r smallint,
s boolean,
t boolean,
u text[],
v text[],
w integer[],
x text
);
INSERT INTO cols2vec_casting_test VALUES
(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 'true', 'true', 'hello', 'hello', 'hello', 'h', 'h', 1, 'false', 'false', ARRAY['1','sdfsd','serw'], ARRAY['2','sdfsd','serw'], ARRAY[1, 2, 3], '1');
DROP TABLE IF EXISTS out_table, out_table_summary, out_table2;
SELECT cols2vec(
'cols2vec_casting_test',
'out_table',
'a, b'
);
SELECT vec2cols(
'out_table',
'out_table2',
'feature_vector'
);
SELECT assert((select data_type from information_schema.columns where table_name = 'out_table2' limit 1) = 'integer', 'Incorrect type casting of features for cols2vec');
DROP TABLE IF EXISTS out_table, out_table_summary, out_table2;
SELECT cols2vec(
'cols2vec_casting_test',
'out_table',
'a, b, c'
);
SELECT vec2cols(
'out_table',
'out_table2',
'feature_vector'
);
SELECT assert((select data_type from information_schema.columns where table_name = 'out_table2' limit 1) = 'bigint', 'Incorrect type casting of features for cols2vec');
DROP TABLE IF EXISTS out_table, out_table_summary, out_table2;
SELECT cols2vec(
'cols2vec_casting_test',
'out_table',
'a, b, c, d, e, f, g, h, i, j'
);
SELECT vec2cols(
'out_table',
'out_table2',
'feature_vector'
);
SELECT assert((select data_type from information_schema.columns where table_name = 'out_table2' limit 1) = 'double precision', 'Incorrect type casting of features for cols2vec');
DROP TABLE IF EXISTS out_table, out_table_summary, out_table2;
SELECT cols2vec(
'cols2vec_casting_test',
'out_table',
'k, l'
);
SELECT vec2cols(
'out_table',
'out_table2',
'feature_vector'
);
SELECT assert((select data_type from information_schema.columns where table_name = 'out_table2' limit 1) = 'boolean', 'Incorrect type casting of features for cols2vec');
DROP TABLE IF EXISTS out_table, out_table_summary, out_table2;
SELECT cols2vec(
'cols2vec_casting_test',
'out_table',
'm, n, o, p, q'
);
SELECT vec2cols(
'out_table',
'out_table2',
'feature_vector'
);
SELECT assert((select data_type from information_schema.columns where table_name = 'out_table2' limit 1) = 'text', 'Incorrect type casting of features for cols2vec');
DROP TABLE IF EXISTS out_table, out_table_summary, out_table2;
SELECT cols2vec(
'cols2vec_casting_test',
'out_table',
'a, r'
);
SELECT vec2cols(
'out_table',
'out_table2',
'feature_vector'
);
SELECT assert((select data_type from information_schema.columns where table_name = 'out_table2' limit 1) = 'smallint', 'Incorrect type casting of features for cols2vec');
DROP TABLE IF EXISTS out_table, out_table_summary, out_table2;
SELECT cols2vec(
'cols2vec_casting_test',
'out_table',
's, t'
);
SELECT vec2cols(
'out_table',
'out_table2',
'feature_vector'
);
SELECT assert((select data_type from information_schema.columns where table_name = 'out_table2' limit 1) = 'boolean', 'Incorrect type casting of features for cols2vec');
DROP TABLE IF EXISTS out_table, out_table_summary, out_table2;
SELECT cols2vec(
'cols2vec_casting_test',
'out_table',
's'
);
SELECT vec2cols(
'out_table',
'out_table2',
'feature_vector'
);
SELECT assert((select data_type from information_schema.columns where table_name = 'out_table2' limit 1) = 'boolean', 'Incorrect type casting of features for cols2vec');
DROP TABLE IF EXISTS out_table, out_table_summary, out_table2;
SELECT cols2vec(
'cols2vec_casting_test',
'out_table',
's::int, a, b'
);
SELECT vec2cols(
'out_table',
'out_table2',
'feature_vector'
);
SELECT assert((select data_type from information_schema.columns where table_name = 'out_table2' limit 1) = 'integer', 'Incorrect type casting of features for cols2vec');
DROP TABLE IF EXISTS out_table, out_table_summary, out_table2;
SELECT cols2vec(
'cols2vec_casting_test',
'out_table',
'm, b::text'
);
SELECT vec2cols(
'out_table',
'out_table2',
'feature_vector'
);
SELECT assert((select data_type from information_schema.columns where table_name = 'out_table2' limit 1) = 'text', 'Incorrect type casting of features for cols2vec');
DROP TABLE IF EXISTS out_table, out_table_summary, out_table2;
SELECT cols2vec(
'cols2vec_casting_test',
'out_table',
'x::integer, b'
);
SELECT vec2cols(
'out_table',
'out_table2',
'feature_vector'
);
SELECT assert((select data_type from information_schema.columns where table_name = 'out_table2' limit 1) = 'integer', 'Incorrect type casting of features for cols2vec');
DROP TABLE IF EXISTS out_table, out_table_summary, out_table2;
SELECT cols2vec(
'cols2vec_casting_test',
'out_table',
'g::smallint, h::smallint'
);
SELECT vec2cols(
'out_table',
'out_table2',
'feature_vector'
);
SELECT assert((select data_type from information_schema.columns where table_name = 'out_table2' limit 1) = 'smallint', 'Incorrect type casting of features for cols2vec');