blob: e4cd578c1f8118caa7ef76bfcf02b4038d0331ff [file] [log] [blame]
/* ----------------------------------------------------------------------- *//**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*//* ----------------------------------------------------------------------- */
m4_include(`SQLCommon.m4')
/* -----------------------------------------------------------------------------
* Test knn.
*
* -------------------------------------------------------------------------- */
DROP TABLE if exists knn_train_data;
create TABLE knn_train_data (
id integer,
data integer[],
label integer);
copy knn_train_data (id, data, label) FROM stdin delimiter '|';
1|{1,1}|1
2|{2,2}|1
3|{3,3}|1
4|{4,4}|1
5|{4,5}|1
6|{20,50}|0
7|{10,31}|0
8|{81,13}|0
9|{1,111}|0
\.
DROP TABLE IF EXISTS knn_train_data_reg;
CREATE TABLE knn_train_data_reg (
id integer,
data integer[],
label float
);
COPY knn_train_data_reg (id, data, label) FROM stdin delimiter '|';
1|{1,1}|1.0
2|{2,2}|1.0
3|{3,3}|1.0
4|{4,4}|1.0
5|{4,5}|1.0
6|{20,50}|0.0
7|{10,31}|0.0
8|{81,13}|0.0
9|{1,111}|0.0
\.
DROP TABLE IF EXISTS knn_test_data;
create TABLE knn_test_data (
id integer,
data integer[]);
copy knn_test_data (id, data) FROM stdin delimiter '|';
1|{2,1}
2|{2,6}
3|{15,40}
4|{12,1}
5|{2,90}
6|{50,45}
\.
DROP TABLE if exists knn_train_data_expr;
create TABLE knn_train_data_expr (
id integer,
data1 integer,
data2 integer,
label integer);
copy knn_train_data_expr (id, data1 , data2, label) FROM stdin delimiter '|';
1| 1 | 1 |1
2| 2 | 2 |1
3| 3 | 3 |1
4| 4 | 4 |1
5| 4 | 5 |1
6| 20 | 50 |0
7| 10 | 31 |0
8| 81 | 13 |0
9| 1 | 111 |0
\.
DROP TABLE if exists madlib_knn_result_classification;
SELECT knn('knn_train_data','data','id','label','knn_test_data','data','id','madlib_knn_result_classification',3,False,'MADLIB_SCHEMA.squared_dist_norm2',False);
SELECT assert(array_agg(prediction ORDER BY id)='{1,1,0,1,0,0}', 'Wrong output in classification with k=3') FROM madlib_knn_result_classification;
DROP TABLE if exists madlib_knn_result_classification;
SELECT knn('knn_train_data','data','id','label','knn_test_data','data','id','madlib_knn_result_classification',3);
SELECT assert(array_agg(x ORDER BY id)= '{1,2,3}','Wrong output in classification with k=3') FROM (SELECT unnest(k_nearest_neighbours) AS x, id FROM madlib_knn_result_classification WHERE id = 1 ORDER BY x ASC) y;
SELECT assert(array_agg(x ORDER BY id)= '{1,1,5}','Wrong distances in classification with k=3') FROM (SELECT unnest(distance) AS x, id FROM madlib_knn_result_classification WHERE id = 1 ORDER BY x ASC) y;
DROP TABLE if exists madlib_knn_result_regression;
SELECT knn('knn_train_data_reg','data','id','label','knn_test_data','data','id','madlib_knn_result_regression',4,False,'MADLIB_SCHEMA.squared_dist_norm2',False);
SELECT assert(array_agg(prediction ORDER BY id)='{1,1,0.5,1,0.25,0.25}', 'Wrong output in regression') FROM madlib_knn_result_regression;
DROP TABLE if exists madlib_knn_result_regression;
SELECT knn('knn_train_data_reg','data','id','label','knn_test_data','data','id','madlib_knn_result_regression',3,True);
SELECT assert(array_agg(x ORDER BY id)= '{1,2,3}' , 'Wrong output in regression with k=3') FROM (SELECT unnest(k_nearest_neighbours) AS x, id FROM madlib_knn_result_regression WHERE id = 1 ORDER BY x ASC) y;
DROP TABLE if exists madlib_knn_result_classification;
SELECT knn('knn_train_data','data','id','label','knn_test_data','data','id','madlib_knn_result_classification',3,False,NULL,False);
SELECT assert(array_agg(prediction ORDER BY id)='{1,1,0,1,0,0}', 'Wrong output in classification with k=3') FROM madlib_knn_result_classification;
DROP TABLE if exists madlib_knn_result_classification;
SELECT knn('knn_train_data','data','id','label','knn_test_data','data','id','madlib_knn_result_classification',3,False,'MADLIB_SCHEMA.dist_norm1');
SELECT assert(array_agg(prediction ORDER BY id)='{1,1,0,1,0,0}', 'Wrong output in classification with k=3') FROM madlib_knn_result_classification;
DROP TABLE if exists madlib_knn_result_classification;
SELECT knn('knn_train_data','data','id','label','knn_test_data','data','id','madlib_knn_result_classification',3,False,'MADLIB_SCHEMA.dist_angle');
SELECT assert(array_agg(prediction ORDER BY id)='{1,0,0,1,0,1}', 'Wrong output in classification with k=3') FROM madlib_knn_result_classification;
DROP TABLE if exists madlib_knn_result_classification;
SELECT knn('knn_train_data','data','id','label','knn_test_data','data','id','madlib_knn_result_classification',3,False,'MADLIB_SCHEMA.dist_tanimoto');
SELECT assert(array_agg(prediction ORDER BY id)='{1,1,0,1,0,0}', 'Wrong output in classification with k=3') FROM madlib_knn_result_classification;
DROP TABLE if exists madlib_knn_result_regression;
SELECT knn('knn_train_data_reg','data','id','label','knn_test_data','data','id','madlib_knn_result_regression',4,False,'MADLIB_SCHEMA.dist_norm1');
SELECT assert(array_agg(prediction ORDER BY id)='{1,1,0.5,1,0.25,0.25}', 'Wrong output in regression') FROM madlib_knn_result_regression;
DROP TABLE if exists madlib_knn_result_regression;
SELECT knn('knn_train_data_reg','data','id','label','knn_test_data','data','id','madlib_knn_result_regression',4,False,'MADLIB_SCHEMA.dist_angle');
SELECT assert(array_agg(prediction ORDER BY id)='{0.75,0.25,0.25,0.75,0.25,1}', 'Wrong output in regression') FROM madlib_knn_result_regression;
DROP TABLE if exists madlib_knn_result_classification;
SELECT knn('knn_train_data','data','id','label','knn_test_data','data','id','madlib_knn_result_classification',3,False,'MADLIB_SCHEMA.squared_dist_norm2', True);
SELECT assert(array_agg(prediction::numeric ORDER BY id)='{1,1,0,1,0,0}', 'Wrong output in classification with k=3') FROM madlib_knn_result_classification;
DROP TABLE if exists madlib_knn_result_regression;
SELECT knn('knn_train_data_reg','data','id','label','knn_test_data','data','id','madlib_knn_result_regression',3,False,'MADLIB_SCHEMA.squared_dist_norm2', True);
SELECT assert(array_agg(prediction::numeric ORDER BY id)='{1,1,0.0408728591876018,1,0,0}', 'Wrong output in regression') FROM madlib_knn_result_regression;
DROP TABLE if exists madlib_knn_result_classification;
SELECT knn('knn_train_data','data[1:1]||data[2:2]','id','label','knn_test_data','data[1:1]||data[2:2]','id','madlib_knn_result_classification',3,False,'MADLIB_SCHEMA.squared_dist_norm2', True);
SELECT assert(array_agg(prediction::numeric ORDER BY id)='{1,1,0,1,0,0}', 'Wrong output in classification') FROM madlib_knn_result_classification;
DROP TABLE if exists madlib_knn_result_regression;
SELECT knn('knn_train_data_reg','data[1:1]||data[2:2]','id','label','knn_test_data','data[1:1]||data[2:2]','id','madlib_knn_result_regression',3,False,'MADLIB_SCHEMA.squared_dist_norm2', True);
SELECT assert(array_agg(prediction::numeric ORDER BY id)='{1,1,0.0408728591876018,1,0,0}', 'Wrong output in regression') FROM madlib_knn_result_regression;
DROP TABLE if exists madlib_knn_result_classification;
SELECT knn('knn_train_data_expr','ARRAY[data1,data2]','id','label','knn_test_data','data[1:1]||data[2:2]','id','madlib_knn_result_classification',3,False,'MADLIB_SCHEMA.squared_dist_norm2', True);
SELECT assert(array_agg(prediction::numeric ORDER BY id)='{1,1,0,1,0,0}', 'Wrong output in classification') FROM madlib_knn_result_classification;
DROP TABLE if exists madlib_knn_result_classification;
SELECT knn('knn_train_data','data','id',NULL,'knn_test_data','data','id','madlib_knn_result_classification',3);
SELECT assert(array_agg(x ORDER BY id)= '{1,2,3}','Wrong output in classification with k=3') FROM (SELECT unnest(k_nearest_neighbours) AS x, id FROM madlib_knn_result_classification WHERE id = 1 ORDER BY x ASC) y;
SELECT knn();
SELECT knn('help');
DROP TABLE if exists knn_train_data2;
CREATE TABLE knn_train_data2 (
id integer,
data double precision[],
label integer
);
COPY knn_train_data2 (id, data, label) FROM stdin delimiter '|';
1|{43983,164834}|0
2|{491231,38953}|0
3|{587484,467668}|0
4|{882448,507209}|0
5|{17326,595844}|0
6|{236408,453230}|0
7|{283929,237605}|0
8|{392623,153808}|0
9|{267864,179054}|0
10|{428486,618138}|0
11|{963752,141363}|0
12|{980623,652584}|0
13|{398411,894748}|0
14|{559681,670919}|0
15|{297984,171933}|0
16|{254190,341966}|0
17|{336766,745420}|0
18|{380918,924250}|0
19|{213087,263365}|0
20|{431458,230413}|0
21|{859208,667865}|0
22|{683642,143136}|0
23|{905470,76265}|0
24|{296944,173333}|0
25|{255319,725429}|0
26|{791471,219070}|0
27|{866791,772094}|0
28|{871653,265202}|0
29|{666841,431334}|0
30|{936120,964824}|0
31|{603267,190309}|0
32|{306790,940033}|1
33|{935729,687708}|1
34|{864282,148815}|1
35|{951072,295739}|1
36|{379228,810280}|1
37|{963604,62869}|1
38|{953416,869073}|1
39|{139133,250360}|1
40|{42406,394452}|1
41|{975789,833877}|1
42|{613521,842579}|1
43|{605970,485173}|1
44|{107780,272810}|1
45|{916507,43900}|1
46|{237634,519773}|1
47|{234208,544424}|1
48|{459805,169937}|1
49|{232131,324086}|1
50|{318751,183202}|1
51|{619825,697978}|1
52|{993482,583428}|1
53|{760847,946898}|1
54|{452501,899980}|1
55|{197257,494907}|1
56|{294431,173045}|1
57|{328783,907951}|1
58|{15624,934752}|1
59|{393124,123404}|1
60|{207562,309630}|1
61|{167303,445196}|1
62|{829402,401511}|1
63|{989619,289207}|1
64|{571447,221749}|1
65|{613292,890198}|1
66|{404951,233116}|1
67|{588176,398433}|1
68|{816544,349023}|1
69|{345330,269045}|1
70|{249002,542587}|1
71|{763951,543433}|1
72|{715632,92734}|1
73|{451384,731255}|1
74|{27485,844507}|1
75|{854659,235047}|1
76|{154137,21962}|1
77|{680243,983539}|1
78|{423473,669861}|1
79|{272745,994920}|1
80|{891610,886037}|1
81|{885117,296561}|1
82|{119153,473293}|2
83|{694994,935696}|2
84|{822315,40323}|2
85|{204741,71317}|2
86|{582910,968691}|2
87|{614749,298541}|2
88|{61424,66132}|2
89|{29796,88909}|2
90|{910639,884455}|2
91|{323956,64775}|2
92|{906416,4198}|2
93|{48314,329888}|2
94|{674059,321058}|2
95|{324807,565669}|2
96|{207094,209924}|2
97|{862229,326247}|2
98|{683217,557222}|2
99|{261943,505531}|2
100|{597545,466683}|2
\.
CREATE TABLE knn_test_data2 (
id integer NOT NULL,
data integer[]
);
COPY knn_test_data2 (id, data) FROM stdin delimiter '|';
1|{576848,180455}
2|{435374,191597}
3|{478996,496797}
4|{257729,508791}
5|{585706,168367}
\.
DROP TABLE if exists madlib_knn_result_classification_kd;
SELECT knn('knn_train_data2','data','id',NULL,'knn_test_data2','data','id',
'madlib_knn_result_classification_kd',1,True,
'MADLIB_SCHEMA.squared_dist_norm2',False,
'kd_tree', 'depth=2, leaf_nodes=2');
SELECT assert(count(*) > 0, 'Wrong output with kd_tree')
FROM madlib_knn_result_classification_kd;
DROP TABLE if exists madlib_knn_result_classification_kd;
SELECT knn('knn_train_data2','data','id','label','knn_test_data2','data','id',
'madlib_knn_result_classification_kd',2,True,
'MADLIB_SCHEMA.squared_dist_norm2',True,
'kd_tree', 'depth=2, leaf_nodes=2');
SELECT assert(count(*) > 0, 'Wrong output with kd_tree')
FROM madlib_knn_result_classification_kd;
DROP TABLE if exists madlib_knn_result_classification_kd;
SELECT knn('knn_train_data', 'data', 'id', NULL, 'knn_test_data', 'data', 'id',
'madlib_knn_result_classification_kd', 2, True,
'MADLIB_SCHEMA.squared_dist_norm2', False, 'kd_tree',
'depth=2, leaf_nodes=1');
SELECT assert(count(*) > 0, 'Wrong output with kd_tree')
FROM madlib_knn_result_classification_kd;
DROP TABLE if exists madlib_knn_result_classification_kd;
SELECT knn('knn_train_data', 'data', 'id', NULL, 'knn_test_data', 'data', 'id',
'madlib_knn_result_classification_kd', 2, True,
'MADLIB_SCHEMA.squared_dist_norm2', False, 'kd_tree',
'depth=3, leaf_nodes=2');
SELECT assert(count(*) > 0, 'Wrong output with kd_tree')
FROM madlib_knn_result_classification_kd;