| /* ----------------------------------------------------------------------- *//** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| *//* ----------------------------------------------------------------------- */ |
| |
| m4_include(`SQLCommon.m4') |
| /* ----------------------------------------------------------------------------- |
| * Test knn. |
| * |
| * -------------------------------------------------------------------------- */ |
| |
| DROP TABLE if exists knn_train_data; |
| create TABLE knn_train_data ( |
| id integer, |
| data integer[], |
| label integer); |
| copy knn_train_data (id, data, label) FROM stdin delimiter '|'; |
| 1|{1,1}|1 |
| 2|{2,2}|1 |
| 3|{3,3}|1 |
| 4|{4,4}|1 |
| 5|{4,5}|1 |
| 6|{20,50}|0 |
| 7|{10,31}|0 |
| 8|{81,13}|0 |
| 9|{1,111}|0 |
| \. |
| DROP TABLE IF EXISTS knn_train_data_reg; |
| CREATE TABLE knn_train_data_reg ( |
| id integer, |
| data integer[], |
| label float |
| ); |
| COPY knn_train_data_reg (id, data, label) FROM stdin delimiter '|'; |
| 1|{1,1}|1.0 |
| 2|{2,2}|1.0 |
| 3|{3,3}|1.0 |
| 4|{4,4}|1.0 |
| 5|{4,5}|1.0 |
| 6|{20,50}|0.0 |
| 7|{10,31}|0.0 |
| 8|{81,13}|0.0 |
| 9|{1,111}|0.0 |
| \. |
| DROP TABLE IF EXISTS knn_test_data; |
| create TABLE knn_test_data ( |
| id integer, |
| data integer[]); |
| copy knn_test_data (id, data) FROM stdin delimiter '|'; |
| 1|{2,1} |
| 2|{2,6} |
| 3|{15,40} |
| 4|{12,1} |
| 5|{2,90} |
| 6|{50,45} |
| \. |
| DROP TABLE if exists knn_train_data_expr; |
| create TABLE knn_train_data_expr ( |
| id integer, |
| data1 integer, |
| data2 integer, |
| label integer); |
| copy knn_train_data_expr (id, data1 , data2, label) FROM stdin delimiter '|'; |
| 1| 1 | 1 |1 |
| 2| 2 | 2 |1 |
| 3| 3 | 3 |1 |
| 4| 4 | 4 |1 |
| 5| 4 | 5 |1 |
| 6| 20 | 50 |0 |
| 7| 10 | 31 |0 |
| 8| 81 | 13 |0 |
| 9| 1 | 111 |0 |
| \. |
| |
| |
| |
| DROP TABLE if exists madlib_knn_result_classification; |
| SELECT knn('knn_train_data','data','id','label','knn_test_data','data','id','madlib_knn_result_classification',3,False,'MADLIB_SCHEMA.squared_dist_norm2',False); |
| SELECT assert(array_agg(prediction ORDER BY id)='{1,1,0,1,0,0}', 'Wrong output in classification with k=3') FROM madlib_knn_result_classification; |
| |
| DROP TABLE if exists madlib_knn_result_classification; |
| SELECT knn('knn_train_data','data','id','label','knn_test_data','data','id','madlib_knn_result_classification',3); |
| SELECT assert(array_agg(x ORDER BY id)= '{1,2,3}','Wrong output in classification with k=3') FROM (SELECT unnest(k_nearest_neighbours) AS x, id FROM madlib_knn_result_classification WHERE id = 1 ORDER BY x ASC) y; |
| SELECT assert(array_agg(x ORDER BY id)= '{1,1,5}','Wrong distances in classification with k=3') FROM (SELECT unnest(distance) AS x, id FROM madlib_knn_result_classification WHERE id = 1 ORDER BY x ASC) y; |
| |
| DROP TABLE if exists madlib_knn_result_regression; |
| SELECT knn('knn_train_data_reg','data','id','label','knn_test_data','data','id','madlib_knn_result_regression',4,False,'MADLIB_SCHEMA.squared_dist_norm2',False); |
| SELECT assert(array_agg(prediction ORDER BY id)='{1,1,0.5,1,0.25,0.25}', 'Wrong output in regression') FROM madlib_knn_result_regression; |
| |
| DROP TABLE if exists madlib_knn_result_regression; |
| SELECT knn('knn_train_data_reg','data','id','label','knn_test_data','data','id','madlib_knn_result_regression',3,True); |
| SELECT assert(array_agg(x ORDER BY id)= '{1,2,3}' , 'Wrong output in regression with k=3') FROM (SELECT unnest(k_nearest_neighbours) AS x, id FROM madlib_knn_result_regression WHERE id = 1 ORDER BY x ASC) y; |
| |
| DROP TABLE if exists madlib_knn_result_classification; |
| SELECT knn('knn_train_data','data','id','label','knn_test_data','data','id','madlib_knn_result_classification',3,False,NULL,False); |
| SELECT assert(array_agg(prediction ORDER BY id)='{1,1,0,1,0,0}', 'Wrong output in classification with k=3') FROM madlib_knn_result_classification; |
| |
| DROP TABLE if exists madlib_knn_result_classification; |
| SELECT knn('knn_train_data','data','id','label','knn_test_data','data','id','madlib_knn_result_classification',3,False,'MADLIB_SCHEMA.dist_norm1'); |
| SELECT assert(array_agg(prediction ORDER BY id)='{1,1,0,1,0,0}', 'Wrong output in classification with k=3') FROM madlib_knn_result_classification; |
| |
| DROP TABLE if exists madlib_knn_result_classification; |
| SELECT knn('knn_train_data','data','id','label','knn_test_data','data','id','madlib_knn_result_classification',3,False,'MADLIB_SCHEMA.dist_angle'); |
| SELECT assert(array_agg(prediction ORDER BY id)='{1,0,0,1,0,1}', 'Wrong output in classification with k=3') FROM madlib_knn_result_classification; |
| |
| DROP TABLE if exists madlib_knn_result_classification; |
| SELECT knn('knn_train_data','data','id','label','knn_test_data','data','id','madlib_knn_result_classification',3,False,'MADLIB_SCHEMA.dist_tanimoto'); |
| SELECT assert(array_agg(prediction ORDER BY id)='{1,1,0,1,0,0}', 'Wrong output in classification with k=3') FROM madlib_knn_result_classification; |
| |
| DROP TABLE if exists madlib_knn_result_regression; |
| SELECT knn('knn_train_data_reg','data','id','label','knn_test_data','data','id','madlib_knn_result_regression',4,False,'MADLIB_SCHEMA.dist_norm1'); |
| SELECT assert(array_agg(prediction ORDER BY id)='{1,1,0.5,1,0.25,0.25}', 'Wrong output in regression') FROM madlib_knn_result_regression; |
| |
| DROP TABLE if exists madlib_knn_result_regression; |
| SELECT knn('knn_train_data_reg','data','id','label','knn_test_data','data','id','madlib_knn_result_regression',4,False,'MADLIB_SCHEMA.dist_angle'); |
| SELECT assert(array_agg(prediction ORDER BY id)='{0.75,0.25,0.25,0.75,0.25,1}', 'Wrong output in regression') FROM madlib_knn_result_regression; |
| |
| |
| DROP TABLE if exists madlib_knn_result_classification; |
| SELECT knn('knn_train_data','data','id','label','knn_test_data','data','id','madlib_knn_result_classification',3,False,'MADLIB_SCHEMA.squared_dist_norm2', True); |
| SELECT assert(array_agg(prediction::numeric ORDER BY id)='{1,1,0,1,0,0}', 'Wrong output in classification with k=3') FROM madlib_knn_result_classification; |
| |
| |
| DROP TABLE if exists madlib_knn_result_regression; |
| SELECT knn('knn_train_data_reg','data','id','label','knn_test_data','data','id','madlib_knn_result_regression',3,False,'MADLIB_SCHEMA.squared_dist_norm2', True); |
| SELECT assert(array_agg(prediction::numeric ORDER BY id)='{1,1,0.0408728591876018,1,0,0}', 'Wrong output in regression') FROM madlib_knn_result_regression; |
| |
| |
| DROP TABLE if exists madlib_knn_result_classification; |
| SELECT knn('knn_train_data','data[1:1]||data[2:2]','id','label','knn_test_data','data[1:1]||data[2:2]','id','madlib_knn_result_classification',3,False,'MADLIB_SCHEMA.squared_dist_norm2', True); |
| SELECT assert(array_agg(prediction::numeric ORDER BY id)='{1,1,0,1,0,0}', 'Wrong output in classification') FROM madlib_knn_result_classification; |
| |
| DROP TABLE if exists madlib_knn_result_regression; |
| SELECT knn('knn_train_data_reg','data[1:1]||data[2:2]','id','label','knn_test_data','data[1:1]||data[2:2]','id','madlib_knn_result_regression',3,False,'MADLIB_SCHEMA.squared_dist_norm2', True); |
| SELECT assert(array_agg(prediction::numeric ORDER BY id)='{1,1,0.0408728591876018,1,0,0}', 'Wrong output in regression') FROM madlib_knn_result_regression; |
| |
| |
| |
| |
| DROP TABLE if exists madlib_knn_result_classification; |
| SELECT knn('knn_train_data_expr','ARRAY[data1,data2]','id','label','knn_test_data','data[1:1]||data[2:2]','id','madlib_knn_result_classification',3,False,'MADLIB_SCHEMA.squared_dist_norm2', True); |
| SELECT assert(array_agg(prediction::numeric ORDER BY id)='{1,1,0,1,0,0}', 'Wrong output in classification') FROM madlib_knn_result_classification; |
| |
| |
| |
| DROP TABLE if exists madlib_knn_result_classification; |
| SELECT knn('knn_train_data','data','id',NULL,'knn_test_data','data','id','madlib_knn_result_classification',3); |
| SELECT assert(array_agg(x ORDER BY id)= '{1,2,3}','Wrong output in classification with k=3') FROM (SELECT unnest(k_nearest_neighbours) AS x, id FROM madlib_knn_result_classification WHERE id = 1 ORDER BY x ASC) y; |
| |
| SELECT knn(); |
| SELECT knn('help'); |
| |
| |
| |
| DROP TABLE if exists knn_train_data2; |
| CREATE TABLE knn_train_data2 ( |
| id integer, |
| data double precision[], |
| label integer |
| ); |
| COPY knn_train_data2 (id, data, label) FROM stdin delimiter '|'; |
| 1|{43983,164834}|0 |
| 2|{491231,38953}|0 |
| 3|{587484,467668}|0 |
| 4|{882448,507209}|0 |
| 5|{17326,595844}|0 |
| 6|{236408,453230}|0 |
| 7|{283929,237605}|0 |
| 8|{392623,153808}|0 |
| 9|{267864,179054}|0 |
| 10|{428486,618138}|0 |
| 11|{963752,141363}|0 |
| 12|{980623,652584}|0 |
| 13|{398411,894748}|0 |
| 14|{559681,670919}|0 |
| 15|{297984,171933}|0 |
| 16|{254190,341966}|0 |
| 17|{336766,745420}|0 |
| 18|{380918,924250}|0 |
| 19|{213087,263365}|0 |
| 20|{431458,230413}|0 |
| 21|{859208,667865}|0 |
| 22|{683642,143136}|0 |
| 23|{905470,76265}|0 |
| 24|{296944,173333}|0 |
| 25|{255319,725429}|0 |
| 26|{791471,219070}|0 |
| 27|{866791,772094}|0 |
| 28|{871653,265202}|0 |
| 29|{666841,431334}|0 |
| 30|{936120,964824}|0 |
| 31|{603267,190309}|0 |
| 32|{306790,940033}|1 |
| 33|{935729,687708}|1 |
| 34|{864282,148815}|1 |
| 35|{951072,295739}|1 |
| 36|{379228,810280}|1 |
| 37|{963604,62869}|1 |
| 38|{953416,869073}|1 |
| 39|{139133,250360}|1 |
| 40|{42406,394452}|1 |
| 41|{975789,833877}|1 |
| 42|{613521,842579}|1 |
| 43|{605970,485173}|1 |
| 44|{107780,272810}|1 |
| 45|{916507,43900}|1 |
| 46|{237634,519773}|1 |
| 47|{234208,544424}|1 |
| 48|{459805,169937}|1 |
| 49|{232131,324086}|1 |
| 50|{318751,183202}|1 |
| 51|{619825,697978}|1 |
| 52|{993482,583428}|1 |
| 53|{760847,946898}|1 |
| 54|{452501,899980}|1 |
| 55|{197257,494907}|1 |
| 56|{294431,173045}|1 |
| 57|{328783,907951}|1 |
| 58|{15624,934752}|1 |
| 59|{393124,123404}|1 |
| 60|{207562,309630}|1 |
| 61|{167303,445196}|1 |
| 62|{829402,401511}|1 |
| 63|{989619,289207}|1 |
| 64|{571447,221749}|1 |
| 65|{613292,890198}|1 |
| 66|{404951,233116}|1 |
| 67|{588176,398433}|1 |
| 68|{816544,349023}|1 |
| 69|{345330,269045}|1 |
| 70|{249002,542587}|1 |
| 71|{763951,543433}|1 |
| 72|{715632,92734}|1 |
| 73|{451384,731255}|1 |
| 74|{27485,844507}|1 |
| 75|{854659,235047}|1 |
| 76|{154137,21962}|1 |
| 77|{680243,983539}|1 |
| 78|{423473,669861}|1 |
| 79|{272745,994920}|1 |
| 80|{891610,886037}|1 |
| 81|{885117,296561}|1 |
| 82|{119153,473293}|2 |
| 83|{694994,935696}|2 |
| 84|{822315,40323}|2 |
| 85|{204741,71317}|2 |
| 86|{582910,968691}|2 |
| 87|{614749,298541}|2 |
| 88|{61424,66132}|2 |
| 89|{29796,88909}|2 |
| 90|{910639,884455}|2 |
| 91|{323956,64775}|2 |
| 92|{906416,4198}|2 |
| 93|{48314,329888}|2 |
| 94|{674059,321058}|2 |
| 95|{324807,565669}|2 |
| 96|{207094,209924}|2 |
| 97|{862229,326247}|2 |
| 98|{683217,557222}|2 |
| 99|{261943,505531}|2 |
| 100|{597545,466683}|2 |
| \. |
| |
| |
| CREATE TABLE knn_test_data2 ( |
| id integer NOT NULL, |
| data integer[] |
| ); |
| |
| COPY knn_test_data2 (id, data) FROM stdin delimiter '|'; |
| 1|{576848,180455} |
| 2|{435374,191597} |
| 3|{478996,496797} |
| 4|{257729,508791} |
| 5|{585706,168367} |
| \. |
| |
| DROP TABLE if exists madlib_knn_result_classification_kd; |
| |
| SELECT knn('knn_train_data2','data','id',NULL,'knn_test_data2','data','id', |
| 'madlib_knn_result_classification_kd',1,True, |
| 'MADLIB_SCHEMA.squared_dist_norm2',False, |
| 'kd_tree', 'depth=2, leaf_nodes=2'); |
| |
| SELECT assert(count(*) > 0, 'Wrong output with kd_tree') |
| FROM madlib_knn_result_classification_kd; |
| |
| DROP TABLE if exists madlib_knn_result_classification_kd; |
| |
| SELECT knn('knn_train_data2','data','id','label','knn_test_data2','data','id', |
| 'madlib_knn_result_classification_kd',2,True, |
| 'MADLIB_SCHEMA.squared_dist_norm2',True, |
| 'kd_tree', 'depth=2, leaf_nodes=2'); |
| |
| SELECT assert(count(*) > 0, 'Wrong output with kd_tree') |
| FROM madlib_knn_result_classification_kd; |
| |
| DROP TABLE if exists madlib_knn_result_classification_kd; |
| |
| SELECT knn('knn_train_data', 'data', 'id', NULL, 'knn_test_data', 'data', 'id', |
| 'madlib_knn_result_classification_kd', 2, True, |
| 'MADLIB_SCHEMA.squared_dist_norm2', False, 'kd_tree', |
| 'depth=2, leaf_nodes=1'); |
| |
| SELECT assert(count(*) > 0, 'Wrong output with kd_tree') |
| FROM madlib_knn_result_classification_kd; |
| |
| DROP TABLE if exists madlib_knn_result_classification_kd; |
| |
| SELECT knn('knn_train_data', 'data', 'id', NULL, 'knn_test_data', 'data', 'id', |
| 'madlib_knn_result_classification_kd', 2, True, |
| 'MADLIB_SCHEMA.squared_dist_norm2', False, 'kd_tree', |
| 'depth=3, leaf_nodes=2'); |
| |
| SELECT assert(count(*) > 0, 'Wrong output with kd_tree') |
| FROM madlib_knn_result_classification_kd; |