blob: 54fcddf39239752c45d9e3b5037c68519ae5bc7c [file] [log] [blame]
/* ----------------------------------------------------------------------- *//**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*//* ----------------------------------------------------------------------- */
DROP TABLE IF EXISTS dbscan_train_data;
CREATE TABLE dbscan_train_data (
id_in integer,
data integer[]);
copy dbscan_train_data (id_in, data) FROM stdin delimiter '|';
1|{1,1}
2|{2,2}
3|{3,3}
4|{4,4}
5|{4,8}
6|{17,8}
7|{19,8}
8|{19,9}
9|{19,10}
10|{3,111}
11|{3,112}
12|{3,113}
13|{8,113}
\.
DROP TABLE IF EXISTS out1, out1_summary;
SELECT dbscan('dbscan_train_data','out1','id_in','data',20,4,'squared_dist_norm2','brute');
SELECT assert(count(DISTINCT id_in) = 5, 'Incorrect cluster 0') FROM out1 WHERE cluster_id = 0 and id_in=ANY(ARRAY[1,2,3,4,5]);
SELECT assert(count(DISTINCT id_in) = 4, 'Incorrect cluster 1') FROM out1 WHERE cluster_id = 1 and id_in=ANY(ARRAY[6,7,8,9]);
SELECT assert(dbscan_predict('out1', array[0,0]::double precision[]) = 0, 'Incorrect predict 0');
SELECT assert(dbscan_predict('out1', array[9.1,10.8]::double precision[]) = 1, 'Incorrect predict 1');
SELECT assert(dbscan_predict('out1', array[9,113]::double precision[]) IS NULL, 'Incorrect predict NULL');
DROP TABLE IF EXISTS dbscan_train_data2;
CREATE TABLE dbscan_train_data2 (pid int, points double precision[]);
INSERT INTO dbscan_train_data2 VALUES
(1, '{1, 1}'),
(2, '{2, 1}'),
(3, '{1, 2}'),
(4, '{2, 2}'),
(5, '{3, 5}'),
(6, '{3, 9}'),
(7, '{3, 10}'),
(8, '{4, 10}'),
(9, '{4, 11}'),
(10, '{5, 10}'),
(11, '{7, 10}'),
(12, '{10, 9}'),
(13, '{10, 6}'),
(14, '{9, 5}'),
(15, '{10, 5}'),
(16, '{11, 5}'),
(17, '{9, 4}'),
(18, '{10, 4}'),
(19, '{11, 4}'),
(20, '{10, 3}');
DROP TABLE IF EXISTS dbscan_result, dbscan_result_summary;
SELECT dbscan(
'dbscan_train_data2', -- source table
'dbscan_result', -- output table
'pid', -- point id column
'points', -- data point
1.75, -- epsilon
4, -- min samples
'dist_norm2', -- metric
'brute_force'); -- algorithm
SELECT * FROM dbscan_result ORDER BY pid;
SELECT assert(count(DISTINCT cluster_id) = 3, 'Incorrect cluster count') FROM dbscan_result;