blob: e3ab9723cb49cfda7119d55618d90a722e9874e1 [file] [log] [blame]
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
# Implements the outlier detection/prediction algorithm using a DBScan model
#
# INPUT:
# ---------------------------------------------
# X The input Matrix to do outlier detection on.
# clusterModel Model of clusters to predict outliers against.
# eps Maximum distance between two points for one to be considered reachable for the other.
# ---------------------------------------------
#
# OUTPUT:
# ----------------------------------------------
# outlierPoints Predicted outliers
# ----------------------------------------------
m_dbscanApply = function (Matrix[Double] X, Matrix[Double] clusterModel, Double eps)
return (Matrix[Double] cluster, Matrix[Double] outlierPoints)
{
num_features_Xtest = ncol(X);
num_rows_Xtest = nrow(X);
num_features_model = ncol(clusterModel);
num_rows_model = nrow(clusterModel);
if(num_features_Xtest != num_features_model) {stop("DBSCAN Outlier: Stopping due to invalid inputs: features need to match");}
if(eps < 0) { stop("DBSCAN Outlier: Stopping due to invalid inputs: Epsilon (eps) should be greater than 0"); }
if(num_rows_model <= 0) { stop("DBSCAN Outlier: Stopping due to invalid inputs: Model is empty"); }
Xall = rbind(clusterModel, X);
neighbors = dist(Xall);
neighbors = replace(target = neighbors, pattern = 0, replacement = 2.225e-307);
neighbors = neighbors - diag(diag(neighbors));
Xtest_dists = neighbors[(num_rows_model+1):nrow(Xall), 1:num_rows_model];
withinEps = ((Xtest_dists <= eps) * (0 < Xtest_dists));
outlierPoints = rowSums(withinEps) >= 1;
cluster = removeEmpty(target=outlierPoints, margin="rows", select=outlierPoints)
}