| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| |
| # Implements the outlier detection/prediction algorithm using a DBScan model |
| # |
| # INPUT: |
| # --------------------------------------------- |
| # X The input Matrix to do outlier detection on. |
| # clusterModel Model of clusters to predict outliers against. |
| # eps Maximum distance between two points for one to be considered reachable for the other. |
| # --------------------------------------------- |
| # |
| # OUTPUT: |
| # ---------------------------------------------- |
| # outlierPoints Predicted outliers |
| # ---------------------------------------------- |
| |
| m_dbscanApply = function (Matrix[Double] X, Matrix[Double] clusterModel, Double eps) |
| return (Matrix[Double] cluster, Matrix[Double] outlierPoints) |
| { |
| num_features_Xtest = ncol(X); |
| num_rows_Xtest = nrow(X); |
| num_features_model = ncol(clusterModel); |
| num_rows_model = nrow(clusterModel); |
| |
| if(num_features_Xtest != num_features_model) {stop("DBSCAN Outlier: Stopping due to invalid inputs: features need to match");} |
| if(eps < 0) { stop("DBSCAN Outlier: Stopping due to invalid inputs: Epsilon (eps) should be greater than 0"); } |
| if(num_rows_model <= 0) { stop("DBSCAN Outlier: Stopping due to invalid inputs: Model is empty"); } |
| |
| Xall = rbind(clusterModel, X); |
| neighbors = dist(Xall); |
| neighbors = replace(target = neighbors, pattern = 0, replacement = 2.225e-307); |
| neighbors = neighbors - diag(diag(neighbors)); |
| Xtest_dists = neighbors[(num_rows_model+1):nrow(Xall), 1:num_rows_model]; |
| withinEps = ((Xtest_dists <= eps) * (0 < Xtest_dists)); |
| outlierPoints = rowSums(withinEps) >= 1; |
| cluster = removeEmpty(target=outlierPoints, margin="rows", select=outlierPoints) |
| } |