blob: 6c364769b536f25e7447b7c6433924128601330f [file] [log] [blame]
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
# Recursive depth-first search which in some cases can run into stack overflow errors in the
# SystemDS runtime. Not recommended, use dfs_iter instead!
#
# INPUT PARAMETERS:
# --------------------------------------------------------------------------------------------
# NAME TYPE DEFAULT MEANING
# --------------------------------------------------------------------------------------------
# GRAPH matrix --- An adjacency matrix.
# start_vertex Integer --- The vertex to start the search from.
# marked matrix --- Column-vector which is used to mark already visited
# vertices. A value of 1 means already visited, 0 means
# unvisited.
# id matrix --- Column-vector which the connected component of each vertex
# is written to.
# current_component Integer --- The component id which is written to each visited vertex'
# entry in the id vector.
#
# Output:
# --------------------------------------------------------------------------------------------
# NAME TYPE MEANING
# --------------------------------------------------------------------------------------------
# id matrix --- Column-vector which the connected component of each vertex is written to.
# marked matrix --- Column-vector which is used to mark already visited vertices.
# A value of 1 means already visited, 0 means unvisited.
# --------------------------------------------------------------------------------------------
dfs = function(Matrix[Double] GRAPH, Integer vertex, Matrix[Double] marked, Matrix[Double] id, Integer count) return (Matrix[Double] id, Matrix[Double] marked) {
marked[vertex,] = 1;
id[vertex,] = count;
for (col in 1:ncol(GRAPH)) {
if (as.scalar(GRAPH[vertex,col])) {
if (!as.scalar(marked[col,])) {
[id, marked] = dfs(GRAPH, col, marked, id, count);
}
}
}
}
# Iterative depth-first search with a manually allocated stack in order to avoid stack overflow
# errors.
#
# INPUT PARAMETERS:
# --------------------------------------------------------------------------------------------
# NAME TYPE DEFAULT MEANING
# --------------------------------------------------------------------------------------------
# GRAPH matrix --- An adjacency matrix.
# start_vertex Integer --- The vertex to start the search from.
# marked matrix --- Column-vector which is used to mark already visited
# vertices. A value of 1 means already visited, 0 means
# unvisited.
# id matrix --- Column-vector which the connected component of each vertex
# is written to.
# current_component Integer --- The component id which is written to each visited vertex'
# entry in the id vector.
#
# Output:
# --------------------------------------------------------------------------------------------
# NAME TYPE MEANING
# --------------------------------------------------------------------------------------------
# id matrix --- Column-vector which the connected component of each vertex is
# written to.
# marked matrix --- Column-vector which is used to mark already visited vertices.
# A value of 1 means already visited, 0 means unvisited.
# --------------------------------------------------------------------------------------------
dfs_iter = function(Matrix[Double] GRAPH, Integer start_vertex, Matrix[Double] marked, Matrix[Double] id, Integer current_component) return (Matrix[Double] id, Matrix[Double] marked) {
nvert = ncol(GRAPH);
stack_idx = matrix(0, rows=nvert, cols=1);
stack_vid = matrix(0, rows=nvert, cols=1);
stack_ptr = 1;
stack_vid[stack_ptr,] = start_vertex;
stack_idx[stack_ptr,] = 1;
while(stack_ptr > 0) {
vertex = as.scalar(stack_vid[stack_ptr,]);
neigh_idx = as.scalar(stack_idx[stack_ptr,]);
if (neigh_idx == 1) {
id[vertex,] = current_component;
marked[vertex,] = 1;
}
# find next not visited neighbour
cont = TRUE;
while (cont) {
if (as.scalar(GRAPH[vertex, neigh_idx]) & !as.scalar(marked[neigh_idx,])) {
cont = FALSE;
}
else {
neigh_idx = neigh_idx + 1;
if (neigh_idx > nvert) {
cont = FALSE;
}
}
}
if (neigh_idx > nvert ) {
stack_ptr = stack_ptr - 1;
}
else if (neigh_idx == nvert & as.scalar(marked[neigh_idx,])) {
stack_ptr = stack_ptr - 1;
}
else {
stack_ptr = stack_ptr + 1;
stack_vid[stack_ptr,] = neigh_idx;
stack_idx[stack_ptr,] = 1;
}
}
}
# Makes all connected components in the adjacency matrix GRAPH fully-connected.
#
# INPUT PARAMETERS:
# --------------------------------------------------------------------------------------------
# NAME TYPE DEFAULT MEANING
# --------------------------------------------------------------------------------------------
# GRAPH matrix --- An adjacency matrix.
#
# Output:
# --------------------------------------------------------------------------------------------
# NAME TYPE MEANING
# --------------------------------------------------------------------------------------------
# GRAPH matrix --- An adjacency matrix with all connected components fully
# connected.
# --------------------------------------------------------------------------------------------
cluster_by_connected_components = function(Matrix[Double] ADJACENCY) return (Matrix[Double] COMPONENTS) {
#TODO could we reuse our components builtin?
rows = nrow(ADJACENCY);
marked = matrix(0, rows=rows, cols=1);
id = matrix(0, rows=nrow(ADJACENCY), cols=1);
count = 1;
for (s in 1:rows) {
if (!as.scalar(marked[s,])) {
#[id, marked] = dfs(ADJACENCY, s, marked, id, count);
[id, marked] = dfs_iter(ADJACENCY, s, marked, id, count);
count = count + 1;
}
}
COMPONENTS = outer(id, t(id), "==");
COMPONENTS = COMPONENTS - diag(diag(COMPONENTS));
}