blob: dfbe8bd700e39f3d44f369d02d915d4052bfe05d [file] [log] [blame]
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
# Computes the connected components of a graph and returns a
# vector indicating the assignment of vertices to components,
# where each component is identified by the maximum vertex ID
# (i.e., row/column position of the input graph)
#
# INPUT:
# -----------------------------------------------------------------------------------------------
# X Location to read the matrix of feature vectors
# Y Location to read the matrix with category labels
# icpt Intercept presence, shifting and rescaling X columns: 0 = no intercept,
# no shifting, no rescaling; 1 = add intercept, but neither shift nor rescale X;
# 2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
# tol tolerance ("epsilon")
# reg regularization parameter (lambda = 1/C); intercept is not regularized
# maxi max. number of outer (Newton) iterations
# maxii max. number of inner (conjugate gradient) iterations, 0 = no max
# verbose flag specifying if logging information should be printed
# -----------------------------------------------------------------------------------------------
#
# OUTPUT:
# ----------------------------------------------------------------------------------------------------
# betas regression betas as output for prediction
# ----------------------------------------------------------------------------------------------------
m_components = function(Matrix[Double] G, Integer maxi = 0, Boolean verbose = TRUE)
return (Matrix[Double] C)
{
# best effort check for symmetry (not exact but fast)
if( sum(rowSums(G) != t(colSums(G))) > 0 ) {
stop("Connected Components: input graph needs to be "
+ "symmetric but rowSums and colSums don't match up.");
}
# initialize state with vertex ids
c = seq(1,nrow(G));
diff = Inf;
iter = 1;
# iterative computation of connected components
while( diff > 0 & (maxi==0 | iter<=maxi) ) {
u = max(rowMaxs(G * t(c)), c);
diff = sum(u != c)
c = u; # update assignment
if( verbose )
print("Connected components: iter = "+iter+", #diff = "+diff);
iter = iter + 1;
}
C = c;
}