blob: 9b0cfe1ca9fe3d445c13d94c4c65e623ff3f1ad3 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.giraph.examples;
import org.apache.giraph.graph.BasicComputation;
import org.apache.giraph.edge.Edge;
import org.apache.giraph.graph.Vertex;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import java.io.IOException;
/**
* Implementation of the HCC algorithm that identifies connected components and
* assigns each vertex its "component identifier" (the smallest vertex id
* in the component)
*
* The idea behind the algorithm is very simple: propagate the smallest
* vertex id along the edges to all vertices of a connected component. The
* number of supersteps necessary is equal to the length of the maximum
* diameter of all components + 1
*
* The original Hadoop-based variant of this algorithm was proposed by Kang,
* Charalampos, Tsourakakis and Faloutsos in
* "PEGASUS: Mining Peta-Scale Graphs", 2010
*
* http://www.cs.cmu.edu/~ukang/papers/PegasusKAIS.pdf
*/
@Algorithm(
name = "Connected components",
description = "Finds connected components of the graph"
)
public class ConnectedComponentsComputation extends
BasicComputation<IntWritable, IntWritable, NullWritable, IntWritable> {
/**
* Propagates the smallest vertex id to all neighbors. Will always choose to
* halt and only reactivate if a smaller id has been sent to it.
*
* @param vertex Vertex
* @param messages Iterator of messages from the previous superstep.
* @throws IOException
*/
@Override
public void compute(
Vertex<IntWritable, IntWritable, NullWritable> vertex,
Iterable<IntWritable> messages) throws IOException {
int currentComponent = vertex.getValue().get();
// First superstep is special, because we can simply look at the neighbors
if (getSuperstep() == 0) {
for (Edge<IntWritable, NullWritable> edge : vertex.getEdges()) {
int neighbor = edge.getTargetVertexId().get();
if (neighbor < currentComponent) {
currentComponent = neighbor;
}
}
// Only need to send value if it is not the own id
if (currentComponent != vertex.getValue().get()) {
vertex.setValue(new IntWritable(currentComponent));
for (Edge<IntWritable, NullWritable> edge : vertex.getEdges()) {
IntWritable neighbor = edge.getTargetVertexId();
if (neighbor.get() > currentComponent) {
sendMessage(neighbor, vertex.getValue());
}
}
}
vertex.voteToHalt();
return;
}
boolean changed = false;
// did we get a smaller id ?
for (IntWritable message : messages) {
int candidateComponent = message.get();
if (candidateComponent < currentComponent) {
currentComponent = candidateComponent;
changed = true;
}
}
// propagate new component id to the neighbors
if (changed) {
vertex.setValue(new IntWritable(currentComponent));
sendMessageToAllEdges(vertex, vertex.getValue());
}
vertex.voteToHalt();
}
}