| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.giraph.examples; |
| |
| import org.apache.giraph.graph.BasicComputation; |
| import org.apache.giraph.edge.Edge; |
| import org.apache.giraph.graph.Vertex; |
| import org.apache.hadoop.io.IntWritable; |
| import org.apache.hadoop.io.NullWritable; |
| |
| import java.io.IOException; |
| |
| /** |
| * Implementation of the HCC algorithm that identifies connected components and |
| * assigns each vertex its "component identifier" (the smallest vertex id |
| * in the component) |
| * |
| * The idea behind the algorithm is very simple: propagate the smallest |
| * vertex id along the edges to all vertices of a connected component. The |
| * number of supersteps necessary is equal to the length of the maximum |
| * diameter of all components + 1 |
| * |
| * The original Hadoop-based variant of this algorithm was proposed by Kang, |
| * Charalampos, Tsourakakis and Faloutsos in |
| * "PEGASUS: Mining Peta-Scale Graphs", 2010 |
| * |
| * http://www.cs.cmu.edu/~ukang/papers/PegasusKAIS.pdf |
| */ |
| @Algorithm( |
| name = "Connected components", |
| description = "Finds connected components of the graph" |
| ) |
| public class ConnectedComponentsComputation extends |
| BasicComputation<IntWritable, IntWritable, NullWritable, IntWritable> { |
| /** |
| * Propagates the smallest vertex id to all neighbors. Will always choose to |
| * halt and only reactivate if a smaller id has been sent to it. |
| * |
| * @param vertex Vertex |
| * @param messages Iterator of messages from the previous superstep. |
| * @throws IOException |
| */ |
| @Override |
| public void compute( |
| Vertex<IntWritable, IntWritable, NullWritable> vertex, |
| Iterable<IntWritable> messages) throws IOException { |
| int currentComponent = vertex.getValue().get(); |
| |
| // First superstep is special, because we can simply look at the neighbors |
| if (getSuperstep() == 0) { |
| for (Edge<IntWritable, NullWritable> edge : vertex.getEdges()) { |
| int neighbor = edge.getTargetVertexId().get(); |
| if (neighbor < currentComponent) { |
| currentComponent = neighbor; |
| } |
| } |
| // Only need to send value if it is not the own id |
| if (currentComponent != vertex.getValue().get()) { |
| vertex.setValue(new IntWritable(currentComponent)); |
| for (Edge<IntWritable, NullWritable> edge : vertex.getEdges()) { |
| IntWritable neighbor = edge.getTargetVertexId(); |
| if (neighbor.get() > currentComponent) { |
| sendMessage(neighbor, vertex.getValue()); |
| } |
| } |
| } |
| |
| vertex.voteToHalt(); |
| return; |
| } |
| |
| boolean changed = false; |
| // did we get a smaller id ? |
| for (IntWritable message : messages) { |
| int candidateComponent = message.get(); |
| if (candidateComponent < currentComponent) { |
| currentComponent = candidateComponent; |
| changed = true; |
| } |
| } |
| |
| // propagate new component id to the neighbors |
| if (changed) { |
| vertex.setValue(new IntWritable(currentComponent)); |
| sendMessageToAllEdges(vertex, vertex.getValue()); |
| } |
| vertex.voteToHalt(); |
| } |
| } |