1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 package org.apache.giraph.examples; 20 21 import org.apache.giraph.graph.BasicComputation; 22 import org.apache.giraph.edge.Edge; 23 import org.apache.giraph.graph.Vertex; 24 import org.apache.hadoop.io.IntWritable; 25 import org.apache.hadoop.io.NullWritable; 26 27 import java.io.IOException; 28 29 /** 30 * Implementation of the HCC algorithm that identifies connected components and 31 * assigns each vertex its "component identifier" (the smallest vertex id 32 * in the component) 33 * 34 * The idea behind the algorithm is very simple: propagate the smallest 35 * vertex id along the edges to all vertices of a connected component. The 36 * number of supersteps necessary is equal to the length of the maximum 37 * diameter of all components + 1 38 * 39 * The original Hadoop-based variant of this algorithm was proposed by Kang, 40 * Charalampos, Tsourakakis and Faloutsos in 41 * "PEGASUS: Mining Peta-Scale Graphs", 2010 42 * 43 * http://www.cs.cmu.edu/~ukang/papers/PegasusKAIS.pdf 44 */ 45 @Algorithm( 46 name = "Connected components", 47 description = "Finds connected components of the graph" 48 ) 49 public class ConnectedComponentsComputation extends 50 BasicComputation<IntWritable, IntWritable, NullWritable, IntWritable> { 51 /** 52 * Propagates the smallest vertex id to all neighbors. Will always choose to 53 * halt and only reactivate if a smaller id has been sent to it. 54 * 55 * @param vertex Vertex 56 * @param messages Iterator of messages from the previous superstep. 57 * @throws IOException 58 */ 59 @Override 60 public void compute( 61 Vertex<IntWritable, IntWritable, NullWritable> vertex, 62 Iterable<IntWritable> messages) throws IOException { 63 int currentComponent = vertex.getValue().get(); 64 65 // First superstep is special, because we can simply look at the neighbors 66 if (getSuperstep() == 0) { 67 for (Edge<IntWritable, NullWritable> edge : vertex.getEdges()) { 68 int neighbor = edge.getTargetVertexId().get(); 69 if (neighbor < currentComponent) { 70 currentComponent = neighbor; 71 } 72 } 73 // Only need to send value if it is not the own id 74 if (currentComponent != vertex.getValue().get()) { 75 vertex.setValue(new IntWritable(currentComponent)); 76 for (Edge<IntWritable, NullWritable> edge : vertex.getEdges()) { 77 IntWritable neighbor = edge.getTargetVertexId(); 78 if (neighbor.get() > currentComponent) { 79 sendMessage(neighbor, vertex.getValue()); 80 } 81 } 82 } 83 84 vertex.voteToHalt(); 85 return; 86 } 87 88 boolean changed = false; 89 // did we get a smaller id ? 90 for (IntWritable message : messages) { 91 int candidateComponent = message.get(); 92 if (candidateComponent < currentComponent) { 93 currentComponent = candidateComponent; 94 changed = true; 95 } 96 } 97 98 // propagate new component id to the neighbors 99 if (changed) { 100 vertex.setValue(new IntWritable(currentComponent)); 101 sendMessageToAllEdges(vertex, vertex.getValue()); 102 } 103 vertex.voteToHalt(); 104 } 105 }