1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
|
19 | |
package org.apache.giraph.io.formats; |
20 | |
|
21 | |
import org.apache.giraph.bsp.BspInputSplit; |
22 | |
import org.apache.giraph.edge.EdgeFactory; |
23 | |
import org.apache.giraph.edge.OutEdges; |
24 | |
import org.apache.giraph.graph.Vertex; |
25 | |
import org.apache.giraph.io.VertexInputFormat; |
26 | |
import org.apache.giraph.io.VertexReader; |
27 | |
import org.apache.hadoop.conf.Configuration; |
28 | |
import org.apache.hadoop.io.DoubleWritable; |
29 | |
import org.apache.hadoop.io.LongWritable; |
30 | |
import org.apache.hadoop.mapreduce.InputSplit; |
31 | |
import org.apache.hadoop.mapreduce.JobContext; |
32 | |
import org.apache.hadoop.mapreduce.TaskAttemptContext; |
33 | |
import org.apache.log4j.Logger; |
34 | |
|
35 | |
import com.google.common.collect.Sets; |
36 | |
|
37 | |
import java.io.IOException; |
38 | |
import java.util.List; |
39 | |
import java.util.Random; |
40 | |
import java.util.Set; |
41 | |
|
42 | |
|
43 | |
|
44 | |
|
45 | |
|
46 | |
|
47 | |
|
48 | 0 | public class PseudoRandomVertexInputFormat extends |
49 | |
VertexInputFormat<LongWritable, DoubleWritable, DoubleWritable> { |
50 | 0 | @Override public void checkInputSpecs(Configuration conf) { } |
51 | |
|
52 | |
@Override |
53 | |
public final List<InputSplit> getSplits(final JobContext context, |
54 | |
final int minSplitCountHint) throws IOException, InterruptedException { |
55 | 0 | return PseudoRandomUtils.getSplits(minSplitCountHint); |
56 | |
} |
57 | |
|
58 | |
@Override |
59 | |
public VertexReader<LongWritable, DoubleWritable, DoubleWritable> |
60 | |
createVertexReader(InputSplit split, TaskAttemptContext context) |
61 | |
throws IOException { |
62 | 0 | return new PseudoRandomVertexReader(); |
63 | |
} |
64 | |
|
65 | |
|
66 | |
|
67 | |
|
68 | |
|
69 | |
private static class PseudoRandomVertexReader extends |
70 | |
VertexReader<LongWritable, DoubleWritable, DoubleWritable> { |
71 | |
|
72 | 0 | private static final Logger LOG = |
73 | 0 | Logger.getLogger(PseudoRandomVertexReader.class); |
74 | |
|
75 | 0 | private long startingVertexId = -1; |
76 | |
|
77 | 0 | private long verticesRead = 0; |
78 | |
|
79 | 0 | private long totalSplitVertices = -1; |
80 | |
|
81 | 0 | private long aggregateVertices = -1; |
82 | |
|
83 | 0 | private int edgesPerVertex = -1; |
84 | |
|
85 | |
private BspInputSplit bspInputSplit; |
86 | |
|
87 | |
private PseudoRandomLocalEdgesHelper localEdgesHelper; |
88 | |
|
89 | |
|
90 | |
|
91 | |
|
92 | 0 | public PseudoRandomVertexReader() { |
93 | 0 | } |
94 | |
|
95 | |
@Override |
96 | |
public void initialize(InputSplit inputSplit, |
97 | |
TaskAttemptContext context) throws IOException { |
98 | 0 | aggregateVertices = getConf().getLong( |
99 | |
PseudoRandomInputFormatConstants.AGGREGATE_VERTICES, 0); |
100 | 0 | if (aggregateVertices <= 0) { |
101 | 0 | throw new IllegalArgumentException( |
102 | |
PseudoRandomInputFormatConstants.AGGREGATE_VERTICES + " <= 0"); |
103 | |
} |
104 | 0 | if (inputSplit instanceof BspInputSplit) { |
105 | 0 | bspInputSplit = (BspInputSplit) inputSplit; |
106 | 0 | long extraVertices = |
107 | 0 | aggregateVertices % bspInputSplit.getNumSplits(); |
108 | 0 | totalSplitVertices = |
109 | 0 | aggregateVertices / bspInputSplit.getNumSplits(); |
110 | 0 | if (bspInputSplit.getSplitIndex() < extraVertices) { |
111 | 0 | ++totalSplitVertices; |
112 | |
} |
113 | 0 | startingVertexId = (bspInputSplit.getSplitIndex() * |
114 | 0 | (aggregateVertices / bspInputSplit.getNumSplits())) + |
115 | 0 | Math.min(bspInputSplit.getSplitIndex(), |
116 | |
extraVertices); |
117 | 0 | } else { |
118 | 0 | throw new IllegalArgumentException( |
119 | 0 | "initialize: Got " + inputSplit.getClass() + |
120 | |
" instead of " + BspInputSplit.class); |
121 | |
} |
122 | 0 | edgesPerVertex = getConf().getInt( |
123 | |
PseudoRandomInputFormatConstants.EDGES_PER_VERTEX, 0); |
124 | 0 | if (edgesPerVertex <= 0) { |
125 | 0 | throw new IllegalArgumentException( |
126 | |
PseudoRandomInputFormatConstants.EDGES_PER_VERTEX + " <= 0"); |
127 | |
} |
128 | 0 | float minLocalEdgesRatio = getConf().getFloat( |
129 | |
PseudoRandomInputFormatConstants.LOCAL_EDGES_MIN_RATIO, |
130 | |
PseudoRandomInputFormatConstants.LOCAL_EDGES_MIN_RATIO_DEFAULT); |
131 | 0 | localEdgesHelper = new PseudoRandomLocalEdgesHelper(aggregateVertices, |
132 | 0 | minLocalEdgesRatio, getConf()); |
133 | 0 | } |
134 | |
|
135 | |
@Override |
136 | |
public boolean nextVertex() throws IOException, InterruptedException { |
137 | 0 | return totalSplitVertices > verticesRead; |
138 | |
} |
139 | |
|
140 | |
@Override |
141 | |
public Vertex<LongWritable, DoubleWritable, DoubleWritable> |
142 | |
getCurrentVertex() throws IOException, InterruptedException { |
143 | |
Vertex<LongWritable, DoubleWritable, DoubleWritable> |
144 | 0 | vertex = getConf().createVertex(); |
145 | 0 | long vertexId = startingVertexId + verticesRead; |
146 | |
|
147 | |
|
148 | |
|
149 | 0 | Random rand = new Random(vertexId); |
150 | 0 | DoubleWritable vertexValue = new DoubleWritable(rand.nextDouble()); |
151 | |
|
152 | |
|
153 | 0 | OutEdges<LongWritable, DoubleWritable> edges = |
154 | 0 | getConf().createAndInitializeOutEdges(edgesPerVertex); |
155 | 0 | Set<LongWritable> destVertices = Sets.newHashSet(); |
156 | 0 | for (long i = 0; i < edgesPerVertex; ++i) { |
157 | 0 | LongWritable destVertexId = new LongWritable(); |
158 | |
do { |
159 | 0 | destVertexId.set( |
160 | 0 | localEdgesHelper.generateDestVertex(vertexId, rand)); |
161 | 0 | } while (destVertices.contains(destVertexId)); |
162 | 0 | edges.add(EdgeFactory.create(destVertexId, |
163 | 0 | new DoubleWritable(rand.nextDouble()))); |
164 | 0 | destVertices.add(destVertexId); |
165 | |
} |
166 | 0 | vertex.initialize(new LongWritable(vertexId), vertexValue, edges); |
167 | 0 | ++verticesRead; |
168 | 0 | if (LOG.isTraceEnabled()) { |
169 | 0 | LOG.trace("next: Return vertexId=" + |
170 | 0 | vertex.getId().get() + |
171 | 0 | ", vertexValue=" + vertex.getValue() + |
172 | 0 | ", edges=" + vertex.getEdges()); |
173 | |
} |
174 | 0 | return vertex; |
175 | |
} |
176 | |
|
177 | |
@Override |
178 | 0 | public void close() throws IOException { } |
179 | |
|
180 | |
@Override |
181 | |
public float getProgress() throws IOException { |
182 | 0 | return verticesRead * 100.0f / totalSplitVertices; |
183 | |
} |
184 | |
} |
185 | |
} |