1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
package org.apache.giraph.edge; |
19 | |
|
20 | |
import it.unimi.dsi.fastutil.bytes.ByteArrays; |
21 | |
import it.unimi.dsi.fastutil.longs.LongArrayList; |
22 | |
|
23 | |
import java.io.DataInput; |
24 | |
import java.io.DataOutput; |
25 | |
import java.io.IOException; |
26 | |
import java.util.Arrays; |
27 | |
import java.util.BitSet; |
28 | |
import java.util.Iterator; |
29 | |
|
30 | |
import javax.annotation.concurrent.NotThreadSafe; |
31 | |
|
32 | |
import org.apache.giraph.utils.ExtendedByteArrayDataInput; |
33 | |
import org.apache.giraph.utils.ExtendedByteArrayDataOutput; |
34 | |
import org.apache.giraph.utils.ExtendedDataInput; |
35 | |
import org.apache.giraph.utils.ExtendedDataOutput; |
36 | |
import org.apache.giraph.utils.UnsafeByteArrayInputStream; |
37 | |
import org.apache.giraph.utils.UnsafeByteArrayOutputStream; |
38 | |
import org.apache.giraph.utils.Varint; |
39 | |
import org.apache.hadoop.io.LongWritable; |
40 | |
import org.apache.hadoop.io.Writable; |
41 | |
|
42 | |
import com.google.common.base.Preconditions; |
43 | |
|
44 | |
|
45 | |
|
46 | |
|
47 | |
|
48 | |
|
49 | |
|
50 | |
|
51 | |
|
52 | |
|
53 | |
|
54 | |
@NotThreadSafe |
55 | 0 | public class LongDiffArray implements Writable { |
56 | |
|
57 | |
|
58 | |
|
59 | |
|
60 | |
private byte[] compressedData; |
61 | |
|
62 | |
|
63 | |
|
64 | |
|
65 | |
|
66 | |
|
67 | |
private int size; |
68 | |
|
69 | |
|
70 | |
|
71 | |
|
72 | |
private TransientChanges transientData; |
73 | |
|
74 | |
|
75 | |
|
76 | |
|
77 | 0 | private boolean useUnsafeSerialization = true; |
78 | |
|
79 | |
|
80 | |
|
81 | |
|
82 | |
|
83 | |
public void setUseUnsafeSerialization(boolean useUnsafeSerialization) { |
84 | 0 | this.useUnsafeSerialization = useUnsafeSerialization; |
85 | 0 | } |
86 | |
|
87 | |
|
88 | |
|
89 | |
|
90 | |
|
91 | |
public void initialize(int capacity) { |
92 | 0 | reset(); |
93 | 0 | if (capacity > 0) { |
94 | 0 | transientData = new TransientChanges(capacity); |
95 | |
} |
96 | 0 | } |
97 | |
|
98 | |
|
99 | |
|
100 | |
|
101 | |
public void initialize() { |
102 | 0 | reset(); |
103 | 0 | } |
104 | |
|
105 | |
|
106 | |
|
107 | |
|
108 | |
|
109 | |
public void add(long id) { |
110 | 0 | checkTransientData(); |
111 | 0 | transientData.add(id); |
112 | 0 | } |
113 | |
|
114 | |
|
115 | |
|
116 | |
|
117 | |
|
118 | |
|
119 | |
public void remove(long id) { |
120 | 0 | checkTransientData(); |
121 | |
|
122 | 0 | if (size > 0) { |
123 | 0 | LongsDiffReader reader = new LongsDiffReader( |
124 | |
compressedData, |
125 | |
useUnsafeSerialization |
126 | |
); |
127 | 0 | for (int i = 0; i < size; i++) { |
128 | 0 | long cur = reader.readNext(); |
129 | 0 | if (cur == id) { |
130 | 0 | transientData.markRemoved(i); |
131 | 0 | } else if (cur > id) { |
132 | 0 | break; |
133 | |
} |
134 | |
} |
135 | |
} |
136 | 0 | transientData.removeAdded(id); |
137 | 0 | } |
138 | |
|
139 | |
|
140 | |
|
141 | |
|
142 | |
|
143 | |
public int size() { |
144 | 0 | int result = size; |
145 | 0 | if (transientData != null) { |
146 | 0 | result += transientData.size(); |
147 | |
} |
148 | 0 | return result; |
149 | |
} |
150 | |
|
151 | |
|
152 | |
|
153 | |
|
154 | |
|
155 | |
public Iterator<LongWritable> iterator() { |
156 | 0 | trim(); |
157 | 0 | return new Iterator<LongWritable>() { |
158 | |
|
159 | |
private int position; |
160 | 0 | private final LongsDiffReader reader = |
161 | 0 | new LongsDiffReader(compressedData, useUnsafeSerialization); |
162 | |
|
163 | |
|
164 | 0 | private final LongWritable reusableLong = new LongWritable(); |
165 | |
|
166 | |
@Override |
167 | |
public boolean hasNext() { |
168 | 0 | return position < size; |
169 | |
} |
170 | |
|
171 | |
@Override |
172 | |
public LongWritable next() { |
173 | 0 | position++; |
174 | 0 | reusableLong.set(reader.readNext()); |
175 | 0 | return reusableLong; |
176 | |
} |
177 | |
|
178 | |
@Override |
179 | |
public void remove() { |
180 | 0 | removeAt(position - 1); |
181 | 0 | } |
182 | |
}; |
183 | |
} |
184 | |
|
185 | |
@Override |
186 | |
public void write(DataOutput out) throws IOException { |
187 | 0 | trim(); |
188 | 0 | Varint.writeUnsignedVarInt(compressedData.length, out); |
189 | 0 | Varint.writeUnsignedVarInt(size, out); |
190 | 0 | out.write(compressedData); |
191 | 0 | } |
192 | |
|
193 | |
@Override |
194 | |
public void readFields(DataInput in) throws IOException { |
195 | 0 | reset(); |
196 | 0 | compressedData = new byte[Varint.readUnsignedVarInt(in)]; |
197 | |
|
198 | |
|
199 | 0 | size = Varint.readUnsignedVarInt(in); |
200 | 0 | in.readFully(compressedData); |
201 | 0 | } |
202 | |
|
203 | |
|
204 | |
|
205 | |
|
206 | |
|
207 | |
public void trim() { |
208 | 0 | if (transientData == null) { |
209 | |
|
210 | 0 | return; |
211 | |
} |
212 | |
|
213 | |
|
214 | 0 | long[] transientValues = transientData.sortedValues(); |
215 | 0 | int pCompressed = 0; |
216 | 0 | int pTransient = 0; |
217 | |
|
218 | 0 | LongsDiffReader reader = new LongsDiffReader( |
219 | |
compressedData, |
220 | |
useUnsafeSerialization |
221 | |
); |
222 | 0 | LongsDiffWriter writer = new LongsDiffWriter(useUnsafeSerialization); |
223 | |
|
224 | 0 | long curValue = size > 0 ? reader.readNext() : Long.MAX_VALUE; |
225 | |
|
226 | |
|
227 | |
|
228 | |
|
229 | 0 | while (pTransient < transientData.numberOfAddedElements() || |
230 | |
pCompressed < size) { |
231 | 0 | if (pTransient < transientData.numberOfAddedElements() && |
232 | |
curValue >= transientValues[pTransient]) { |
233 | 0 | writer.writeNext(transientValues[pTransient]); |
234 | 0 | pTransient++; |
235 | |
} else { |
236 | 0 | if (!transientData.isRemoved(pCompressed)) { |
237 | 0 | writer.writeNext(curValue); |
238 | |
} |
239 | 0 | pCompressed++; |
240 | 0 | if (pCompressed < size) { |
241 | 0 | curValue = reader.readNext(); |
242 | |
} else { |
243 | 0 | curValue = Long.MAX_VALUE; |
244 | |
} |
245 | |
} |
246 | |
} |
247 | |
|
248 | 0 | compressedData = writer.toByteArray(); |
249 | 0 | size += transientData.size(); |
250 | 0 | transientData = null; |
251 | 0 | } |
252 | |
|
253 | |
|
254 | |
|
255 | |
|
256 | |
|
257 | |
|
258 | |
|
259 | |
private void removeAt(int i) { |
260 | 0 | checkTransientData(); |
261 | 0 | if (i < size) { |
262 | 0 | transientData.markRemoved(i); |
263 | |
} else { |
264 | 0 | transientData.removeAddedAt(i - size); |
265 | |
} |
266 | 0 | } |
267 | |
|
268 | |
|
269 | |
|
270 | |
|
271 | |
private void checkTransientData() { |
272 | 0 | if (transientData == null) { |
273 | 0 | transientData = new TransientChanges(); |
274 | |
} |
275 | 0 | } |
276 | |
|
277 | |
|
278 | |
|
279 | |
|
280 | |
private void reset() { |
281 | 0 | compressedData = ByteArrays.EMPTY_ARRAY; |
282 | 0 | size = 0; |
283 | 0 | transientData = null; |
284 | 0 | } |
285 | |
|
286 | |
|
287 | |
|
288 | |
|
289 | |
private static class LongsDiffReader { |
290 | |
|
291 | |
private final ExtendedDataInput input; |
292 | |
|
293 | |
private long current; |
294 | |
|
295 | 0 | private boolean first = true; |
296 | |
|
297 | |
|
298 | |
|
299 | |
|
300 | |
|
301 | |
|
302 | |
|
303 | 0 | public LongsDiffReader(byte[] compressedData, boolean useUnsafeReader) { |
304 | 0 | if (useUnsafeReader) { |
305 | 0 | input = new UnsafeByteArrayInputStream(compressedData); |
306 | |
} else { |
307 | 0 | input = new ExtendedByteArrayDataInput(compressedData); |
308 | |
} |
309 | 0 | } |
310 | |
|
311 | |
|
312 | |
|
313 | |
|
314 | |
|
315 | |
long readNext() { |
316 | |
try { |
317 | 0 | if (first) { |
318 | 0 | current = input.readLong(); |
319 | 0 | first = false; |
320 | |
} else { |
321 | 0 | current += Varint.readUnsignedVarLong(input); |
322 | |
} |
323 | 0 | return current; |
324 | 0 | } catch (IOException e) { |
325 | 0 | throw new IllegalStateException(e); |
326 | |
} |
327 | |
} |
328 | |
} |
329 | |
|
330 | |
|
331 | |
|
332 | |
|
333 | |
private static class LongsDiffWriter { |
334 | |
|
335 | |
private final ExtendedDataOutput out; |
336 | |
|
337 | |
private long lastWritten; |
338 | |
|
339 | 0 | private boolean first = true; |
340 | |
|
341 | |
|
342 | |
|
343 | |
|
344 | |
|
345 | 0 | public LongsDiffWriter(boolean useUnsafeWriter) { |
346 | 0 | if (useUnsafeWriter) { |
347 | 0 | out = new UnsafeByteArrayOutputStream(); |
348 | |
} else { |
349 | 0 | out = new ExtendedByteArrayDataOutput(); |
350 | |
} |
351 | 0 | } |
352 | |
|
353 | |
|
354 | |
|
355 | |
|
356 | |
|
357 | |
void writeNext(long value) { |
358 | |
try { |
359 | 0 | if (first) { |
360 | 0 | out.writeLong(value); |
361 | 0 | first = false; |
362 | |
} else { |
363 | 0 | Preconditions.checkState(value >= lastWritten, |
364 | |
"Values need to be in order"); |
365 | 0 | Preconditions.checkState((value - lastWritten) >= 0, |
366 | |
"In order to use this class, difference of consecutive IDs " + |
367 | |
"cannot overflow longs"); |
368 | 0 | Varint.writeUnsignedVarLong(value - lastWritten, out); |
369 | |
} |
370 | 0 | lastWritten = value; |
371 | 0 | } catch (IOException e) { |
372 | 0 | throw new IllegalStateException(e); |
373 | 0 | } |
374 | 0 | } |
375 | |
|
376 | |
|
377 | |
|
378 | |
|
379 | |
|
380 | |
byte[] toByteArray() { |
381 | 0 | return out.toByteArray(); |
382 | |
} |
383 | |
} |
384 | |
|
385 | |
|
386 | |
|
387 | |
|
388 | |
|
389 | |
|
390 | |
|
391 | 0 | private static class TransientChanges { |
392 | |
|
393 | |
private final LongArrayList neighborsAdded; |
394 | |
|
395 | 0 | private final BitSet removed = new BitSet(); |
396 | |
|
397 | |
private int removedCount; |
398 | |
|
399 | |
|
400 | |
|
401 | |
|
402 | |
|
403 | 0 | private TransientChanges(int capacity) { |
404 | 0 | neighborsAdded = new LongArrayList(capacity); |
405 | 0 | } |
406 | |
|
407 | |
|
408 | |
|
409 | |
|
410 | 0 | private TransientChanges() { |
411 | 0 | neighborsAdded = new LongArrayList(); |
412 | 0 | } |
413 | |
|
414 | |
|
415 | |
|
416 | |
|
417 | |
|
418 | |
private void add(long value) { |
419 | 0 | neighborsAdded.add(value); |
420 | 0 | } |
421 | |
|
422 | |
|
423 | |
|
424 | |
|
425 | |
|
426 | |
private void markRemoved(int index) { |
427 | 0 | if (!removed.get(index)) { |
428 | 0 | removedCount++; |
429 | 0 | removed.set(index); |
430 | |
} |
431 | 0 | } |
432 | |
|
433 | |
|
434 | |
|
435 | |
|
436 | |
|
437 | |
private void removeAddedAt(int index) { |
438 | |
|
439 | |
|
440 | |
|
441 | 0 | if (index == neighborsAdded.size() - 1) { |
442 | 0 | neighborsAdded.popLong(); |
443 | |
} else { |
444 | 0 | neighborsAdded.set(index, neighborsAdded.popLong()); |
445 | |
} |
446 | 0 | } |
447 | |
|
448 | |
|
449 | |
|
450 | |
|
451 | |
|
452 | |
private int numberOfAddedElements() { |
453 | 0 | return neighborsAdded.size(); |
454 | |
} |
455 | |
|
456 | |
|
457 | |
|
458 | |
|
459 | |
|
460 | |
private void removeAdded(long target) { |
461 | 0 | neighborsAdded.rem(target); |
462 | 0 | } |
463 | |
|
464 | |
|
465 | |
|
466 | |
|
467 | |
|
468 | |
private int size() { |
469 | 0 | return neighborsAdded.size() - removedCount; |
470 | |
} |
471 | |
|
472 | |
|
473 | |
|
474 | |
|
475 | |
|
476 | |
private long[] sortedValues() { |
477 | 0 | long[] ret = neighborsAdded.elements(); |
478 | 0 | Arrays.sort(ret, 0, neighborsAdded.size()); |
479 | 0 | return ret; |
480 | |
} |
481 | |
|
482 | |
|
483 | |
|
484 | |
|
485 | |
|
486 | |
|
487 | |
private boolean isRemoved(int i) { |
488 | 0 | return removed.get(i); |
489 | |
} |
490 | |
} |
491 | |
} |