1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
|
19 | |
package org.apache.giraph.ooc.policy; |
20 | |
|
21 | |
import com.google.common.collect.Maps; |
22 | |
import com.sun.management.GarbageCollectionNotificationInfo; |
23 | |
import org.apache.giraph.conf.FloatConfOption; |
24 | |
import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration; |
25 | |
import org.apache.giraph.ooc.OutOfCoreEngine; |
26 | |
import org.apache.giraph.ooc.OutOfCoreIOStatistics; |
27 | |
import org.apache.giraph.ooc.command.IOCommand; |
28 | |
import org.apache.giraph.ooc.command.LoadPartitionIOCommand; |
29 | |
import org.apache.giraph.ooc.command.WaitIOCommand; |
30 | |
import org.apache.log4j.Logger; |
31 | |
|
32 | |
import java.lang.management.MemoryUsage; |
33 | |
import java.util.Map; |
34 | |
import java.util.concurrent.atomic.AtomicInteger; |
35 | |
import java.util.concurrent.atomic.AtomicLong; |
36 | |
|
37 | |
|
38 | |
|
39 | |
|
40 | |
|
41 | |
|
42 | |
|
43 | |
|
44 | |
|
45 | |
|
46 | |
|
47 | |
|
48 | |
|
49 | |
|
50 | |
|
51 | |
|
52 | |
|
53 | |
|
54 | |
|
55 | |
|
56 | |
|
57 | |
|
58 | |
|
59 | |
|
60 | 0 | public class SimpleGCMonitoringOracle implements OutOfCoreOracle { |
61 | |
|
62 | |
|
63 | |
|
64 | |
|
65 | |
|
66 | 0 | public static final FloatConfOption OPTIMAL_MEMORY_PRESSURE = |
67 | |
new FloatConfOption("giraph.optimalMemoryPressure", 0.8f, |
68 | |
"The memory pressure (fraction of used memory) at which the job " + |
69 | |
"shows the optimal GC behavior. This fraction may be dependent " + |
70 | |
"on the GC strategy used in running the job."); |
71 | |
|
72 | |
|
73 | 0 | private static final Logger LOG = |
74 | 0 | Logger.getLogger(SimpleGCMonitoringOracle.class); |
75 | |
|
76 | |
private final float optimalMemoryPressure; |
77 | |
|
78 | |
private final OutOfCoreEngine oocEngine; |
79 | |
|
80 | |
private GCObservation lastGCObservation; |
81 | |
|
82 | 0 | private final AtomicLong desiredDiskToMemoryDataRate = |
83 | |
new AtomicLong(0); |
84 | |
|
85 | 0 | private final Map<IOCommand.IOCommandType, AtomicInteger> commandOccurrences = |
86 | 0 | Maps.newConcurrentMap(); |
87 | |
|
88 | |
|
89 | |
|
90 | |
|
91 | |
|
92 | |
|
93 | |
|
94 | |
public SimpleGCMonitoringOracle(ImmutableClassesGiraphConfiguration conf, |
95 | 0 | OutOfCoreEngine oocEngine) { |
96 | 0 | this.optimalMemoryPressure = OPTIMAL_MEMORY_PRESSURE.get(conf); |
97 | 0 | this.oocEngine = oocEngine; |
98 | 0 | this.lastGCObservation = new GCObservation(-1, 0, 0); |
99 | 0 | for (IOCommand.IOCommandType type : IOCommand.IOCommandType.values()) { |
100 | 0 | commandOccurrences.put(type, new AtomicInteger(0)); |
101 | |
} |
102 | 0 | } |
103 | |
|
104 | |
@Override |
105 | |
public synchronized void gcCompleted(GarbageCollectionNotificationInfo |
106 | |
gcInfo) { |
107 | 0 | long time = System.currentTimeMillis(); |
108 | 0 | Map<String, MemoryUsage> memAfter = gcInfo.getGcInfo() |
109 | 0 | .getMemoryUsageAfterGc(); |
110 | 0 | long usedMemory = 0; |
111 | 0 | long maxMemory = 0; |
112 | 0 | for (MemoryUsage memDetail : memAfter.values()) { |
113 | 0 | usedMemory += memDetail.getUsed(); |
114 | 0 | maxMemory += memDetail.getMax(); |
115 | 0 | } |
116 | 0 | GCObservation observation = new GCObservation(time, usedMemory, maxMemory); |
117 | 0 | if (LOG.isInfoEnabled()) { |
118 | 0 | LOG.info("gcCompleted: GC completed with: " + observation); |
119 | |
} |
120 | |
|
121 | 0 | if (lastGCObservation.isValid()) { |
122 | 0 | long deltaDataRate = |
123 | 0 | lastGCObservation.getDesiredDeltaDataRate(observation); |
124 | 0 | long diskBandwidthEstimate = |
125 | 0 | oocEngine.getIOStatistics().getDiskBandwidth(); |
126 | |
|
127 | |
|
128 | |
|
129 | |
|
130 | 0 | long dataInjectionRate = desiredDiskToMemoryDataRate.get(); |
131 | 0 | desiredDiskToMemoryDataRate.set(Math.max( |
132 | 0 | Math.min(desiredDiskToMemoryDataRate.get() - deltaDataRate, |
133 | |
diskBandwidthEstimate), -diskBandwidthEstimate)); |
134 | 0 | if (LOG.isInfoEnabled()) { |
135 | 0 | LOG.info("gcCompleted: changing data injection rate from " + |
136 | 0 | String.format("%.2f", dataInjectionRate / 1024.0 / 1024.0) + |
137 | 0 | " to " + String.format("%.2f", desiredDiskToMemoryDataRate.get() / |
138 | |
1024.0 / 1024.0)); |
139 | |
} |
140 | |
} |
141 | 0 | lastGCObservation = observation; |
142 | 0 | } |
143 | |
|
144 | |
@Override |
145 | |
public void startIteration() { |
146 | 0 | } |
147 | |
|
148 | |
|
149 | |
|
150 | |
|
151 | |
|
152 | |
|
153 | |
|
154 | |
|
155 | |
private long getCurrentDataInjectionRate() { |
156 | 0 | long effectiveBytesTransferred = 0; |
157 | 0 | long effectiveDuration = 0; |
158 | 0 | for (IOCommand.IOCommandType type : IOCommand.IOCommandType.values()) { |
159 | 0 | OutOfCoreIOStatistics.BytesDuration stats = |
160 | 0 | oocEngine.getIOStatistics().getCommandTypeStats(type); |
161 | 0 | int occurrence = commandOccurrences.get(type).get(); |
162 | 0 | long typeBytesTransferred = stats.getBytes(); |
163 | 0 | long typeDuration = stats.getDuration(); |
164 | |
|
165 | |
|
166 | |
|
167 | |
|
168 | |
|
169 | |
|
170 | |
|
171 | 0 | if (stats.getOccurrence() != 0) { |
172 | 0 | typeBytesTransferred += stats.getBytes() / stats.getOccurrence() * |
173 | |
occurrence; |
174 | 0 | typeDuration += stats.getDuration() / stats.getOccurrence() * |
175 | |
occurrence; |
176 | |
} |
177 | 0 | if (type == IOCommand.IOCommandType.LOAD_PARTITION) { |
178 | 0 | effectiveBytesTransferred += typeBytesTransferred; |
179 | |
} else { |
180 | |
|
181 | 0 | effectiveBytesTransferred -= typeBytesTransferred; |
182 | |
} |
183 | 0 | effectiveDuration += typeDuration; |
184 | |
} |
185 | 0 | if (effectiveDuration == 0) { |
186 | 0 | return 0; |
187 | |
} else { |
188 | 0 | return effectiveBytesTransferred / effectiveDuration; |
189 | |
} |
190 | |
} |
191 | |
|
192 | |
@Override |
193 | |
public IOAction[] getNextIOActions() { |
194 | 0 | long error = (long) (oocEngine.getIOStatistics().getDiskBandwidth() * 0.05); |
195 | 0 | long desiredRate = desiredDiskToMemoryDataRate.get(); |
196 | 0 | long currentRate = getCurrentDataInjectionRate(); |
197 | 0 | if (desiredRate > error) { |
198 | |
|
199 | 0 | if (currentRate > desiredRate + error) { |
200 | |
|
201 | |
|
202 | 0 | return new IOAction[]{ |
203 | |
IOAction.STORE_MESSAGES_AND_BUFFERS, |
204 | |
IOAction.STORE_PROCESSED_PARTITION}; |
205 | 0 | } else if (currentRate < desiredRate - error) { |
206 | |
|
207 | 0 | return new IOAction[]{IOAction.LOAD_PARTITION}; |
208 | |
} else { |
209 | |
|
210 | |
|
211 | |
|
212 | 0 | return new IOAction[]{ |
213 | |
IOAction.STORE_MESSAGES_AND_BUFFERS, |
214 | |
IOAction.STORE_PROCESSED_PARTITION, |
215 | |
IOAction.LOAD_PARTITION}; |
216 | |
} |
217 | 0 | } else if (desiredRate < -error) { |
218 | |
|
219 | 0 | if (currentRate < desiredRate - error) { |
220 | |
|
221 | |
|
222 | 0 | return new IOAction[]{IOAction.LOAD_UNPROCESSED_PARTITION}; |
223 | 0 | } else if (currentRate > desiredRate + error) { |
224 | |
|
225 | 0 | return new IOAction[]{ |
226 | |
IOAction.STORE_MESSAGES_AND_BUFFERS, |
227 | |
IOAction.STORE_PARTITION}; |
228 | |
} else { |
229 | |
|
230 | |
|
231 | 0 | return new IOAction[]{ |
232 | |
IOAction.STORE_MESSAGES_AND_BUFFERS, |
233 | |
IOAction.STORE_PROCESSED_PARTITION, |
234 | |
IOAction.LOAD_UNPROCESSED_PARTITION}; |
235 | |
} |
236 | |
} else { |
237 | |
|
238 | |
|
239 | |
|
240 | 0 | if (currentRate > desiredRate + error) { |
241 | 0 | return new IOAction[]{ |
242 | |
IOAction.STORE_MESSAGES_AND_BUFFERS, |
243 | |
IOAction.STORE_PROCESSED_PARTITION}; |
244 | 0 | } else if (currentRate < desiredRate - error) { |
245 | 0 | return new IOAction[]{IOAction.LOAD_UNPROCESSED_PARTITION}; |
246 | |
} else { |
247 | 0 | return new IOAction[]{ |
248 | |
IOAction.STORE_MESSAGES_AND_BUFFERS, |
249 | |
IOAction.STORE_PROCESSED_PARTITION, |
250 | |
IOAction.LOAD_UNPROCESSED_PARTITION}; |
251 | |
} |
252 | |
} |
253 | |
} |
254 | |
|
255 | |
@Override |
256 | |
public synchronized boolean approve(IOCommand command) { |
257 | 0 | long error = (long) (oocEngine.getIOStatistics().getDiskBandwidth() * 0.05); |
258 | 0 | long desiredRate = desiredDiskToMemoryDataRate.get(); |
259 | 0 | long currentRate = getCurrentDataInjectionRate(); |
260 | |
|
261 | |
|
262 | |
|
263 | 0 | if (currentRate > desiredRate + error && |
264 | |
command instanceof LoadPartitionIOCommand) { |
265 | 0 | return false; |
266 | |
} |
267 | 0 | if (currentRate < desiredRate - error && |
268 | |
!(command instanceof LoadPartitionIOCommand) && |
269 | |
!(command instanceof WaitIOCommand)) { |
270 | 0 | return false; |
271 | |
} |
272 | 0 | commandOccurrences.get(command.getType()).getAndIncrement(); |
273 | 0 | return true; |
274 | |
} |
275 | |
|
276 | |
@Override |
277 | |
public void commandCompleted(IOCommand command) { |
278 | 0 | commandOccurrences.get(command.getType()).getAndDecrement(); |
279 | 0 | } |
280 | |
|
281 | |
|
282 | |
private class GCObservation { |
283 | |
|
284 | |
private long time; |
285 | |
|
286 | |
private long usedMemory; |
287 | |
|
288 | |
private long maxMemory; |
289 | |
|
290 | |
|
291 | |
|
292 | |
|
293 | |
|
294 | |
|
295 | |
|
296 | |
|
297 | 0 | public GCObservation(long time, long usedMemory, long maxMemory) { |
298 | 0 | this.time = time; |
299 | 0 | this.usedMemory = usedMemory; |
300 | 0 | this.maxMemory = maxMemory; |
301 | 0 | } |
302 | |
|
303 | |
|
304 | |
|
305 | |
|
306 | |
|
307 | |
|
308 | |
public boolean isValid() { |
309 | 0 | return time > 0; |
310 | |
} |
311 | |
|
312 | |
|
313 | |
|
314 | |
|
315 | |
|
316 | |
|
317 | |
|
318 | |
|
319 | |
public long getDesiredDeltaDataRate(GCObservation newObservation) { |
320 | 0 | long newUsedMemory = newObservation.usedMemory; |
321 | 0 | long newMaxMemory = newObservation.maxMemory; |
322 | 0 | long lastUsedMemory = usedMemory; |
323 | 0 | long lastMaxMemory = maxMemory; |
324 | |
|
325 | 0 | long scaledMaxMemory = Math.min(lastMaxMemory, newMaxMemory); |
326 | 0 | newUsedMemory = |
327 | |
(long) (((double) scaledMaxMemory / newMaxMemory) * newUsedMemory); |
328 | 0 | lastUsedMemory = |
329 | |
(long) (((double) scaledMaxMemory / lastMaxMemory) * lastUsedMemory); |
330 | 0 | long desiredUsedMemory = (long) (optimalMemoryPressure * scaledMaxMemory); |
331 | 0 | if (LOG.isInfoEnabled()) { |
332 | 0 | LOG.info("getDesiredDeltaDataRate: " + String.format("previous usage " + |
333 | 0 | "= %.2f MB, ", lastUsedMemory / 1024.0 / 1024.0) + String.format( |
334 | 0 | "current usage = %.2f MB, ", newUsedMemory / 1024.0 / 1024.0) + |
335 | 0 | String.format("ideal usage = %.2f MB", desiredUsedMemory / 1024.0 / |
336 | |
1024.0)); |
337 | |
} |
338 | 0 | long interval = newObservation.time - time; |
339 | 0 | if (interval == 0) { |
340 | 0 | interval = 1; |
341 | 0 | LOG.warn("getDesiredDeltaRate: two GC happened almost at the same " + |
342 | |
"time!"); |
343 | |
} |
344 | 0 | long currentDataRate = (long) ((double) (newUsedMemory - |
345 | |
lastUsedMemory) / interval * 1000); |
346 | 0 | long desiredDataRate = (long) ((double) (desiredUsedMemory - |
347 | |
newUsedMemory) / interval * 1000); |
348 | 0 | return currentDataRate - desiredDataRate; |
349 | |
} |
350 | |
|
351 | |
@Override |
352 | |
public String toString() { |
353 | 0 | return String.format("(usedMemory: %.2f MB, maxMemory: %.2f MB at " + |
354 | 0 | "time: %d ms)", usedMemory / 1024.0 / 1024.0, |
355 | 0 | maxMemory / 1024.0 / 1024.0, time); |
356 | |
} |
357 | |
} |
358 | |
} |