1 package org.apache.maven.index.updater;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import java.io.BufferedInputStream;
23 import java.io.DataInput;
24 import java.io.DataInputStream;
25 import java.io.EOFException;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.io.UTFDataFormatException;
29 import java.util.Date;
30 import java.util.LinkedHashSet;
31 import java.util.Set;
32 import java.util.zip.GZIPInputStream;
33
34 import org.apache.lucene.document.Document;
35 import org.apache.lucene.document.Field;
36 import org.apache.lucene.document.FieldType;
37 import org.apache.lucene.index.IndexOptions;
38 import org.apache.lucene.index.IndexWriter;
39 import org.apache.maven.index.ArtifactInfo;
40 import org.apache.maven.index.context.IndexUtils;
41 import org.apache.maven.index.context.IndexingContext;
42
43
44
45
46
47
48 public class IndexDataReader
49 {
50 private final DataInputStream dis;
51
52 public IndexDataReader( final InputStream is )
53 throws IOException
54 {
55
56
57
58 is.mark( 2 );
59 InputStream data;
60 if ( is.read() == 0x1f && is.read() == 0x8b )
61 {
62 is.reset();
63 data = new BufferedInputStream( new GZIPInputStream( is, 1024 * 8 ), 1024 * 8 );
64 }
65 else
66 {
67 is.reset();
68 data = new BufferedInputStream( is, 1024 * 8 );
69 }
70
71 this.dis = new DataInputStream( data );
72 }
73
74 public IndexDataReadResult readIndex( IndexWriter w, IndexingContext context )
75 throws IOException
76 {
77 long timestamp = readHeader();
78
79 Date date = null;
80
81 if ( timestamp != -1 )
82 {
83 date = new Date( timestamp );
84
85 IndexUtils.updateTimestamp( w.getDirectory(), date );
86 }
87
88 int n = 0;
89
90 Document doc;
91 Set<String> rootGroups = new LinkedHashSet<>();
92 Set<String> allGroups = new LinkedHashSet<>();
93
94 while ( ( doc = readDocument() ) != null )
95 {
96 ArtifactInfo ai = IndexUtils.constructArtifactInfo( doc, context );
97 if ( ai != null )
98 {
99 w.addDocument( IndexUtils.updateDocument( doc, context, false, ai ) );
100
101 rootGroups.add( ai.getRootGroup() );
102 allGroups.add( ai.getGroupId() );
103 }
104 else if ( doc.getField( ArtifactInfo.ALL_GROUPS ) != null
105 || doc.getField( ArtifactInfo.ROOT_GROUPS ) != null )
106 {
107
108 }
109 else
110 {
111 w.addDocument( doc );
112 }
113 n++;
114 }
115
116 w.commit();
117
118 IndexDataReadResult result = new IndexDataReadResult();
119 result.setDocumentCount( n );
120 result.setTimestamp( date );
121 result.setRootGroups( rootGroups );
122 result.setAllGroups( allGroups );
123
124 return result;
125 }
126
127 public long readHeader()
128 throws IOException
129 {
130 final byte hdrbyte = (byte) ( ( IndexDataWriter.VERSION << 24 ) >> 24 );
131
132 if ( hdrbyte != dis.readByte() )
133 {
134
135 throw new IOException( "Provided input contains unexpected data (0x01 expected as 1st byte)!" );
136 }
137
138 return dis.readLong();
139 }
140
141 public Document readDocument()
142 throws IOException
143 {
144 int fieldCount;
145 try
146 {
147 fieldCount = dis.readInt();
148 }
149 catch ( EOFException ex )
150 {
151 return null;
152 }
153
154 Document doc = new Document();
155
156 for ( int i = 0; i < fieldCount; i++ )
157 {
158 doc.add( readField() );
159 }
160
161
162 final Field uinfoField = (Field) doc.getField( ArtifactInfo.UINFO );
163 final String info = doc.get( ArtifactInfo.INFO );
164 if ( uinfoField != null && info != null && !info.isEmpty() )
165 {
166 final String[] splitInfo = ArtifactInfo.FS_PATTERN.split( info );
167 if ( splitInfo.length > 6 )
168 {
169 final String extension = splitInfo[6];
170 final String uinfoString = uinfoField.stringValue();
171 if ( uinfoString.endsWith( ArtifactInfo.FS + ArtifactInfo.NA ) )
172 {
173 uinfoField.setStringValue( uinfoString + ArtifactInfo.FS + ArtifactInfo.nvl( extension ) );
174 }
175 }
176 }
177
178 return doc;
179 }
180
181 private Field readField()
182 throws IOException
183 {
184 int flags = dis.read();
185
186 FieldType fieldType = new FieldType();
187 if ( ( flags & IndexDataWriter.F_INDEXED ) > 0 )
188 {
189 boolean tokenized = ( flags & IndexDataWriter.F_TOKENIZED ) > 0;
190 fieldType.setTokenized( tokenized );
191 }
192 fieldType.setIndexOptions( IndexOptions.DOCS_AND_FREQS_AND_POSITIONS );
193 fieldType.setStored( ( flags & IndexDataWriter.F_STORED ) > 0 );
194
195 String name = dis.readUTF();
196 String value = readUTF( dis );
197
198 return new Field( name, value, fieldType );
199 }
200
201 private static String readUTF( DataInput in )
202 throws IOException
203 {
204 int utflen = in.readInt();
205
206 byte[] bytearr;
207 char[] chararr;
208
209 try
210 {
211 bytearr = new byte[utflen];
212 chararr = new char[utflen];
213 }
214 catch ( OutOfMemoryError e )
215 {
216 throw new IOException( "Index data content is inappropriate (is junk?), leads to OutOfMemoryError!"
217 + " See MINDEXER-28 for more information!", e );
218 }
219
220 int c, char2, char3;
221 int count = 0;
222 int chararrCount = 0;
223
224 in.readFully( bytearr, 0, utflen );
225
226 while ( count < utflen )
227 {
228 c = bytearr[count] & 0xff;
229 if ( c > 127 )
230 {
231 break;
232 }
233 count++;
234 chararr[chararrCount++] = (char) c;
235 }
236
237 while ( count < utflen )
238 {
239 c = bytearr[count] & 0xff;
240 switch ( c >> 4 )
241 {
242 case 0:
243 case 1:
244 case 2:
245 case 3:
246 case 4:
247 case 5:
248 case 6:
249 case 7:
250
251 count++;
252 chararr[chararrCount++] = (char) c;
253 break;
254
255 case 12:
256 case 13:
257
258 count += 2;
259 if ( count > utflen )
260 {
261 throw new UTFDataFormatException( "malformed input: partial character at end" );
262 }
263 char2 = bytearr[count - 1];
264 if ( ( char2 & 0xC0 ) != 0x80 )
265 {
266 throw new UTFDataFormatException( "malformed input around byte " + count );
267 }
268 chararr[chararrCount++] = (char) ( ( ( c & 0x1F ) << 6 ) | ( char2 & 0x3F ) );
269 break;
270
271 case 14:
272
273 count += 3;
274 if ( count > utflen )
275 {
276 throw new UTFDataFormatException( "malformed input: partial character at end" );
277 }
278 char2 = bytearr[count - 2];
279 char3 = bytearr[count - 1];
280 if ( ( ( char2 & 0xC0 ) != 0x80 ) || ( ( char3 & 0xC0 ) != 0x80 ) )
281 {
282 throw new UTFDataFormatException( "malformed input around byte " + ( count - 1 ) );
283 }
284 chararr[chararrCount++] =
285 (char) ( ( ( c & 0x0F ) << 12 ) | ( ( char2 & 0x3F ) << 6 ) | ( ( char3 & 0x3F ) ) );
286 break;
287
288 default:
289
290 throw new UTFDataFormatException( "malformed input around byte " + count );
291 }
292 }
293
294
295 return new String( chararr, 0, chararrCount );
296 }
297
298
299
300
301 public static class IndexDataReadResult
302 {
303 private Date timestamp;
304
305 private int documentCount;
306
307 private Set<String> rootGroups;
308
309 private Set<String> allGroups;
310
311 public void setDocumentCount( int documentCount )
312 {
313 this.documentCount = documentCount;
314 }
315
316 public int getDocumentCount()
317 {
318 return documentCount;
319 }
320
321 public void setTimestamp( Date timestamp )
322 {
323 this.timestamp = timestamp;
324 }
325
326 public Date getTimestamp()
327 {
328 return timestamp;
329 }
330
331 public void setRootGroups( Set<String> rootGroups )
332 {
333 this.rootGroups = rootGroups;
334 }
335
336 public Set<String> getRootGroups()
337 {
338 return rootGroups;
339 }
340
341 public void setAllGroups( Set<String> allGroups )
342 {
343 this.allGroups = allGroups;
344 }
345
346 public Set<String> getAllGroups()
347 {
348 return allGroups;
349 }
350
351 }
352
353
354
355
356
357
358
359
360
361
362 public IndexDataReadResult readIndex( final IndexDataReadVisitor visitor, final IndexingContext context )
363 throws IOException
364 {
365 dis.readByte();
366
367 long timestamp = dis.readLong();
368
369 Date date = null;
370
371 if ( timestamp != -1 )
372 {
373 date = new Date( timestamp );
374 }
375
376 int n = 0;
377
378 Document doc;
379 while ( ( doc = readDocument() ) != null )
380 {
381 visitor.visitDocument( IndexUtils.updateDocument( doc, context, false ) );
382
383 n++;
384 }
385
386 IndexDataReadResult result = new IndexDataReadResult();
387 result.setDocumentCount( n );
388 result.setTimestamp( date );
389 return result;
390 }
391
392
393
394
395 public interface IndexDataReadVisitor
396 {
397
398
399
400
401
402
403 void visitDocument( Document document );
404
405 }
406
407 }