1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
package org.codehaus.plexus.util.xml; |
18 | |
|
19 | |
import java.io.BufferedInputStream; |
20 | |
import java.io.BufferedReader; |
21 | |
import java.io.File; |
22 | |
import java.io.FileInputStream; |
23 | |
import java.io.IOException; |
24 | |
import java.io.InputStream; |
25 | |
import java.io.InputStreamReader; |
26 | |
import java.io.Reader; |
27 | |
import java.io.StringReader; |
28 | |
import java.net.URL; |
29 | |
import java.net.URLConnection; |
30 | |
import java.net.HttpURLConnection; |
31 | |
import java.util.regex.Pattern; |
32 | |
import java.util.regex.Matcher; |
33 | |
import java.text.MessageFormat; |
34 | |
|
35 | |
|
36 | |
|
37 | |
|
38 | |
|
39 | |
|
40 | |
|
41 | |
|
42 | |
|
43 | |
|
44 | |
|
45 | |
|
46 | |
|
47 | |
|
48 | |
|
49 | |
|
50 | |
|
51 | |
|
52 | |
|
53 | |
|
54 | |
|
55 | |
|
56 | |
|
57 | |
|
58 | |
|
59 | |
public class XmlReader extends Reader |
60 | |
{ |
61 | |
private static final int BUFFER_SIZE = 4096; |
62 | |
|
63 | |
private static final String UTF_8 = "UTF-8"; |
64 | |
|
65 | |
private static final String US_ASCII = "US-ASCII"; |
66 | |
|
67 | |
private static final String UTF_16BE = "UTF-16BE"; |
68 | |
|
69 | |
private static final String UTF_16LE = "UTF-16LE"; |
70 | |
|
71 | |
private static final String UTF_16 = "UTF-16"; |
72 | |
|
73 | |
private static final String EBCDIC = "CP1047"; |
74 | |
|
75 | 1 | private static String _staticDefaultEncoding = null; |
76 | |
|
77 | |
private Reader _reader; |
78 | |
|
79 | |
private String _encoding; |
80 | |
|
81 | |
private String _defaultEncoding; |
82 | |
|
83 | |
|
84 | |
|
85 | |
|
86 | |
|
87 | |
|
88 | |
|
89 | |
|
90 | |
|
91 | |
public static void setDefaultEncoding( String encoding ) |
92 | |
{ |
93 | 0 | _staticDefaultEncoding = encoding; |
94 | 0 | } |
95 | |
|
96 | |
|
97 | |
|
98 | |
|
99 | |
|
100 | |
|
101 | |
|
102 | |
public static String getDefaultEncoding() |
103 | |
{ |
104 | 0 | return _staticDefaultEncoding; |
105 | |
} |
106 | |
|
107 | |
|
108 | |
|
109 | |
|
110 | |
|
111 | |
|
112 | |
|
113 | |
|
114 | |
|
115 | |
|
116 | |
|
117 | |
|
118 | |
|
119 | |
|
120 | |
|
121 | |
|
122 | |
public XmlReader( File file ) throws IOException |
123 | |
{ |
124 | 30 | this( new FileInputStream( file ) ); |
125 | 30 | } |
126 | |
|
127 | |
|
128 | |
|
129 | |
|
130 | |
|
131 | |
|
132 | |
|
133 | |
|
134 | |
|
135 | |
|
136 | |
|
137 | |
|
138 | |
|
139 | |
|
140 | |
|
141 | |
public XmlReader( InputStream is ) throws IOException |
142 | |
{ |
143 | 30 | this( is, true ); |
144 | 30 | } |
145 | |
|
146 | |
|
147 | |
|
148 | |
|
149 | |
|
150 | |
|
151 | |
|
152 | |
|
153 | |
|
154 | |
|
155 | |
|
156 | |
|
157 | |
|
158 | |
|
159 | |
|
160 | |
|
161 | |
|
162 | |
|
163 | |
|
164 | |
|
165 | |
|
166 | |
|
167 | |
|
168 | |
|
169 | |
|
170 | |
|
171 | |
|
172 | |
|
173 | |
|
174 | |
|
175 | |
public XmlReader( InputStream is, boolean lenient ) throws IOException, XmlStreamReaderException |
176 | 30 | { |
177 | 30 | _defaultEncoding = _staticDefaultEncoding; |
178 | |
try |
179 | |
{ |
180 | 30 | doRawStream( is, lenient ); |
181 | |
} |
182 | 0 | catch ( XmlStreamReaderException ex ) |
183 | |
{ |
184 | 0 | if ( !lenient ) |
185 | |
{ |
186 | 0 | throw ex; |
187 | |
} |
188 | |
else |
189 | |
{ |
190 | 0 | doLenientDetection( null, ex ); |
191 | |
} |
192 | 30 | } |
193 | 30 | } |
194 | |
|
195 | |
|
196 | |
|
197 | |
|
198 | |
|
199 | |
|
200 | |
|
201 | |
|
202 | |
|
203 | |
|
204 | |
|
205 | |
|
206 | |
|
207 | |
|
208 | |
|
209 | |
|
210 | |
|
211 | |
|
212 | |
|
213 | |
public XmlReader( URL url ) throws IOException |
214 | |
{ |
215 | 0 | this( url.openConnection() ); |
216 | 0 | } |
217 | |
|
218 | |
|
219 | |
|
220 | |
|
221 | |
|
222 | |
|
223 | |
|
224 | |
|
225 | |
|
226 | |
|
227 | |
|
228 | |
|
229 | |
|
230 | |
|
231 | |
|
232 | |
|
233 | |
|
234 | |
|
235 | |
|
236 | |
public XmlReader( URLConnection conn ) throws IOException |
237 | 0 | { |
238 | 0 | _defaultEncoding = _staticDefaultEncoding; |
239 | 0 | boolean lenient = true; |
240 | 0 | if ( conn instanceof HttpURLConnection ) |
241 | |
{ |
242 | |
try |
243 | |
{ |
244 | 0 | doHttpStream( conn.getInputStream(), conn.getContentType(), lenient ); |
245 | |
} |
246 | 0 | catch ( XmlStreamReaderException ex ) |
247 | |
{ |
248 | 0 | doLenientDetection( conn.getContentType(), ex ); |
249 | 0 | } |
250 | |
} |
251 | 0 | else if ( conn.getContentType() != null ) |
252 | |
{ |
253 | |
try |
254 | |
{ |
255 | 0 | doHttpStream( conn.getInputStream(), conn.getContentType(), lenient ); |
256 | |
} |
257 | 0 | catch ( XmlStreamReaderException ex ) |
258 | |
{ |
259 | 0 | doLenientDetection( conn.getContentType(), ex ); |
260 | 0 | } |
261 | |
} |
262 | |
else |
263 | |
{ |
264 | |
try |
265 | |
{ |
266 | 0 | doRawStream( conn.getInputStream(), lenient ); |
267 | |
} |
268 | 0 | catch ( XmlStreamReaderException ex ) |
269 | |
{ |
270 | 0 | doLenientDetection( null, ex ); |
271 | 0 | } |
272 | |
} |
273 | 0 | } |
274 | |
|
275 | |
|
276 | |
|
277 | |
|
278 | |
|
279 | |
|
280 | |
|
281 | |
|
282 | |
|
283 | |
|
284 | |
|
285 | |
|
286 | |
|
287 | |
|
288 | |
|
289 | |
|
290 | |
|
291 | |
|
292 | |
|
293 | |
public XmlReader( InputStream is, String httpContentType ) throws IOException |
294 | |
{ |
295 | 0 | this( is, httpContentType, true ); |
296 | 0 | } |
297 | |
|
298 | |
|
299 | |
|
300 | |
|
301 | |
|
302 | |
|
303 | |
|
304 | |
|
305 | |
|
306 | |
|
307 | |
|
308 | |
|
309 | |
|
310 | |
|
311 | |
|
312 | |
|
313 | |
|
314 | |
|
315 | |
|
316 | |
|
317 | |
|
318 | |
|
319 | |
|
320 | |
|
321 | |
|
322 | |
|
323 | |
|
324 | |
|
325 | |
|
326 | |
|
327 | |
|
328 | |
|
329 | |
|
330 | |
|
331 | |
|
332 | |
public XmlReader( InputStream is, String httpContentType, boolean lenient, String defaultEncoding ) |
333 | |
throws IOException, XmlStreamReaderException |
334 | 0 | { |
335 | 0 | _defaultEncoding = ( defaultEncoding == null ) ? _staticDefaultEncoding : defaultEncoding; |
336 | |
try |
337 | |
{ |
338 | 0 | doHttpStream( is, httpContentType, lenient ); |
339 | |
} |
340 | 0 | catch ( XmlStreamReaderException ex ) |
341 | |
{ |
342 | 0 | if ( !lenient ) |
343 | |
{ |
344 | 0 | throw ex; |
345 | |
} |
346 | |
else |
347 | |
{ |
348 | 0 | doLenientDetection( httpContentType, ex ); |
349 | |
} |
350 | 0 | } |
351 | 0 | } |
352 | |
|
353 | |
|
354 | |
|
355 | |
|
356 | |
|
357 | |
|
358 | |
|
359 | |
|
360 | |
|
361 | |
|
362 | |
|
363 | |
|
364 | |
|
365 | |
|
366 | |
|
367 | |
|
368 | |
|
369 | |
|
370 | |
|
371 | |
|
372 | |
|
373 | |
|
374 | |
|
375 | |
|
376 | |
|
377 | |
|
378 | |
|
379 | |
|
380 | |
|
381 | |
|
382 | |
|
383 | |
|
384 | |
|
385 | |
|
386 | |
|
387 | |
public XmlReader( InputStream is, String httpContentType, boolean lenient ) throws IOException, XmlStreamReaderException |
388 | |
{ |
389 | 0 | this( is, httpContentType, lenient, null ); |
390 | 0 | } |
391 | |
|
392 | |
private void doLenientDetection( String httpContentType, XmlStreamReaderException ex ) throws IOException |
393 | |
{ |
394 | 0 | if ( httpContentType != null ) |
395 | |
{ |
396 | 0 | if ( httpContentType.startsWith( "text/html" ) ) |
397 | |
{ |
398 | 0 | httpContentType = httpContentType.substring( "text/html".length() ); |
399 | 0 | httpContentType = "text/xml" + httpContentType; |
400 | |
try |
401 | |
{ |
402 | 0 | doHttpStream( ex.getInputStream(), httpContentType, true ); |
403 | 0 | ex = null; |
404 | |
} |
405 | 0 | catch ( XmlStreamReaderException ex2 ) |
406 | |
{ |
407 | 0 | ex = ex2; |
408 | 0 | } |
409 | |
} |
410 | |
} |
411 | 0 | if ( ex != null ) |
412 | |
{ |
413 | 0 | String encoding = ex.getXmlEncoding(); |
414 | 0 | if ( encoding == null ) |
415 | |
{ |
416 | 0 | encoding = ex.getContentTypeEncoding(); |
417 | |
} |
418 | 0 | if ( encoding == null ) |
419 | |
{ |
420 | 0 | encoding = ( _defaultEncoding == null ) ? UTF_8 : _defaultEncoding; |
421 | |
} |
422 | 0 | prepareReader( ex.getInputStream(), encoding ); |
423 | |
} |
424 | 0 | } |
425 | |
|
426 | |
|
427 | |
|
428 | |
|
429 | |
|
430 | |
|
431 | |
|
432 | |
|
433 | |
public String getEncoding() |
434 | |
{ |
435 | 0 | return _encoding; |
436 | |
} |
437 | |
|
438 | |
public int read( char[] buf, int offset, int len ) throws IOException |
439 | |
{ |
440 | 190 | return _reader.read( buf, offset, len ); |
441 | |
} |
442 | |
|
443 | |
|
444 | |
|
445 | |
|
446 | |
|
447 | |
|
448 | |
|
449 | |
|
450 | |
|
451 | |
public void close() throws IOException |
452 | |
{ |
453 | 30 | _reader.close(); |
454 | 30 | } |
455 | |
|
456 | |
private void doRawStream( InputStream is, boolean lenient ) throws IOException |
457 | |
{ |
458 | 30 | BufferedInputStream pis = new BufferedInputStream( is, BUFFER_SIZE ); |
459 | 30 | String bomEnc = getBOMEncoding( pis ); |
460 | 30 | String xmlGuessEnc = getXMLGuessEncoding( pis ); |
461 | 30 | String xmlEnc = getXmlProlog( pis, xmlGuessEnc ); |
462 | 30 | String encoding = calculateRawEncoding( bomEnc, xmlGuessEnc, xmlEnc, pis ); |
463 | 30 | prepareReader( pis, encoding ); |
464 | 30 | } |
465 | |
|
466 | |
private void doHttpStream( InputStream is, String httpContentType, boolean lenient ) throws IOException |
467 | |
{ |
468 | 0 | BufferedInputStream pis = new BufferedInputStream( is, BUFFER_SIZE ); |
469 | 0 | String cTMime = getContentTypeMime( httpContentType ); |
470 | 0 | String cTEnc = getContentTypeEncoding( httpContentType ); |
471 | 0 | String bomEnc = getBOMEncoding( pis ); |
472 | 0 | String xmlGuessEnc = getXMLGuessEncoding( pis ); |
473 | 0 | String xmlEnc = getXmlProlog( pis, xmlGuessEnc ); |
474 | 0 | String encoding = calculateHttpEncoding( cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc, pis, lenient ); |
475 | 0 | prepareReader( pis, encoding ); |
476 | 0 | } |
477 | |
|
478 | |
private void prepareReader( InputStream is, String encoding ) throws IOException |
479 | |
{ |
480 | 30 | _reader = new InputStreamReader( is, encoding ); |
481 | 30 | _encoding = encoding; |
482 | 30 | } |
483 | |
|
484 | |
|
485 | |
private String calculateRawEncoding( String bomEnc, String xmlGuessEnc, String xmlEnc, InputStream is ) |
486 | |
throws IOException |
487 | |
{ |
488 | |
String encoding; |
489 | 30 | if ( bomEnc == null ) |
490 | |
{ |
491 | 24 | if ( xmlGuessEnc == null || xmlEnc == null ) |
492 | |
{ |
493 | 9 | encoding = ( _defaultEncoding == null ) ? UTF_8 : _defaultEncoding; |
494 | |
} |
495 | 15 | else if ( xmlEnc.equals( UTF_16 ) && ( xmlGuessEnc.equals( UTF_16BE ) || xmlGuessEnc.equals( UTF_16LE ) ) ) |
496 | |
{ |
497 | 0 | encoding = xmlGuessEnc; |
498 | |
} |
499 | |
else |
500 | |
{ |
501 | 15 | encoding = xmlEnc; |
502 | |
} |
503 | |
} |
504 | 6 | else if ( bomEnc.equals( UTF_8 ) ) |
505 | |
{ |
506 | 0 | if ( xmlGuessEnc != null && !xmlGuessEnc.equals( UTF_8 ) ) |
507 | |
{ |
508 | 0 | throw new XmlStreamReaderException( RAW_EX_1.format( new Object[] { bomEnc, xmlGuessEnc, xmlEnc } ), bomEnc, |
509 | |
xmlGuessEnc, xmlEnc, is ); |
510 | |
} |
511 | 0 | if ( xmlEnc != null && !xmlEnc.equals( UTF_8 ) ) |
512 | |
{ |
513 | 0 | throw new XmlStreamReaderException( RAW_EX_1.format( new Object[] { bomEnc, xmlGuessEnc, xmlEnc } ), bomEnc, |
514 | |
xmlGuessEnc, xmlEnc, is ); |
515 | |
} |
516 | 0 | encoding = UTF_8; |
517 | |
} |
518 | 6 | else if ( bomEnc.equals( UTF_16BE ) || bomEnc.equals( UTF_16LE ) ) |
519 | |
{ |
520 | 6 | if ( xmlGuessEnc != null && !xmlGuessEnc.equals( bomEnc ) ) |
521 | |
{ |
522 | 0 | throw new IOException( RAW_EX_1.format( new Object[] { bomEnc, xmlGuessEnc, xmlEnc } ) ); |
523 | |
} |
524 | 6 | if ( xmlEnc != null && !xmlEnc.equals( UTF_16 ) && !xmlEnc.equals( bomEnc ) ) |
525 | |
{ |
526 | 0 | throw new XmlStreamReaderException( RAW_EX_1.format( new Object[] { bomEnc, xmlGuessEnc, xmlEnc } ), bomEnc, |
527 | |
xmlGuessEnc, xmlEnc, is ); |
528 | |
} |
529 | 6 | encoding = bomEnc; |
530 | |
} |
531 | |
else |
532 | |
{ |
533 | 0 | throw new XmlStreamReaderException( RAW_EX_2.format( new Object[] { bomEnc, xmlGuessEnc, xmlEnc } ), bomEnc, |
534 | |
xmlGuessEnc, xmlEnc, is ); |
535 | |
} |
536 | 30 | return encoding; |
537 | |
} |
538 | |
|
539 | |
|
540 | |
private String calculateHttpEncoding( String cTMime, String cTEnc, String bomEnc, String xmlGuessEnc, |
541 | |
String xmlEnc, InputStream is, boolean lenient ) throws IOException |
542 | |
{ |
543 | |
String encoding; |
544 | 0 | if ( lenient & xmlEnc != null ) |
545 | |
{ |
546 | 0 | encoding = xmlEnc; |
547 | |
} |
548 | |
else |
549 | |
{ |
550 | 0 | boolean appXml = isAppXml( cTMime ); |
551 | 0 | boolean textXml = isTextXml( cTMime ); |
552 | 0 | if ( appXml || textXml ) |
553 | |
{ |
554 | 0 | if ( cTEnc == null ) |
555 | |
{ |
556 | 0 | if ( appXml ) |
557 | |
{ |
558 | 0 | encoding = calculateRawEncoding( bomEnc, xmlGuessEnc, xmlEnc, is ); |
559 | |
} |
560 | |
else |
561 | |
{ |
562 | 0 | encoding = ( _defaultEncoding == null ) ? US_ASCII : _defaultEncoding; |
563 | |
} |
564 | |
} |
565 | 0 | else if ( bomEnc != null && ( cTEnc.equals( UTF_16BE ) || cTEnc.equals( UTF_16LE ) ) ) |
566 | |
{ |
567 | 0 | throw new XmlStreamReaderException( HTTP_EX_1.format( new Object[] { cTMime, cTEnc, bomEnc, xmlGuessEnc, |
568 | |
xmlEnc } ), cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc, is ); |
569 | |
} |
570 | 0 | else if ( cTEnc.equals( UTF_16 ) ) |
571 | |
{ |
572 | 0 | if ( bomEnc != null && bomEnc.startsWith( UTF_16 ) ) |
573 | |
{ |
574 | 0 | encoding = bomEnc; |
575 | |
} |
576 | |
else |
577 | |
{ |
578 | 0 | throw new XmlStreamReaderException( HTTP_EX_2.format( new Object[] { cTMime, cTEnc, bomEnc, |
579 | |
xmlGuessEnc, xmlEnc } ), cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc, is ); |
580 | |
} |
581 | |
} |
582 | |
else |
583 | |
{ |
584 | 0 | encoding = cTEnc; |
585 | |
} |
586 | |
} |
587 | |
else |
588 | |
{ |
589 | 0 | throw new XmlStreamReaderException( HTTP_EX_3.format( new Object[] { cTMime, cTEnc, bomEnc, xmlGuessEnc, |
590 | |
xmlEnc } ), cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc, is ); |
591 | |
} |
592 | |
} |
593 | 0 | return encoding; |
594 | |
} |
595 | |
|
596 | |
|
597 | |
private static String getContentTypeMime( String httpContentType ) |
598 | |
{ |
599 | 0 | String mime = null; |
600 | 0 | if ( httpContentType != null ) |
601 | |
{ |
602 | 0 | int i = httpContentType.indexOf( ";" ); |
603 | 0 | mime = ( ( i == -1 ) ? httpContentType : httpContentType.substring( 0, i ) ).trim(); |
604 | |
} |
605 | 0 | return mime; |
606 | |
} |
607 | |
|
608 | 1 | private static final Pattern CHARSET_PATTERN = Pattern.compile( "charset=([.[^; ]]*)" ); |
609 | |
|
610 | |
|
611 | |
private static String getContentTypeEncoding( String httpContentType ) |
612 | |
{ |
613 | 0 | String encoding = null; |
614 | 0 | if ( httpContentType != null ) |
615 | |
{ |
616 | 0 | int i = httpContentType.indexOf( ";" ); |
617 | 0 | if ( i > -1 ) |
618 | |
{ |
619 | 0 | String postMime = httpContentType.substring( i + 1 ); |
620 | 0 | Matcher m = CHARSET_PATTERN.matcher( postMime ); |
621 | 0 | encoding = ( m.find() ) ? m.group( 1 ) : null; |
622 | 0 | encoding = ( encoding != null ) ? encoding.toUpperCase() : null; |
623 | |
} |
624 | |
} |
625 | 0 | return encoding; |
626 | |
} |
627 | |
|
628 | |
|
629 | |
|
630 | |
private static String getBOMEncoding( BufferedInputStream is ) throws IOException |
631 | |
{ |
632 | 30 | String encoding = null; |
633 | 30 | int[] bytes = new int[3]; |
634 | 30 | is.mark( 3 ); |
635 | 30 | bytes[0] = is.read(); |
636 | 30 | bytes[1] = is.read(); |
637 | 30 | bytes[2] = is.read(); |
638 | |
|
639 | 30 | if ( bytes[0] == 0xFE && bytes[1] == 0xFF ) |
640 | |
{ |
641 | 6 | encoding = UTF_16BE; |
642 | 6 | is.reset(); |
643 | 6 | is.read(); |
644 | 6 | is.read(); |
645 | |
} |
646 | 24 | else if ( bytes[0] == 0xFF && bytes[1] == 0xFE ) |
647 | |
{ |
648 | 0 | encoding = UTF_16LE; |
649 | 0 | is.reset(); |
650 | 0 | is.read(); |
651 | 0 | is.read(); |
652 | |
} |
653 | 24 | else if ( bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF ) |
654 | |
{ |
655 | 0 | encoding = UTF_8; |
656 | |
} |
657 | |
else |
658 | |
{ |
659 | 24 | is.reset(); |
660 | |
} |
661 | 30 | return encoding; |
662 | |
} |
663 | |
|
664 | |
|
665 | |
private static String getXMLGuessEncoding( BufferedInputStream is ) throws IOException |
666 | |
{ |
667 | 30 | String encoding = null; |
668 | 30 | int[] bytes = new int[4]; |
669 | 30 | is.mark( 4 ); |
670 | 30 | bytes[0] = is.read(); |
671 | 30 | bytes[1] = is.read(); |
672 | 30 | bytes[2] = is.read(); |
673 | 30 | bytes[3] = is.read(); |
674 | 30 | is.reset(); |
675 | |
|
676 | 30 | if ( bytes[0] == 0x00 && bytes[1] == 0x3C && bytes[2] == 0x00 && bytes[3] == 0x3F ) |
677 | |
{ |
678 | 6 | encoding = UTF_16BE; |
679 | |
} |
680 | 24 | else if ( bytes[0] == 0x3C && bytes[1] == 0x00 && bytes[2] == 0x3F && bytes[3] == 0x00 ) |
681 | |
{ |
682 | 0 | encoding = UTF_16LE; |
683 | |
} |
684 | 24 | else if ( bytes[0] == 0x3C && bytes[1] == 0x3F && bytes[2] == 0x78 && bytes[3] == 0x6D ) |
685 | |
{ |
686 | 15 | encoding = UTF_8; |
687 | |
} |
688 | 9 | else if ( bytes[0] == 0x4C && bytes[1] == 0x6F && bytes[2] == 0xA7 && bytes[3] == 0x94 ) |
689 | |
{ |
690 | 0 | encoding = EBCDIC; |
691 | |
} |
692 | 30 | return encoding; |
693 | |
} |
694 | |
|
695 | 1 | static final Pattern ENCODING_PATTERN = |
696 | |
Pattern.compile( "<\\?xml.*encoding[\\s]*=[\\s]*((?:\".[^\"]*\")|(?:'.[^']*'))", Pattern.MULTILINE ); |
697 | |
|
698 | |
|
699 | |
private static String getXmlProlog( BufferedInputStream is, String guessedEnc ) throws IOException |
700 | |
{ |
701 | 30 | String encoding = null; |
702 | 30 | if ( guessedEnc != null ) |
703 | |
{ |
704 | 21 | byte[] bytes = new byte[BUFFER_SIZE]; |
705 | 21 | is.mark( BUFFER_SIZE ); |
706 | 21 | int offset = 0; |
707 | 21 | int max = BUFFER_SIZE; |
708 | 21 | int c = is.read( bytes, offset, max ); |
709 | 21 | int firstGT = -1; |
710 | 21 | String xmlProlog = null; |
711 | 42 | while ( c != -1 && firstGT == -1 && offset < BUFFER_SIZE ) |
712 | |
{ |
713 | 21 | offset += c; |
714 | 21 | max -= c; |
715 | 21 | c = is.read( bytes, offset, max ); |
716 | 21 | xmlProlog = new String( bytes, 0, offset, guessedEnc ); |
717 | 21 | firstGT = xmlProlog.indexOf( '>' ); |
718 | |
} |
719 | 21 | if ( firstGT == -1 ) |
720 | |
{ |
721 | 0 | if ( c == -1 ) |
722 | |
{ |
723 | 0 | throw new IOException( "Unexpected end of XML stream" ); |
724 | |
} |
725 | |
else |
726 | |
{ |
727 | 0 | throw new IOException( "XML prolog or ROOT element not found on first " + offset + " bytes" ); |
728 | |
} |
729 | |
} |
730 | 21 | int bytesRead = offset; |
731 | 21 | if ( bytesRead > 0 ) |
732 | |
{ |
733 | 21 | is.reset(); |
734 | 21 | BufferedReader bReader = new BufferedReader( new StringReader( xmlProlog.substring( 0, firstGT + 1 ) ) ); |
735 | 21 | StringBuffer prolog = new StringBuffer(); |
736 | 21 | String line = bReader.readLine(); |
737 | 42 | while ( line != null ) |
738 | |
{ |
739 | 21 | prolog.append( line ); |
740 | 21 | line = bReader.readLine(); |
741 | |
} |
742 | 21 | Matcher m = ENCODING_PATTERN.matcher( prolog ); |
743 | 21 | if ( m.find() ) |
744 | |
{ |
745 | 21 | encoding = m.group( 1 ).toUpperCase(); |
746 | 21 | encoding = encoding.substring( 1, encoding.length() - 1 ); |
747 | |
} |
748 | |
} |
749 | |
} |
750 | 30 | return encoding; |
751 | |
} |
752 | |
|
753 | |
|
754 | |
private static boolean isAppXml( String mime ) |
755 | |
{ |
756 | 0 | return mime != null |
757 | |
&& ( mime.equals( "application/xml" ) || mime.equals( "application/xml-dtd" ) |
758 | |
|| mime.equals( "application/xml-external-parsed-entity" ) || ( mime.startsWith( "application/" ) && mime.endsWith( "+xml" ) ) ); |
759 | |
} |
760 | |
|
761 | |
|
762 | |
private static boolean isTextXml( String mime ) |
763 | |
{ |
764 | 0 | return mime != null |
765 | |
&& ( mime.equals( "text/xml" ) || mime.equals( "text/xml-external-parsed-entity" ) || ( mime.startsWith( "text/" ) && mime.endsWith( "+xml" ) ) ); |
766 | |
} |
767 | |
|
768 | 1 | private static final MessageFormat RAW_EX_1 = |
769 | |
new MessageFormat( "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] encoding mismatch" ); |
770 | |
|
771 | 1 | private static final MessageFormat RAW_EX_2 = |
772 | |
new MessageFormat( "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] unknown BOM" ); |
773 | |
|
774 | 1 | private static final MessageFormat HTTP_EX_1 = |
775 | |
new MessageFormat( |
776 | |
"Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], BOM must be NULL" ); |
777 | |
|
778 | 1 | private static final MessageFormat HTTP_EX_2 = |
779 | |
new MessageFormat( |
780 | |
"Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], encoding mismatch" ); |
781 | |
|
782 | 1 | private static final MessageFormat HTTP_EX_3 = |
783 | |
new MessageFormat( |
784 | |
"Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], Invalid MIME" ); |
785 | |
|
786 | |
} |