Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
XmlStreamReader |
|
| 1.0;1 |
1 | /* | |
2 | * Copyright 2004 Sun Microsystems, Inc. | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | * | |
16 | */ | |
17 | package org.codehaus.plexus.util.xml; | |
18 | ||
19 | import java.io.File; | |
20 | import java.io.IOException; | |
21 | import java.io.InputStream; | |
22 | import java.net.URL; | |
23 | import java.net.URLConnection; | |
24 | ||
25 | /** | |
26 | * Character stream that handles (or at least attemtps to) all the necessary Voodo to figure out the charset encoding of | |
27 | * the XML document within the stream. | |
28 | * <p> | |
29 | * IMPORTANT: This class is not related in any way to the org.xml.sax.XMLReader. This one IS a character stream. | |
30 | * <p> | |
31 | * All this has to be done without consuming characters from the stream, if not the XML parser will not recognized the | |
32 | * document as a valid XML. This is not 100% true, but it's close enough (UTF-8 BOM is not handled by all parsers right | |
33 | * now, XmlReader handles it and things work in all parsers). | |
34 | * <p> | |
35 | * The XmlReader class handles the charset encoding of XML documents in Files, raw streams and HTTP streams by offering | |
36 | * a wide set of constructors. | |
37 | * <P> | |
38 | * By default the charset encoding detection is lenient, the constructor with the lenient flag can be used for an script | |
39 | * (following HTTP MIME and XML specifications). All this is nicely explained by Mark Pilgrim in his blog, <a | |
40 | * href="http://diveintomark.org/archives/2004/02/13/xml-media-types"> Determining the character encoding of a feed</a>. | |
41 | * <p> | |
42 | * | |
43 | * @author Alejandro Abdelnur | |
44 | * @version revision 1.17 taken on 26/06/2007 from Rome (see https://rome.dev.java.net/source/browse/rome/src/java/com/sun/syndication/io/XmlReader.java) | |
45 | * @since 1.4.4 | |
46 | * @deprecated TO BE REMOVED from here when plexus-utils is upgraded to 1.4.5+ (and prerequisite upgraded to Maven 2.0.6) | |
47 | */ | |
48 | public class XmlStreamReader | |
49 | extends XmlReader | |
50 | { | |
51 | /** | |
52 | * Creates a Reader for a File. | |
53 | * <p> | |
54 | * It looks for the UTF-8 BOM first, if none sniffs the XML prolog charset, if this is also missing defaults to | |
55 | * UTF-8. | |
56 | * <p> | |
57 | * It does a lenient charset encoding detection, check the constructor with the lenient parameter for details. | |
58 | * <p> | |
59 | * | |
60 | * @param file | |
61 | * File to create a Reader from. | |
62 | * @throws IOException | |
63 | * thrown if there is a problem reading the file. | |
64 | * | |
65 | */ | |
66 | public XmlStreamReader( File file ) throws IOException | |
67 | { | |
68 | 30 | super( file ); |
69 | 30 | } |
70 | ||
71 | /** | |
72 | * Creates a Reader for a raw InputStream. | |
73 | * <p> | |
74 | * It follows the same logic used for files. | |
75 | * <p> | |
76 | * It does a lenient charset encoding detection, check the constructor with the lenient parameter for details. | |
77 | * <p> | |
78 | * | |
79 | * @param is | |
80 | * InputStream to create a Reader from. | |
81 | * @throws IOException | |
82 | * thrown if there is a problem reading the stream. | |
83 | * | |
84 | */ | |
85 | public XmlStreamReader( InputStream is ) throws IOException | |
86 | { | |
87 | 0 | super( is ); |
88 | 0 | } |
89 | ||
90 | /** | |
91 | * Creates a Reader for a raw InputStream. | |
92 | * <p> | |
93 | * It follows the same logic used for files. | |
94 | * <p> | |
95 | * If lenient detection is indicated and the detection above fails as per specifications it then attempts the | |
96 | * following: | |
97 | * <p> | |
98 | * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again. | |
99 | * <p> | |
100 | * Else if the XML prolog had a charset encoding that encoding is used. | |
101 | * <p> | |
102 | * Else if the content type had a charset encoding that encoding is used. | |
103 | * <p> | |
104 | * Else 'UTF-8' is used. | |
105 | * <p> | |
106 | * If lenient detection is indicated an XmlStreamReaderException is never thrown. | |
107 | * <p> | |
108 | * | |
109 | * @param is | |
110 | * InputStream to create a Reader from. | |
111 | * @param lenient | |
112 | * indicates if the charset encoding detection should be relaxed. | |
113 | * @throws IOException | |
114 | * thrown if there is a problem reading the stream. | |
115 | * @throws XmlStreamReaderException | |
116 | * thrown if the charset encoding could not be determined according to the specs. | |
117 | * | |
118 | */ | |
119 | public XmlStreamReader( InputStream is, boolean lenient ) throws IOException, XmlStreamReaderException | |
120 | { | |
121 | 0 | super( is, lenient ); |
122 | 0 | } |
123 | ||
124 | /** | |
125 | * Creates a Reader using the InputStream of a URL. | |
126 | * <p> | |
127 | * If the URL is not of type HTTP and there is not 'content-type' header in the fetched data it uses the same logic | |
128 | * used for Files. | |
129 | * <p> | |
130 | * If the URL is a HTTP Url or there is a 'content-type' header in the fetched data it uses the same logic used for | |
131 | * an InputStream with content-type. | |
132 | * <p> | |
133 | * It does a lenient charset encoding detection, check the constructor with the lenient parameter for details. | |
134 | * <p> | |
135 | * | |
136 | * @param url | |
137 | * URL to create a Reader from. | |
138 | * @throws IOException | |
139 | * thrown if there is a problem reading the stream of the URL. | |
140 | * | |
141 | */ | |
142 | public XmlStreamReader( URL url ) throws IOException | |
143 | { | |
144 | 0 | super( url ); |
145 | 0 | } |
146 | ||
147 | /** | |
148 | * Creates a Reader using the InputStream of a URLConnection. | |
149 | * <p> | |
150 | * If the URLConnection is not of type HttpURLConnection and there is not 'content-type' header in the fetched data | |
151 | * it uses the same logic used for files. | |
152 | * <p> | |
153 | * If the URLConnection is a HTTP Url or there is a 'content-type' header in the fetched data it uses the same logic | |
154 | * used for an InputStream with content-type. | |
155 | * <p> | |
156 | * It does a lenient charset encoding detection, check the constructor with the lenient parameter for details. | |
157 | * <p> | |
158 | * | |
159 | * @param conn | |
160 | * URLConnection to create a Reader from. | |
161 | * @throws IOException | |
162 | * thrown if there is a problem reading the stream of the URLConnection. | |
163 | * | |
164 | */ | |
165 | public XmlStreamReader( URLConnection conn ) throws IOException | |
166 | { | |
167 | 0 | super( conn ); |
168 | 0 | } |
169 | ||
170 | /** | |
171 | * Creates a Reader using an InputStream an the associated content-type header. | |
172 | * <p> | |
173 | * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding. If there is not | |
174 | * content-type encoding checks the XML prolog encoding. If there is not XML prolog encoding uses the default | |
175 | * encoding mandated by the content-type MIME type. | |
176 | * <p> | |
177 | * It does a lenient charset encoding detection, check the constructor with the lenient parameter for details. | |
178 | * <p> | |
179 | * | |
180 | * @param is | |
181 | * InputStream to create the reader from. | |
182 | * @param httpContentType | |
183 | * content-type header to use for the resolution of the charset encoding. | |
184 | * @throws IOException | |
185 | * thrown if there is a problem reading the file. | |
186 | * | |
187 | */ | |
188 | public XmlStreamReader( InputStream is, String httpContentType ) throws IOException | |
189 | { | |
190 | 0 | super( is, httpContentType ); |
191 | 0 | } |
192 | ||
193 | /** | |
194 | * Creates a Reader using an InputStream an the associated content-type header. This constructor is lenient | |
195 | * regarding the encoding detection. | |
196 | * <p> | |
197 | * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding. If there is not | |
198 | * content-type encoding checks the XML prolog encoding. If there is not XML prolog encoding uses the default | |
199 | * encoding mandated by the content-type MIME type. | |
200 | * <p> | |
201 | * If lenient detection is indicated and the detection above fails as per specifications it then attempts the | |
202 | * following: | |
203 | * <p> | |
204 | * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again. | |
205 | * <p> | |
206 | * Else if the XML prolog had a charset encoding that encoding is used. | |
207 | * <p> | |
208 | * Else if the content type had a charset encoding that encoding is used. | |
209 | * <p> | |
210 | * Else 'UTF-8' is used. | |
211 | * <p> | |
212 | * If lenient detection is indicated an XmlStreamReaderException is never thrown. | |
213 | * <p> | |
214 | * | |
215 | * @param is | |
216 | * InputStream to create the reader from. | |
217 | * @param httpContentType | |
218 | * content-type header to use for the resolution of the charset encoding. | |
219 | * @param lenient | |
220 | * indicates if the charset encoding detection should be relaxed. | |
221 | * @throws IOException | |
222 | * thrown if there is a problem reading the file. | |
223 | * @throws XmlStreamReaderException | |
224 | * thrown if the charset encoding could not be determined according to the specs. | |
225 | * | |
226 | */ | |
227 | public XmlStreamReader( InputStream is, String httpContentType, boolean lenient, String defaultEncoding ) | |
228 | throws IOException, XmlStreamReaderException | |
229 | { | |
230 | 0 | super( is, httpContentType, lenient, defaultEncoding ); |
231 | 0 | } |
232 | ||
233 | /** | |
234 | * Creates a Reader using an InputStream an the associated content-type header. This constructor is lenient | |
235 | * regarding the encoding detection. | |
236 | * <p> | |
237 | * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding. If there is not | |
238 | * content-type encoding checks the XML prolog encoding. If there is not XML prolog encoding uses the default | |
239 | * encoding mandated by the content-type MIME type. | |
240 | * <p> | |
241 | * If lenient detection is indicated and the detection above fails as per specifications it then attempts the | |
242 | * following: | |
243 | * <p> | |
244 | * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again. | |
245 | * <p> | |
246 | * Else if the XML prolog had a charset encoding that encoding is used. | |
247 | * <p> | |
248 | * Else if the content type had a charset encoding that encoding is used. | |
249 | * <p> | |
250 | * Else 'UTF-8' is used. | |
251 | * <p> | |
252 | * If lenient detection is indicated an XmlStreamReaderException is never thrown. | |
253 | * <p> | |
254 | * | |
255 | * @param is | |
256 | * InputStream to create the reader from. | |
257 | * @param httpContentType | |
258 | * content-type header to use for the resolution of the charset encoding. | |
259 | * @param lenient | |
260 | * indicates if the charset encoding detection should be relaxed. | |
261 | * @throws IOException | |
262 | * thrown if there is a problem reading the file. | |
263 | * @throws XmlStreamReaderException | |
264 | * thrown if the charset encoding could not be determined according to the specs. | |
265 | * | |
266 | */ | |
267 | public XmlStreamReader( InputStream is, String httpContentType, boolean lenient ) throws IOException, XmlStreamReaderException | |
268 | { | |
269 | 0 | super( is, httpContentType, lenient ); |
270 | 0 | } |
271 | } |