View Javadoc

1   /*
2    * $HeadURL: https://svn.apache.org/repos/asf/httpcomponents/oac.hc3x/trunk/src/java/org/apache/commons/httpclient/ChunkedInputStream.java $
3    * $Revision$
4    * $Date$
5    *
6    * ====================================================================
7    *
8    *  Licensed to the Apache Software Foundation (ASF) under one or more
9    *  contributor license agreements.  See the NOTICE file distributed with
10   *  this work for additional information regarding copyright ownership.
11   *  The ASF licenses this file to You under the Apache License, Version 2.0
12   *  (the "License"); you may not use this file except in compliance with
13   *  the License.  You may obtain a copy of the License at
14   *
15   *      http://www.apache.org/licenses/LICENSE-2.0
16   *
17   *  Unless required by applicable law or agreed to in writing, software
18   *  distributed under the License is distributed on an "AS IS" BASIS,
19   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20   *  See the License for the specific language governing permissions and
21   *  limitations under the License.
22   * ====================================================================
23   *
24   * This software consists of voluntary contributions made by many
25   * individuals on behalf of the Apache Software Foundation.  For more
26   * information on the Apache Software Foundation, please see
27   * <http://www.apache.org/>.
28   *
29   */
30  
31  package org.apache.commons.httpclient;
32  
33  import java.io.ByteArrayOutputStream;
34  import java.io.IOException;
35  import java.io.InputStream;
36  
37  import org.apache.commons.httpclient.util.EncodingUtil;
38  import org.apache.commons.httpclient.util.ExceptionUtil;
39  import org.apache.commons.logging.Log;
40  import org.apache.commons.logging.LogFactory;
41  
42  
43  /***
44   * <p>Transparently coalesces chunks of a HTTP stream that uses
45   * Transfer-Encoding chunked.</p>
46   *
47   * <p>Note that this class NEVER closes the underlying stream, even when close
48   * gets called.  Instead, it will read until the "end" of its chunking on close,
49   * which allows for the seamless invocation of subsequent HTTP 1.1 calls, while
50   * not requiring the client to remember to read the entire contents of the
51   * response.</p>
52   *
53   * @author Ortwin Glueck
54   * @author Sean C. Sullivan
55   * @author Martin Elwin
56   * @author Eric Johnson
57   * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
58   * @author Michael Becke
59   * @author <a href="mailto:oleg@ural.ru">Oleg Kalnichevski</a>
60   *
61   * @since 2.0
62   *
63   */
64  public class ChunkedInputStream extends InputStream {
65      /*** The inputstream that we're wrapping */
66      private InputStream in;
67  
68      /*** The chunk size */
69      private int chunkSize;
70  
71      /*** The current position within the current chunk */
72      private int pos;
73  
74      /*** True if we'are at the beginning of stream */
75      private boolean bof = true;
76  
77      /*** True if we've reached the end of stream */
78      private boolean eof = false;
79  
80      /*** True if this stream is closed */
81      private boolean closed = false;
82  
83      /*** The method that this stream came from */
84      private HttpMethod method = null;
85  
86      /*** Log object for this class. */
87      private static final Log LOG = LogFactory.getLog(ChunkedInputStream.class);
88  
89      /***
90       * ChunkedInputStream constructor that associates the chunked input stream with a 
91       * {@link HttpMethod HTTP method}. Usually it should be the same {@link HttpMethod 
92       * HTTP method} the chunked input stream originates from. If chunked input stream 
93       * contains any footers (trailing headers), they will be added to the associated 
94       * {@link HttpMethod HTTP method}.
95       *
96       * @param in the raw input stream
97       * @param method the HTTP method to associate this input stream with. Can be <tt>null</tt>.  
98       *
99       * @throws IOException If an IO error occurs
100      */
101     public ChunkedInputStream(
102         final InputStream in, final HttpMethod method) throws IOException {
103             
104     	if (in == null) {
105     		throw new IllegalArgumentException("InputStream parameter may not be null");
106     	}
107         this.in = in;
108         this.method = method;
109         this.pos = 0;
110     }
111 
112     /***
113      * ChunkedInputStream constructor
114      *
115      * @param in the raw input stream
116      *
117      * @throws IOException If an IO error occurs
118      */
119     public ChunkedInputStream(final InputStream in) throws IOException {
120     	this(in, null);
121     }
122     
123     /***
124      * <p> Returns all the data in a chunked stream in coalesced form. A chunk
125      * is followed by a CRLF. The method returns -1 as soon as a chunksize of 0
126      * is detected.</p>
127      * 
128      * <p> Trailer headers are read automcatically at the end of the stream and
129      * can be obtained with the getResponseFooters() method.</p>
130      *
131      * @return -1 of the end of the stream has been reached or the next data
132      * byte
133      * @throws IOException If an IO problem occurs
134      * 
135      * @see HttpMethod#getResponseFooters()
136      */
137     public int read() throws IOException {
138 
139         if (closed) {
140             throw new IOException("Attempted read from closed stream.");
141         }
142         if (eof) {
143             return -1;
144         } 
145         if (pos >= chunkSize) {
146             nextChunk();
147             if (eof) { 
148                 return -1;
149             }
150         }
151         pos++;
152         return in.read();
153     }
154 
155     /***
156      * Read some bytes from the stream.
157      * @param b The byte array that will hold the contents from the stream.
158      * @param off The offset into the byte array at which bytes will start to be
159      * placed.
160      * @param len the maximum number of bytes that can be returned.
161      * @return The number of bytes returned or -1 if the end of stream has been
162      * reached.
163      * @see java.io.InputStream#read(byte[], int, int)
164      * @throws IOException if an IO problem occurs.
165      */
166     public int read (byte[] b, int off, int len) throws IOException {
167 
168         if (closed) {
169             throw new IOException("Attempted read from closed stream.");
170         }
171 
172         if (eof) { 
173             return -1;
174         }
175         if (pos >= chunkSize) {
176             nextChunk();
177             if (eof) { 
178                 return -1;
179             }
180         }
181         len = Math.min(len, chunkSize - pos);
182         int count = in.read(b, off, len);
183         pos += count;
184         return count;
185     }
186 
187     /***
188      * Read some bytes from the stream.
189      * @param b The byte array that will hold the contents from the stream.
190      * @return The number of bytes returned or -1 if the end of stream has been
191      * reached.
192      * @see java.io.InputStream#read(byte[])
193      * @throws IOException if an IO problem occurs.
194      */
195     public int read (byte[] b) throws IOException {
196         return read(b, 0, b.length);
197     }
198 
199     /***
200      * Read the CRLF terminator.
201      * @throws IOException If an IO error occurs.
202      */
203     private void readCRLF() throws IOException {
204         int cr = in.read();
205         int lf = in.read();
206         if ((cr != '\r') || (lf != '\n')) { 
207             throw new IOException(
208                 "CRLF expected at end of chunk: " + cr + "/" + lf);
209         }
210     }
211 
212 
213     /***
214      * Read the next chunk.
215      * @throws IOException If an IO error occurs.
216      */
217     private void nextChunk() throws IOException {
218         if (!bof) {
219             readCRLF();
220         }
221         chunkSize = getChunkSizeFromInputStream(in);
222         bof = false;
223         pos = 0;
224         if (chunkSize == 0) {
225             eof = true;
226             parseTrailerHeaders();
227         }
228     }
229 
230     /***
231      * Expects the stream to start with a chunksize in hex with optional
232      * comments after a semicolon. The line must end with a CRLF: "a3; some
233      * comment\r\n" Positions the stream at the start of the next line.
234      *
235      * @param in The new input stream.
236      * @param required <tt>true<tt/> if a valid chunk must be present,
237      *                 <tt>false<tt/> otherwise.
238      * 
239      * @return the chunk size as integer
240      * 
241      * @throws IOException when the chunk size could not be parsed
242      */
243     private static int getChunkSizeFromInputStream(final InputStream in) 
244       throws IOException {
245             
246         ByteArrayOutputStream baos = new ByteArrayOutputStream();
247         // States: 0=normal, 1=\r was scanned, 2=inside quoted string, -1=end
248         int state = 0; 
249         while (state != -1) {
250         int b = in.read();
251             if (b == -1) { 
252                 throw new IOException("chunked stream ended unexpectedly");
253             }
254             switch (state) {
255                 case 0: 
256                     switch (b) {
257                         case '\r':
258                             state = 1;
259                             break;
260                         case '\"':
261                             state = 2;
262                             /* fall through */
263                         default:
264                             baos.write(b);
265                     }
266                     break;
267 
268                 case 1:
269                     if (b == '\n') {
270                         state = -1;
271                     } else {
272                         // this was not CRLF
273                         throw new IOException("Protocol violation: Unexpected"
274                             + " single newline character in chunk size");
275                     }
276                     break;
277 
278                 case 2:
279                     switch (b) {
280                         case '//':
281                             b = in.read();
282                             baos.write(b);
283                             break;
284                         case '\"':
285                             state = 0;
286                             /* fall through */
287                         default:
288                             baos.write(b);
289                     }
290                     break;
291                 default: throw new RuntimeException("assertion failed");
292             }
293         }
294 
295         //parse data
296         String dataString = EncodingUtil.getAsciiString(baos.toByteArray());
297         int separator = dataString.indexOf(';');
298         dataString = (separator > 0)
299             ? dataString.substring(0, separator).trim()
300             : dataString.trim();
301 
302         int result;
303         try {
304             result = Integer.parseInt(dataString.trim(), 16);
305         } catch (NumberFormatException e) {
306             throw new IOException ("Bad chunk size: " + dataString);
307         }
308         return result;
309     }
310 
311     /***
312      * Reads and stores the Trailer headers.
313      * @throws IOException If an IO problem occurs
314      */
315     private void parseTrailerHeaders() throws IOException {
316         Header[] footers = null;
317         try {
318             String charset = "US-ASCII";
319             if (this.method != null) {
320                 charset = this.method.getParams().getHttpElementCharset();
321             }
322             footers = HttpParser.parseHeaders(in, charset);
323         } catch(HttpException e) {
324             LOG.error("Error parsing trailer headers", e);
325             IOException ioe = new IOException(e.getMessage());
326             ExceptionUtil.initCause(ioe, e); 
327             throw ioe;
328         }
329         if (this.method != null) {
330             for (int i = 0; i < footers.length; i++) {
331                 this.method.addResponseFooter(footers[i]);
332             }
333         }
334     }
335 
336     /***
337      * Upon close, this reads the remainder of the chunked message,
338      * leaving the underlying socket at a position to start reading the
339      * next response without scanning.
340      * @throws IOException If an IO problem occurs.
341      */
342     public void close() throws IOException {
343         if (!closed) {
344             try {
345                 if (!eof) {
346                     exhaustInputStream(this);
347                 }
348             } finally {
349                 eof = true;
350                 closed = true;
351             }
352         }
353     }
354 
355     /***
356      * Exhaust an input stream, reading until EOF has been encountered.
357      *
358      * <p>Note that this function is intended as a non-public utility.
359      * This is a little weird, but it seemed silly to make a utility
360      * class for this one function, so instead it is just static and
361      * shared that way.</p>
362      *
363      * @param inStream The {@link InputStream} to exhaust.
364      * @throws IOException If an IO problem occurs
365      */
366     static void exhaustInputStream(InputStream inStream) throws IOException {
367         // read and discard the remainder of the message
368         byte buffer[] = new byte[1024];
369         while (inStream.read(buffer) >= 0) {
370             ;
371         }
372     }
373 }