Coverage Report - org.apache.johnzon.core.RFC4627AwareInputStreamReader
 
Classes in this File Line Coverage Branch Coverage Complexity
RFC4627AwareInputStreamReader
100%
44/44
86%
38/44
7,75
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one
 3  
  * or more contributor license agreements. See the NOTICE file
 4  
  * distributed with this work for additional information
 5  
  * regarding copyright ownership. The ASF licenses this file
 6  
  * to you under the Apache License, Version 2.0 (the
 7  
  * "License"); you may not use this file except in compliance
 8  
  * with the License. You may obtain a copy of the License at
 9  
  *
 10  
  * http://www.apache.org/licenses/LICENSE-2.0
 11  
  *
 12  
  * Unless required by applicable law or agreed to in writing,
 13  
  * software distributed under the License is distributed on an
 14  
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 15  
  * KIND, either express or implied. See the License for the
 16  
  * specific language governing permissions and limitations
 17  
  * under the License.
 18  
  */
 19  
 package org.apache.johnzon.core;
 20  
 
 21  
 import java.io.IOException;
 22  
 import java.io.InputStream;
 23  
 import java.io.InputStreamReader;
 24  
 import java.io.PushbackInputStream;
 25  
 import java.nio.charset.Charset;
 26  
 
 27  
 import javax.json.JsonException;
 28  
 
 29  
 final class RFC4627AwareInputStreamReader extends InputStreamReader {
 30  
 
 31  
     RFC4627AwareInputStreamReader(final InputStream in) {
 32  278
         this(new PushbackInputStream(in,4));
 33  275
     }
 34  
     
 35  
     private RFC4627AwareInputStreamReader(final PushbackInputStream in) {
 36  278
         super(in, getCharset(in).newDecoder());
 37  
        
 38  275
     }
 39  
 
 40  
     /**
 41  
      * According to the Java API "An attempt is made to read as many as len bytes, but a smaller number may be read".
 42  
      * [http://docs.oracle.com/javase/7/docs/api/java/io/InputStream.html#read(byte[],%20int,%20int)]
 43  
      * For this reason we need to ensure that we've read all the bytes that we need out of this stream.
 44  
      */
 45  
     private static byte[] readAllBytes(final PushbackInputStream inputStream) throws IOException {
 46  278
         final int first = inputStream.read();
 47  277
         final int second = inputStream.read();
 48  277
         if(first == -1|| second == -1) {
 49  2
             throw new JsonException("Invalid Json. Valid Json has at least 2 bytes");
 50  
         }
 51  275
         final int third = inputStream.read();
 52  275
         final int fourth = inputStream.read();
 53  275
         if(third == -1) {
 54  2
             return new byte[] { (byte) first, (byte) second };
 55  273
         } else if(fourth == -1) {
 56  1
             return new byte[] { (byte) first, (byte) second, (byte) third };
 57  
         } else {
 58  272
             return new byte[] { (byte) first, (byte) second, (byte) third, (byte) fourth };
 59  
         }
 60  
     }
 61  
 
 62  
     /*
 63  
         * RFC 4627
 64  
 
 65  
           JSON text SHALL be encoded in Unicode.  The default encoding is
 66  
           UTF-8.
 67  
        
 68  
           Since the first two characters of a JSON text will always be ASCII
 69  
           characters [RFC0020], it is possible to determine whether an octet
 70  
           stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking
 71  
           at the pattern of nulls in the first four octets.
 72  
 
 73  
           00 00 00 xx  UTF-32BE
 74  
           00 xx 00 xx  UTF-16BE
 75  
           xx 00 00 00  UTF-32LE
 76  
           xx 00 xx 00  UTF-16LE
 77  
           xx xx xx xx  UTF-8
 78  
 
 79  
         */
 80  
 
 81  
     private static Charset getCharset(final PushbackInputStream inputStream) {
 82  278
         Charset charset = Charset.forName("UTF-8");
 83  278
         int bomLength=0;
 84  
         try {
 85  278
             final byte[] utfBytes = readAllBytes(inputStream);
 86  275
             int first = (utfBytes[0] & 0xFF);
 87  275
             int second = (utfBytes[1] & 0xFF);
 88  275
             if (first == 0x00) {
 89  4
                 charset = (second == 0x00) ? Charset.forName("UTF-32BE") : Charset.forName("UTF-16BE");
 90  271
             } else if (utfBytes.length > 2 && second == 0x00) {
 91  3
                 int third = (utfBytes[2] & 0xFF);
 92  3
                 charset = (third  == 0x00) ? Charset.forName("UTF-32LE") : Charset.forName("UTF-16LE");
 93  3
             } else {
 94  
 
 95  
                     /*check BOM
 96  
 
 97  
                     Encoding       hex byte order mark
 98  
                     UTF-8          EF BB BF
 99  
                     UTF-16 (BE)    FE FF
 100  
                     UTF-16 (LE)    FF FE
 101  
                     UTF-32 (BE)    00 00 FE FF
 102  
                     UTF-32 (LE)    FF FE 00 00
 103  
                     */
 104  
 
 105  
                 //We do not check for UTF-32BE because that is already covered above and we
 106  
                 //do not to unread anything.
 107  
 
 108  268
                 if(first == 0xFE && second == 0xFF) {
 109  2
                     charset = Charset.forName("UTF-16BE");
 110  2
                     bomLength=2;
 111  266
                 } else if(first == 0xFF && second == 0xFE) {
 112  3
                     if(utfBytes.length > 3 && (utfBytes[2]&0xff) == 0x00 && (utfBytes[3]&0xff) == 0x00) {
 113  2
                         charset = Charset.forName("UTF-32LE");
 114  2
                         bomLength=4;
 115  
                     }else {
 116  1
                         charset = Charset.forName("UTF-16LE");
 117  1
                         bomLength=2;
 118  
                     }
 119  263
                 } else if (utfBytes.length > 2 && first == 0xEF && second == 0xBB && (utfBytes[2]&0xff) == 0xBF) {
 120  
                     //UTF-8 with BOM
 121  2
                     bomLength=3;
 122  
                 }
 123  
             }
 124  
             //assume UTF8
 125  275
             if(bomLength > 0 && bomLength < 4) {             
 126  
                 //do not unread BOM, only bytes after BOM        
 127  5
                 inputStream.unread(utfBytes,bomLength,utfBytes.length - bomLength);
 128  
             } else {             
 129  
                 //no BOM, unread all read bytes
 130  270
                 inputStream.unread(utfBytes);
 131  
             }
 132  
           
 133  
 
 134  1
         } catch (final IOException e) {
 135  1
             throw new JsonException("Unable to detect charset due to "+e.getMessage(), e);
 136  275
         }
 137  
 
 138  275
         return charset;
 139  
     }
 140  
 
 141  
 }