Coverage Report

Coverage Report - org.apache.johnzon.core.RFC4627AwareInputStreamReader

Classes in this File

Line Coverage

Branch Coverage

Complexity

RFC4627AwareInputStreamReader

100%

44/44

86%

38/44

7,75

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements. See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership. The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License. You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied. See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 package org.apache.johnzon.core;
 
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.PushbackInputStream;
 import java.nio.charset.Charset;
 
 import javax.json.JsonException;
 
 final class RFC4627AwareInputStreamReader extends InputStreamReader {
 
     RFC4627AwareInputStreamReader(final InputStream in) {
         this(new PushbackInputStream(in,4));
     }
     
     private RFC4627AwareInputStreamReader(final PushbackInputStream in) {
         super(in, getCharset(in).newDecoder());
        
     }
 
     /**
      * According to the Java API "An attempt is made to read as many as len bytes, but a smaller number may be read".
      * [http://docs.oracle.com/javase/7/docs/api/java/io/InputStream.html#read(byte[],%20int,%20int)]
      * For this reason we need to ensure that we've read all the bytes that we need out of this stream.
      */
     private static byte[] readAllBytes(final PushbackInputStream inputStream) throws IOException {
         final int first = inputStream.read();
         final int second = inputStream.read();
         if(first == -1|| second == -1) {
             throw new JsonException("Invalid Json. Valid Json has at least 2 bytes");
         }
         final int third = inputStream.read();
         final int fourth = inputStream.read();
         if(third == -1) {
             return new byte[] { (byte) first, (byte) second };
         } else if(fourth == -1) {
             return new byte[] { (byte) first, (byte) second, (byte) third };
         } else {
             return new byte[] { (byte) first, (byte) second, (byte) third, (byte) fourth };
         }
     }
 
     /*
         * RFC 4627
 
           JSON text SHALL be encoded in Unicode.  The default encoding is
           UTF-8.
        
           Since the first two characters of a JSON text will always be ASCII
           characters [RFC0020], it is possible to determine whether an octet
           stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking
           at the pattern of nulls in the first four octets.
 
           00 00 00 xx  UTF-32BE
           00 xx 00 xx  UTF-16BE
           xx 00 00 00  UTF-32LE
           xx 00 xx 00  UTF-16LE
           xx xx xx xx  UTF-8
 
         */
 
     private static Charset getCharset(final PushbackInputStream inputStream) {
         Charset charset = Charset.forName("UTF-8");
         int bomLength=0;
         try {
             final byte[] utfBytes = readAllBytes(inputStream);
             int first = (utfBytes[0] & 0xFF);
             int second = (utfBytes[1] & 0xFF);
             if (first == 0x00) {
                 charset = (second == 0x00) ? Charset.forName("UTF-32BE") : Charset.forName("UTF-16BE");
             } else if (utfBytes.length > 2 && second == 0x00) {
                 int third = (utfBytes[2] & 0xFF);
                 charset = (third  == 0x00) ? Charset.forName("UTF-32LE") : Charset.forName("UTF-16LE");
             } else {
 
                     /*check BOM
 
                     Encoding       hex byte order mark
                     UTF-8          EF BB BF
                     UTF-16 (BE)    FE FF
                     UTF-16 (LE)    FF FE
                     UTF-32 (BE)    00 00 FE FF
                     UTF-32 (LE)    FF FE 00 00
                     */
 
                 //We do not check for UTF-32BE because that is already covered above and we
                 //do not to unread anything.
 
                 if(first == 0xFE && second == 0xFF) {
                     charset = Charset.forName("UTF-16BE");
                     bomLength=2;
                 } else if(first == 0xFF && second == 0xFE) {
                     if(utfBytes.length > 3 && (utfBytes[2]&0xff) == 0x00 && (utfBytes[3]&0xff) == 0x00) {
                         charset = Charset.forName("UTF-32LE");
                         bomLength=4;
                     }else {
                         charset = Charset.forName("UTF-16LE");
                         bomLength=2;
                     }
                 } else if (utfBytes.length > 2 && first == 0xEF && second == 0xBB && (utfBytes[2]&0xff) == 0xBF) {
                     //UTF-8 with BOM
                     bomLength=3;
                 }
             }
             //assume UTF8
             if(bomLength > 0 && bomLength < 4) {             
                 //do not unread BOM, only bytes after BOM        
                 inputStream.unread(utfBytes,bomLength,utfBytes.length - bomLength);
             } else {             
                 //no BOM, unread all read bytes
                 inputStream.unread(utfBytes);
             }
           
 
         } catch (final IOException e) {
             throw new JsonException("Unable to detect charset due to "+e.getMessage(), e);
         }
 
         return charset;
     }
 
 }

1		/*
2		* Licensed to the Apache Software Foundation (ASF) under one
3		* or more contributor license agreements. See the NOTICE file
4		* distributed with this work for additional information
5		* regarding copyright ownership. The ASF licenses this file
6		* to you under the Apache License, Version 2.0 (the
7		* "License"); you may not use this file except in compliance
8		* with the License. You may obtain a copy of the License at
9		*
10		* http://www.apache.org/licenses/LICENSE-2.0
11		*
12		* Unless required by applicable law or agreed to in writing,
13		* software distributed under the License is distributed on an
14		* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15		* KIND, either express or implied. See the License for the
16		* specific language governing permissions and limitations
17		* under the License.
18		*/
19		package org.apache.johnzon.core;
20
21		import java.io.IOException;
22		import java.io.InputStream;
23		import java.io.InputStreamReader;
24		import java.io.PushbackInputStream;
25		import java.nio.charset.Charset;
26
27		import javax.json.JsonException;
28
29		final class RFC4627AwareInputStreamReader extends InputStreamReader {
30
31		RFC4627AwareInputStreamReader(final InputStream in) {
32	278	this(new PushbackInputStream(in,4));
33	275	}
34
35		private RFC4627AwareInputStreamReader(final PushbackInputStream in) {
36	278	super(in, getCharset(in).newDecoder());
37
38	275	}
39
40		/**
41		* According to the Java API "An attempt is made to read as many as len bytes, but a smaller number may be read".
42		* [http://docs.oracle.com/javase/7/docs/api/java/io/InputStream.html#read(byte[],%20int,%20int)]
43		* For this reason we need to ensure that we've read all the bytes that we need out of this stream.
44		*/
45		private static byte[] readAllBytes(final PushbackInputStream inputStream) throws IOException {
46	278	final int first = inputStream.read();
47	277	final int second = inputStream.read();
48	277	if(first == -1\|\| second == -1) {
49	2	throw new JsonException("Invalid Json. Valid Json has at least 2 bytes");
50		}
51	275	final int third = inputStream.read();
52	275	final int fourth = inputStream.read();
53	275	if(third == -1) {
54	2	return new byte[] { (byte) first, (byte) second };
55	273	} else if(fourth == -1) {
56	1	return new byte[] { (byte) first, (byte) second, (byte) third };
57		} else {
58	272	return new byte[] { (byte) first, (byte) second, (byte) third, (byte) fourth };
59		}
60		}
61
62		/*
63		* RFC 4627
64
65		JSON text SHALL be encoded in Unicode. The default encoding is
66		UTF-8.
67
68		Since the first two characters of a JSON text will always be ASCII
69		characters [RFC0020], it is possible to determine whether an octet
70		stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking
71		at the pattern of nulls in the first four octets.
72
73		00 00 00 xx UTF-32BE
74		00 xx 00 xx UTF-16BE
75		xx 00 00 00 UTF-32LE
76		xx 00 xx 00 UTF-16LE
77		xx xx xx xx UTF-8
78
79		*/
80
81		private static Charset getCharset(final PushbackInputStream inputStream) {
82	278	Charset charset = Charset.forName("UTF-8");
83	278	int bomLength=0;
84		try {
85	278	final byte[] utfBytes = readAllBytes(inputStream);
86	275	int first = (utfBytes[0] & 0xFF);
87	275	int second = (utfBytes[1] & 0xFF);
88	275	if (first == 0x00) {
89	4	charset = (second == 0x00) ? Charset.forName("UTF-32BE") : Charset.forName("UTF-16BE");
90	271	} else if (utfBytes.length > 2 && second == 0x00) {
91	3	int third = (utfBytes[2] & 0xFF);
92	3	charset = (third == 0x00) ? Charset.forName("UTF-32LE") : Charset.forName("UTF-16LE");
93	3	} else {
94
95		/*check BOM
96
97		Encoding hex byte order mark
98		UTF-8 EF BB BF
99		UTF-16 (BE) FE FF
100		UTF-16 (LE) FF FE
101		UTF-32 (BE) 00 00 FE FF
102		UTF-32 (LE) FF FE 00 00
103		*/
104
105		//We do not check for UTF-32BE because that is already covered above and we
106		//do not to unread anything.
107
108	268	if(first == 0xFE && second == 0xFF) {
109	2	charset = Charset.forName("UTF-16BE");
110	2	bomLength=2;
111	266	} else if(first == 0xFF && second == 0xFE) {
112	3	if(utfBytes.length > 3 && (utfBytes[2]&0xff) == 0x00 && (utfBytes[3]&0xff) == 0x00) {
113	2	charset = Charset.forName("UTF-32LE");
114	2	bomLength=4;
115		}else {
116	1	charset = Charset.forName("UTF-16LE");
117	1	bomLength=2;
118		}
119	263	} else if (utfBytes.length > 2 && first == 0xEF && second == 0xBB && (utfBytes[2]&0xff) == 0xBF) {
120		//UTF-8 with BOM
121	2	bomLength=3;
122		}
123		}
124		//assume UTF8
125	275	if(bomLength > 0 && bomLength < 4) {
126		//do not unread BOM, only bytes after BOM
127	5	inputStream.unread(utfBytes,bomLength,utfBytes.length - bomLength);
128		} else {
129		//no BOM, unread all read bytes
130	270	inputStream.unread(utfBytes);
131		}
132
133
134	1	} catch (final IOException e) {
135	1	throw new JsonException("Unable to detect charset due to "+e.getMessage(), e);
136	275	}
137
138	275	return charset;
139		}
140
141		}