blob: 645cf0e34aac30d2a66eb870ea60e278db14266e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.johnzon.core;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PushbackInputStream;
import java.nio.charset.Charset;
import javax.json.JsonException;
final class RFC4627AwareInputStreamReader extends InputStreamReader {
RFC4627AwareInputStreamReader(final InputStream in) {
this(new PushbackInputStream(in,4));
}
private RFC4627AwareInputStreamReader(final PushbackInputStream in) {
super(in, getCharset(in).newDecoder());
}
/**
* According to the Java API "An attempt is made to read as many as len bytes, but a smaller number may be read".
* [http://docs.oracle.com/javase/7/docs/api/java/io/InputStream.html#read(byte[],%20int,%20int)]
* For this reason we need to ensure that we've read all the bytes that we need out of this stream.
*/
private static byte[] readAllBytes(final PushbackInputStream inputStream) throws IOException {
final int first = inputStream.read();
final int second = inputStream.read();
if(first == -1|| second == -1) {
throw new JsonException("Invalid Json. Valid Json has at least 2 bytes");
}
final int third = inputStream.read();
final int fourth = inputStream.read();
if(third == -1) {
return new byte[] { (byte) first, (byte) second };
} else if(fourth == -1) {
return new byte[] { (byte) first, (byte) second, (byte) third };
} else {
return new byte[] { (byte) first, (byte) second, (byte) third, (byte) fourth };
}
}
/*
* RFC 4627
JSON text SHALL be encoded in Unicode. The default encoding is
UTF-8.
Since the first two characters of a JSON text will always be ASCII
characters [RFC0020], it is possible to determine whether an octet
stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking
at the pattern of nulls in the first four octets.
00 00 00 xx UTF-32BE
00 xx 00 xx UTF-16BE
xx 00 00 00 UTF-32LE
xx 00 xx 00 UTF-16LE
xx xx xx xx UTF-8
*/
private static Charset getCharset(final PushbackInputStream inputStream) {
Charset charset = Charset.forName("UTF-8");
int bomLength=0;
try {
final byte[] utfBytes = readAllBytes(inputStream);
int first = (utfBytes[0] & 0xFF);
int second = (utfBytes[1] & 0xFF);
if (first == 0x00) {
charset = (second == 0x00) ? Charset.forName("UTF-32BE") : Charset.forName("UTF-16BE");
} else if (utfBytes.length > 2 && second == 0x00) {
int third = (utfBytes[2] & 0xFF);
charset = (third == 0x00) ? Charset.forName("UTF-32LE") : Charset.forName("UTF-16LE");
} else {
/*check BOM
Encoding hex byte order mark
UTF-8 EF BB BF
UTF-16 (BE) FE FF
UTF-16 (LE) FF FE
UTF-32 (BE) 00 00 FE FF
UTF-32 (LE) FF FE 00 00
*/
//We do not check for UTF-32BE because that is already covered above and we
//do not to unread anything.
if(first == 0xFE && second == 0xFF) {
charset = Charset.forName("UTF-16BE");
bomLength=2;
} else if(first == 0xFF && second == 0xFE) {
if(utfBytes.length > 3 && (utfBytes[2]&0xff) == 0x00 && (utfBytes[3]&0xff) == 0x00) {
charset = Charset.forName("UTF-32LE");
bomLength=4;
}else {
charset = Charset.forName("UTF-16LE");
bomLength=2;
}
} else if (utfBytes.length > 2 && first == 0xEF && second == 0xBB && (utfBytes[2]&0xff) == 0xBF) {
//UTF-8 with BOM
bomLength=3;
}
}
//assume UTF8
if(bomLength > 0 && bomLength < 4) {
//do not unread BOM, only bytes after BOM
inputStream.unread(utfBytes,bomLength,utfBytes.length - bomLength);
} else {
//no BOM, unread all read bytes
inputStream.unread(utfBytes);
}
} catch (final IOException e) {
throw new JsonException("Unable to detect charset due to "+e.getMessage(), e);
}
return charset;
}
}