blob: 59d95376c4ea5b53635d5b38bb62cbb5cf0c8902 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.netbeans.modules.properties;
import java.lang.ref.Reference;
import java.lang.ref.WeakReference;
import java.net.URL;
import java.nio.BufferOverflowException;
import java.nio.BufferUnderflowException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.netbeans.spi.queries.FileEncodingQueryImplementation;
import org.openide.filesystems.FileAttributeEvent;
import org.openide.filesystems.FileChangeListener;
import org.openide.filesystems.FileEvent;
import org.openide.filesystems.FileObject;
import static java.lang.Math.min;
import static java.nio.charset.CoderResult.OVERFLOW;
import static java.nio.charset.CoderResult.UNDERFLOW;
import org.openide.filesystems.FileRenameEvent;
import org.openide.filesystems.FileStateInvalidException;
import org.openide.filesystems.URLMapper;
/**
*
* @author Marian Petras
*/
public final class PropertiesEncoding extends FileEncodingQueryImplementation {
static final String PROP_CHARSET_NAME = "ISO-8859-1";
/*
* TO DO:
*
* DECODER
* - leave some characters in the form of escape sequence, e.g. 0x00 - 0x1f
* - allow decoding of supplementary characters (?)
*/
@Override
public Charset getEncoding(FileObject file) {
assert !file.isValid() || file.isData();
Object attribute = file.getAttribute(PropertiesDataNode.PROPERTY_ENCODING);
if (attribute != null) {
boolean useProjectEncoding = (Boolean) attribute;
if (useProjectEncoding) {
return null;
}
}
try {
return new PropCharset(file);
} catch (FileStateInvalidException ex) {
return null;
}
}
/**
*
*/
static final class PropCharset extends Charset implements FileChangeListener {
private final Reference<FileObject> fileRef;
private URL fileURL;
PropCharset(FileObject file) throws FileStateInvalidException {
super(PROP_CHARSET_NAME, null); //NOI18N
fileRef = new WeakReference<FileObject>(file);
file.addFileChangeListener(this);
updateURL(file);
}
PropCharset() {
super(PROP_CHARSET_NAME, null); //NOI18N
fileRef = null;
}
public boolean contains(Charset charset) {
return true;
}
public CharsetEncoder newEncoder() {
return new PropCharsetEncoder(this);
}
public CharsetDecoder newDecoder() {
long fileSize = (fileRef != null) ? getFileSize() : -1;
return (fileSize > 0l) ? new PropCharsetDecoder(this, fileSize)
: new PropCharsetDecoder(this);
}
private long getFileSize() {
FileObject file = getFile();
return ((file != null) && file.isValid()) ? file.getSize() : 0l;
}
private FileObject getFile() {
FileObject fileObj = fileRef.get();
URL url;
synchronized (this) {
url = fileURL;
}
if ((fileObj == null) && (url != null)) {
fileObj = URLMapper.findFileObject(url);
}
return fileObj;
}
public void fileRenamed(FileRenameEvent fe) {
updateURL(fe.getFile());
}
public void fileChanged(FileEvent fe) {
updateURL(fe.getFile());
}
private synchronized void updateURL(FileObject file) {
try {
fileURL = file.getURL();
} catch (FileStateInvalidException ex) {
fileURL = null;
}
}
public void fileDeleted(FileEvent fe) { }
public void fileAttributeChanged(FileAttributeEvent fe) { }
public void fileDataCreated(FileEvent fe) {
/* this should be never called on plain files (non-directories) */
// assert false;
}
public void fileFolderCreated(FileEvent fe) {
/* this should be never called on plain files (non-directories) */
assert false;
}
}
/**
*
* @author Marian Petras
*/
static final class PropCharsetEncoder extends CharsetEncoder {
private static final int avgEncodedTokenLen = 3;
private static final int maxEncodedTokenLen = 6;
private static final int inBufSize = 8192;
private static final int outBufSize = inBufSize * avgEncodedTokenLen;
private final char[] inBuf = new char[inBufSize];
private final byte[] outBuf = new byte[outBufSize];
private int inBufPos, outBufPos;
private boolean emptyIn;
private boolean fullOut;
private boolean emptyInBuf;
PropCharsetEncoder(Charset charset) {
super(charset, avgEncodedTokenLen, maxEncodedTokenLen);
}
PropCharsetEncoder() {
super(new PropCharset(), avgEncodedTokenLen, maxEncodedTokenLen);
}
{
implReset();
}
@Override
protected void implReset() {
inBufPos = 0;
outBufPos = 0;
emptyIn = false;
fullOut = false;
emptyInBuf = true;
}
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
emptyIn = false;
fullOut = false;
try {
readInLoop:
for (;;) {
readIn(in);
for (;;) {
encodeBuf();
if (emptyInBuf && !emptyIn) {
continue readInLoop;
}
flushOutBuf(out);
if (fullOut) {
return OVERFLOW;
} else if (emptyInBuf && emptyIn) {
return UNDERFLOW;
}
}
}
} catch (BufferUnderflowException ex) {
assert false; //this should not happen
return UNDERFLOW;
} catch (BufferOverflowException ex) {
assert false; //this should not happen
return OVERFLOW;
}
}
@Override
protected CoderResult implFlush(ByteBuffer out) {
return flushOutBuf(out) ? OVERFLOW
: UNDERFLOW;
}
/**
* Reads and stores as many characters from the input buffer as possible.
* If there are no more characters available in the input buffer,
* sets flag variable {@link #emptyIn} to {@code true}.
*/
private void readIn(CharBuffer in) {
if (emptyIn) {
return;
}
int inRemaining = in.remaining();
if (inRemaining == 0) {
emptyIn = true;
return;
}
int bufRemaining = inBuf.length - inBufPos;
if (bufRemaining == 0) {
/* no space in inBuf */
return;
}
int length = min(inRemaining, bufRemaining);
in.get(inBuf, inBufPos, length);
inBufPos += length;
emptyInBuf = false;
if (length == inRemaining) {
assert in.remaining() == 0;
emptyIn = true;
}
}
/**
* Encodes as many chars from the internal input buffer as possible.
*/
private void encodeBuf() {
if (emptyInBuf) {
return;
}
int encodingInBufPos = 0;
while ((encodingInBufPos < inBufPos)
&& (outBufPos <= outBufSize - maxEncodedTokenLen)) {
encodeChar(inBuf[encodingInBufPos++]);
}
int remainder = inBufPos - encodingInBufPos;
if (remainder != 0) {
System.arraycopy(inBuf, encodingInBufPos,
inBuf, 0,
remainder);
}
inBufPos = remainder;
emptyInBuf = (inBufPos == 0);
}
/**
* Writes as many as possible bytes from the {@code outBuf} to the given
* {@code ByteBuffer} and removes the written bytes from {@code outBuf}.
*
* @return {@code true} if the given {@code out} buffer is overflown,
* {@code false} otherwise
*/
private boolean flushOutBuf(ByteBuffer out) {
if (fullOut) {
return true;
}
int outRemaining = out.remaining();
if (outRemaining == 0) {
fullOut = true;
return true;
}
if (outBufPos == 0) {
/* nothing to flush */
return false;
}
int length = min(outRemaining, outBufPos);
out.put(outBuf, 0, length);
int remainder = outBufPos - length;
if (remainder != 0) {
System.arraycopy(outBuf, length,
outBuf, 0,
remainder);
}
outBufPos = remainder;
if (length == outRemaining) {
assert out.remaining() == 0;
fullOut = true;
}
return (remainder != 0);
}
private static final byte zeroByte = (byte) '0';
private static final byte[] hexadecimalChars = UtilConvert.hexDigit;
private int encodeChar(final char c) {
final int oldPos = outBufPos;
final int cInt = (int) c;
if ((c == '\r') || (c == '\n') || (c == '\t') || (c == '\f')) {
/*
* Do not translate Space, Tab and FF characters
* escape sequences - such translation might change semantics,
* which is not desirable (see issue #111530).
*/
outBuf[outBufPos++] = (byte) c;
} else if ((c < '\u0020') || (c > '\u007e')) {
outBuf[outBufPos++] = (byte) '\\';
outBuf[outBufPos++] = (byte) 'u';
if (c >= '\u0100') {
outBuf[outBufPos++] = hexadecimalChars[(cInt >> 12) & 0x000f];
outBuf[outBufPos++] = hexadecimalChars[(cInt >> 8) & 0x000f];
} else {
outBuf[outBufPos++] = zeroByte;
outBuf[outBufPos++] = zeroByte;
}
outBuf[outBufPos++] = hexadecimalChars[(cInt >> 4) & 0x000f];
outBuf[outBufPos++] = hexadecimalChars[cInt & 0x000f];
} else {
outBuf[outBufPos++] = (byte) c;
}
return outBufPos - oldPos;
}
byte[] encodeCharForTests(final char c) {
reset();
final int tokenLength = encodeChar(c);
byte[] result = new byte[tokenLength];
System.arraycopy(outBuf, 0, result, 0, tokenLength);
return result;
}
byte[] encodeStringForTests(final String s) throws CharacterCodingException {
ByteBuffer resultBuf = encode(CharBuffer.wrap(s));
byte[] resultBufArray = resultBuf.array();
int resultBufPos = resultBuf.limit();
if (resultBufPos == resultBufArray.length) {
return resultBufArray;
} else {
byte[] result = new byte[resultBufPos];
System.arraycopy(resultBufArray, 0, result, 0, resultBufPos);
return result;
}
}
}
/**
*
*/
static final class PropCharsetDecoder extends CharsetDecoder {
private final Logger log = Logger.getLogger(getClass().getName().replace('$', '.'));
private static enum State {
INITIAL,
BACKSLASH,
UNICODE,
}
private static final float avgCharsPerByte = 1.00f;
private static final float maxCharsPerByte = 6.00f;
private static final int maxCharsPerByteInt = 6;
/*
* Five chars are written to the output when a malformed unicode
* sequence is detected. Unicode sequences are six bytes long;
* if the first five bytes formed a valid sequence
* (e.g. <backslash>, "u", "1", "2", "3") and the sixth byte is not
* a hexadecimal digit, we transform the first five bytes
* of the sequence to (five) characters and send them to the output.
* (The sixth byte is re-read and handled in the next round
* of the decoding cycle.)
*/
private static final int inBufSize = 8192;
private static final int outBufSize = inBufSize;
/** */
private static final int SIZE_UNKNOWN = -1;
/** size of the input file, or {@link #SIZE_UNKNOWN} if unknown */
private long inputSize;
/** number of input bytes decoded so far */
private int bytesDecoded = 0;
private final byte[] inBuf = new byte[inBufSize];
private final char[] outBuf = new char[outBufSize];
private int inBufPos, outBufPos;
private boolean emptyIn;
private boolean fullOut;
private boolean emptyInBuf;
private State state;
private int unicodeBytesRead;
private int unicodeValue;
/** used when flushing a unicode sequence to the out buffer */
private char[] unicodeValueChars = new char[4];
PropCharsetDecoder(Charset charset) {
this(charset, SIZE_UNKNOWN);
}
PropCharsetDecoder(Charset charset, long inputSize) {
super(charset, avgCharsPerByte, maxCharsPerByte);
this.inputSize = inputSize;
}
{
implReset();
}
@Override
protected void implReset() {
log.finer("");
log.finer("implReset() called");
inputSize = SIZE_UNKNOWN;
bytesDecoded = 0;
inBufPos = 0;
outBufPos = 0;
emptyIn = false;
fullOut = false;
emptyInBuf = true;
state = State.INITIAL;
unicodeBytesRead = 0;
}
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
log.finer("");
log.finer("decodeLoop() called");
if (log.isLoggable(Level.FINEST)) {
String inCount = String.format("%5d", in.remaining());
String outCount = String.format("%5d", out.remaining());
log.finest(" - input: " + inCount + " bytes");
log.finest(" - output: " + outCount + " chars");
}
emptyIn = false;
fullOut = false;
try {
readInLoop:
for (;;) {
readIn(in);
for (;;) {
bytesDecoded += decodeBuf();
// assert: if (bytesDecoded == inputSize) then (emptyIn)
assert (bytesDecoded != inputSize) || emptyIn;
if (emptyInBuf && !emptyIn) {
continue readInLoop;
} else if (emptyIn && hasPendingCharacters()
&& ((inputSize == SIZE_UNKNOWN) || (bytesDecoded >= inputSize))) {
handlePendingCharacters();
}
flushOutBuf(out);
if (fullOut) {
log.finest(" - returning OVERFLOW");
return OVERFLOW;
} else if (emptyInBuf && emptyIn) {
log.finest(" - returning UNDERFLOW");
return UNDERFLOW;
}
}
}
} catch (BufferUnderflowException ex) {
assert false; //this should not happen
return UNDERFLOW;
} catch (BufferOverflowException ex) {
assert false; //this should not happen
return OVERFLOW;
}
}
/**
* This is method is only necessary because of bug in JDK 1.5.x that
* {@code flush()} is not always called when it should be.
* Once JDK 1.5.x is not supported, remove this method.
*
* @see #handlePendingCharacters()
*/
private boolean hasPendingCharacters() {
return state != State.INITIAL;
}
/**
* This is method is only necessary because of bug in JDK 1.5.x that
* {@code flush()} is not always called when it should be.
* Once JDK 1.5.x is not supported, move this method's functionality
* to method {@code implFlush()} and remove this method.
*
* @see <a href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6221056">JDK bug 6221056</a>
* @see <a href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4744247">JDK bug 4744247</a>
*/
private void handlePendingCharacters() {
log.finer("handlePendingCharacters()");
if (!hasPendingCharacters()) {
log.finer(" - no pending characters");
return;
}
switch (state) {
case INITIAL:
assert false;
break;
case BACKSLASH:
log.finer(" - backslash pending");
outBuf[outBufPos++] = '\\';
break;
case UNICODE:
log.finer(" - broken \\u.... sequence pending");
if (log.isLoggable(Level.FINEST)) {
log.finest(" - " + unicodeBytesRead + " unicode value bytes pending");
}
assert (unicodeBytesRead >= 0) && (unicodeBytesRead < 4);
flushUnicodeSequence();
break;
default:
assert false;
break;
}
state = State.INITIAL;
}
@Override
protected CoderResult implFlush(CharBuffer out) {
log.finer("");
log.finer("implFlush() called");
fullOut = out.hasRemaining();
return flushOutBuf(out) ? OVERFLOW
: UNDERFLOW;
}
/**
* Reads and stores as many characters from the input buffer as possible.
* If there are no more characters available in the input buffer,
* sets flag variable {@link #emptyIn} to {@code true}.
*/
private void readIn(ByteBuffer in) {
log.finer("filling inBuf: ");
if (emptyIn) {
log.finer(" - input empty (emptyIn already set)");
return;
}
int inRemaining = in.remaining();
if (inRemaining == 0) {
log.finer(" - input empty (emptyIn will be set)");
emptyIn = true;
return;
}
int bufRemaining = inBuf.length - inBufPos;
if (bufRemaining == 0) {
log.finer(" - no space remaining in inBuf");
/* no space in inBuf */
return;
}
int length = min(inRemaining, bufRemaining);
if (log.isLoggable(Level.FINER)) {
log.finer(" - " + length + " bytes will be read");
}
in.get(inBuf, inBufPos, length);
inBufPos += length;
emptyInBuf = false;
if (length == inRemaining) {
assert in.remaining() == 0;
log.finer(" - all remaining bytes were read (emptyIn will be set)");
emptyIn = true;
}
}
/**
* Encodes as many chars from the internal input buffer as possible.
*
* @return number of bytes decoded
*/
private int decodeBuf() {
log.finer("decoding inBuf, writing to outBuf");
if (emptyInBuf) {
log.finer(" - inBuf is empty - nothing to decode");
return 0;
}
int decodingInBufPos = 0;
log.finest(" - decoding bytes:");
log.finest(" - initial state: " + state);
while ((decodingInBufPos < inBufPos)
&& (outBufPos <= outBufSize - maxCharsPerByteInt)) {
int decodedChars = decodeByte(inBuf[decodingInBufPos++]);
if (log.isLoggable(Level.FINEST)) {
StringBuilder sb = new StringBuilder(60);
sb.append(" - byte 0x");
sb.append(hexavalue(inBuf[decodingInBufPos - 1]));
sb.append(" => ").append(state);
log.finest(sb.toString());
}
if (decodedChars < 0) {
/* put back the character following the broken sequence: */
decodingInBufPos--;
log.finer(" - last byte returned to be processed again");
unicodeBytesRead = 0;
unicodeValue = 0;
state = State.INITIAL;
//break;
}
}
int remainder = inBufPos - decodingInBufPos;
if (remainder != 0) {
if (log.isLoggable(Level.FINER)) {
log.finer(" - " + remainder + " bytes will remain in the inBuf");
}
System.arraycopy(inBuf, decodingInBufPos,
inBuf, 0,
remainder);
} else {
log.finer(" - all bytes were successfully decoded");
}
inBufPos = remainder;
emptyInBuf = (inBufPos == 0);
return decodingInBufPos;
}
/**
*
* @return {@code true} if the given {@code out} buffer is overflown,
* {@code false} otherwise
*/
private boolean flushOutBuf(CharBuffer out) {
log.finer("flushing outBuf");
if (outBufPos == 0) {
/* nothing to flush */
log.finer(" - outBuf is empty - nothing to flush");
return false;
}
if (fullOut) { //we know that (outBufPos != null)
log.finer(" - output CharBuffer is full (fullOut already set)");
return true;
}
int outRemaining = out.remaining();
if (outRemaining == 0) {
log.finer(" - output CharBuffer is full (fullOut will be set)");
fullOut = true;
return true;
}
int length = min(outRemaining, outBufPos);
if (log.isLoggable(Level.FINER)) {
log.finer(" - " + length + " chars will be written");
}
out.put(outBuf, 0, length);
int remainder = outBufPos - length;
if (remainder != 0) {
if (log.isLoggable(Level.FINER)) {
log.finer(" - " + remainder + " bytes will remain in the outBuf");
}
System.arraycopy(outBuf, length,
outBuf, 0,
remainder);
} else {
log.finer(" - all bytes were successfully flushed");
}
outBufPos = remainder;
if (length == outRemaining) {
assert out.remaining() == 0;
log.finer(" - output CharBuffer is now full (fullOut will be set)");
fullOut = true;
}
return (remainder != 0);
}
private static final String hexadecimalChars
= "0123456789abcdefABCDEF"; //NOI18N
private int decodeByte(final byte b) {
final int oldPos = outBufPos;
final int bInt = (b >= 0 ? b : b + 256);
assert (bInt >= 0) && ((bInt & 0xff) == bInt);
final char bChar = (char) bInt;
switch (state) {
case INITIAL:
if (bChar == '\\') {
state = State.BACKSLASH;
} else {
outBuf[outBufPos++] = bChar;
/* keep the state at INITIAL */
}
break;
case BACKSLASH:
if (bChar == 'u') {
state = State.UNICODE;
} else {
outBuf[outBufPos++] = '\\';
outBuf[outBufPos++] = bChar;
state = State.INITIAL;
}
break;
case UNICODE:
boolean malformed = false;
int index = hexadecimalChars.indexOf(bChar);
if (index >= 0) {
if (index > 15) { //one of [A-F] used
index -= 6; //transform to lowercase
}
assert index <= 15;
unicodeValue = (unicodeValue << 4) | index;
if (++unicodeBytesRead == 4) {
if (unicodeValue <= 0x20) {
unicodeValueChars[3] = bChar;
/*
* Do not translate Unicode sequences of value
* 20h (space) or less (control characters).
* Changing form of these unicode sequences
* to one-character form might change
* semantics, which is not desirable
* (see issue #111530).
*/
flushUnicodeSequence();
} else {
outBuf[outBufPos++] = (char) unicodeValue;
}
state = State.INITIAL;
} else {
unicodeValueChars[unicodeBytesRead - 1] = bChar;
/* keep the state at UNICODE */
}
} else {
malformed = true;
/*
* send the malformed unicode sequence to the output
*/
flushUnicodeSequence();
state = State.INITIAL;
}
if (state != State.UNICODE) {
unicodeBytesRead = 0;
unicodeValue = 0;
if (malformed) {
return -1;
}
}
break;
default:
assert false;
break;
}
return outBufPos - oldPos;
}
/**
* Writes the buffered Unicode sequence (possible incomplete)
* to the output buffer ({@link #outBuf}).
* It also resets fields {@link #unicodeBytesRead}
* and {@link #unicodeValue} to {@code 0}.
*/
private void flushUnicodeSequence() {
outBuf[outBufPos++] = '\\';
outBuf[outBufPos++] = 'u';
for (int i = 0; i < unicodeBytesRead; i++) {
outBuf[outBufPos++] = unicodeValueChars[i];
}
unicodeBytesRead = 0;
unicodeValue = 0;
}
private static char[] hexavalue(byte b) {
final int bInt = (b >= 0 ? b : b + 256);
char[] result = new char[2];
result[0] = hexadecimalChars.charAt(bInt / 16);
result[1] = hexadecimalChars.charAt(bInt % 16);
return result;
}
char[] decodeBytesForTests(final byte[] bytes) throws CharacterCodingException {
CharBuffer resultBuf = decode(ByteBuffer.wrap(bytes));
char[] resultBufArray = resultBuf.array();
int resultBufPos = resultBuf.limit();
if (resultBufPos == resultBufArray.length) {
return resultBufArray;
} else {
char[] result = new char[resultBufPos];
System.arraycopy(resultBufArray, 0, result, 0, resultBufPos);
return result;
}
}
}
}