PIVOT-989: Use the correct Charset in StringSerializer.writeObject(). The "getBytes()"
call was not specifying any Charset, thus getting the platform default (which on Windows
is usually Win-1252), which is usually not the same as the UTF-8 default which is used
in this class. So, use the given Charset.
Also, in "trunk", use the StandardCharsets.UTF_8 value, instead of searching by name.
Note: this part of the change will not be propagated to "2.0.x" because it still needs
to compile under Java 6.
Update the StringSerializerTest with a specific test of this, and include a byte dump
so we can actually examine the bytes to make sure (the console output, especially on
Windows, is not helpful).
git-svn-id: https://svn.apache.org/repos/asf/pivot/trunk@1747445 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/core/src/org/apache/pivot/serialization/StringSerializer.java b/core/src/org/apache/pivot/serialization/StringSerializer.java
index 709893b..97264b7 100644
--- a/core/src/org/apache/pivot/serialization/StringSerializer.java
+++ b/core/src/org/apache/pivot/serialization/StringSerializer.java
@@ -23,21 +23,26 @@
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
/**
* Implementation of the {@link Serializer} interface that reads data from and
- * writes data to Java Strings.
+ * writes data to Java Strings. The text data is interpreted using either the
+ * default <code>UTF-8</code> {@link Charset} or a <code>Charset</code> supplied
+ * in the constructor.
+ * <p> Instances of this class are reusable (and thread-safe) because no mutable
+ * instance data is used in the {@link #readObject} and {@link #writeObject}
+ * methods.
*/
public class StringSerializer implements Serializer<String> {
private final Charset charset;
- public static final String DEFAULT_CHARSET_NAME = "UTF-8";
public static final String TEXT_EXTENSION = "txt";
public static final String MIME_TYPE = "text/plain";
public static final int BUFFER_SIZE = 2048;
public StringSerializer() {
- this(Charset.forName(DEFAULT_CHARSET_NAME));
+ this(StandardCharsets.UTF_8);
}
public StringSerializer(Charset charset) {
@@ -53,11 +58,12 @@
}
/**
- * Reads plain text data from an input stream.
+ * Reads plain text data from an input stream, interpreted by the given {@link Charset}.
*
* @param inputStream The input stream from which data will be read.
* @return An instance of {@link String} containing the text read from the
* input stream.
+ * @see #getCharset
*/
@Override
public String readObject(InputStream inputStream) throws IOException, SerializationException {
@@ -88,10 +94,11 @@
}
/**
- * Writes plain text data to an output stream.
+ * Writes plain text data to an output stream, encoded in the given {@link Charset}.
*
* @param text The text to be written to the output stream.
* @param outputStream The output stream to which data will be written.
+ * @see #getCharset
*/
@Override
public void writeObject(String text, OutputStream outputStream) throws IOException,
@@ -106,7 +113,7 @@
try {
BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(outputStream);
- bufferedOutputStream.write(text.getBytes());
+ bufferedOutputStream.write(text.getBytes(charset));
bufferedOutputStream.flush();
} catch (IOException exception) {
throw new SerializationException(exception);
diff --git a/core/test/org/apache/pivot/serialization/test/StringSerializerTest.java b/core/test/org/apache/pivot/serialization/test/StringSerializerTest.java
index 92b1e5b..a264cc2 100644
--- a/core/test/org/apache/pivot/serialization/test/StringSerializerTest.java
+++ b/core/test/org/apache/pivot/serialization/test/StringSerializerTest.java
@@ -16,6 +16,7 @@
*/
package org.apache.pivot.serialization.test;
+import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
@@ -23,6 +24,8 @@
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import org.apache.pivot.serialization.SerializationException;
import org.apache.pivot.serialization.Serializer;
@@ -30,19 +33,38 @@
import org.junit.Test;
public class StringSerializerTest {
+ private static final Charset UTF_8 = StandardCharsets.UTF_8;
+ // Note: include a real Unicode character to test the UTF-8 encoding
public static final String testString = "// \n" + "// Hello from "
- + StringSerializerTest.class.getName() + "\n" + "// \n";
- public static final byte[] testBytes = testString.getBytes();
+ + StringSerializerTest.class.getSimpleName() + "\n" + "// \u03C0 r square \n"
+ + "// \n";
+ public static final byte[] testBytes = testString.getBytes(UTF_8);
public void log(String msg) {
System.out.println(msg);
}
+ public void logBytes(String msg, byte[] b) {
+ StringBuilder buf = new StringBuilder(b.length * 4);
+ buf.append('[');
+ for (int i = 0; i < b.length; i++) {
+ if (i > 0)
+ buf.append(',');
+ int ib = ((int)b[i]) & 0xFF;
+ String hex = Integer.toHexString(ib).toUpperCase();
+ if (hex.length() < 2)
+ buf.append('0');
+ buf.append(hex);
+ }
+ buf.append(']');
+ log(msg + ": " + buf.toString() + "\n");
+ }
+
@Test
public void readValues() throws IOException, SerializationException {
log("readValues()");
- Serializer<String> serializer = new StringSerializer();
+ Serializer<String> serializer = new StringSerializer(UTF_8);
ByteArrayInputStream inputStream = new ByteArrayInputStream(testBytes);
String result = serializer.readObject(inputStream);
@@ -51,8 +73,10 @@
// dump content, but useful only for text resources ...
String dump = result;
- int dumpLength = dump.getBytes().length;
+ byte[] dumpBytes = dump.getBytes();
+ int dumpLength = dumpBytes.length;
log("Result: " + dumpLength + " bytes \n" + dump);
+ logBytes("Result bytes", dumpBytes);
assertTrue(dumpLength > 0);
}
@@ -60,7 +84,8 @@
@Test
public void writeValues() throws IOException, SerializationException {
log("writeValues()");
-
+log("test string = \"" + testString + "\"");
+ // Note: assume the default Charset for StringSerializer is UTF-8, which we are using here
Serializer<String> serializer = new StringSerializer();
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
@@ -69,16 +94,19 @@
outputStream.flush();
outputStream.close();
- String result = outputStream.toString();
+ String result = outputStream.toString(UTF_8.name());
assertNotNull(result);
assertEquals(result, testString);
- // dump content, but useful only for text resources ...
- String dump = result;
- int dumpLength = dump.getBytes().length;
- log("Result: " + dumpLength + " bytes \n" + dump);
+ byte[] resultBytes = outputStream.toByteArray();
+ assertArrayEquals(resultBytes, testBytes);
- assertTrue(dumpLength > 0);
+ // dump content, but useful only for text resources ...
+ log("Result: " + resultBytes.length + " bytes \n" + result);
+ logBytes("Result bytes", resultBytes);
+ logBytes(" Test bytes", testBytes);
+
+ assertTrue(resultBytes.length > 0);
}
}