blob: 9771fd1a966237e2ef02a8285e6755a90f7b5cd0 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.io;
import java.io.IOException;
import java.nio.BufferUnderflowException;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.util.Random;
import com.google.common.base.Charsets;
import com.google.common.primitives.Bytes;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
/** Unit tests for LargeUTF8. */
public class TestText {
private static final int NUM_ITERATIONS = 100;
private static final Random RANDOM = new Random(1);
private static final int RAND_LEN = -1;
// generate a valid java String
private static String getTestString(int len) throws Exception {
StringBuilder buffer = new StringBuilder();
int length = (len==RAND_LEN) ? RANDOM.nextInt(1000) : len;
while (buffer.length()<length) {
int codePoint = RANDOM.nextInt(Character.MAX_CODE_POINT);
char tmpStr[] = new char[2];
if (Character.isDefined(codePoint)) {
//unpaired surrogate
if (codePoint < Character.MIN_SUPPLEMENTARY_CODE_POINT &&
!Character.isHighSurrogate((char)codePoint) &&
!Character.isLowSurrogate((char)codePoint)) {
Character.toChars(codePoint, tmpStr, 0);
buffer.append(tmpStr);
}
}
}
return buffer.toString();
}
public static String getTestString() throws Exception {
return getTestString(RAND_LEN);
}
public static String getLongString() throws Exception {
String str = getTestString();
int length = Short.MAX_VALUE+str.length();
StringBuilder buffer = new StringBuilder();
while(buffer.length()<length)
buffer.append(str);
return buffer.toString();
}
@Test
public void testWritable() throws Exception {
for (int i = 0; i < NUM_ITERATIONS; i++) {
String str;
if (i == 0)
str = getLongString();
else
str = getTestString();
TestWritable.testWritable(new Text(str));
}
}
@Test
public void testCoding() throws Exception {
String before = "Bad \t encoding \t testcase";
Text text = new Text(before);
String after = text.toString();
assertTrue(before.equals(after));
for (int i = 0; i < NUM_ITERATIONS; i++) {
// generate a random string
if (i == 0)
before = getLongString();
else
before = getTestString();
// test string to utf8
ByteBuffer bb = Text.encode(before);
byte[] utf8Text = bb.array();
byte[] utf8Java = before.getBytes("UTF-8");
assertEquals(0, WritableComparator.compareBytes(
utf8Text, 0, bb.limit(),
utf8Java, 0, utf8Java.length));
// test utf8 to string
after = Text.decode(utf8Java);
assertTrue(before.equals(after));
}
}
@Test
public void testIO() throws Exception {
DataOutputBuffer out = new DataOutputBuffer();
DataInputBuffer in = new DataInputBuffer();
for (int i = 0; i < NUM_ITERATIONS; i++) {
// generate a random string
String before;
if (i == 0)
before = getLongString();
else
before = getTestString();
// write it
out.reset();
Text.writeString(out, before);
// test that it reads correctly
in.reset(out.getData(), out.getLength());
String after = Text.readString(in);
assertTrue(before.equals(after));
// Test compatibility with Java's other decoder
int strLenSize = WritableUtils.getVIntSize(Text.utf8Length(before));
String after2 = new String(out.getData(), strLenSize,
out.getLength()-strLenSize, "UTF-8");
assertTrue(before.equals(after2));
}
}
public void doTestLimitedIO(String str, int len) throws IOException {
DataOutputBuffer out = new DataOutputBuffer();
DataInputBuffer in = new DataInputBuffer();
out.reset();
try {
Text.writeString(out, str, len);
fail("expected writeString to fail when told to write a string " +
"that was too long! The string was '" + str + "'");
} catch (IOException e) {
}
Text.writeString(out, str, len + 1);
// test that it reads correctly
in.reset(out.getData(), out.getLength());
in.mark(len);
String after;
try {
after = Text.readString(in, len);
fail("expected readString to fail when told to read a string " +
"that was too long! The string was '" + str + "'");
} catch (IOException e) {
}
in.reset();
after = Text.readString(in, len + 1);
assertTrue(str.equals(after));
}
@Test
public void testLimitedIO() throws Exception {
doTestLimitedIO("abcd", 3);
doTestLimitedIO("foo bar baz", 10);
doTestLimitedIO("1", 0);
}
@Test
public void testCompare() throws Exception {
DataOutputBuffer out1 = new DataOutputBuffer();
DataOutputBuffer out2 = new DataOutputBuffer();
DataOutputBuffer out3 = new DataOutputBuffer();
Text.Comparator comparator = new Text.Comparator();
for (int i=0; i<NUM_ITERATIONS; i++) {
// reset output buffer
out1.reset();
out2.reset();
out3.reset();
// generate two random strings
String str1 = getTestString();
String str2 = getTestString();
if (i == 0) {
str1 = getLongString();
str2 = getLongString();
} else {
str1 = getTestString();
str2 = getTestString();
}
// convert to texts
Text txt1 = new Text(str1);
Text txt2 = new Text(str2);
Text txt3 = new Text(str1);
// serialize them
txt1.write(out1);
txt2.write(out2);
txt3.write(out3);
// compare two strings by looking at their binary formats
int ret1 = comparator.compare(out1.getData(), 0, out1.getLength(),
out2.getData(), 0, out2.getLength());
// compare two strings
int ret2 = txt1.compareTo(txt2);
assertEquals(ret1, ret2);
assertEquals("Equivalence of different txt objects, same content" ,
0,
txt1.compareTo(txt3));
assertEquals("Equvalence of data output buffers",
0,
comparator.compare(out1.getData(), 0, out3.getLength(),
out3.getData(), 0, out3.getLength()));
}
}
@Test
public void testFind() throws Exception {
Text text = new Text("abcd\u20acbdcd\u20ac");
assertTrue(text.find("abd")==-1);
assertTrue(text.find("ac") ==-1);
assertTrue(text.find("\u20ac") == 4);
assertTrue(text.find("\u20ac", 5)==11);
}
@Test
public void testFindAfterUpdatingContents() throws Exception {
Text text = new Text("abcd");
text.set("a".getBytes());
assertEquals(text.getLength(),1);
assertEquals(text.find("a"), 0);
assertEquals(text.find("b"), -1);
}
@Test
public void testValidate() throws Exception {
Text text = new Text("abcd\u20acbdcd\u20ac");
byte [] utf8 = text.getBytes();
int length = text.getLength();
Text.validateUTF8(utf8, 0, length);
}
@Test
public void testClear() throws Exception {
// Test lengths on an empty text object
Text text = new Text();
assertEquals(
"Actual string on an empty text object must be an empty string",
"", text.toString());
assertEquals("Underlying byte array length must be zero",
0, text.getBytes().length);
assertEquals("String's length must be zero",
0, text.getLength());
// Test if clear works as intended
text = new Text("abcd\u20acbdcd\u20ac");
int len = text.getLength();
text.clear();
assertEquals("String must be empty after clear()",
"", text.toString());
assertTrue(
"Length of the byte array must not decrease after clear()",
text.getBytes().length >= len);
assertEquals("Length of the string must be reset to 0 after clear()",
0, text.getLength());
}
@Test
public void testTextText() throws CharacterCodingException {
Text a=new Text("abc");
Text b=new Text("a");
b.set(a);
assertEquals("abc", b.toString());
a.append("xdefgxxx".getBytes(), 1, 4);
assertEquals("modified aliased string", "abc", b.toString());
assertEquals("appended string incorrectly", "abcdefg", a.toString());
// add an extra byte so that capacity = 14 and length = 8
a.append(new byte[]{'d'}, 0, 1);
assertEquals(14, a.getBytes().length);
assertEquals(8, a.copyBytes().length);
}
private class ConcurrentEncodeDecodeThread extends Thread {
public ConcurrentEncodeDecodeThread(String name) {
super(name);
}
@Override
public void run() {
final String name = this.getName();
DataOutputBuffer out = new DataOutputBuffer();
DataInputBuffer in = new DataInputBuffer();
for (int i=0; i < 1000; ++i) {
try {
out.reset();
WritableUtils.writeString(out, name);
in.reset(out.getData(), out.getLength());
String s = WritableUtils.readString(in);
assertEquals("input buffer reset contents = " + name, name, s);
} catch (Exception ioe) {
throw new RuntimeException(ioe);
}
}
}
}
@Test
public void testConcurrentEncodeDecode() throws Exception{
Thread thread1 = new ConcurrentEncodeDecodeThread("apache");
Thread thread2 = new ConcurrentEncodeDecodeThread("hadoop");
thread1.start();
thread2.start();
thread2.join();
thread2.join();
}
@Test
public void testAvroReflect() throws Exception {
AvroTestUtil.testReflect
(new Text("foo"),
"{\"type\":\"string\",\"java-class\":\"org.apache.hadoop.io.Text\"}");
}
/**
*
*/
@Test
public void testCharAt() {
String line = "adsawseeeeegqewgasddga";
Text text = new Text(line);
for (int i = 0; i < line.length(); i++) {
assertTrue("testCharAt error1 !!!", text.charAt(i) == line.charAt(i));
}
assertEquals("testCharAt error2 !!!", -1, text.charAt(-1));
assertEquals("testCharAt error3 !!!", -1, text.charAt(100));
}
/**
* test {@code Text} readFields/write operations
*/
@Test
public void testReadWriteOperations() {
String line = "adsawseeeeegqewgasddga";
byte[] inputBytes = line.getBytes();
inputBytes = Bytes.concat(new byte[] {(byte)22}, inputBytes);
DataInputBuffer in = new DataInputBuffer();
DataOutputBuffer out = new DataOutputBuffer();
Text text = new Text(line);
try {
in.reset(inputBytes, inputBytes.length);
text.readFields(in);
} catch(Exception ex) {
fail("testReadFields error !!!");
}
try {
text.write(out);
} catch(IOException ex) {
} catch(Exception ex) {
fail("testReadWriteOperations error !!!");
}
}
@Test
public void testReadWithKnownLength() throws IOException {
String line = "hello world";
byte[] inputBytes = line.getBytes(Charsets.UTF_8);
DataInputBuffer in = new DataInputBuffer();
Text text = new Text();
in.reset(inputBytes, inputBytes.length);
text.readWithKnownLength(in, 5);
assertEquals("hello", text.toString());
// Read longer length, make sure it lengthens
in.reset(inputBytes, inputBytes.length);
text.readWithKnownLength(in, 7);
assertEquals("hello w", text.toString());
// Read shorter length, make sure it shortens
in.reset(inputBytes, inputBytes.length);
text.readWithKnownLength(in, 2);
assertEquals("he", text.toString());
}
/**
* test {@code Text.bytesToCodePoint(bytes) }
* with {@code BufferUnderflowException}
*
*/
@Test
public void testBytesToCodePoint() {
try {
ByteBuffer bytes = ByteBuffer.wrap(new byte[] {-2, 45, 23, 12, 76, 89});
Text.bytesToCodePoint(bytes);
assertTrue("testBytesToCodePoint error !!!", bytes.position() == 6 );
} catch (BufferUnderflowException ex) {
fail("testBytesToCodePoint unexp exception");
} catch (Exception e) {
fail("testBytesToCodePoint unexp exception");
}
}
@Test
public void testbytesToCodePointWithInvalidUTF() {
try {
Text.bytesToCodePoint(ByteBuffer.wrap(new byte[] {-2}));
fail("testbytesToCodePointWithInvalidUTF error unexp exception !!!");
} catch (BufferUnderflowException ex) {
} catch(Exception e) {
fail("testbytesToCodePointWithInvalidUTF error unexp exception !!!");
}
}
@Test
public void testUtf8Length() {
assertEquals("testUtf8Length1 error !!!",
1, Text.utf8Length(new String(new char[]{(char) 1})));
assertEquals("testUtf8Length127 error !!!",
1, Text.utf8Length(new String(new char[]{(char) 127})));
assertEquals("testUtf8Length128 error !!!",
2, Text.utf8Length(new String(new char[]{(char) 128})));
assertEquals("testUtf8Length193 error !!!",
2, Text.utf8Length(new String(new char[]{(char) 193})));
assertEquals("testUtf8Length225 error !!!",
2, Text.utf8Length(new String(new char[]{(char) 225})));
assertEquals("testUtf8Length254 error !!!",
2, Text.utf8Length(new String(new char[]{(char)254})));
}
}