blob: d235c0e8352a915dff257e5903a7716619126229 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text;
import java.io.UnsupportedEncodingException;
import java.util.Arrays;
import java.util.List;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
/**
* Unit tests for {@link AlphabetConverter}.
*/
public class AlphabetConverterTest {
private static Character[] lower_case_english = {' ','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'};
private static Character[] english_and_numbers = {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z',' ' };
private static Character[] lower_case_english_and_numbers = {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',' ' };
private static Character[] numbers = {'0','1','2','3','4','5','6','7','8','9'};
private static Character[] binary = {'0','1'};
private static Character[] hebrew = {'_', ' ', '\u05e7','\u05e8','\u05d0','\u05d8','\u05d5','\u05df','\u05dd','\u05e4','\u05e9','\u05d3','\u05d2','\u05db','\u05e2','\u05d9','\u05d7','\u05dc','\u05da','\u05e3','\u05d6','\u05e1','\u05d1','\u05d4','\u05e0','\u05de','\u05e6','\u05ea','\u05e5'};
private static Character[] empty = {};
private static Integer[] unicode = {32,35395,35397,36302,36291,35203,35201,35215,35219,35268,97,98,99,100,101,102,103,104,105,106,107,108,109,110,1001,1002,1003,1004,1005};
private static Integer[] lower_case_english_codepoints = {32,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122};
private static Integer[] doNotEncodePoints = {32,97,98,99}; // space, a, b, c
@Rule
public ExpectedException thrown = ExpectedException.none();
@Test
public void encodeFailureTest() throws UnsupportedEncodingException {
thrown.expect(UnsupportedEncodingException.class);
thrown.expectMessage("Couldn't find encoding for '3'");
test(binary, numbers, empty, "3");
}
@Test
public void binaryTest() throws UnsupportedEncodingException {
test(binary, numbers, empty, "0", "1", "10", "11");
test(numbers, binary, empty, "12345", "0");
test(lower_case_english, binary, empty, "abc", "a");
}
@Test
public void hebrewTest() throws UnsupportedEncodingException {
test(hebrew, binary, empty, "\u05d0", "\u05e2", "\u05d0\u05dc\u05e3_\u05d0\u05d5\u05d4\u05d1\u05dc_\u05d1\u05d9\u05ea_\u05d6\u05d4_\u05d1\u05d9\u05ea_\u05d2\u05d9\u05de\u05dc_\u05d6\u05d4_\u05db\u05de\u05dc_\u05d2\u05d3\u05d5\u05dc");
test(hebrew, numbers, empty, "\u05d0", "\u05e2", "\u05d0\u05dc\u05e3_\u05d0\u05d5\u05d4\u05d1\u05dc_\u05d1\u05d9\u05ea_\u05d6\u05d4_\u05d1\u05d9\u05ea_\u05d2\u05d9\u05de\u05dc_\u05d6\u05d4_\u05db\u05de\u05dc_\u05d2\u05d3\u05d5\u05dc");
test(numbers, hebrew, empty, "123456789", "1", "5");
test(lower_case_english, hebrew, empty, "this is a test");
}
@Test
public void doNotEncodeTest() throws UnsupportedEncodingException {
test(english_and_numbers, lower_case_english_and_numbers, lower_case_english, "1", "456", "abc", "ABC", "this will not be converted but THIS WILL");
test(english_and_numbers, lower_case_english_and_numbers, numbers, "1", "456", "abc", "ABC", "this will be converted but 12345 and this will be");
}
private AlphabetConverter createJavadocExample() {
final Character[] original = {'a','b','c','d'};
final Character[] encoding = {'0','1','d'};
final Character[] doNotEncode = {'d'};
return AlphabetConverter.createConverterFromChars(original, encoding, doNotEncode);
}
/*
* Test example in javadocs for consistency
*/
@Test
public void javadocExampleTest() throws UnsupportedEncodingException {
final AlphabetConverter ac = createJavadocExample();
Assert.assertEquals("00", ac.encode("a"));
Assert.assertEquals("01", ac.encode("b"));
Assert.assertEquals("0d", ac.encode("c"));
Assert.assertEquals("d", ac.encode("d"));
Assert.assertEquals("00010dd", ac.encode("abcd"));
}
@Test
public void unexpectedEndwhileDecodingTest() throws UnsupportedEncodingException {
final String toDecode = "00d01d0";
thrown.expect(UnsupportedEncodingException.class);
thrown.expectMessage("Unexpected end of string while decoding " + toDecode);
final AlphabetConverter ac = createJavadocExample();
ac.decode(toDecode);
}
@Test
public void unexpectedStringWhileDecodingTest() throws UnsupportedEncodingException {
final String toDecode = "00XX";
thrown.expect(UnsupportedEncodingException.class);
thrown.expectMessage("Unexpected string without decoding (XX) in " + toDecode);
final AlphabetConverter ac = createJavadocExample();
ac.decode(toDecode);
}
/*
* Test constructor from code points
*/
@Test
public void unicodeTest() throws UnsupportedEncodingException {
final AlphabetConverter ac = AlphabetConverter.createConverter(unicode, lower_case_english_codepoints, doNotEncodePoints);
Assert.assertEquals(2, ac.getEncodedCharLength());
final String original = "\u8a43\u8a45 \u8dce ab \u8dc3 c \u8983";
final String encoded = ac.encode(original);
final String decoded = ac.decode(encoded);
Assert.assertEquals("Encoded '" + original + "' into '" + encoded + "', but decoded into '" + decoded + "'", original, decoded);
}
@Test
public void noEncodingLettersTest() {
thrown.expect(IllegalArgumentException.class);
thrown.expectMessage("Must have at least two encoding characters (excluding those in the 'do not encode' list), but has 0");
AlphabetConverter.createConverterFromChars(english_and_numbers, numbers, numbers);
}
@Test
public void onlyOneEncodingLettersTest() {
thrown.expect(IllegalArgumentException.class);
thrown.expectMessage("Must have at least two encoding characters (excluding those in the 'do not encode' list), but has 1");
final Character[] numbersPlusUnderscore = Arrays.copyOf(numbers, numbers.length + 1);
numbersPlusUnderscore[numbersPlusUnderscore.length -1] = '_';
AlphabetConverter.createConverterFromChars(english_and_numbers, numbersPlusUnderscore, numbers);
}
@Test
public void missingDoNotEncodeLettersFromEncodingTest() {
thrown.expect(IllegalArgumentException.class);
thrown.expectMessage("Can not use 'do not encode' list because encoding alphabet does not contain");
AlphabetConverter.createConverterFromChars(english_and_numbers, lower_case_english, numbers);
}
@Test
public void missingDoNotEncodeLettersFromOriginalTest() {
thrown.expect(IllegalArgumentException.class);
thrown.expectMessage("Can not use 'do not encode' list because original alphabet does not contain");
AlphabetConverter.createConverterFromChars(lower_case_english, english_and_numbers, numbers);
}
private void test(final Character[] originalChars, final Character[] encodingChars, final Character[] doNotEncodeChars, final String... strings) throws UnsupportedEncodingException {
final AlphabetConverter ac = AlphabetConverter.createConverterFromChars(originalChars, encodingChars, doNotEncodeChars);
final AlphabetConverter reconstructedAlphabetConverter = AlphabetConverter.createConverterFromMap(ac.getOriginalToEncoded());
Assert.assertEquals(ac, reconstructedAlphabetConverter);
Assert.assertEquals(ac.hashCode(), reconstructedAlphabetConverter.hashCode());
Assert.assertEquals(ac.toString(), reconstructedAlphabetConverter.toString());
Assert.assertEquals(null, ac.encode(null)); // test null conversions
Assert.assertEquals("", ac.encode("")); // test empty conversion
// test all the trial strings
for (final String s : strings) {
final String encoded = ac.encode(s);
// test that only encoding chars are used
final List<Character> originalEncodingChars = Arrays.asList(encodingChars);
for (int i = 0; i < encoded.length(); i++) {
Assert.assertTrue(originalEncodingChars.contains(encoded.charAt(i)));
}
final String decoded = ac.decode(encoded);
// test that only the original alphabet is used after decoding
final List<Character> originalCharsList = Arrays.asList(originalChars);
for (int i = 0; i < decoded.length(); i++) {
Assert.assertTrue(originalCharsList.contains(decoded.charAt(i)));
}
Assert.assertEquals("Encoded '" + s + "' into '" + encoded + "', but decoded into '" + decoded + "'", s, decoded);
}
}
}