| /* |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| */ |
| package org.apache.qpid.proton.codec; |
| |
| import static org.junit.Assert.assertEquals; |
| |
| import java.lang.Character.UnicodeBlock; |
| import java.nio.ByteBuffer; |
| import java.nio.charset.Charset; |
| import java.util.Arrays; |
| import java.util.HashSet; |
| import java.util.Set; |
| |
| import org.apache.qpid.proton.amqp.messaging.AmqpValue; |
| import org.junit.Test; |
| |
| /** |
| * Test the encoding and decoding of {@link StringType} values. |
| */ |
| public class StringTypeTest |
| { |
| private static final Charset CHARSET_UTF8 = Charset.forName("UTF-8"); |
| |
| /** |
| * Loop over all the chars in given {@link UnicodeBlock}s and return a |
| * {@link Set <String>} containing all the possible values as their |
| * {@link String} values. |
| * |
| * @param blocks the {@link UnicodeBlock}s to loop over |
| * @return a {@link Set <String>} containing all the possible values as |
| * {@link String} values |
| */ |
| private static Set<String> getAllStringsFromUnicodeBlocks(final UnicodeBlock... blocks) |
| { |
| final Set<UnicodeBlock> blockSet = new HashSet<UnicodeBlock>(Arrays.asList(blocks)); |
| final Set<String> strings = new HashSet<String>(); |
| for (int codePoint = 0; codePoint <= Character.MAX_CODE_POINT; codePoint++) |
| { |
| if (blockSet.contains(UnicodeBlock.of(codePoint))) |
| { |
| final int charCount = Character.charCount(codePoint); |
| final StringBuilder sb = new StringBuilder( |
| charCount); |
| if (charCount == 1) |
| { |
| sb.append(String.valueOf((char) codePoint)); |
| } |
| else if (charCount == 2) |
| { |
| //TODO: use Character.highSurrogate(codePoint) and Character.lowSurrogate(codePoint) when Java 7 is baseline |
| char highSurrogate = (char) ((codePoint >>> 10) + ('\uD800' - (0x010000 >>> 10))); |
| char lowSurrogate = (char) ((codePoint & 0x3ff) + '\uDC00'); |
| |
| sb.append(highSurrogate); |
| sb.append(lowSurrogate); |
| } |
| else |
| { |
| throw new IllegalArgumentException("Character.charCount of " |
| + charCount + " not supported."); |
| } |
| strings.add(sb.toString()); |
| } |
| } |
| return strings; |
| } |
| |
| |
| /** |
| * Test the encoding and decoding of various complicated Unicode characters |
| * which will end up as "surrogate pairs" when encoded to UTF-8 |
| */ |
| @Test |
| public void calculateUTF8Length() |
| { |
| for (final String input : generateTestData()) |
| { |
| assertEquals("Incorrect string length calculated for string '"+input+"'",input.getBytes(CHARSET_UTF8).length, StringType.calculateUTF8Length(input)); |
| } |
| } |
| |
| /** |
| * Test the encoding and decoding of various Unicode characters |
| */ |
| @Test |
| public void encodeDecodeStrings() |
| { |
| final DecoderImpl decoder = new DecoderImpl(); |
| final EncoderImpl encoder = new EncoderImpl(decoder); |
| AMQPDefinedTypes.registerAllTypes(decoder, encoder); |
| final ByteBuffer bb = ByteBuffer.allocate(16); |
| |
| for (final String input : generateTestData()) |
| { |
| bb.clear(); |
| final AmqpValue inputValue = new AmqpValue(input); |
| encoder.setByteBuffer(bb); |
| encoder.writeObject(inputValue); |
| bb.clear(); |
| decoder.setByteBuffer(bb); |
| final AmqpValue outputValue = (AmqpValue) decoder.readObject(); |
| assertEquals("Failed to round trip String correctly: ", input, outputValue.getValue()); |
| } |
| } |
| |
| // build up some test data with a set of suitable Unicode characters |
| private Set<String> generateTestData() |
| { |
| return new HashSet<String>() |
| { |
| private static final long serialVersionUID = 7331717267070233454L; |
| |
| { |
| // non-surrogate pair blocks |
| addAll(getAllStringsFromUnicodeBlocks(UnicodeBlock.BASIC_LATIN, |
| UnicodeBlock.LATIN_1_SUPPLEMENT, |
| UnicodeBlock.GREEK, |
| UnicodeBlock.LETTERLIKE_SYMBOLS)); |
| // blocks with surrogate pairs |
| //TODO: restore others when Java 7 is baseline |
| addAll(getAllStringsFromUnicodeBlocks(/*UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,*/ |
| UnicodeBlock.MUSICAL_SYMBOLS, |
| /*UnicodeBlock.EMOTICONS,*/ |
| /*UnicodeBlock.PLAYING_CARDS,*/ |
| UnicodeBlock.BOX_DRAWING, |
| UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS, |
| UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A, |
| UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B)); |
| // some additional combinations of characters that could cause problems to the encoder |
| String[] boxDrawing = getAllStringsFromUnicodeBlocks(UnicodeBlock.BOX_DRAWING).toArray(new String[0]); |
| String[] halfFullWidthForms = getAllStringsFromUnicodeBlocks(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS).toArray(new String[0]); |
| for (int i = 0; i < halfFullWidthForms.length; i++) |
| { |
| add(halfFullWidthForms[i] + boxDrawing[i % boxDrawing.length]); |
| } |
| } |
| }; |
| } |
| } |